Files
actions-runner-controller/controllers/runnerreplicaset_controller.go

268 lines
8.0 KiB
Go
Raw Permalink Normal View History

/*
Copyright 2020 The actions-runner-controller authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package controllers
import (
"context"
"errors"
"fmt"
"time"
gogithub "github.com/google/go-github/v33/github"
"github.com/go-logr/logr"
kerrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
"github.com/summerwind/actions-runner-controller/github"
)
// RunnerReplicaSetReconciler reconciles a Runner object
type RunnerReplicaSetReconciler struct {
client.Client
Log logr.Logger
Recorder record.EventRecorder
Scheme *runtime.Scheme
GitHubClient *github.Client
Name string
}
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/finalizers,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runners,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runners/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
func (r *RunnerReplicaSetReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
ctx := context.Background()
log := r.Log.WithValues("runnerreplicaset", req.NamespacedName)
var rs v1alpha1.RunnerReplicaSet
if err := r.Get(ctx, req.NamespacedName, &rs); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
if !rs.ObjectMeta.DeletionTimestamp.IsZero() {
return ctrl.Result{}, nil
}
Manage runner with label (#355) * Update RunnerDeploymentSpec to have Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update RunnerReplicaSetSpec to have Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Add CloneSelectorAndAddLabel to add Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Fix tests Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Use label to find RunnerReplicaSet/Runner Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Update controller-gen versions in CRD Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update autoscaler to list Pods with labels Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Add debug log Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Modify RunnerDeployment tests Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Modify RunnerReplicaset test Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Modify integration test Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Use RunnerDeployment Template Labels as the default selector for backward compatibility * Fix labeling Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update func in Eventually to return (int, error) Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update RunnerDeployment controller not to use label selector Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Fix potential replicaset controller breakage on replicaset created before v0.17.0 * Fix errors on existing runner replica sets * Ensure RunnerReplicaSet Spec Selector addition does not break controller * Ensure RunnerDeployment Template.Spec.Labels change does result in template hash change * Fix comment Co-authored-by: binoue <banji-inoue@cybozu.co.jp> Co-authored-by: Yusuke Kuoka <ykuoka@gmail.com>
2021-03-05 10:15:39 +09:00
selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector)
if err != nil {
return ctrl.Result{}, err
}
// Get the Runners managed by the target RunnerReplicaSet
var allRunners v1alpha1.RunnerList
Manage runner with label (#355) * Update RunnerDeploymentSpec to have Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update RunnerReplicaSetSpec to have Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Add CloneSelectorAndAddLabel to add Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Fix tests Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Use label to find RunnerReplicaSet/Runner Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Update controller-gen versions in CRD Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update autoscaler to list Pods with labels Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Add debug log Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Modify RunnerDeployment tests Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Modify RunnerReplicaset test Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Modify integration test Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Use RunnerDeployment Template Labels as the default selector for backward compatibility * Fix labeling Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update func in Eventually to return (int, error) Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update RunnerDeployment controller not to use label selector Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Fix potential replicaset controller breakage on replicaset created before v0.17.0 * Fix errors on existing runner replica sets * Ensure RunnerReplicaSet Spec Selector addition does not break controller * Ensure RunnerDeployment Template.Spec.Labels change does result in template hash change * Fix comment Co-authored-by: binoue <banji-inoue@cybozu.co.jp> Co-authored-by: Yusuke Kuoka <ykuoka@gmail.com>
2021-03-05 10:15:39 +09:00
if err := r.List(
ctx,
&allRunners,
client.InNamespace(req.Namespace),
client.MatchingLabelsSelector{Selector: selector},
); err != nil {
if !kerrors.IsNotFound(err) {
return ctrl.Result{}, err
}
}
var myRunners []v1alpha1.Runner
Manage runner with label (#355) * Update RunnerDeploymentSpec to have Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update RunnerReplicaSetSpec to have Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Add CloneSelectorAndAddLabel to add Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Fix tests Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Use label to find RunnerReplicaSet/Runner Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Update controller-gen versions in CRD Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update autoscaler to list Pods with labels Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Add debug log Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Modify RunnerDeployment tests Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Modify RunnerReplicaset test Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Modify integration test Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Use RunnerDeployment Template Labels as the default selector for backward compatibility * Fix labeling Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update func in Eventually to return (int, error) Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update RunnerDeployment controller not to use label selector Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Fix potential replicaset controller breakage on replicaset created before v0.17.0 * Fix errors on existing runner replica sets * Ensure RunnerReplicaSet Spec Selector addition does not break controller * Ensure RunnerDeployment Template.Spec.Labels change does result in template hash change * Fix comment Co-authored-by: binoue <banji-inoue@cybozu.co.jp> Co-authored-by: Yusuke Kuoka <ykuoka@gmail.com>
2021-03-05 10:15:39 +09:00
var (
available int
ready int
)
for _, r := range allRunners.Items {
Manage runner with label (#355) * Update RunnerDeploymentSpec to have Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update RunnerReplicaSetSpec to have Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Add CloneSelectorAndAddLabel to add Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Fix tests Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Use label to find RunnerReplicaSet/Runner Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Update controller-gen versions in CRD Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update autoscaler to list Pods with labels Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Add debug log Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Modify RunnerDeployment tests Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Modify RunnerReplicaset test Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Modify integration test Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Use RunnerDeployment Template Labels as the default selector for backward compatibility * Fix labeling Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update func in Eventually to return (int, error) Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update RunnerDeployment controller not to use label selector Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Fix potential replicaset controller breakage on replicaset created before v0.17.0 * Fix errors on existing runner replica sets * Ensure RunnerReplicaSet Spec Selector addition does not break controller * Ensure RunnerDeployment Template.Spec.Labels change does result in template hash change * Fix comment Co-authored-by: binoue <banji-inoue@cybozu.co.jp> Co-authored-by: Yusuke Kuoka <ykuoka@gmail.com>
2021-03-05 10:15:39 +09:00
// This guard is required to avoid the RunnerReplicaSet created by the controller v0.17.0 or before
// to not treat all the runners in the namespace as its children.
if metav1.IsControlledBy(&r, &rs) {
myRunners = append(myRunners, r)
available += 1
if r.Status.Phase == string(corev1.PodRunning) {
ready += 1
}
}
}
var desired int
if rs.Spec.Replicas != nil {
desired = *rs.Spec.Replicas
} else {
desired = 1
}
if available > desired {
n := available - desired
log.V(0).Info(fmt.Sprintf("Deleting %d runners", n), "desired", desired, "available", available, "ready", ready)
// get runners that are currently offline/not busy/timed-out to register
var deletionCandidates []v1alpha1.Runner
Manage runner with label (#355) * Update RunnerDeploymentSpec to have Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update RunnerReplicaSetSpec to have Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Add CloneSelectorAndAddLabel to add Selector field Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Fix tests Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Use label to find RunnerReplicaSet/Runner Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Update controller-gen versions in CRD Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update autoscaler to list Pods with labels Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Add debug log Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Modify RunnerDeployment tests Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Modify RunnerReplicaset test Signed-off-by: binoue <banji-inoue@cybozu.co.jp> * Modify integration test Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Use RunnerDeployment Template Labels as the default selector for backward compatibility * Fix labeling Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update func in Eventually to return (int, error) Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Update RunnerDeployment controller not to use label selector Signed-off-by: Hiroshi Muraoka <h.muraoka714@gmail.com> * Fix potential replicaset controller breakage on replicaset created before v0.17.0 * Fix errors on existing runner replica sets * Ensure RunnerReplicaSet Spec Selector addition does not break controller * Ensure RunnerDeployment Template.Spec.Labels change does result in template hash change * Fix comment Co-authored-by: binoue <banji-inoue@cybozu.co.jp> Co-authored-by: Yusuke Kuoka <ykuoka@gmail.com>
2021-03-05 10:15:39 +09:00
for _, runner := range allRunners.Items {
busy, err := r.GitHubClient.IsRunnerBusy(ctx, runner.Spec.Enterprise, runner.Spec.Organization, runner.Spec.Repository, runner.Name)
if err != nil {
notRegistered := false
offline := false
var notFoundException *github.RunnerNotFound
var offlineException *github.RunnerOffline
if errors.As(err, &notFoundException) {
log.V(1).Info("Failed to check if runner is busy. Either this runner has never been successfully registered to GitHub or it still needs more time.", "runnerName", runner.Name)
notRegistered = true
} else if errors.As(err, &offlineException) {
offline = true
} else {
var e *gogithub.RateLimitError
if errors.As(err, &e) {
// We log the underlying error when we failed calling GitHub API to list or unregisters,
// or the runner is still busy.
log.Error(
err,
fmt.Sprintf(
"Failed to check if runner is busy due to GitHub API rate limit. Retrying in %s to avoid excessive GitHub API calls",
retryDelayOnGitHubAPIRateLimitError,
),
)
return ctrl.Result{RequeueAfter: retryDelayOnGitHubAPIRateLimitError}, err
}
return ctrl.Result{}, err
}
registrationTimeout := 15 * time.Minute
currentTime := time.Now()
registrationDidTimeout := currentTime.Sub(runner.CreationTimestamp.Add(registrationTimeout)) > 0
if notRegistered && registrationDidTimeout {
log.Info(
"Runner failed to register itself to GitHub in timely manner. "+
"Marking the runner for scale down. "+
"CAUTION: If you see this a lot, you should investigate the root cause. "+
"See https://github.com/summerwind/actions-runner-controller/issues/288",
"runnerCreationTimestamp", runner.CreationTimestamp,
"currentTime", currentTime,
"configuredRegistrationTimeout", registrationTimeout,
)
deletionCandidates = append(deletionCandidates, runner)
}
// offline runners should always be a great target for scale down
if offline {
deletionCandidates = append(deletionCandidates, runner)
}
} else if !busy {
deletionCandidates = append(deletionCandidates, runner)
}
}
if len(deletionCandidates) < n {
n = len(deletionCandidates)
}
for i := 0; i < n; i++ {
if err := r.Client.Delete(ctx, &deletionCandidates[i]); client.IgnoreNotFound(err) != nil {
log.Error(err, "Failed to delete runner resource")
return ctrl.Result{}, err
}
r.Recorder.Event(&rs, corev1.EventTypeNormal, "RunnerDeleted", fmt.Sprintf("Deleted runner '%s'", deletionCandidates[i].Name))
log.Info("Deleted runner")
}
} else if desired > available {
n := desired - available
log.V(0).Info(fmt.Sprintf("Creating %d runner(s)", n), "desired", desired, "available", available, "ready", ready)
for i := 0; i < n; i++ {
newRunner, err := r.newRunner(rs)
if err != nil {
log.Error(err, "Could not create runner")
return ctrl.Result{}, err
}
if err := r.Client.Create(ctx, &newRunner); err != nil {
log.Error(err, "Failed to create runner resource")
return ctrl.Result{}, err
}
}
}
if rs.Status.AvailableReplicas != available || rs.Status.ReadyReplicas != ready {
updated := rs.DeepCopy()
updated.Status.AvailableReplicas = available
updated.Status.ReadyReplicas = ready
if err := r.Status().Update(ctx, updated); err != nil {
log.Info("Failed to update status. Retrying immediately", "error", err.Error())
Compact excessive error message on runnerreplicaset status update conflict (#350) We occasionally see logs like the below: ``` 2021-02-24T02:48:26.769ZERRORFailed to update runner status{"runnerreplicaset": "testns-244ol/example-runnerdeploy-j5wzf", "error": "Operation cannot be fulfilled on runnerreplicasets.actions.summerwind.dev \"example-runnerdeploy-j5wzf\": the object has been modified; please apply your changes to the latest version and try again"} github.com/go-logr/zapr.(*zapLogger).Error /home/runner/go/pkg/mod/github.com/go-logr/zapr@v0.1.0/zapr.go:128 github.com/summerwind/actions-runner-controller/controllers.(*RunnerReplicaSetReconciler).Reconcile /home/runner/work/actions-runner-controller/actions-runner-controller/controllers/runnerreplicaset_controller.go:207 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler /home/runner/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.4.0/pkg/internal/controller/controller.go:256 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem /home/runner/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.4.0/pkg/internal/controller/controller.go:232 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).worker /home/runner/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.4.0/pkg/internal/controller/controller.go:211 k8s.io/apimachinery/pkg/util/wait.JitterUntil.func1 /home/runner/go/pkg/mod/k8s.io/apimachinery@v0.0.0-20190913080033-27d36303b655/pkg/util/wait/wait.go:152 k8s.io/apimachinery/pkg/util/wait.JitterUntil /home/runner/go/pkg/mod/k8s.io/apimachinery@v0.0.0-20190913080033-27d36303b655/pkg/util/wait/wait.go:153 k8s.io/apimachinery/pkg/util/wait.Until /home/runner/go/pkg/mod/k8s.io/apimachinery@v0.0.0-20190913080033-27d36303b655/pkg/util/wait/wait.go:88 2021-02-24T02:48:26.769ZERRORcontroller-runtime.controllerReconciler error{"controller": "testns-244olrunnerreplicaset", "request": "testns-244ol/example-runnerdeploy-j5wzf", "error": "Operation cannot be fulfilled on runnerreplicasets.actions.summerwind.dev \"example-runnerdeploy-j5wzf\": the object has been modified; please apply your changes to the latest version and try again"} github.com/go-logr/zapr.(*zapLogger).Error /home/runner/go/pkg/mod/github.com/go-logr/zapr@v0.1.0/zapr.go:128 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler /home/runner/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.4.0/pkg/internal/controller/controller.go:258 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem /home/runner/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.4.0/pkg/internal/controller/controller.go:232 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).worker /home/runner/go/pkg/mod/sigs.k8s.io/controller-runtime@v0.4.0/pkg/internal/controller/controller.go:211 k8s.io/apimachinery/pkg/util/wait.JitterUntil.func1 /home/runner/go/pkg/mod/k8s.io/apimachinery@v0.0.0-20190913080033-27d36303b655/pkg/util/wait/wait.go:152 k8s.io/apimachinery/pkg/util/wait.JitterUntil /home/runner/go/pkg/mod/k8s.io/apimachinery@v0.0.0-20190913080033-27d36303b655/pkg/util/wait/wait.go:153 k8s.io/apimachinery/pkg/util/wait.Until /home/runner/go/pkg/mod/k8s.io/apimachinery@v0.0.0-20190913080033-27d36303b655/pkg/util/wait/wait.go:88 ``` which can be compacted into one-liner, without the useless stack trace, without double-logging the same error from the logger and the controller.
2021-02-25 09:01:02 +09:00
return ctrl.Result{
Requeue: true,
}, nil
}
}
return ctrl.Result{}, nil
}
func (r *RunnerReplicaSetReconciler) newRunner(rs v1alpha1.RunnerReplicaSet) (v1alpha1.Runner, error) {
objectMeta := rs.Spec.Template.ObjectMeta.DeepCopy()
2020-03-15 21:50:45 +09:00
objectMeta.GenerateName = rs.ObjectMeta.Name + "-"
objectMeta.Namespace = rs.ObjectMeta.Namespace
runner := v1alpha1.Runner{
TypeMeta: metav1.TypeMeta{},
ObjectMeta: *objectMeta,
Spec: rs.Spec.Template.Spec,
}
if err := ctrl.SetControllerReference(&rs, &runner, r.Scheme); err != nil {
return runner, err
}
return runner, nil
}
func (r *RunnerReplicaSetReconciler) SetupWithManager(mgr ctrl.Manager) error {
name := "runnerreplicaset-controller"
if r.Name != "" {
name = r.Name
}
r.Recorder = mgr.GetEventRecorderFor(name)
return ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.RunnerReplicaSet{}).
Owns(&v1alpha1.Runner{}).
Named(name).
Complete(r)
}