feat: Use HorizontalRunnerAutoscaler for autoscaling

This commit is contained in:
Yusuke Kuoka
2020-07-19 18:42:06 +09:00
parent eca6917c6a
commit ae30648985
12 changed files with 668 additions and 206 deletions

View File

@@ -8,13 +8,11 @@ import (
"strings"
)
func (r *RunnerDeploymentReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment) (*int, error) {
if rd.Spec.Replicas != nil {
return nil, fmt.Errorf("bug: determineDesiredReplicas should not be called for deplomeny with specific replicas")
} else if rd.Spec.MinReplicas == nil {
return nil, fmt.Errorf("runnerdeployment %s/%s is missing minReplicas", rd.Namespace, rd.Name)
} else if rd.Spec.MaxReplicas == nil {
return nil, fmt.Errorf("runnerdeployment %s/%s is missing maxReplicas", rd.Namespace, rd.Name)
func (r *HorizontalRunnerAutoscalerReconciler) determineDesiredReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
if hra.Spec.MinReplicas == nil {
return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing minReplicas", hra.Namespace, hra.Name)
} else if hra.Spec.MaxReplicas == nil {
return nil, fmt.Errorf("horizontalrunnerautoscaler %s/%s is missing maxReplicas", hra.Namespace, hra.Name)
}
var repos [][]string
@@ -26,12 +24,12 @@ func (r *RunnerDeploymentReconciler) determineDesiredReplicas(rd v1alpha1.Runner
return nil, fmt.Errorf("asserting runner deployment spec to detect bug: spec.template.organization should not be empty on this code path")
}
metrics := rd.Spec.Autoscaling.Metrics
metrics := hra.Spec.Metrics
if len(metrics) == 0 {
return nil, fmt.Errorf("validating autoscaling metrics: one or more metrics is required")
} else if tpe := metrics[0].Type; tpe != v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndProgressingWorkflowRuns {
return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q: only supported value is %s", tpe, v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndProgressingWorkflowRuns)
} else if tpe := metrics[0].Type; tpe != v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns {
return nil, fmt.Errorf("validting autoscaling metrics: unsupported metric type %q: only supported value is %s", tpe, v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns)
} else if len(metrics[0].RepositoryNames) == 0 {
return nil, errors.New("validating autoscaling metrics: spec.autoscaling.metrics[].repositoryNames is required and must have one more more entries for organizational runner deployment")
}
@@ -74,8 +72,8 @@ func (r *RunnerDeploymentReconciler) determineDesiredReplicas(rd v1alpha1.Runner
}
}
minReplicas := *rd.Spec.MinReplicas
maxReplicas := *rd.Spec.MaxReplicas
minReplicas := *hra.Spec.MinReplicas
maxReplicas := *hra.Spec.MaxReplicas
necessaryReplicas := queued + inProgress
var desiredReplicas int

View File

@@ -115,9 +115,12 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
},
// fixed at 3
{
repo: "test/valid",
fixed: intPtr(3),
want: 3,
repo: "test/valid",
min: intPtr(1),
max: intPtr(3),
fixed: intPtr(3),
workflowRuns: `{"total_count": 4, "workflow_runs":[{"status":"in_progress"}, {"status":"in_progress"}, {"status":"in_progress"}, {"status":"completed"}]}"`,
want: 3,
},
}
@@ -137,7 +140,7 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
defer server.Close()
client := newGithubClient(server)
r := &RunnerDeploymentReconciler{
h := &HorizontalRunnerAutoscalerReconciler{
Log: log,
GitHubClient: client,
Scheme: scheme,
@@ -145,23 +148,34 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
rd := v1alpha1.RunnerDeployment{
TypeMeta: metav1.TypeMeta{},
ObjectMeta: metav1.ObjectMeta{
Name: "testrd",
},
Spec: v1alpha1.RunnerDeploymentSpec{
Template: v1alpha1.RunnerTemplate{
Spec: v1alpha1.RunnerSpec{
Repository: tc.repo,
},
},
Replicas: tc.fixed,
Replicas: tc.fixed,
},
Status: v1alpha1.RunnerDeploymentStatus{
Replicas: tc.sReplicas,
},
}
hra := v1alpha1.HorizontalRunnerAutoscaler{
Spec: v1alpha1.HorizontalRunnerAutoscalerSpec{
MaxReplicas: tc.max,
MinReplicas: tc.min,
},
Status: v1alpha1.RunnerDeploymentStatus{
Replicas: tc.sReplicas,
Status: v1alpha1.HorizontalRunnerAutoscalerStatus{
DesiredReplicas: tc.sReplicas,
LastSuccessfulScaleOutTime: tc.sTime,
},
}
rs, err := r.newRunnerReplicaSetWithAutoscaling(rd)
got, err := h.computeReplicas(rd, hra)
if err != nil {
if tc.err == "" {
t.Fatalf("unexpected error: expected none, got %v", err)
@@ -171,8 +185,6 @@ func TestDetermineDesiredReplicas_RepositoryRunner(t *testing.T) {
return
}
got := rs.Spec.Replicas
if got == nil {
t.Fatalf("unexpected value of rs.Spec.Replicas: nil")
}
@@ -278,16 +290,23 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) {
},
// fixed at 3
{
org: "test",
repos: []string{"valid"},
fixed: intPtr(3),
want: 3,
org: "test",
repos: []string{"valid"},
fixed: intPtr(1),
min: intPtr(1),
max: intPtr(3),
workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"in_progress"}, {"status":"in_progress"}, {"status":"completed"}]}"`,
want: 3,
},
// org runner, fixed at 3
{
org: "test",
fixed: intPtr(3),
want: 3,
org: "test",
repos: []string{"valid"},
fixed: intPtr(1),
min: intPtr(1),
max: intPtr(3),
workflowRuns: `{"total_count": 2, "workflow_runs":[{"status":"in_progress"}, {"status":"in_progress"}, {"status":"in_progress"}, {"status":"completed"}]}"`,
want: 3,
},
// org runner, 1 demanded, min at 1, no repos
{
@@ -315,39 +334,50 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) {
defer server.Close()
client := newGithubClient(server)
r := &RunnerDeploymentReconciler{
h := &HorizontalRunnerAutoscalerReconciler{
Log: log,
GitHubClient: client,
Scheme: scheme,
GitHubClient: client,
}
rd := v1alpha1.RunnerDeployment{
TypeMeta: metav1.TypeMeta{},
ObjectMeta: metav1.ObjectMeta{
Name: "testrd",
},
Spec: v1alpha1.RunnerDeploymentSpec{
Template: v1alpha1.RunnerTemplate{
Spec: v1alpha1.RunnerSpec{
Organization: tc.org,
},
},
Autoscaling: v1alpha1.AutoscalingSpec{
Metrics: []v1alpha1.MetricSpec{
{
Type: v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndProgressingWorkflowRuns,
RepositoryNames: tc.repos,
},
},
},
Replicas: tc.fixed,
MaxReplicas: tc.max,
MinReplicas: tc.min,
Replicas: tc.fixed,
},
Status: v1alpha1.RunnerDeploymentStatus{
Replicas: tc.sReplicas,
Replicas: tc.sReplicas,
},
}
hra := v1alpha1.HorizontalRunnerAutoscaler{
Spec: v1alpha1.HorizontalRunnerAutoscalerSpec{
ScaleTargetRef: v1alpha1.ScaleTargetRef{
Name: "testrd",
},
MaxReplicas: tc.max,
MinReplicas: tc.min,
Metrics: []v1alpha1.MetricSpec{
{
Type: v1alpha1.AutoscalingMetricTypeTotalNumberOfQueuedAndInProgressWorkflowRuns,
RepositoryNames: tc.repos,
},
},
},
Status: v1alpha1.HorizontalRunnerAutoscalerStatus{
DesiredReplicas: tc.sReplicas,
LastSuccessfulScaleOutTime: tc.sTime,
},
}
rs, err := r.newRunnerReplicaSetWithAutoscaling(rd)
got, err := h.computeReplicas(rd, hra)
if err != nil {
if tc.err == "" {
t.Fatalf("unexpected error: expected none, got %v", err)
@@ -357,10 +387,8 @@ func TestDetermineDesiredReplicas_OrganizationalRunner(t *testing.T) {
return
}
got := rs.Spec.Replicas
if got == nil {
t.Fatalf("unexpected value of rs.Spec.Replicas: nil")
t.Fatalf("unexpected value of rs.Spec.Replicas: nil, wanted %v", tc.want)
}
if *got != tc.want {

View File

@@ -0,0 +1,184 @@
/*
Copyright 2020 The actions-runner-controller authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package controllers
import (
"context"
"time"
"github.com/summerwind/actions-runner-controller/github"
"k8s.io/apimachinery/pkg/types"
"github.com/go-logr/logr"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
)
const (
DefaultScaleDownDelay = 10 * time.Minute
)
// HorizontalRunnerAutoscalerReconciler reconciles a HorizontalRunnerAutoscaler object
type HorizontalRunnerAutoscalerReconciler struct {
client.Client
GitHubClient *github.Client
Log logr.Logger
Recorder record.EventRecorder
Scheme *runtime.Scheme
}
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerdeployments,verbs=get;list;watch;update;patch
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=horizontalrunnerautoscalers,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=horizontalrunnerautoscalers/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
func (r *HorizontalRunnerAutoscalerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
ctx := context.Background()
log := r.Log.WithValues("horizontalrunnerautoscaler", req.NamespacedName)
var hra v1alpha1.HorizontalRunnerAutoscaler
if err := r.Get(ctx, req.NamespacedName, &hra); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
if !hra.ObjectMeta.DeletionTimestamp.IsZero() {
return ctrl.Result{}, nil
}
var rd v1alpha1.RunnerDeployment
if err := r.Get(ctx, types.NamespacedName{
Namespace: req.Namespace,
Name: hra.Spec.ScaleTargetRef.Name,
}, &rd); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
if !rd.ObjectMeta.DeletionTimestamp.IsZero() {
return ctrl.Result{}, nil
}
replicas, err := r.computeReplicas(rd, hra)
if err != nil {
r.Recorder.Event(&hra, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error())
log.Error(err, "Could not compute replicas")
return ctrl.Result{}, err
}
const defaultReplicas = 1
currentDesiredReplicas := getIntOrDefault(rd.Spec.Replicas, defaultReplicas)
newDesiredReplicas := getIntOrDefault(replicas, defaultReplicas)
// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
if currentDesiredReplicas != newDesiredReplicas {
copy := rd.DeepCopy()
copy.Spec.Replicas = &newDesiredReplicas
if err := r.Client.Update(ctx, copy); err != nil {
log.Error(err, "Failed to update runnerderployment resource")
return ctrl.Result{}, err
}
return ctrl.Result{}, err
}
if hra.Status.DesiredReplicas == nil || *hra.Status.DesiredReplicas != *replicas {
updated := hra.DeepCopy()
if (hra.Status.DesiredReplicas == nil && *replicas > 1) ||
(hra.Status.DesiredReplicas != nil && *replicas > *hra.Status.DesiredReplicas) {
updated.Status.LastSuccessfulScaleOutTime = &metav1.Time{Time: time.Now()}
}
updated.Status.DesiredReplicas = replicas
if err := r.Status().Update(ctx, updated); err != nil {
log.Error(err, "Failed to update horizontalrunnerautoscaler status")
return ctrl.Result{}, err
}
}
return ctrl.Result{}, nil
}
func (r *HorizontalRunnerAutoscalerReconciler) SetupWithManager(mgr ctrl.Manager) error {
r.Recorder = mgr.GetEventRecorderFor("runnerdeployment-controller")
if err := mgr.GetFieldIndexer().IndexField(&v1alpha1.RunnerReplicaSet{}, runnerSetOwnerKey, func(rawObj runtime.Object) []string {
runnerSet := rawObj.(*v1alpha1.RunnerReplicaSet)
owner := metav1.GetControllerOf(runnerSet)
if owner == nil {
return nil
}
if owner.APIVersion != v1alpha1.GroupVersion.String() || owner.Kind != "RunnerDeployment" {
return nil
}
return []string{owner.Name}
}); err != nil {
return err
}
return ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.RunnerDeployment{}).
Owns(&v1alpha1.RunnerReplicaSet{}).
Complete(r)
}
func (r *HorizontalRunnerAutoscalerReconciler) computeReplicas(rd v1alpha1.RunnerDeployment, hra v1alpha1.HorizontalRunnerAutoscaler) (*int, error) {
var computedReplicas *int
replicas, err := r.determineDesiredReplicas(rd, hra)
if err != nil {
return nil, err
}
var scaleDownDelay time.Duration
if hra.Spec.ScaleDownDelaySecondsAfterScaleUp != nil {
scaleDownDelay = time.Duration(*hra.Spec.ScaleDownDelaySecondsAfterScaleUp) * time.Second
} else {
scaleDownDelay = DefaultScaleDownDelay
}
now := time.Now()
if hra.Status.DesiredReplicas == nil ||
*hra.Status.DesiredReplicas < *replicas ||
hra.Status.LastSuccessfulScaleOutTime == nil ||
hra.Status.LastSuccessfulScaleOutTime.Add(scaleDownDelay).Before(now) {
computedReplicas = replicas
} else {
computedReplicas = hra.Status.DesiredReplicas
}
return computedReplicas, nil
}

View File

@@ -23,7 +23,6 @@ import (
"sort"
"time"
"github.com/summerwind/actions-runner-controller/github"
"k8s.io/apimachinery/pkg/types"
"github.com/davecgh/go-spew/spew"
@@ -49,10 +48,9 @@ const (
// RunnerDeploymentReconciler reconciles a Runner object
type RunnerDeploymentReconciler struct {
client.Client
GitHubClient *github.Client
Log logr.Logger
Recorder record.EventRecorder
Scheme *runtime.Scheme
Log logr.Logger
Recorder record.EventRecorder
Scheme *runtime.Scheme
}
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerdeployments,verbs=get;list;watch;create;update;patch;delete
@@ -97,7 +95,7 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
oldSets = myRunnerReplicaSets[1:]
}
desiredRS, err := r.newRunnerReplicaSetWithAutoscaling(rd)
desiredRS, err := r.newRunnerReplicaSet(rd)
if err != nil {
r.Recorder.Event(&rd, corev1.EventTypeNormal, "RunnerAutoscalingFailure", err.Error())
@@ -193,12 +191,6 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
updated := rd.DeepCopy()
updated.Status.Replicas = desiredRS.Spec.Replicas
if (rd.Status.Replicas == nil && *desiredRS.Spec.Replicas > 1) ||
(rd.Status.Replicas != nil && *desiredRS.Spec.Replicas > *rd.Status.Replicas) {
updated.Status.LastSuccessfulScaleOutTime = &metav1.Time{Time: time.Now()}
}
if err := r.Status().Update(ctx, updated); err != nil {
log.Error(err, "Failed to update runnerdeployment status")
@@ -263,7 +255,7 @@ func CloneAndAddLabel(labels map[string]string, labelKey, labelValue string) map
return newLabels
}
func (r *RunnerDeploymentReconciler) newRunnerReplicaSet(rd v1alpha1.RunnerDeployment, computedReplicas *int) (*v1alpha1.RunnerReplicaSet, error) {
func (r *RunnerDeploymentReconciler) newRunnerReplicaSet(rd v1alpha1.RunnerDeployment) (*v1alpha1.RunnerReplicaSet, error) {
newRSTemplate := *rd.Spec.Template.DeepCopy()
templateHash := ComputeHash(&newRSTemplate)
// Add template hash label to selector.
@@ -284,10 +276,6 @@ func (r *RunnerDeploymentReconciler) newRunnerReplicaSet(rd v1alpha1.RunnerDeplo
},
}
if computedReplicas != nil {
rs.Spec.Replicas = computedReplicas
}
if err := ctrl.SetControllerReference(&rd, &rs, r.Scheme); err != nil {
return &rs, err
}
@@ -319,36 +307,3 @@ func (r *RunnerDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) error {
Owns(&v1alpha1.RunnerReplicaSet{}).
Complete(r)
}
func (r *RunnerDeploymentReconciler) newRunnerReplicaSetWithAutoscaling(rd v1alpha1.RunnerDeployment) (*v1alpha1.RunnerReplicaSet, error) {
var computedReplicas *int
if rd.Spec.Replicas == nil {
replicas, err := r.determineDesiredReplicas(rd)
if err != nil {
return nil, err
}
var scaleDownDelay time.Duration
if rd.Spec.ScaleDownDelaySecondsAfterScaleUp != nil {
scaleDownDelay = time.Duration(*rd.Spec.ScaleDownDelaySecondsAfterScaleUp) * time.Second
} else {
scaleDownDelay = 10 * time.Minute
}
now := time.Now()
if rd.Status.Replicas == nil ||
*rd.Status.Replicas < *replicas ||
rd.Status.LastSuccessfulScaleOutTime == nil ||
rd.Status.LastSuccessfulScaleOutTime.Add(scaleDownDelay).Before(now) {
computedReplicas = replicas
} else {
computedReplicas = rd.Status.Replicas
}
}
return r.newRunnerReplicaSet(rd, computedReplicas)
}