2020-02-21 03:01:52 +09:00
/ *
Copyright 2020 The actions - runner - controller authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package controllers
import (
"context"
2021-02-09 10:17:52 +09:00
"errors"
2020-02-21 03:01:52 +09:00
"fmt"
2021-02-09 10:17:52 +09:00
"time"
2020-03-15 18:08:11 +09:00
2021-02-25 00:38:55 +01:00
gogithub "github.com/google/go-github/v33/github"
2020-02-21 03:01:52 +09:00
"github.com/go-logr/logr"
2021-02-09 10:17:52 +09:00
kerrors "k8s.io/apimachinery/pkg/api/errors"
2020-02-21 03:01:52 +09:00
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/summerwind/actions-runner-controller/api/v1alpha1"
2020-10-05 01:06:37 +01:00
"github.com/summerwind/actions-runner-controller/github"
2020-02-21 03:01:52 +09:00
)
2020-03-10 09:14:11 +09:00
// RunnerReplicaSetReconciler reconciles a Runner object
type RunnerReplicaSetReconciler struct {
2020-02-21 03:01:52 +09:00
client . Client
2020-10-05 01:06:37 +01:00
Log logr . Logger
Recorder record . EventRecorder
Scheme * runtime . Scheme
GitHubClient * github . Client
2021-02-19 10:33:04 +09:00
Name string
2020-02-21 03:01:52 +09:00
}
2020-03-15 18:08:11 +09:00
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets,verbs=get;list;watch;create;update;patch;delete
2020-10-06 09:23:03 +09:00
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/finalizers,verbs=get;list;watch;create;update;patch;delete
2020-03-15 18:08:11 +09:00
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runnerreplicasets/status,verbs=get;update;patch
2020-02-21 03:01:52 +09:00
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runners,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=actions.summerwind.dev,resources=runners/status,verbs=get;update;patch
2020-03-27 23:25:37 +09:00
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
2020-02-21 03:01:52 +09:00
2020-03-10 09:14:11 +09:00
func ( r * RunnerReplicaSetReconciler ) Reconcile ( req ctrl . Request ) ( ctrl . Result , error ) {
2020-02-21 03:01:52 +09:00
ctx := context . Background ( )
2021-01-24 10:58:35 +09:00
log := r . Log . WithValues ( "runnerreplicaset" , req . NamespacedName )
2020-02-21 03:01:52 +09:00
2020-03-10 09:14:11 +09:00
var rs v1alpha1 . RunnerReplicaSet
2020-02-21 03:01:52 +09:00
if err := r . Get ( ctx , req . NamespacedName , & rs ) ; err != nil {
return ctrl . Result { } , client . IgnoreNotFound ( err )
}
if ! rs . ObjectMeta . DeletionTimestamp . IsZero ( ) {
return ctrl . Result { } , nil
}
2021-03-05 10:15:39 +09:00
selector , err := metav1 . LabelSelectorAsSelector ( rs . Spec . Selector )
if err != nil {
return ctrl . Result { } , err
}
// Get the Runners managed by the target RunnerReplicaSet
2020-02-21 03:01:52 +09:00
var allRunners v1alpha1 . RunnerList
2021-03-05 10:15:39 +09:00
if err := r . List (
ctx ,
& allRunners ,
client . InNamespace ( req . Namespace ) ,
client . MatchingLabelsSelector { Selector : selector } ,
) ; err != nil {
2021-02-09 10:17:52 +09:00
if ! kerrors . IsNotFound ( err ) {
2020-02-21 03:01:52 +09:00
return ctrl . Result { } , err
}
}
var myRunners [ ] v1alpha1 . Runner
2021-03-05 10:15:39 +09:00
var (
available int
ready int
)
2020-02-21 03:01:52 +09:00
for _ , r := range allRunners . Items {
2021-03-05 10:15:39 +09:00
// This guard is required to avoid the RunnerReplicaSet created by the controller v0.17.0 or before
// to not treat all the runners in the namespace as its children.
2020-02-21 03:01:52 +09:00
if metav1 . IsControlledBy ( & r , & rs ) {
myRunners = append ( myRunners , r )
available += 1
if r . Status . Phase == string ( corev1 . PodRunning ) {
ready += 1
}
}
}
var desired int
if rs . Spec . Replicas != nil {
desired = * rs . Spec . Replicas
} else {
desired = 1
}
if available > desired {
n := available - desired
2021-03-20 07:34:25 +09:00
log . V ( 0 ) . Info ( fmt . Sprintf ( "Deleting %d runners" , n ) , "desired" , desired , "available" , available , "ready" , ready )
// get runners that are currently offline/not busy/timed-out to register
var deletionCandidates [ ] v1alpha1 . Runner
2021-03-05 10:15:39 +09:00
for _ , runner := range allRunners . Items {
2021-02-09 10:17:52 +09:00
busy , err := r . GitHubClient . IsRunnerBusy ( ctx , runner . Spec . Enterprise , runner . Spec . Organization , runner . Spec . Repository , runner . Name )
2020-10-05 01:06:37 +01:00
if err != nil {
2021-02-15 01:36:49 +01:00
notRegistered := false
2021-02-22 02:08:04 +01:00
offline := false
2021-02-15 01:36:49 +01:00
2021-02-22 02:08:04 +01:00
var notFoundException * github . RunnerNotFound
var offlineException * github . RunnerOffline
if errors . As ( err , & notFoundException ) {
log . V ( 1 ) . Info ( "Failed to check if runner is busy. Either this runner has never been successfully registered to GitHub or it still needs more time." , "runnerName" , runner . Name )
2021-02-15 01:36:49 +01:00
notRegistered = true
2021-02-22 02:08:04 +01:00
} else if errors . As ( err , & offlineException ) {
offline = true
2021-02-15 01:36:49 +01:00
} else {
var e * gogithub . RateLimitError
if errors . As ( err , & e ) {
// We log the underlying error when we failed calling GitHub API to list or unregisters,
// or the runner is still busy.
log . Error (
err ,
fmt . Sprintf (
"Failed to check if runner is busy due to GitHub API rate limit. Retrying in %s to avoid excessive GitHub API calls" ,
retryDelayOnGitHubAPIRateLimitError ,
) ,
)
return ctrl . Result { RequeueAfter : retryDelayOnGitHubAPIRateLimitError } , err
2021-02-09 10:17:52 +09:00
}
2021-02-15 01:36:49 +01:00
return ctrl . Result { } , err
2021-02-09 10:17:52 +09:00
}
registrationTimeout := 15 * time . Minute
currentTime := time . Now ( )
2021-02-12 10:00:20 +09:00
registrationDidTimeout := currentTime . Sub ( runner . CreationTimestamp . Add ( registrationTimeout ) ) > 0
2021-02-09 10:17:52 +09:00
2021-02-15 01:36:49 +01:00
if notRegistered && registrationDidTimeout {
2021-02-09 10:17:52 +09:00
log . Info (
"Runner failed to register itself to GitHub in timely manner. " +
2021-02-22 02:08:04 +01:00
"Marking the runner for scale down. " +
2021-02-09 10:17:52 +09:00
"CAUTION: If you see this a lot, you should investigate the root cause. " +
"See https://github.com/summerwind/actions-runner-controller/issues/288" ,
"runnerCreationTimestamp" , runner . CreationTimestamp ,
"currentTime" , currentTime ,
"configuredRegistrationTimeout" , registrationTimeout ,
)
2021-03-20 07:34:25 +09:00
deletionCandidates = append ( deletionCandidates , runner )
2021-02-09 10:17:52 +09:00
}
2021-02-22 02:08:04 +01:00
// offline runners should always be a great target for scale down
if offline {
2021-03-20 07:34:25 +09:00
deletionCandidates = append ( deletionCandidates , runner )
2021-02-22 02:08:04 +01:00
}
2021-02-09 10:17:52 +09:00
} else if ! busy {
2021-03-20 07:34:25 +09:00
deletionCandidates = append ( deletionCandidates , runner )
2020-10-05 01:06:37 +01:00
}
}
2021-03-20 07:34:25 +09:00
if len ( deletionCandidates ) < n {
n = len ( deletionCandidates )
2020-10-05 01:06:37 +01:00
}
2020-02-21 03:01:52 +09:00
for i := 0 ; i < n ; i ++ {
2021-03-20 07:34:25 +09:00
if err := r . Client . Delete ( ctx , & deletionCandidates [ i ] ) ; client . IgnoreNotFound ( err ) != nil {
2020-02-21 03:01:52 +09:00
log . Error ( err , "Failed to delete runner resource" )
return ctrl . Result { } , err
}
2021-03-20 07:34:25 +09:00
r . Recorder . Event ( & rs , corev1 . EventTypeNormal , "RunnerDeleted" , fmt . Sprintf ( "Deleted runner '%s'" , deletionCandidates [ i ] . Name ) )
log . Info ( "Deleted runner" )
2020-02-21 03:01:52 +09:00
}
} else if desired > available {
n := desired - available
2021-03-20 07:34:25 +09:00
log . V ( 0 ) . Info ( fmt . Sprintf ( "Creating %d runner(s)" , n ) , "desired" , desired , "available" , available , "ready" , ready )
2020-02-21 03:01:52 +09:00
for i := 0 ; i < n ; i ++ {
newRunner , err := r . newRunner ( rs )
if err != nil {
log . Error ( err , "Could not create runner" )
return ctrl . Result { } , err
}
if err := r . Client . Create ( ctx , & newRunner ) ; err != nil {
log . Error ( err , "Failed to create runner resource" )
return ctrl . Result { } , err
}
}
}
if rs . Status . AvailableReplicas != available || rs . Status . ReadyReplicas != ready {
updated := rs . DeepCopy ( )
updated . Status . AvailableReplicas = available
updated . Status . ReadyReplicas = ready
if err := r . Status ( ) . Update ( ctx , updated ) ; err != nil {
2021-02-25 09:30:32 +09:00
log . Info ( "Failed to update status. Retrying immediately" , "error" , err . Error ( ) )
2021-02-25 09:01:02 +09:00
return ctrl . Result {
Requeue : true ,
} , nil
2020-02-21 03:01:52 +09:00
}
}
return ctrl . Result { } , nil
}
2020-03-10 09:14:11 +09:00
func ( r * RunnerReplicaSetReconciler ) newRunner ( rs v1alpha1 . RunnerReplicaSet ) ( v1alpha1 . Runner , error ) {
2020-02-26 21:23:23 +09:00
objectMeta := rs . Spec . Template . ObjectMeta . DeepCopy ( )
2020-03-15 21:50:45 +09:00
objectMeta . GenerateName = rs . ObjectMeta . Name + "-"
2020-02-26 21:23:23 +09:00
objectMeta . Namespace = rs . ObjectMeta . Namespace
2020-02-21 03:01:52 +09:00
runner := v1alpha1 . Runner {
2020-02-26 21:23:23 +09:00
TypeMeta : metav1 . TypeMeta { } ,
ObjectMeta : * objectMeta ,
Spec : rs . Spec . Template . Spec ,
2020-02-21 03:01:52 +09:00
}
if err := ctrl . SetControllerReference ( & rs , & runner , r . Scheme ) ; err != nil {
return runner , err
}
return runner , nil
}
2020-03-10 09:14:11 +09:00
func ( r * RunnerReplicaSetReconciler ) SetupWithManager ( mgr ctrl . Manager ) error {
2021-02-16 18:51:33 +09:00
name := "runnerreplicaset-controller"
2021-02-19 10:33:04 +09:00
if r . Name != "" {
name = r . Name
}
2021-02-16 18:51:33 +09:00
r . Recorder = mgr . GetEventRecorderFor ( name )
2020-02-21 03:01:52 +09:00
return ctrl . NewControllerManagedBy ( mgr ) .
2020-03-10 09:14:11 +09:00
For ( & v1alpha1 . RunnerReplicaSet { } ) .
2020-02-21 03:01:52 +09:00
Owns ( & v1alpha1 . Runner { } ) .
2021-02-16 18:51:33 +09:00
Named ( name ) .
2020-02-21 03:01:52 +09:00
Complete ( r )
}