2020-01-28 21:56:54 +09:00
|
|
|
#!/bin/bash
|
2022-04-12 23:02:06 +02:00
|
|
|
source logger.bash
|
2020-01-28 21:56:54 +09:00
|
|
|
|
2022-03-18 08:40:52 +01:00
|
|
|
RUNNER_ASSETS_DIR=${RUNNER_ASSETS_DIR:-/runnertmp}
|
2021-08-02 10:11:48 +01:00
|
|
|
RUNNER_HOME=${RUNNER_HOME:-/runner}
|
|
|
|
|
|
2022-07-09 23:11:29 -07:00
|
|
|
# Let GitHub runner execute these hooks. These environment variables are used by GitHub's Runner as described here
|
|
|
|
|
# https://github.com/actions/runner/blob/main/docs/adrs/1751-runner-job-hooks.md
|
|
|
|
|
# Scripts referenced in the ACTIONS_RUNNER_HOOK_ environment variables must end in .sh or .ps1
|
|
|
|
|
# for it to become a valid hook script, otherwise GitHub will fail to run the hook
|
|
|
|
|
export ACTIONS_RUNNER_HOOK_JOB_STARTED=/etc/arc/hooks/job-started.sh
|
|
|
|
|
export ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/etc/arc/hooks/job-completed.sh
|
|
|
|
|
|
2021-07-30 11:58:04 +01:00
|
|
|
if [ ! -z "${STARTUP_DELAY_IN_SECONDS}" ]; then
|
2022-04-12 23:02:06 +02:00
|
|
|
log.notice "Delaying startup by ${STARTUP_DELAY_IN_SECONDS} seconds"
|
2021-07-03 11:51:07 +01:00
|
|
|
sleep ${STARTUP_DELAY_IN_SECONDS}
|
2021-06-04 00:57:59 +01:00
|
|
|
fi
|
|
|
|
|
|
2020-10-28 15:15:53 +02:00
|
|
|
if [ -z "${GITHUB_URL}" ]; then
|
2022-04-12 23:02:06 +02:00
|
|
|
log.debug 'Working with public GitHub'
|
2020-10-28 15:15:53 +02:00
|
|
|
GITHUB_URL="https://github.com/"
|
|
|
|
|
else
|
|
|
|
|
length=${#GITHUB_URL}
|
|
|
|
|
last_char=${GITHUB_URL:length-1:1}
|
|
|
|
|
|
|
|
|
|
[[ $last_char != "/" ]] && GITHUB_URL="$GITHUB_URL/"; :
|
2022-04-12 23:02:06 +02:00
|
|
|
log.debug "Github endpoint URL ${GITHUB_URL}"
|
2020-10-28 15:15:53 +02:00
|
|
|
fi
|
|
|
|
|
|
2020-01-28 21:56:54 +09:00
|
|
|
if [ -z "${RUNNER_NAME}" ]; then
|
2022-04-12 23:02:06 +02:00
|
|
|
log.error 'RUNNER_NAME must be set'
|
2020-01-28 21:56:54 +09:00
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
2021-02-05 02:31:06 +02:00
|
|
|
if [ -n "${RUNNER_ORG}" ] && [ -n "${RUNNER_REPO}" ] && [ -n "${RUNNER_ENTERPRISE}" ]; then
|
2020-04-23 16:36:40 +02:00
|
|
|
ATTACH="${RUNNER_ORG}/${RUNNER_REPO}"
|
2020-04-24 07:17:09 +02:00
|
|
|
elif [ -n "${RUNNER_ORG}" ]; then
|
|
|
|
|
ATTACH="${RUNNER_ORG}"
|
|
|
|
|
elif [ -n "${RUNNER_REPO}" ]; then
|
|
|
|
|
ATTACH="${RUNNER_REPO}"
|
2021-02-05 02:31:06 +02:00
|
|
|
elif [ -n "${RUNNER_ENTERPRISE}" ]; then
|
|
|
|
|
ATTACH="enterprises/${RUNNER_ENTERPRISE}"
|
2020-04-24 07:17:09 +02:00
|
|
|
else
|
2022-04-12 23:02:06 +02:00
|
|
|
log.error 'At least one of RUNNER_ORG, RUNNER_REPO, or RUNNER_ENTERPRISE must be set'
|
2020-04-24 07:17:09 +02:00
|
|
|
exit 1
|
2020-04-23 16:36:40 +02:00
|
|
|
fi
|
|
|
|
|
|
2020-01-28 21:56:54 +09:00
|
|
|
if [ -z "${RUNNER_TOKEN}" ]; then
|
2022-04-12 23:02:06 +02:00
|
|
|
log.error 'RUNNER_TOKEN must be set'
|
2020-01-28 21:56:54 +09:00
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
2021-03-08 01:18:23 +01:00
|
|
|
if [ -z "${RUNNER_REPO}" ] && [ -n "${RUNNER_GROUP}" ];then
|
2021-03-31 03:09:08 +02:00
|
|
|
RUNNER_GROUPS=${RUNNER_GROUP}
|
2020-11-10 08:15:54 +00:00
|
|
|
fi
|
|
|
|
|
|
2021-06-22 17:55:06 +09:00
|
|
|
# Hack due to https://github.com/actions-runner-controller/actions-runner-controller/issues/252#issuecomment-758338483
|
2021-08-02 10:46:58 +01:00
|
|
|
if [ ! -d "${RUNNER_HOME}" ]; then
|
2022-04-12 23:02:06 +02:00
|
|
|
log.error "$RUNNER_HOME should be an emptyDir mount. Please fix the pod spec."
|
2021-01-24 10:58:35 +09:00
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
|
2021-08-02 10:11:48 +01:00
|
|
|
# if this is not a testing environment
|
2022-03-18 08:40:52 +01:00
|
|
|
if [[ "${UNITTEST:-}" == '' ]]; then
|
|
|
|
|
sudo chown -R runner:docker "$RUNNER_HOME"
|
|
|
|
|
# enable dotglob so we can copy a ".env" file to load in env vars as part of the service startup if one is provided
|
|
|
|
|
# loading a .env from the root of the service is part of the actions/runner logic
|
|
|
|
|
shopt -s dotglob
|
|
|
|
|
# use cp instead of mv to avoid issues when src and dst are on different devices
|
|
|
|
|
cp -r "$RUNNER_ASSETS_DIR"/* "$RUNNER_HOME"/
|
|
|
|
|
shopt -u dotglob
|
2021-08-02 10:11:48 +01:00
|
|
|
fi
|
2021-01-24 10:58:35 +09:00
|
|
|
|
2021-08-02 10:11:48 +01:00
|
|
|
cd ${RUNNER_HOME}
|
|
|
|
|
# past that point, it's all relative pathes from /runner
|
feat: Workflow job based ephemeral runner scaling (#721)
This add support for two upcoming enhancements on the GitHub side of self-hosted runners, ephemeral runners, and `workflow_jow` events. You can't use these yet.
**These features are not yet generally available to all GitHub users**. Please take this pull request as a preparation to make it available to actions-runner-controller users as soon as possible after GitHub released the necessary features on their end.
**Ephemeral runners**:
The former, ephemeral runners, is basically the reliable alternative to `--once`, which we've been using when you enabled `ephemeral: true` (default in actions-runner-controller).
`--once` has been suffering from a race issue #466. `--ephemeral` fixes that.
To enable ephemeral runners with `actions/runner`, you give `--ephemeral` to `config.sh`. This updated version of `actions-runner-controller` does it for you, by using `--ephemeral` instead of `--once` when you set `RUNNER_FEATURE_FLAG_EPHEMERAL=true`.
Please read the section `Ephemeral Runners` in the updated version of our README for more information.
Note that ephemeral runners is not released on GitHub yet. And `RUNNER_FEATURE_FLAG_EPHEMERAL=true` won't work at all until the feature gets released on GitHub. Stay tuned for an announcement from GitHub!
**`workflow_job` events**:
`workflow_job` is the additional webhook event that corresponds to each GitHub Actions workflow job run. It provides `actions-runner-controller` a solid foundation to improve our webhook-based autoscale.
Formerly, we've been exploiting webhook events like `check_run` for autoscaling. However, as none of our supported events has included `labels`, you had to configure an HRA to only match relevant `check_run` events. It wasn't trivial.
In contrast, a `workflow_job` event payload contains `labels` of runners requested. `actions-runner-controller` is able to automatically decide which HRA to scale by filtering the corresponding RunnerDeployment by `labels` included in the webhook payload. So all you need to use webhook-based autoscale will be to enable `workflow_job` on GitHub and expose actions-runner-controller's webhook server to the internet.
Note that the current implementation of `workflow_job` support works in two ways, increment, and decrement. An increment happens when the webhook server receives` workflow_job` of `queued` status. A decrement happens when it receives `workflow_job` of `completed` status. The latter is used to make scaling-down faster so that you waste money less than before. You still don't suffer from flapping, as a scale-down is still subject to `scaleDownDelaySecondsAfterScaleOut `.
Please read the section `Example 3: Scale on each `workflow_job` event` in the updated version of our README for more information on its usage.
2021-08-11 09:52:04 +09:00
|
|
|
|
|
|
|
|
config_args=()
|
2022-05-11 17:55:33 +09:00
|
|
|
if [ "${RUNNER_FEATURE_FLAG_ONCE:-}" != "true" -a "${RUNNER_EPHEMERAL}" == "true" ]; then
|
feat: Workflow job based ephemeral runner scaling (#721)
This add support for two upcoming enhancements on the GitHub side of self-hosted runners, ephemeral runners, and `workflow_jow` events. You can't use these yet.
**These features are not yet generally available to all GitHub users**. Please take this pull request as a preparation to make it available to actions-runner-controller users as soon as possible after GitHub released the necessary features on their end.
**Ephemeral runners**:
The former, ephemeral runners, is basically the reliable alternative to `--once`, which we've been using when you enabled `ephemeral: true` (default in actions-runner-controller).
`--once` has been suffering from a race issue #466. `--ephemeral` fixes that.
To enable ephemeral runners with `actions/runner`, you give `--ephemeral` to `config.sh`. This updated version of `actions-runner-controller` does it for you, by using `--ephemeral` instead of `--once` when you set `RUNNER_FEATURE_FLAG_EPHEMERAL=true`.
Please read the section `Ephemeral Runners` in the updated version of our README for more information.
Note that ephemeral runners is not released on GitHub yet. And `RUNNER_FEATURE_FLAG_EPHEMERAL=true` won't work at all until the feature gets released on GitHub. Stay tuned for an announcement from GitHub!
**`workflow_job` events**:
`workflow_job` is the additional webhook event that corresponds to each GitHub Actions workflow job run. It provides `actions-runner-controller` a solid foundation to improve our webhook-based autoscale.
Formerly, we've been exploiting webhook events like `check_run` for autoscaling. However, as none of our supported events has included `labels`, you had to configure an HRA to only match relevant `check_run` events. It wasn't trivial.
In contrast, a `workflow_job` event payload contains `labels` of runners requested. `actions-runner-controller` is able to automatically decide which HRA to scale by filtering the corresponding RunnerDeployment by `labels` included in the webhook payload. So all you need to use webhook-based autoscale will be to enable `workflow_job` on GitHub and expose actions-runner-controller's webhook server to the internet.
Note that the current implementation of `workflow_job` support works in two ways, increment, and decrement. An increment happens when the webhook server receives` workflow_job` of `queued` status. A decrement happens when it receives `workflow_job` of `completed` status. The latter is used to make scaling-down faster so that you waste money less than before. You still don't suffer from flapping, as a scale-down is still subject to `scaleDownDelaySecondsAfterScaleOut `.
Please read the section `Example 3: Scale on each `workflow_job` event` in the updated version of our README for more information on its usage.
2021-08-11 09:52:04 +09:00
|
|
|
config_args+=(--ephemeral)
|
2022-04-12 23:02:06 +02:00
|
|
|
log.debug 'Passing --ephemeral to config.sh to enable the ephemeral runner.'
|
feat: Workflow job based ephemeral runner scaling (#721)
This add support for two upcoming enhancements on the GitHub side of self-hosted runners, ephemeral runners, and `workflow_jow` events. You can't use these yet.
**These features are not yet generally available to all GitHub users**. Please take this pull request as a preparation to make it available to actions-runner-controller users as soon as possible after GitHub released the necessary features on their end.
**Ephemeral runners**:
The former, ephemeral runners, is basically the reliable alternative to `--once`, which we've been using when you enabled `ephemeral: true` (default in actions-runner-controller).
`--once` has been suffering from a race issue #466. `--ephemeral` fixes that.
To enable ephemeral runners with `actions/runner`, you give `--ephemeral` to `config.sh`. This updated version of `actions-runner-controller` does it for you, by using `--ephemeral` instead of `--once` when you set `RUNNER_FEATURE_FLAG_EPHEMERAL=true`.
Please read the section `Ephemeral Runners` in the updated version of our README for more information.
Note that ephemeral runners is not released on GitHub yet. And `RUNNER_FEATURE_FLAG_EPHEMERAL=true` won't work at all until the feature gets released on GitHub. Stay tuned for an announcement from GitHub!
**`workflow_job` events**:
`workflow_job` is the additional webhook event that corresponds to each GitHub Actions workflow job run. It provides `actions-runner-controller` a solid foundation to improve our webhook-based autoscale.
Formerly, we've been exploiting webhook events like `check_run` for autoscaling. However, as none of our supported events has included `labels`, you had to configure an HRA to only match relevant `check_run` events. It wasn't trivial.
In contrast, a `workflow_job` event payload contains `labels` of runners requested. `actions-runner-controller` is able to automatically decide which HRA to scale by filtering the corresponding RunnerDeployment by `labels` included in the webhook payload. So all you need to use webhook-based autoscale will be to enable `workflow_job` on GitHub and expose actions-runner-controller's webhook server to the internet.
Note that the current implementation of `workflow_job` support works in two ways, increment, and decrement. An increment happens when the webhook server receives` workflow_job` of `queued` status. A decrement happens when it receives `workflow_job` of `completed` status. The latter is used to make scaling-down faster so that you waste money less than before. You still don't suffer from flapping, as a scale-down is still subject to `scaleDownDelaySecondsAfterScaleOut `.
Please read the section `Example 3: Scale on each `workflow_job` event` in the updated version of our README for more information on its usage.
2021-08-11 09:52:04 +09:00
|
|
|
fi
|
2022-02-03 15:03:38 -06:00
|
|
|
if [ "${DISABLE_RUNNER_UPDATE:-}" == "true" ]; then
|
|
|
|
|
config_args+=(--disableupdate)
|
2022-04-12 23:02:06 +02:00
|
|
|
log.debug 'Passing --disableupdate to config.sh to disable automatic runner updates.'
|
2022-02-03 15:03:38 -06:00
|
|
|
fi
|
feat: Workflow job based ephemeral runner scaling (#721)
This add support for two upcoming enhancements on the GitHub side of self-hosted runners, ephemeral runners, and `workflow_jow` events. You can't use these yet.
**These features are not yet generally available to all GitHub users**. Please take this pull request as a preparation to make it available to actions-runner-controller users as soon as possible after GitHub released the necessary features on their end.
**Ephemeral runners**:
The former, ephemeral runners, is basically the reliable alternative to `--once`, which we've been using when you enabled `ephemeral: true` (default in actions-runner-controller).
`--once` has been suffering from a race issue #466. `--ephemeral` fixes that.
To enable ephemeral runners with `actions/runner`, you give `--ephemeral` to `config.sh`. This updated version of `actions-runner-controller` does it for you, by using `--ephemeral` instead of `--once` when you set `RUNNER_FEATURE_FLAG_EPHEMERAL=true`.
Please read the section `Ephemeral Runners` in the updated version of our README for more information.
Note that ephemeral runners is not released on GitHub yet. And `RUNNER_FEATURE_FLAG_EPHEMERAL=true` won't work at all until the feature gets released on GitHub. Stay tuned for an announcement from GitHub!
**`workflow_job` events**:
`workflow_job` is the additional webhook event that corresponds to each GitHub Actions workflow job run. It provides `actions-runner-controller` a solid foundation to improve our webhook-based autoscale.
Formerly, we've been exploiting webhook events like `check_run` for autoscaling. However, as none of our supported events has included `labels`, you had to configure an HRA to only match relevant `check_run` events. It wasn't trivial.
In contrast, a `workflow_job` event payload contains `labels` of runners requested. `actions-runner-controller` is able to automatically decide which HRA to scale by filtering the corresponding RunnerDeployment by `labels` included in the webhook payload. So all you need to use webhook-based autoscale will be to enable `workflow_job` on GitHub and expose actions-runner-controller's webhook server to the internet.
Note that the current implementation of `workflow_job` support works in two ways, increment, and decrement. An increment happens when the webhook server receives` workflow_job` of `queued` status. A decrement happens when it receives `workflow_job` of `completed` status. The latter is used to make scaling-down faster so that you waste money less than before. You still don't suffer from flapping, as a scale-down is still subject to `scaleDownDelaySecondsAfterScaleOut `.
Please read the section `Example 3: Scale on each `workflow_job` event` in the updated version of our README for more information on its usage.
2021-08-11 09:52:04 +09:00
|
|
|
|
2022-07-09 23:11:29 -07:00
|
|
|
update-status "Registering"
|
|
|
|
|
|
2021-07-27 15:11:43 +01:00
|
|
|
retries_left=10
|
|
|
|
|
while [[ ${retries_left} -gt 0 ]]; do
|
2022-04-12 23:02:06 +02:00
|
|
|
log.debug 'Configuring the runner.'
|
2021-07-27 15:11:43 +01:00
|
|
|
./config.sh --unattended --replace \
|
|
|
|
|
--name "${RUNNER_NAME}" \
|
|
|
|
|
--url "${GITHUB_URL}${ATTACH}" \
|
|
|
|
|
--token "${RUNNER_TOKEN}" \
|
|
|
|
|
--runnergroup "${RUNNER_GROUPS}" \
|
|
|
|
|
--labels "${RUNNER_LABELS}" \
|
|
|
|
|
--work "${RUNNER_WORKDIR}" "${config_args[@]}"
|
|
|
|
|
|
2021-08-02 10:11:48 +01:00
|
|
|
if [ -f .runner ]; then
|
2022-04-12 23:02:06 +02:00
|
|
|
log.debug 'Runner successfully configured.'
|
2021-07-27 15:11:43 +01:00
|
|
|
break
|
|
|
|
|
fi
|
|
|
|
|
|
2022-04-12 23:02:06 +02:00
|
|
|
log.debug 'Configuration failed. Retrying'
|
2021-07-27 15:11:43 +01:00
|
|
|
retries_left=$((retries_left - 1))
|
|
|
|
|
sleep 1
|
|
|
|
|
done
|
|
|
|
|
|
2021-08-02 10:11:48 +01:00
|
|
|
if [ ! -f .runner ]; then
|
2021-07-27 15:11:43 +01:00
|
|
|
# we couldn't configure and register the runner; no point continuing
|
2022-04-12 23:02:06 +02:00
|
|
|
log.error 'Configuration failed!'
|
2021-07-27 15:11:43 +01:00
|
|
|
exit 2
|
feat: Support for scaling from/to zero (#465)
This is an attempt to support scaling from/to zero.
The basic idea is that we create a one-off "registration-only" runner pod on RunnerReplicaSet being scaled to zero, so that there is one "offline" runner, which enables GitHub Actions to queue jobs instead of discarding those.
GitHub Actions seems to immediately throw away the new job when there are no runners at all. Generally, having runners of any status, `busy`, `idle`, or `offline` would prevent GitHub actions from failing jobs. But retaining `busy` or `idle` runners means that we need to keep runner pods running, which conflicts with our desired to scale to/from zero, hence we retain `offline` runners.
In this change, I enhanced the runnerreplicaset controller to create a registration-only runner on very beginning of its reconciliation logic, only when a runnerreplicaset is scaled to zero. The runner controller creates the registration-only runner pod, waits for it to become "offline", and then removes the runner pod. The runner on GitHub stays `offline`, until the runner resource on K8s is deleted. As we remove the registration-only runner pod as soon as it registers, this doesn't block cluster-autoscaler.
Related to #447
2021-05-02 16:11:36 +09:00
|
|
|
fi
|
|
|
|
|
|
2021-07-27 15:11:43 +01:00
|
|
|
cat .runner
|
|
|
|
|
# Note: the `.runner` file's content should be something like the below:
|
|
|
|
|
#
|
|
|
|
|
# $ cat /runner/.runner
|
|
|
|
|
# {
|
|
|
|
|
# "agentId": 117, #=> corresponds to the ID of the runner
|
|
|
|
|
# "agentName": "THE_RUNNER_POD_NAME",
|
|
|
|
|
# "poolId": 1,
|
|
|
|
|
# "poolName": "Default",
|
|
|
|
|
# "serverUrl": "https://pipelines.actions.githubusercontent.com/SOME_RANDOM_ID",
|
|
|
|
|
# "gitHubUrl": "https://github.com/USER/REPO",
|
|
|
|
|
# "workFolder": "/some/work/dir" #=> corresponds to Runner.Spec.WorkDir
|
|
|
|
|
# }
|
|
|
|
|
#
|
|
|
|
|
# Especially `agentId` is important, as other than listing all the runners in the repo,
|
|
|
|
|
# this is the only change we could get the exact runnner ID which can be useful for further
|
|
|
|
|
# GitHub API call like the below. Note that 171 is the agentId seen above.
|
|
|
|
|
# curl \
|
|
|
|
|
# -H "Accept: application/vnd.github.v3+json" \
|
|
|
|
|
# -H "Authorization: bearer ${GITHUB_TOKEN}"
|
|
|
|
|
# https://api.github.com/repos/USER/REPO/actions/runners/171
|
|
|
|
|
|
2022-05-16 02:37:37 -07:00
|
|
|
# Hack due to the DinD volumes
|
|
|
|
|
if [ -z "${UNITTEST:-}" ] && [ -e ./externalstmp ]; then
|
2022-03-30 04:18:18 -07:00
|
|
|
mkdir -p ./externals
|
2021-08-02 10:11:48 +01:00
|
|
|
mv ./externalstmp/* ./externals/
|
|
|
|
|
fi
|
2020-10-05 01:58:20 +02:00
|
|
|
|
2022-06-30 03:31:50 +01:00
|
|
|
if [[ "${DISABLE_WAIT_FOR_DOCKER}" != "true" ]] && [[ "${DOCKER_ENABLED}" == "true" ]]; then
|
|
|
|
|
log.debug 'Docker enabled runner detected and Docker daemon wait is enabled'
|
|
|
|
|
log.debug 'Waiting until Docker is available or the timeout is reached'
|
|
|
|
|
timeout 120s bash -c 'until docker ps ;do sleep 1; done'
|
|
|
|
|
else
|
|
|
|
|
log.notice 'Docker wait check skipped. Either Docker is disabled or the wait is disabled, continuing with entrypoint'
|
|
|
|
|
fi
|
|
|
|
|
|
2022-03-22 19:02:51 +00:00
|
|
|
# Unset entrypoint environment variables so they don't leak into the runner environment
|
|
|
|
|
unset RUNNER_NAME RUNNER_REPO RUNNER_TOKEN STARTUP_DELAY_IN_SECONDS DISABLE_WAIT_FOR_DOCKER
|
2022-03-18 08:40:52 +01:00
|
|
|
|
|
|
|
|
# Docker ignores PAM and thus never loads the system environment variables that
|
|
|
|
|
# are meant to be set in every environment of every user. We emulate the PAM
|
|
|
|
|
# behavior by reading the environment variables without interpreting them.
|
|
|
|
|
#
|
|
|
|
|
# https://github.com/actions-runner-controller/actions-runner-controller/issues/1135
|
|
|
|
|
# https://github.com/actions/runner/issues/1703
|
2022-03-22 19:02:51 +00:00
|
|
|
|
|
|
|
|
# /etc/environment may not exist when running unit tests depending on the platform being used
|
|
|
|
|
# (e.g. Mac OS) so we just skip the mapping entirely
|
|
|
|
|
if [ -z "${UNITTEST:-}" ]; then
|
|
|
|
|
mapfile -t env </etc/environment
|
|
|
|
|
fi
|
2022-07-09 23:11:29 -07:00
|
|
|
update-status "Idle"
|
2022-06-29 21:49:52 +09:00
|
|
|
exec env -- "${env[@]}" ./run.sh
|