batch processing w/ exponential backoff (#79)

Signed-off-by: Brian DeHamer <bdehamer@github.com>
This commit is contained in:
Brian DeHamer
2024-06-03 07:56:25 -07:00
committed by GitHub
parent a0652efe33
commit 9e752e3d76
8 changed files with 140 additions and 66 deletions

View File

@@ -65,7 +65,7 @@ See [action.yml](action.yml)
with:
# Path to the artifact serving as the subject of the attestation. Must
# specify exactly one of "subject-path" or "subject-digest". May contain
# a glob pattern or list of paths (total subject count cannot exceed 64).
# a glob pattern or list of paths (total subject count cannot exceed 2500).
subject-path:
# SHA256 digest of the subject for the attestation. Must be in the form
@@ -115,6 +115,15 @@ If multiple subjects are being attested at the same time, each attestation will
be written to the output file on a separate line (using the [JSON Lines][7]
format).
## Attestation Limits
### Subject Limits
No more than 2500 subjects can be attested at the same time. Subjects will be
processed in batches 50. After the initial group of 50, each subsequent batch
will incur an exponentially increasing amount of delay (capped at 1 minute of
delay per batch) to avoid overwhelming the attestation API.
## Examples
### Identify Subject by Path
@@ -175,8 +184,8 @@ fully-qualified image name (e.g. "ghcr.io/user/app" or
"acme.azurecr.io/user/app"). Do NOT include a tag as part of the image name --
the specific image being attested is identified by the supplied digest.
> **NOTE**: When pushing to Docker Hub, please use "docker.io" as the
> registry portion of the image name.
> **NOTE**: When pushing to Docker Hub, please use "docker.io" as the registry
> portion of the image name.
```yaml
name: build-attested-image

View File

@@ -46,8 +46,7 @@ const defaultInputs: main.RunInputs = {
pushToRegistry: false,
githubToken: '',
privateSigning: false,
batchSize: 50,
batchDelay: 5000
batchSize: 50
}
describe('action', () => {
@@ -355,8 +354,7 @@ describe('action', () => {
predicateType,
predicate,
githubToken: 'gh-token',
batchSize: 2,
batchDelay: 500
batchSize: 2
}
await main.run(inputs)
@@ -377,6 +375,54 @@ describe('action', () => {
expect(scope.isDone()).toBe(true)
})
})
describe('when the subject count exceeds the max', () => {
let dir = ''
const filename = 'subject'
beforeEach(async () => {
const subjectCount = 2501
const content = 'file content'
// Set-up temp directory
const tmpDir = await fs.realpath(os.tmpdir())
dir = await fs.mkdtemp(tmpDir + path.sep)
// Add files for glob testing
for (let i = 0; i < subjectCount; i++) {
await fs.writeFile(path.join(dir, `${filename}-${i}`), content)
}
// Set the GH context with private repository visibility and a repo owner.
setGHContext({
payload: { repository: { visibility: 'private' } },
repo: { owner: 'foo', repo: 'bar' }
})
})
afterEach(async () => {
// Clean-up temp directory
await fs.rm(dir, { recursive: true })
})
it('sets a failed status', async () => {
const inputs: main.RunInputs = {
...defaultInputs,
subjectPath: path.join(dir, `${filename}-*`),
predicateType,
predicate,
githubToken: 'gh-token'
}
await main.run(inputs)
expect(runMock).toHaveReturned()
expect(setFailedMock).toHaveBeenCalledWith(
new Error(
'Too many subjects specified. The maximum number of subjects is 2500.'
)
)
})
})
})
// Stubbing the GitHub context is a bit tricky. We need to use

View File

@@ -151,7 +151,7 @@ describe('subjectFromInputs', () => {
})
})
describe('when the file eixts', () => {
describe('when the file exists', () => {
let dir = ''
const filename = 'subject'
const content = 'file content'

View File

@@ -10,7 +10,7 @@ inputs:
description: >
Path to the artifact serving as the subject of the attestation. Must
specify exactly one of "subject-path" or "subject-digest". May contain a
glob pattern or list of paths (total subject count cannot exceed 64).
glob pattern or list of paths (total subject count cannot exceed 2500).
required: false
subject-digest:
description: >

58
dist/index.js generated vendored
View File

@@ -80002,7 +80002,6 @@ Object.defineProperty(exports, "__esModule", ({ value: true }));
const core = __importStar(__nccwpck_require__(42186));
const main_1 = __nccwpck_require__(70399);
const DEFAULT_BATCH_SIZE = 50;
const DEFAULT_BATCH_DELAY = 5000;
const inputs = {
subjectPath: core.getInput('subject-path'),
subjectName: core.getInput('subject-name'),
@@ -80015,8 +80014,7 @@ const inputs = {
// undocumented -- not part of public interface
privateSigning: ['true', 'True', 'TRUE', '1'].includes(core.getInput('private-signing')),
// internal only
batchSize: DEFAULT_BATCH_SIZE,
batchDelay: DEFAULT_BATCH_DELAY
batchSize: DEFAULT_BATCH_SIZE
};
// eslint-disable-next-line @typescript-eslint/no-floating-promises
(0, main_1.run)(inputs);
@@ -80068,6 +80066,8 @@ const predicate_1 = __nccwpck_require__(72103);
const style = __importStar(__nccwpck_require__(41583));
const subject_1 = __nccwpck_require__(95206);
const ATTESTATION_FILE_NAME = 'attestation.jsonl';
const DELAY_INTERVAL_MS = 75;
const DELAY_MAX_MS = 1200;
/* istanbul ignore next */
const logHandler = (level, ...args) => {
// Send any HTTP-related log events to the GitHub Actions debug log
@@ -80101,17 +80101,18 @@ async function run(inputs) {
const outputPath = path_1.default.join(tempDir(), ATTESTATION_FILE_NAME);
core.setOutput('bundle-path', outputPath);
const subjectChunks = chunkArray(subjects, inputs.batchSize);
let chunkCount = 0;
// Generate attestations for each subject serially, working in batches
for (const subjectChunk of subjectChunks) {
// Delay between batches (only when chunkCount > 0)
if (chunkCount++) {
await new Promise(resolve => setTimeout(resolve, inputs.batchDelay));
}
for (let i = 0; i < subjectChunks.length; i++) {
if (subjectChunks.length > 1) {
core.info(`Processing subject batch ${chunkCount}/${subjectChunks.length}`);
core.info(`Processing subject batch ${i + 1}/${subjectChunks.length}`);
}
for (const subject of subjectChunk) {
// Calculate the delay time for this batch
const delayTime = delay(i);
for (const subject of subjectChunks[i]) {
// Delay between attestations (only when chunk size > 1)
if (i > 0) {
await new Promise(resolve => setTimeout(resolve, delayTime));
}
const att = await (0, attest_1.createAttestation)(subject, predicate, {
sigstoreInstance,
pushToRegistry: inputs.pushToRegistry,
@@ -80190,6 +80191,8 @@ const tempDir = () => {
const chunkArray = (array, chunkSize) => {
return Array.from({ length: Math.ceil(array.length / chunkSize) }, (_, index) => array.slice(index * chunkSize, (index + 1) * chunkSize));
};
// Calculate the delay time for a given iteration
const delay = (iteration) => Math.min(DELAY_INTERVAL_MS * 2 ** iteration, DELAY_MAX_MS);
const attestationURL = (id) => `${github.context.serverUrl}/${github.context.repo.owner}/${github.context.repo.repo}/attestations/${id}`;
@@ -80287,6 +80290,7 @@ const crypto_1 = __importDefault(__nccwpck_require__(6113));
const sync_1 = __nccwpck_require__(74393);
const fs_1 = __importDefault(__nccwpck_require__(57147));
const path_1 = __importDefault(__nccwpck_require__(71017));
const MAX_SUBJECT_COUNT = 2500;
const DIGEST_ALGORITHM = 'sha256';
// Returns the subject specified by the action's inputs. The subject may be
// specified as a path to a file or as a digest. If a path is provided, the
@@ -80317,27 +80321,31 @@ exports.subjectFromInputs = subjectFromInputs;
// Returns the subject specified by the path to a file. The file's digest is
// calculated and returned along with the subject's name.
const getSubjectFromPath = async (subjectPath, subjectName) => {
const subjects = [];
const digestedSubjects = [];
const files = [];
// Parse the list of subject paths
const subjectPaths = parseList(subjectPath);
// Expand the globbed paths to a list of files
for (const subPath of subjectPaths) {
// Expand the globbed path to a list of files
/* eslint-disable-next-line github/no-then */
const files = await glob.create(subPath).then(async (g) => g.glob());
for (const file of files) {
// Skip anything that is NOT a file
if (!fs_1.default.statSync(file).isFile()) {
continue;
}
const name = subjectName || path_1.default.parse(file).base;
const digest = await digestFile(DIGEST_ALGORITHM, file);
subjects.push({ name, digest: { [DIGEST_ALGORITHM]: digest } });
}
files.push(...(await glob.create(subPath).then(async (g) => g.glob())));
}
if (subjects.length === 0) {
if (files.length > MAX_SUBJECT_COUNT) {
throw new Error(`Too many subjects specified. The maximum number of subjects is ${MAX_SUBJECT_COUNT}.`);
}
for (const file of files) {
// Skip anything that is NOT a file
if (!fs_1.default.statSync(file).isFile()) {
continue;
}
const name = subjectName || path_1.default.parse(file).base;
const digest = await digestFile(DIGEST_ALGORITHM, file);
digestedSubjects.push({ name, digest: { [DIGEST_ALGORITHM]: digest } });
}
if (digestedSubjects.length === 0) {
throw new Error(`Could not find subject at path ${subjectPath}`);
}
return Promise.all(subjects);
return digestedSubjects;
};
// Returns the subject specified by the digest of a file. The digest is returned
// along with the subject's name.

View File

@@ -5,7 +5,6 @@ import * as core from '@actions/core'
import { run, RunInputs } from './main'
const DEFAULT_BATCH_SIZE = 50
const DEFAULT_BATCH_DELAY = 5000
const inputs: RunInputs = {
subjectPath: core.getInput('subject-path'),
@@ -21,8 +20,7 @@ const inputs: RunInputs = {
core.getInput('private-signing')
),
// internal only
batchSize: DEFAULT_BATCH_SIZE,
batchDelay: DEFAULT_BATCH_DELAY
batchSize: DEFAULT_BATCH_SIZE
}
// eslint-disable-next-line @typescript-eslint/no-floating-promises

View File

@@ -10,6 +10,8 @@ import * as style from './style'
import { SubjectInputs, subjectFromInputs } from './subject'
const ATTESTATION_FILE_NAME = 'attestation.jsonl'
const DELAY_INTERVAL_MS = 75
const DELAY_MAX_MS = 1200
export type RunInputs = SubjectInputs &
PredicateInputs & {
@@ -17,7 +19,6 @@ export type RunInputs = SubjectInputs &
githubToken: string
privateSigning: boolean
batchSize: number
batchDelay: number
}
/* istanbul ignore next */
@@ -62,22 +63,22 @@ export async function run(inputs: RunInputs): Promise<void> {
core.setOutput('bundle-path', outputPath)
const subjectChunks = chunkArray(subjects, inputs.batchSize)
let chunkCount = 0
// Generate attestations for each subject serially, working in batches
for (const subjectChunk of subjectChunks) {
// Delay between batches (only when chunkCount > 0)
if (chunkCount++) {
await new Promise(resolve => setTimeout(resolve, inputs.batchDelay))
}
for (let i = 0; i < subjectChunks.length; i++) {
if (subjectChunks.length > 1) {
core.info(
`Processing subject batch ${chunkCount}/${subjectChunks.length}`
)
core.info(`Processing subject batch ${i + 1}/${subjectChunks.length}`)
}
for (const subject of subjectChunk) {
// Calculate the delay time for this batch
const delayTime = delay(i)
for (const subject of subjectChunks[i]) {
// Delay between attestations (only when chunk size > 1)
if (i > 0) {
await new Promise(resolve => setTimeout(resolve, delayTime))
}
const att = await createAttestation(subject, predicate, {
sigstoreInstance,
pushToRegistry: inputs.pushToRegistry,
@@ -197,5 +198,9 @@ const chunkArray = <T>(array: T[], chunkSize: number): T[][] => {
)
}
// Calculate the delay time for a given iteration
const delay = (iteration: number): number =>
Math.min(DELAY_INTERVAL_MS * 2 ** iteration, DELAY_MAX_MS)
const attestationURL = (id: string): string =>
`${github.context.serverUrl}/${github.context.repo.owner}/${github.context.repo.repo}/attestations/${id}`

View File

@@ -6,6 +6,7 @@ import path from 'path'
import type { Subject } from '@actions/attest'
const MAX_SUBJECT_COUNT = 2500
const DIGEST_ALGORITHM = 'sha256'
export type SubjectInputs = {
@@ -54,34 +55,41 @@ const getSubjectFromPath = async (
subjectPath: string,
subjectName?: string
): Promise<Subject[]> => {
const subjects: Subject[] = []
const digestedSubjects: Subject[] = []
const files: string[] = []
// Parse the list of subject paths
const subjectPaths = parseList(subjectPath)
// Expand the globbed paths to a list of files
for (const subPath of subjectPaths) {
// Expand the globbed path to a list of files
/* eslint-disable-next-line github/no-then */
const files = await glob.create(subPath).then(async g => g.glob())
for (const file of files) {
// Skip anything that is NOT a file
if (!fs.statSync(file).isFile()) {
continue
}
const name = subjectName || path.parse(file).base
const digest = await digestFile(DIGEST_ALGORITHM, file)
subjects.push({ name, digest: { [DIGEST_ALGORITHM]: digest } })
}
files.push(...(await glob.create(subPath).then(async g => g.glob())))
}
if (subjects.length === 0) {
if (files.length > MAX_SUBJECT_COUNT) {
throw new Error(
`Too many subjects specified. The maximum number of subjects is ${MAX_SUBJECT_COUNT}.`
)
}
for (const file of files) {
// Skip anything that is NOT a file
if (!fs.statSync(file).isFile()) {
continue
}
const name = subjectName || path.parse(file).base
const digest = await digestFile(DIGEST_ALGORITHM, file)
digestedSubjects.push({ name, digest: { [DIGEST_ALGORITHM]: digest } })
}
if (digestedSubjects.length === 0) {
throw new Error(`Could not find subject at path ${subjectPath}`)
}
return Promise.all(subjects)
return digestedSubjects
}
// Returns the subject specified by the digest of a file. The digest is returned