9 Commits
v2.0.6 ... v2

Author SHA1 Message Date
Stephanie Giang
e09e659817 Merge pull request #173 from GitPaulo/main
Support passing max_tokens and max_completion_tokens
2026-02-24 10:40:15 -05:00
Paulo Santos
e608d2ba8a update dist 2026-02-15 00:26:15 +00:00
Paulo Santos
27965bc3a4 updated docs for missing prompt.yml model parameters 2026-02-15 00:23:47 +00:00
Paulo Santos
a8bddad5e5 update dist 2026-02-13 12:41:36 +00:00
Paulo Santos
672ba8a3ac missed comment 2026-02-13 12:38:48 +00:00
Paulo Santos
3a80d137e1 update comments 2026-02-13 12:36:47 +00:00
Paulo Santos
074e8b294d copilot review: add test for coverage of no params passed 2026-02-13 12:31:45 +00:00
Paulo Santos
f1ca66fc66 build dist 2026-02-13 12:16:03 +00:00
Paulo Santos
6360e0db9b implement passing two action input properties to cover all model scenarios 2026-02-13 12:15:12 +00:00
11 changed files with 175 additions and 39 deletions

View File

@@ -123,6 +123,33 @@ supplied via the `input` parameter in YAML format. Additionally, you can
provide file-based variables via `file_input`, where each key maps to a file provide file-based variables via `file_input`, where each key maps to a file
path. path.
### Prompt.yml with model parameters
You can specify model parameters directly in your `.prompt.yml` files using the
`modelParameters` key:
```yaml
messages:
- role: system
content: Be as concise as possible
- role: user
content: 'Compare {{a}} and {{b}}, please'
model: openai/gpt-4o
modelParameters:
maxCompletionTokens: 500
temperature: 0.7
```
| Key | Type | Description |
| --------------------- | ------ | ----------------------------------------------------- |
| `maxCompletionTokens` | number | The maximum number of tokens to generate |
| `maxTokens` | number | The maximum number of tokens to generate (deprecated) |
| `temperature` | number | The sampling temperature to use (0-1) |
| `topP` | number | The nucleus sampling parameter to use (0-1) |
> ![Note]
> Parameters set in `modelParameters` take precedence over the corresponding action inputs.
### Using a system prompt file ### Using a system prompt file
In addition to the regular prompt, you can provide a system prompt file instead In addition to the regular prompt, you can provide a system prompt file instead
@@ -276,23 +303,24 @@ perform actions like searching issues and PRs.
Various inputs are defined in [`action.yml`](action.yml) to let you configure Various inputs are defined in [`action.yml`](action.yml) to let you configure
the action: the action:
| Name | Description | Default | | Name | Description | Default |
| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------ | | ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------ |
| `token` | Token to use for inference. Typically the GITHUB_TOKEN secret | `github.token` | | `token` | Token to use for inference. Typically the GITHUB_TOKEN secret | `github.token` |
| `prompt` | The prompt to send to the model | N/A | | `prompt` | The prompt to send to the model | N/A |
| `prompt-file` | Path to a file containing the prompt (supports .txt and .prompt.yml formats). If both `prompt` and `prompt-file` are provided, `prompt-file` takes precedence | `""` | | `prompt-file` | Path to a file containing the prompt (supports .txt and .prompt.yml formats). If both `prompt` and `prompt-file` are provided, `prompt-file` takes precedence | `""` |
| `input` | Template variables in YAML format for .prompt.yml files (e.g., `var1: value1` on separate lines) | `""` | | `input` | Template variables in YAML format for .prompt.yml files (e.g., `var1: value1` on separate lines) | `""` |
| `file_input` | Template variables in YAML where values are file paths. The file contents are read and used for templating | `""` | | `file_input` | Template variables in YAML where values are file paths. The file contents are read and used for templating | `""` |
| `system-prompt` | The system prompt to send to the model | `"You are a helpful assistant"` | | `system-prompt` | The system prompt to send to the model | `"You are a helpful assistant"` |
| `system-prompt-file` | Path to a file containing the system prompt. If both `system-prompt` and `system-prompt-file` are provided, `system-prompt-file` takes precedence | `""` | | `system-prompt-file` | Path to a file containing the system prompt. If both `system-prompt` and `system-prompt-file` are provided, `system-prompt-file` takes precedence | `""` |
| `model` | The model to use for inference. Must be available in the [GitHub Models](https://github.com/marketplace?type=models) catalog | `openai/gpt-4o` | | `model` | The model to use for inference. Must be available in the [GitHub Models](https://github.com/marketplace?type=models) catalog | `openai/gpt-4o` |
| `endpoint` | The endpoint to use for inference. If you're running this as part of an org, you should probably use the org-specific Models endpoint | `https://models.github.ai/inference` | | `endpoint` | The endpoint to use for inference. If you're running this as part of an org, you should probably use the org-specific Models endpoint | `https://models.github.ai/inference` |
| `max-tokens` | The max number of tokens to generate | 200 | | `max-tokens` | The maximum number of tokens to generate (deprecated, use `max-completion-tokens` instead) | 200 |
| `temperature` | The sampling temperature to use (0-1) | `""` | | `max-completion-tokens` | The maximum number of tokens to generate | `""` |
| `top-p` | The nucleus sampling parameter to use (0-1) | `""` | | `temperature` | The sampling temperature to use (0-1) | `""` |
| `enable-github-mcp` | Enable Model Context Protocol integration with GitHub tools | `false` | | `top-p` | The nucleus sampling parameter to use (0-1) | `""` |
| `github-mcp-token` | Token to use for GitHub MCP server (defaults to the main token if not specified). | `""` | | `enable-github-mcp` | Enable Model Context Protocol integration with GitHub tools | `false` |
| `custom-headers` | Custom HTTP headers to include in API requests. Supports both YAML format (`header1: value1`) and JSON format (`{"header1": "value1"}`). Useful for API Management platforms, rate limiting, and request tracking. | `""` | | `github-mcp-token` | Token to use for GitHub MCP server (defaults to the main token if not specified). | `""` |
| `custom-headers` | Custom HTTP headers to include in API requests. Supports both YAML format (`header1: value1`) and JSON format (`{"header1": "value1"}`). Useful for API Management platforms, rate limiting, and request tracking. | `""` |
## Outputs ## Outputs

View File

@@ -109,6 +109,7 @@ describe('helpers.ts - inference request building', () => {
undefined, undefined,
undefined, undefined,
100, 100,
undefined,
'https://api.test.com', 'https://api.test.com',
'test-token', 'test-token',
) )
@@ -122,6 +123,7 @@ describe('helpers.ts - inference request building', () => {
temperature: undefined, temperature: undefined,
topP: undefined, topP: undefined,
maxTokens: 100, maxTokens: 100,
maxCompletionTokens: undefined,
endpoint: 'https://api.test.com', endpoint: 'https://api.test.com',
token: 'test-token', token: 'test-token',
responseFormat: { responseFormat: {
@@ -143,6 +145,7 @@ describe('helpers.ts - inference request building', () => {
undefined, undefined,
undefined, undefined,
100, 100,
undefined,
'https://api.test.com', 'https://api.test.com',
'test-token', 'test-token',
) )
@@ -156,6 +159,7 @@ describe('helpers.ts - inference request building', () => {
temperature: undefined, temperature: undefined,
topP: undefined, topP: undefined,
maxTokens: 100, maxTokens: 100,
maxCompletionTokens: undefined,
endpoint: 'https://api.test.com', endpoint: 'https://api.test.com',
token: 'test-token', token: 'test-token',
responseFormat: undefined, responseFormat: undefined,

View File

@@ -31,7 +31,7 @@ describe('inference.ts', () => {
{role: 'user' as const, content: 'Hello, AI!'}, {role: 'user' as const, content: 'Hello, AI!'},
], ],
modelName: 'gpt-4', modelName: 'gpt-4',
maxTokens: 100, maxCompletionTokens: 100,
endpoint: 'https://api.test.com', endpoint: 'https://api.test.com',
token: 'test-token', token: 'test-token',
} }
@@ -633,4 +633,64 @@ describe('inference.ts', () => {
expect(result).toBe('{"immediate": "result"}') expect(result).toBe('{"immediate": "result"}')
}) })
}) })
describe('token param routing', () => {
it('sends max_tokens when only maxTokens is set', async () => {
const requestWithMaxTokens = {
...mockRequest,
maxCompletionTokens: undefined,
maxTokens: 100,
}
const mockResponse = {
choices: [
{
message: {
content: 'Direct max_tokens response',
},
},
],
}
mockCreate.mockResolvedValueOnce(mockResponse)
const result = await simpleInference(requestWithMaxTokens)
expect(result).toBe('Direct max_tokens response')
expect(mockCreate).toHaveBeenCalledTimes(1)
// Should have sent max_tokens directly
expect(mockCreate.mock.calls[0][0]).toHaveProperty('max_tokens', 100)
expect(mockCreate.mock.calls[0][0]).not.toHaveProperty('max_completion_tokens')
})
it('sends neither token param when both are undefined', async () => {
const requestWithNoTokens = {
...mockRequest,
maxCompletionTokens: undefined,
maxTokens: undefined,
}
const mockResponse = {
choices: [
{
message: {
content: 'No token limit response',
},
},
],
}
mockCreate.mockResolvedValueOnce(mockResponse)
const result = await simpleInference(requestWithNoTokens)
expect(result).toBe('No token limit response')
expect(mockCreate).toHaveBeenCalledTimes(1)
const params = mockCreate.mock.calls[0][0]
expect(params).not.toHaveProperty('max_tokens')
expect(params).not.toHaveProperty('max_completion_tokens')
})
})
}) })

View File

@@ -168,6 +168,7 @@ describe('main.ts', () => {
], ],
modelName: 'gpt-4', modelName: 'gpt-4',
maxTokens: 100, maxTokens: 100,
maxCompletionTokens: undefined,
endpoint: 'https://api.test.com', endpoint: 'https://api.test.com',
token: 'fake-token', token: 'fake-token',
responseFormat: undefined, responseFormat: undefined,
@@ -259,6 +260,7 @@ describe('main.ts', () => {
], ],
modelName: 'gpt-4', modelName: 'gpt-4',
maxTokens: 100, maxTokens: 100,
maxCompletionTokens: undefined,
endpoint: 'https://api.test.com', endpoint: 'https://api.test.com',
token: 'fake-token', token: 'fake-token',
responseFormat: undefined, responseFormat: undefined,

View File

@@ -43,9 +43,13 @@ inputs:
required: false required: false
default: '' default: ''
max-tokens: max-tokens:
description: The maximum number of tokens to generate description: The maximum number of tokens to generate (deprecated)
required: false required: false
default: '200' default: '200'
max-completion-tokens:
description: The maximum number of tokens to generate
required: false
default: ''
temperature: temperature:
description: The sampling temperature to use (0-1) description: The sampling temperature to use (0-1)
required: false required: false

34
dist/index.js generated vendored
View File

@@ -58300,6 +58300,19 @@ OpenAI.Responses = Responses;
OpenAI.Evals = Evals; OpenAI.Evals = Evals;
OpenAI.Containers = Containers; OpenAI.Containers = Containers;
/**
* Build according to what input was passed, default to max_tokens.
* Only one of max_tokens or max_completion_tokens will be set.
*/
function buildMaxTokensParam(request) {
if (request.maxCompletionTokens != null) {
return { max_completion_tokens: request.maxCompletionTokens };
}
if (request.maxTokens != null) {
return { max_tokens: request.maxTokens };
}
return {};
}
/** /**
* Simple one-shot inference without tools * Simple one-shot inference without tools
*/ */
@@ -58312,10 +58325,10 @@ async function simpleInference(request) {
}); });
const chatCompletionRequest = { const chatCompletionRequest = {
messages: request.messages, messages: request.messages,
max_completion_tokens: request.maxTokens,
model: request.modelName, model: request.modelName,
temperature: request.temperature, temperature: request.temperature,
top_p: request.topP, top_p: request.topP,
...buildMaxTokensParam(request), // Note: solution around models using different underlying max tokens properties
}; };
// Add response format if specified // Add response format if specified
if (request.responseFormat) { if (request.responseFormat) {
@@ -58349,10 +58362,10 @@ async function mcpInference(request, githubMcpClient) {
coreExports.info(`MCP inference iteration ${iterationCount}`); coreExports.info(`MCP inference iteration ${iterationCount}`);
const chatCompletionRequest = { const chatCompletionRequest = {
messages: messages, messages: messages,
max_completion_tokens: request.maxTokens,
model: request.modelName, model: request.modelName,
temperature: request.temperature, temperature: request.temperature,
top_p: request.topP, top_p: request.topP,
...buildMaxTokensParam(request), // Note: solution around models using different underlying max tokens properties
}; };
// Add response format if specified (only on final iteration to avoid conflicts with tool calls) // Add response format if specified (only on final iteration to avoid conflicts with tool calls)
if (finalMessage && request.responseFormat) { if (finalMessage && request.responseFormat) {
@@ -61382,7 +61395,8 @@ function validateAndMaskHeaders(headers) {
/** /**
* Build complete InferenceRequest from prompt config and inputs * Build complete InferenceRequest from prompt config and inputs
*/ */
function buildInferenceRequest(promptConfig, systemPrompt, prompt, modelName, temperature, topP, maxTokens, endpoint, token, customHeaders) { function buildInferenceRequest(promptConfig, systemPrompt, prompt, modelName, temperature, topP, maxTokens, // Deprecated
maxCompletionTokens, endpoint, token, customHeaders) {
const messages = buildMessages(promptConfig, systemPrompt, prompt); const messages = buildMessages(promptConfig, systemPrompt, prompt);
const responseFormat = buildResponseFormat(promptConfig); const responseFormat = buildResponseFormat(promptConfig);
return { return {
@@ -61390,7 +61404,8 @@ function buildInferenceRequest(promptConfig, systemPrompt, prompt, modelName, te
modelName, modelName,
temperature, temperature,
topP, topP,
maxTokens, maxTokens, // Deprecated
maxCompletionTokens,
endpoint, endpoint,
token, token,
responseFormat, responseFormat,
@@ -61536,10 +61551,11 @@ async function run() {
} }
// Get common parameters // Get common parameters
const modelName = promptConfig?.model || coreExports.getInput('model'); const modelName = promptConfig?.model || coreExports.getInput('model');
let maxTokens = promptConfig?.modelParameters?.maxTokens ?? coreExports.getInput('max-tokens'); // Parse token limit inputs
if (typeof maxTokens === 'string') { const maxCompletionTokensInput = promptConfig?.modelParameters?.maxCompletionTokens ?? coreExports.getInput('max-completion-tokens');
maxTokens = parseInt(maxTokens, 10); const maxCompletionTokens = maxCompletionTokensInput ? Number(maxCompletionTokensInput) : undefined;
} const maxTokensInput = promptConfig?.modelParameters?.maxTokens ?? coreExports.getInput('max-tokens');
const maxTokens = maxCompletionTokens != null ? undefined : maxTokensInput ? Number(maxTokensInput) : undefined;
const token = process.env['GITHUB_TOKEN'] || coreExports.getInput('token'); const token = process.env['GITHUB_TOKEN'] || coreExports.getInput('token');
if (token === undefined) { if (token === undefined) {
throw new Error('GITHUB_TOKEN is not set'); throw new Error('GITHUB_TOKEN is not set');
@@ -61557,7 +61573,7 @@ async function run() {
const customHeadersInput = coreExports.getInput('custom-headers'); const customHeadersInput = coreExports.getInput('custom-headers');
const customHeaders = parseCustomHeaders(customHeadersInput); const customHeaders = parseCustomHeaders(customHeadersInput);
// Build the inference request with pre-processed messages and response format // Build the inference request with pre-processed messages and response format
const inferenceRequest = buildInferenceRequest(promptConfig, systemPrompt, prompt, modelName, temperature, topP, maxTokens, endpoint, token, customHeaders); const inferenceRequest = buildInferenceRequest(promptConfig, systemPrompt, prompt, modelName, temperature, topP, maxTokens, maxCompletionTokens, endpoint, token, customHeaders);
const enableMcp = coreExports.getBooleanInput('enable-github-mcp') || false; const enableMcp = coreExports.getBooleanInput('enable-github-mcp') || false;
let modelResponse = null; let modelResponse = null;
if (enableMcp) { if (enableMcp) {

2
dist/index.js.map generated vendored

File diff suppressed because one or more lines are too long

View File

@@ -162,7 +162,8 @@ export function buildInferenceRequest(
modelName: string, modelName: string,
temperature: number | undefined, temperature: number | undefined,
topP: number | undefined, topP: number | undefined,
maxTokens: number, maxTokens: number | undefined, // Deprecated
maxCompletionTokens: number | undefined,
endpoint: string, endpoint: string,
token: string, token: string,
customHeaders?: Record<string, string>, customHeaders?: Record<string, string>,
@@ -175,7 +176,8 @@ export function buildInferenceRequest(
modelName, modelName,
temperature, temperature,
topP, topP,
maxTokens, maxTokens, // Deprecated
maxCompletionTokens,
endpoint, endpoint,
token, token,
responseFormat, responseFormat,

View File

@@ -12,7 +12,8 @@ interface ChatMessage {
export interface InferenceRequest { export interface InferenceRequest {
messages: Array<{role: 'system' | 'user' | 'assistant' | 'tool'; content: string}> messages: Array<{role: 'system' | 'user' | 'assistant' | 'tool'; content: string}>
modelName: string modelName: string
maxTokens: number maxTokens?: number // Deprecated
maxCompletionTokens?: number
endpoint: string endpoint: string
token: string token: string
temperature?: number temperature?: number
@@ -33,6 +34,20 @@ export interface InferenceResponse {
}> }>
} }
/**
* Build according to what input was passed, default to max_tokens.
* Only one of max_tokens or max_completion_tokens will be set.
*/
function buildMaxTokensParam(request: InferenceRequest): {max_tokens?: number; max_completion_tokens?: number} {
if (request.maxCompletionTokens != null) {
return {max_completion_tokens: request.maxCompletionTokens}
}
if (request.maxTokens != null) {
return {max_tokens: request.maxTokens}
}
return {}
}
/** /**
* Simple one-shot inference without tools * Simple one-shot inference without tools
*/ */
@@ -47,10 +62,10 @@ export async function simpleInference(request: InferenceRequest): Promise<string
const chatCompletionRequest: OpenAI.Chat.Completions.ChatCompletionCreateParams = { const chatCompletionRequest: OpenAI.Chat.Completions.ChatCompletionCreateParams = {
messages: request.messages as OpenAI.Chat.Completions.ChatCompletionMessageParam[], messages: request.messages as OpenAI.Chat.Completions.ChatCompletionMessageParam[],
max_completion_tokens: request.maxTokens,
model: request.modelName, model: request.modelName,
temperature: request.temperature, temperature: request.temperature,
top_p: request.topP, top_p: request.topP,
...buildMaxTokensParam(request), // Note: solution around models using different underlying max tokens properties
} }
// Add response format if specified // Add response format if specified
@@ -95,10 +110,10 @@ export async function mcpInference(
const chatCompletionRequest: OpenAI.Chat.Completions.ChatCompletionCreateParams = { const chatCompletionRequest: OpenAI.Chat.Completions.ChatCompletionCreateParams = {
messages: messages as OpenAI.Chat.Completions.ChatCompletionMessageParam[], messages: messages as OpenAI.Chat.Completions.ChatCompletionMessageParam[],
max_completion_tokens: request.maxTokens,
model: request.modelName, model: request.modelName,
temperature: request.temperature, temperature: request.temperature,
top_p: request.topP, top_p: request.topP,
...buildMaxTokensParam(request), // Note: solution around models using different underlying max tokens properties
} }
// Add response format if specified (only on final iteration to avoid conflicts with tool calls) // Add response format if specified (only on final iteration to avoid conflicts with tool calls)

View File

@@ -48,11 +48,14 @@ export async function run(): Promise<void> {
// Get common parameters // Get common parameters
const modelName = promptConfig?.model || core.getInput('model') const modelName = promptConfig?.model || core.getInput('model')
let maxTokens = promptConfig?.modelParameters?.maxTokens ?? core.getInput('max-tokens')
if (typeof maxTokens === 'string') { // Parse token limit inputs
maxTokens = parseInt(maxTokens, 10) const maxCompletionTokensInput =
} promptConfig?.modelParameters?.maxCompletionTokens ?? core.getInput('max-completion-tokens')
const maxCompletionTokens = maxCompletionTokensInput ? Number(maxCompletionTokensInput) : undefined
const maxTokensInput = promptConfig?.modelParameters?.maxTokens ?? core.getInput('max-tokens')
const maxTokens = maxCompletionTokens != null ? undefined : maxTokensInput ? Number(maxTokensInput) : undefined
const token = process.env['GITHUB_TOKEN'] || core.getInput('token') const token = process.env['GITHUB_TOKEN'] || core.getInput('token')
if (token === undefined) { if (token === undefined) {
@@ -85,6 +88,7 @@ export async function run(): Promise<void> {
temperature, temperature,
topP, topP,
maxTokens, maxTokens,
maxCompletionTokens,
endpoint, endpoint,
token, token,
customHeaders, customHeaders,

View File

@@ -8,7 +8,8 @@ export interface PromptMessage {
} }
export interface ModelParameters { export interface ModelParameters {
maxTokens?: number maxTokens?: number // Deprecated
maxCompletionTokens?: number
temperature?: number temperature?: number
topP?: number topP?: number
} }