diff --git a/README.md b/README.md index 4203278..fe64a21 100644 --- a/README.md +++ b/README.md @@ -123,6 +123,33 @@ supplied via the `input` parameter in YAML format. Additionally, you can provide file-based variables via `file_input`, where each key maps to a file path. +### Prompt.yml with model parameters + +You can specify model parameters directly in your `.prompt.yml` files using the +`modelParameters` key: + +```yaml +messages: + - role: system + content: Be as concise as possible + - role: user + content: 'Compare {{a}} and {{b}}, please' +model: openai/gpt-4o +modelParameters: + maxCompletionTokens: 500 + temperature: 0.7 +``` + +| Key | Type | Description | +| --------------------- | ------ | -------------------------------------------------------------- | +| `maxCompletionTokens` | number | The maximum number of tokens to generate | +| `maxTokens` | number | The maximum number of tokens to generate (deprecated) | +| `temperature` | number | The sampling temperature to use (0-1) | +| `topP` | number | The nucleus sampling parameter to use (0-1) | + +> ![Note] +> Parameters set in `modelParameters` take precedence over the corresponding action inputs. + ### Using a system prompt file In addition to the regular prompt, you can provide a system prompt file instead @@ -287,7 +314,8 @@ the action: | `system-prompt-file` | Path to a file containing the system prompt. If both `system-prompt` and `system-prompt-file` are provided, `system-prompt-file` takes precedence | `""` | | `model` | The model to use for inference. Must be available in the [GitHub Models](https://github.com/marketplace?type=models) catalog | `openai/gpt-4o` | | `endpoint` | The endpoint to use for inference. If you're running this as part of an org, you should probably use the org-specific Models endpoint | `https://models.github.ai/inference` | -| `max-tokens` | The max number of tokens to generate | 200 | +| `max-tokens` | The maximum number of tokens to generate (deprecated, use `max-completion-tokens` instead) | 200 | +| `max-completion-tokens` | The maximum number of tokens to generate | `""` | | `temperature` | The sampling temperature to use (0-1) | `""` | | `top-p` | The nucleus sampling parameter to use (0-1) | `""` | | `enable-github-mcp` | Enable Model Context Protocol integration with GitHub tools | `false` | diff --git a/src/inference.ts b/src/inference.ts index df9d8f7..4a1ce52 100644 --- a/src/inference.ts +++ b/src/inference.ts @@ -1,6 +1,6 @@ import * as core from '@actions/core' import OpenAI from 'openai' -import {GitHubMCPClient, executeToolCalls, ToolCall} from './mcp.js' +import { GitHubMCPClient, executeToolCalls, ToolCall } from './mcp.js' interface ChatMessage { role: 'system' | 'user' | 'assistant' | 'tool' @@ -10,7 +10,7 @@ interface ChatMessage { } export interface InferenceRequest { - messages: Array<{role: 'system' | 'user' | 'assistant' | 'tool'; content: string}> + messages: Array<{ role: 'system' | 'user' | 'assistant' | 'tool'; content: string }> modelName: string maxTokens?: number // Deprecated maxCompletionTokens?: number @@ -18,7 +18,7 @@ export interface InferenceRequest { token: string temperature?: number topP?: number - responseFormat?: {type: 'json_schema'; json_schema: unknown} // Processed response format for the API + responseFormat?: { type: 'json_schema'; json_schema: unknown } // Processed response format for the API customHeaders?: Record // Custom HTTP headers to include in API requests } @@ -34,18 +34,17 @@ export interface InferenceResponse { }> } -// Note: solution around models using different underlying max tokens properties /** * Build according to what input was passed, default to max_tokens. * Only one of max_tokens or max_completion_tokens will be set. */ -function buildMaxTokensParam(request: InferenceRequest): {max_tokens?: number; max_completion_tokens?: number} { +function buildMaxTokensParam(request: InferenceRequest): { max_tokens?: number; max_completion_tokens?: number } { if (request.maxCompletionTokens != null) { - return {max_completion_tokens: request.maxCompletionTokens} + return { max_completion_tokens: request.maxCompletionTokens } } if (request.maxTokens != null) { - return {max_tokens: request.maxTokens} + return { max_tokens: request.maxTokens } } return {} } @@ -115,7 +114,7 @@ export async function mcpInference( model: request.modelName, temperature: request.temperature, top_p: request.topP, - ...buildMaxTokensParam(request), + ...buildMaxTokensParam(request), // Note: solution around models using different underlying max tokens properties } // Add response format if specified (only on final iteration to avoid conflicts with tool calls) @@ -138,7 +137,7 @@ export async function mcpInference( messages.push({ role: 'assistant', content: modelResponse || '', - ...(toolCalls && {tool_calls: toolCalls as ToolCall[]}), + ...(toolCalls && { tool_calls: toolCalls as ToolCall[] }), }) if (!toolCalls || toolCalls.length === 0) {