updated docs for missing prompt.yml model parameters
This commit is contained in:
30
README.md
30
README.md
@@ -123,6 +123,33 @@ supplied via the `input` parameter in YAML format. Additionally, you can
|
||||
provide file-based variables via `file_input`, where each key maps to a file
|
||||
path.
|
||||
|
||||
### Prompt.yml with model parameters
|
||||
|
||||
You can specify model parameters directly in your `.prompt.yml` files using the
|
||||
`modelParameters` key:
|
||||
|
||||
```yaml
|
||||
messages:
|
||||
- role: system
|
||||
content: Be as concise as possible
|
||||
- role: user
|
||||
content: 'Compare {{a}} and {{b}}, please'
|
||||
model: openai/gpt-4o
|
||||
modelParameters:
|
||||
maxCompletionTokens: 500
|
||||
temperature: 0.7
|
||||
```
|
||||
|
||||
| Key | Type | Description |
|
||||
| --------------------- | ------ | -------------------------------------------------------------- |
|
||||
| `maxCompletionTokens` | number | The maximum number of tokens to generate |
|
||||
| `maxTokens` | number | The maximum number of tokens to generate (deprecated) |
|
||||
| `temperature` | number | The sampling temperature to use (0-1) |
|
||||
| `topP` | number | The nucleus sampling parameter to use (0-1) |
|
||||
|
||||
> ![Note]
|
||||
> Parameters set in `modelParameters` take precedence over the corresponding action inputs.
|
||||
|
||||
### Using a system prompt file
|
||||
|
||||
In addition to the regular prompt, you can provide a system prompt file instead
|
||||
@@ -287,7 +314,8 @@ the action:
|
||||
| `system-prompt-file` | Path to a file containing the system prompt. If both `system-prompt` and `system-prompt-file` are provided, `system-prompt-file` takes precedence | `""` |
|
||||
| `model` | The model to use for inference. Must be available in the [GitHub Models](https://github.com/marketplace?type=models) catalog | `openai/gpt-4o` |
|
||||
| `endpoint` | The endpoint to use for inference. If you're running this as part of an org, you should probably use the org-specific Models endpoint | `https://models.github.ai/inference` |
|
||||
| `max-tokens` | The max number of tokens to generate | 200 |
|
||||
| `max-tokens` | The maximum number of tokens to generate (deprecated, use `max-completion-tokens` instead) | 200 |
|
||||
| `max-completion-tokens` | The maximum number of tokens to generate | `""` |
|
||||
| `temperature` | The sampling temperature to use (0-1) | `""` |
|
||||
| `top-p` | The nucleus sampling parameter to use (0-1) | `""` |
|
||||
| `enable-github-mcp` | Enable Model Context Protocol integration with GitHub tools | `false` |
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import * as core from '@actions/core'
|
||||
import OpenAI from 'openai'
|
||||
import {GitHubMCPClient, executeToolCalls, ToolCall} from './mcp.js'
|
||||
import { GitHubMCPClient, executeToolCalls, ToolCall } from './mcp.js'
|
||||
|
||||
interface ChatMessage {
|
||||
role: 'system' | 'user' | 'assistant' | 'tool'
|
||||
@@ -10,7 +10,7 @@ interface ChatMessage {
|
||||
}
|
||||
|
||||
export interface InferenceRequest {
|
||||
messages: Array<{role: 'system' | 'user' | 'assistant' | 'tool'; content: string}>
|
||||
messages: Array<{ role: 'system' | 'user' | 'assistant' | 'tool'; content: string }>
|
||||
modelName: string
|
||||
maxTokens?: number // Deprecated
|
||||
maxCompletionTokens?: number
|
||||
@@ -18,7 +18,7 @@ export interface InferenceRequest {
|
||||
token: string
|
||||
temperature?: number
|
||||
topP?: number
|
||||
responseFormat?: {type: 'json_schema'; json_schema: unknown} // Processed response format for the API
|
||||
responseFormat?: { type: 'json_schema'; json_schema: unknown } // Processed response format for the API
|
||||
customHeaders?: Record<string, string> // Custom HTTP headers to include in API requests
|
||||
}
|
||||
|
||||
@@ -34,18 +34,17 @@ export interface InferenceResponse {
|
||||
}>
|
||||
}
|
||||
|
||||
// Note: solution around models using different underlying max tokens properties
|
||||
|
||||
/**
|
||||
* Build according to what input was passed, default to max_tokens.
|
||||
* Only one of max_tokens or max_completion_tokens will be set.
|
||||
*/
|
||||
function buildMaxTokensParam(request: InferenceRequest): {max_tokens?: number; max_completion_tokens?: number} {
|
||||
function buildMaxTokensParam(request: InferenceRequest): { max_tokens?: number; max_completion_tokens?: number } {
|
||||
if (request.maxCompletionTokens != null) {
|
||||
return {max_completion_tokens: request.maxCompletionTokens}
|
||||
return { max_completion_tokens: request.maxCompletionTokens }
|
||||
}
|
||||
if (request.maxTokens != null) {
|
||||
return {max_tokens: request.maxTokens}
|
||||
return { max_tokens: request.maxTokens }
|
||||
}
|
||||
return {}
|
||||
}
|
||||
@@ -115,7 +114,7 @@ export async function mcpInference(
|
||||
model: request.modelName,
|
||||
temperature: request.temperature,
|
||||
top_p: request.topP,
|
||||
...buildMaxTokensParam(request),
|
||||
...buildMaxTokensParam(request), // Note: solution around models using different underlying max tokens properties
|
||||
}
|
||||
|
||||
// Add response format if specified (only on final iteration to avoid conflicts with tool calls)
|
||||
@@ -138,7 +137,7 @@ export async function mcpInference(
|
||||
messages.push({
|
||||
role: 'assistant',
|
||||
content: modelResponse || '',
|
||||
...(toolCalls && {tool_calls: toolCalls as ToolCall[]}),
|
||||
...(toolCalls && { tool_calls: toolCalls as ToolCall[] }),
|
||||
})
|
||||
|
||||
if (!toolCalls || toolCalls.length === 0) {
|
||||
|
||||
Reference in New Issue
Block a user