diff --git a/action.yml b/action.yml index 4fc7863..2e27cf6 100644 --- a/action.yml +++ b/action.yml @@ -43,11 +43,11 @@ inputs: required: false default: '' max-tokens: - description: Maximum tokens to generate (deprecated) + description: The maximum tokens to generate (deprecated) required: false default: '200' max-completion-tokens: - description: Maximum tokens to generate + description: The maximum tokens to generate required: false default: '' temperature: diff --git a/src/inference.ts b/src/inference.ts index e8d5136..5a40130 100644 --- a/src/inference.ts +++ b/src/inference.ts @@ -35,7 +35,7 @@ export interface InferenceResponse { } /** - * Build the token limit params for a chat completion request. + * Build according to what input was passed, default to max_tokens. * Only one of max_tokens or max_completion_tokens will be set. */ function buildMaxTokensParam(request: InferenceRequest): {max_tokens?: number; max_completion_tokens?: number} { @@ -177,7 +177,9 @@ export async function mcpInference( } /** - * Wrapper around OpenAI chat.completions.create with response validation. + * Wrapper around OpenAI chat.completions.create with defensive handling for cases where + * the SDK returns a raw string (e.g., unexpected content-type or streaming body) instead of + * a parsed object. Ensures an object with a 'choices' array is returned or throws a descriptive error. */ async function chatCompletion( client: OpenAI,