Merge pull request #150 from actions/sgoedecke/mock-inference-in-ci

Mock inference in CI
fix: keep response-file temp file for downstream steps
2025-11-28 08:17:33 +11:00 · 2025-11-27 21:06:42 +00:00 · 2025-11-27 20:59:41 +00:00 · 2025-11-24 11:27:47 +11:00 · 2025-11-23 16:19:48 -08:00 · 2025-11-23 16:19:12 -08:00
17 changed files with 313 additions and 144 deletions
--- a/.github/workflows/check-dist.yml
+++ b/.github/workflows/check-dist.yml
@@ -28,7 +28,7 @@ jobs:
    steps:
      - name: Checkout
        id: checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5

      - name: Setup Node.js
        id: setup-node
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,7 +20,7 @@ jobs:
    steps:
      - name: Checkout
        id: checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5

      - name: Setup Node.js
        id: setup-node
@@ -54,22 +54,53 @@ jobs:
    steps:
      - name: Checkout
        id: checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version-file: .node-version
+
+      - name: Start Mock Inference Server
+        id: mock-server
+        run: |
+          node script/mock-inference-server.mjs &
+          echo "pid=$!" >> $GITHUB_OUTPUT
+          # Wait for server to be ready
+          for i in {1..10}; do
+            if curl -s http://localhost:3456/health > /dev/null; then
+              echo "Mock server is ready"
+              break
+            fi
+            sleep 1
+          done

      - name: Test Local Action
        id: test-action
-        continue-on-error: true
        uses: ./
        with:
          prompt: hello
+          endpoint: http://localhost:3456
        env:
          GITHUB_TOKEN: ${{ github.token }}

      - name: Print Output
        id: output
-        continue-on-error: true
        run: echo "${{ steps.test-action.outputs.response }}"

+      - name: Verify Output
+        run: |
+          response="${{ steps.test-action.outputs.response }}"
+          if [[ -z "$response" ]]; then
+            echo "Error: No response received"
+            exit 1
+          fi
+          echo "Response received: $response"
+
+      - name: Stop Mock Server
+        if: always()
+        run: kill ${{ steps.mock-server.outputs.pid }} || true
+
  test-action-prompt-file:
    name: GitHub Actions Test with Prompt File
    runs-on: ubuntu-latest
@@ -77,7 +108,26 @@ jobs:
    steps:
      - name: Checkout
        id: checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version-file: .node-version
+
+      - name: Start Mock Inference Server
+        id: mock-server
+        run: |
+          node script/mock-inference-server.mjs &
+          echo "pid=$!" >> $GITHUB_OUTPUT
+          # Wait for server to be ready
+          for i in {1..10}; do
+            if curl -s http://localhost:3456/health > /dev/null; then
+              echo "Mock server is ready"
+              break
+            fi
+            sleep 1
+          done

      - name: Create Prompt File
        run: echo "hello" > prompt.txt
@@ -87,16 +137,33 @@ jobs:

      - name: Test Local Action with Prompt File
        id: test-action-prompt-file
-        continue-on-error: true
        uses: ./
        with:
          prompt-file: prompt.txt
          system-prompt-file: system-prompt.txt
+          endpoint: http://localhost:3456
        env:
          GITHUB_TOKEN: ${{ github.token }}

      - name: Print Output
-        continue-on-error: true
        run: |
          echo "Response saved to: ${{ steps.test-action-prompt-file.outputs.response-file }}"
          cat "${{ steps.test-action-prompt-file.outputs.response-file }}"
+
+      - name: Verify Output
+        run: |
+          response_file="${{ steps.test-action-prompt-file.outputs.response-file }}"
+          if [[ ! -f "$response_file" ]]; then
+            echo "Error: Response file not found"
+            exit 1
+          fi
+          content=$(cat "$response_file")
+          if [[ -z "$content" ]]; then
+            echo "Error: Response file is empty"
+            exit 1
+          fi
+          echo "Response file content: $content"
+
+      - name: Stop Mock Server
+        if: always()
+        run: kill ${{ steps.mock-server.outputs.pid }} || true
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -30,7 +30,7 @@ jobs:
    steps:
      - name: Checkout
        id: checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5

      - name: Initialize CodeQL
        id: initialize
--- a/.github/workflows/licensed.yml
+++ b/.github/workflows/licensed.yml
@@ -27,7 +27,7 @@ jobs:
    steps:
      - name: Checkout
        id: checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5

      - name: Setup Node.js
        id: setup-node
--- a/.github/workflows/linter.yml
+++ b/.github/workflows/linter.yml
@@ -21,7 +21,7 @@ jobs:
    steps:
      - name: Checkout
        id: checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
        with:
          fetch-depth: 0

--- a/README.md
+++ b/README.md
@@ -162,6 +162,9 @@ This action now supports **read-only** integration with the GitHub-hosted Model
 Context Protocol (MCP) server, which provides access to GitHub tools like
 repository management, issue tracking, and pull request operations.

+> [!NOTE]
+> The GitHub MCP integration requires a Personal Access Token (PAT) and cannot use the built-in `GITHUB_TOKEN`.
+
 ```yaml
 steps:
  - name: AI Inference with GitHub Tools
@@ -209,7 +212,7 @@ the action:
 | `endpoint`           | The endpoint to use for inference. If you're running this as part of an org, you should probably use the org-specific Models endpoint                         | `https://models.github.ai/inference` |
 | `max-tokens`         | The max number of tokens to generate                                                                                                                          | 200                                  |
 | `enable-github-mcp`  | Enable Model Context Protocol integration with GitHub tools                                                                                                   | `false`                              |
-| `github-mcp-token`   | Token to use for GitHub MCP server (defaults to the main token if not specified). Use a separate PAT for tighter security                                     | `""`                                 |
+| `github-mcp-token`   | Token to use for GitHub MCP server (defaults to the main token if not specified). This must be a PAT in order for MCP to work                                 | `""`                                 |

 ## Outputs

--- a/tests/helpers-inference.test.ts
+++ b/tests/helpers-inference.test.ts
@@ -106,6 +106,8 @@ describe('helpers.ts - inference request building', () => {
        undefined,
        undefined,
        'gpt-4',
+        undefined,
+        undefined,
        100,
        'https://api.test.com',
        'test-token',
@@ -117,6 +119,8 @@ describe('helpers.ts - inference request building', () => {
          {role: 'user', content: 'User message'},
        ],
        modelName: 'gpt-4',
+        temperature: undefined,
+        topP: undefined,
        maxTokens: 100,
        endpoint: 'https://api.test.com',
        token: 'test-token',
@@ -136,6 +140,8 @@ describe('helpers.ts - inference request building', () => {
        'System prompt',
        'User prompt',
        'gpt-4',
+        undefined,
+        undefined,
        100,
        'https://api.test.com',
        'test-token',
@@ -147,6 +153,8 @@ describe('helpers.ts - inference request building', () => {
          {role: 'user', content: 'User prompt'},
        ],
        modelName: 'gpt-4',
+        temperature: undefined,
+        topP: undefined,
        maxTokens: 100,
        endpoint: 'https://api.test.com',
        token: 'test-token',
--- a/tests/main.test.ts
+++ b/tests/main.test.ts
@@ -75,17 +75,13 @@ vi.mock('fs', () => ({
  writeFileSync: mockWriteFileSync,
 }))

-// Mocks for tmp module to control temporary file creation and cleanup
-const mockRemoveCallback = vi.fn()
+// Mocks for tmp module to control temporary file creation
 const mockFileSync = vi.fn().mockReturnValue({
  name: '/secure/temp/dir/modelResponse-abc123.txt',
-  removeCallback: mockRemoveCallback,
 })
-const mockSetGracefulCleanup = vi.fn()

 vi.mock('tmp', () => ({
  fileSync: mockFileSync,
-  setGracefulCleanup: mockSetGracefulCleanup,
 }))

 // Mock MCP and inference modules
@@ -283,7 +279,7 @@ describe('main.ts', () => {
    expect(mockProcessExit).toHaveBeenCalledWith(1)
  })

-  it('creates secure temporary files with proper cleanup', async () => {
+  it('creates temporary files that persist for downstream steps', async () => {
    mockInputs({
      prompt: 'Test prompt',
      'system-prompt': 'You are a test assistant.',
@@ -291,34 +287,16 @@ describe('main.ts', () => {

    await run()

-    expect(mockSetGracefulCleanup).toHaveBeenCalledOnce()
-
+    // Verify temp file is created with keep: true so it persists
    expect(mockFileSync).toHaveBeenCalledWith({
      prefix: 'modelResponse-',
      postfix: '.txt',
+      keep: true,
    })

    expect(core.setOutput).toHaveBeenNthCalledWith(2, 'response-file', '/secure/temp/dir/modelResponse-abc123.txt')
    expect(mockWriteFileSync).toHaveBeenCalledWith('/secure/temp/dir/modelResponse-abc123.txt', 'Hello, user!', 'utf-8')
-    expect(mockRemoveCallback).toHaveBeenCalledOnce()

    expect(mockProcessExit).toHaveBeenCalledWith(0)
  })
-
-  it('handles cleanup errors gracefully', async () => {
-    mockRemoveCallback.mockImplementationOnce(() => {
-      throw new Error('Cleanup failed')
-    })
-
-    mockInputs({
-      prompt: 'Test prompt',
-      'system-prompt': 'You are a test assistant.',
-    })
-
-    await run()
-
-    expect(mockRemoveCallback).toHaveBeenCalledOnce()
-    expect(core.warning).toHaveBeenCalledWith('Failed to cleanup temporary file: Error: Cleanup failed')
-    expect(mockProcessExit).toHaveBeenCalledWith(0)
-  })
 })
--- a/action.yml
+++ b/action.yml
@@ -55,7 +55,7 @@ inputs:
    required: false
    default: 'false'
  github-mcp-token:
-    description: The token to use for GitHub MCP server (defaults to GITHUB_TOKEN if not specified)
+    description: The token to use for GitHub MCP server (defaults to the main token if not specified). This must be a PAT for MCP to work.
    required: false
    default: ''

--- a/dist/index.js
+++ b/dist/index.js
@@ -49496,28 +49496,18 @@ async function simpleInference(request) {
        messages: request.messages,
        max_tokens: request.maxTokens,
        model: request.modelName,
+        temperature: request.temperature,
+        top_p: request.topP,
    };
    // Add response format if specified
    if (request.responseFormat) {
        // eslint-disable-next-line @typescript-eslint/no-explicit-any
        chatCompletionRequest.response_format = request.responseFormat;
    }
-    try {
-        const response = await client.chat.completions.create(chatCompletionRequest);
-        if ('choices' in response) {
-            const modelResponse = response.choices[0]?.message?.content;
-            coreExports.info(`Model response: ${modelResponse || 'No response content'}`);
-            return modelResponse || null;
-        }
-        else {
-            coreExports.error(`Unexpected response format from API: ${JSON.stringify(response)}`);
-            return null;
-        }
-    }
-    catch (error) {
-        coreExports.error(`API error: ${error}`);
-        throw error;
-    }
+    const response = await chatCompletion(client, chatCompletionRequest, 'simpleInference');
+    const modelResponse = response.choices[0]?.message?.content;
+    coreExports.info(`Model response: ${modelResponse || 'No response content'}`);
+    return modelResponse || null;
 }
 /**
 * GitHub MCP-enabled inference with tool execution loop
@@ -49542,6 +49532,8 @@ async function mcpInference(request, githubMcpClient) {
            messages: messages,
            max_tokens: request.maxTokens,
            model: request.modelName,
+            temperature: request.temperature,
+            top_p: request.topP,
        };
        // Add response format if specified (only on final iteration to avoid conflicts with tool calls)
        if (finalMessage && request.responseFormat) {
@@ -49552,10 +49544,7 @@ async function mcpInference(request, githubMcpClient) {
            chatCompletionRequest.tools = githubMcpClient.tools;
        }
        try {
-            const response = await client.chat.completions.create(chatCompletionRequest);
-            if (!('choices' in response)) {
-                throw new Error(`Unexpected response format from API: ${JSON.stringify(response)}`);
-            }
+            const response = await chatCompletion(client, chatCompletionRequest, `mcpInference iteration ${iterationCount}`);
            const assistantMessage = response.choices[0]?.message;
            const modelResponse = assistantMessage?.content;
            const toolCalls = assistantMessage?.tool_calls;
@@ -49567,17 +49556,13 @@ async function mcpInference(request, githubMcpClient) {
            });
            if (!toolCalls || toolCalls.length === 0) {
                coreExports.info('No tool calls requested, ending GitHub MCP inference loop');
-                // If we have a response format set and we haven't explicitly run one final message iteration,
-                // do another loop with the response format set
                if (request.responseFormat && !finalMessage) {
                    coreExports.info('Making one more MCP loop with the requested response format...');
-                    // Add a user message requesting JSON format and try again
                    messages.push({
                        role: 'user',
                        content: `Please provide your response in the exact ${request.responseFormat.type} format specified.`,
                    });
                    finalMessage = true;
-                    // Continue the loop to get a properly formatted response
                    continue;
                }
                else {
@@ -49585,9 +49570,7 @@ async function mcpInference(request, githubMcpClient) {
                }
            }
            coreExports.info(`Model requested ${toolCalls.length} tool calls`);
-            // Execute all tool calls via GitHub MCP
            const toolResults = await executeToolCalls(githubMcpClient.client, toolCalls);
-            // Add tool results to the conversation
            messages.push(...toolResults);
            coreExports.info('Tool results added, continuing conversation...');
        }
@@ -49604,6 +49587,38 @@ async function mcpInference(request, githubMcpClient) {
        .find(msg => msg.role === 'assistant');
    return lastAssistantMessage?.content || null;
 }
+/**
+ * Wrapper around OpenAI chat.completions.create with defensive handling for cases where
+ * the SDK returns a raw string (e.g., unexpected content-type or streaming body) instead of
+ * a parsed object. Ensures an object with a 'choices' array is returned or throws a descriptive error.
+ */
+async function chatCompletion(client, params, context) {
+    try {
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        let response = await client.chat.completions.create(params);
+        coreExports.debug(`${context}: raw response typeof=${typeof response}`);
+        if (typeof response === 'string') {
+            // Attempt to parse if we unexpectedly received a string
+            try {
+                response = JSON.parse(response);
+            }
+            catch (e) {
+                const preview = response.slice(0, 400);
+                throw new Error(`${context}: Chat completion response was a string and not valid JSON (${e.message}). Preview: ${preview}`);
+            }
+        }
+        if (!response || typeof response !== 'object' || !('choices' in response)) {
+            const preview = JSON.stringify(response)?.slice(0, 800);
+            throw new Error(`${context}: Unexpected response shape (no choices). Preview: ${preview}`);
+        }
+        return response;
+    }
+    catch (err) {
+        // Re-throw after logging for upstream handling
+        coreExports.error(`${context}: chatCompletion failed: ${err}`);
+        throw err;
+    }
+}

 /**
 * Helper function to load content from a file or use fallback input
@@ -49674,12 +49689,14 @@ function buildResponseFormat(promptConfig) {
 /**
 * Build complete InferenceRequest from prompt config and inputs
 */
-function buildInferenceRequest(promptConfig, systemPrompt, prompt, modelName, maxTokens, endpoint, token) {
+function buildInferenceRequest(promptConfig, systemPrompt, prompt, modelName, temperature, topP, maxTokens, endpoint, token) {
    const messages = buildMessages(promptConfig, systemPrompt, prompt);
    const responseFormat = buildResponseFormat(promptConfig);
    return {
        messages,
        modelName,
+        temperature,
+        topP,
        maxTokens,
        endpoint,
        token,
@@ -52561,10 +52578,8 @@ function loadPromptFile(filePath, templateVariables = {}) {
        throw new Error(`Prompt file not found: ${filePath}`);
    }
    const fileContent = fs.readFileSync(filePath, 'utf-8');
-    // Apply template variable substitution
-    const processedContent = replaceTemplateVariables(fileContent, templateVariables);
    try {
-        const config = load(processedContent);
+        const config = load(fileContent);
        if (!config.messages || !Array.isArray(config.messages)) {
            throw new Error('Prompt file must contain a "messages" array');
        }
@@ -52577,6 +52592,13 @@ function loadPromptFile(filePath, templateVariables = {}) {
                throw new Error(`Invalid message role: ${message.role}`);
            }
        }
+        // Prepare messages by replacing template variables with actual content
+        config.messages = config.messages.map(msg => {
+            return {
+                ...msg,
+                content: replaceTemplateVariables(msg.content, templateVariables),
+            };
+        });
        return config;
    }
    catch (error) {
@@ -52596,9 +52618,6 @@ function isPromptYamlFile(filePath) {
 * @returns Resolves when the action is complete.
 */
 async function run() {
-    let responseFile = null;
-    // Set up graceful cleanup for temporary files on process exit
-    tmpExports.setGracefulCleanup();
    try {
        const promptFilePath = coreExports.getInput('prompt-file');
        const inputVariables = coreExports.getInput('input');
@@ -52624,7 +52643,10 @@ async function run() {
        }
        // Get common parameters
        const modelName = promptConfig?.model || coreExports.getInput('model');
-        const maxTokens = parseInt(coreExports.getInput('max-tokens'), 10);
+        let maxTokens = promptConfig?.modelParameters?.maxTokens ?? coreExports.getInput('max-tokens');
+        if (typeof maxTokens === 'string') {
+            maxTokens = parseInt(maxTokens, 10);
+        }
        const token = process.env['GITHUB_TOKEN'] || coreExports.getInput('token');
        if (token === undefined) {
            throw new Error('GITHUB_TOKEN is not set');
@@ -52633,7 +52655,7 @@ async function run() {
        const githubMcpToken = coreExports.getInput('github-mcp-token') || token;
        const endpoint = coreExports.getInput('endpoint');
        // Build the inference request with pre-processed messages and response format
-        const inferenceRequest = buildInferenceRequest(promptConfig, systemPrompt, prompt, modelName, maxTokens, endpoint, token);
+        const inferenceRequest = buildInferenceRequest(promptConfig, systemPrompt, prompt, modelName, promptConfig?.modelParameters?.temperature, promptConfig?.modelParameters?.topP, maxTokens, endpoint, token);
        const enableMcp = coreExports.getBooleanInput('enable-github-mcp') || false;
        let modelResponse = null;
        if (enableMcp) {
@@ -52650,10 +52672,13 @@ async function run() {
            modelResponse = await simpleInference(inferenceRequest);
        }
        coreExports.setOutput('response', modelResponse || '');
-        // Create a secure temporary file instead of using the temp directory directly
-        responseFile = tmpExports.fileSync({
+        // Create a temporary file for the response that persists for downstream steps.
+        // We use keep: true to prevent automatic cleanup - the file will be cleaned up
+        // by the runner when the job completes.
+        const responseFile = tmpExports.fileSync({
            prefix: 'modelResponse-',
            postfix: '.txt',
+            keep: true,
        });
        coreExports.setOutput('response-file', responseFile.name);
        if (modelResponse && modelResponse !== '') {
@@ -52670,18 +52695,6 @@ async function run() {
        // Force exit to prevent hanging on open connections
        process.exit(1);
    }
-    finally {
-        // Explicit cleanup of temporary file if it was created
-        if (responseFile) {
-            try {
-                responseFile.removeCallback();
-            }
-            catch (cleanupError) {
-                // Log cleanup errors but don't fail the action
-                coreExports.warning(`Failed to cleanup temporary file: ${cleanupError}`);
-            }
-        }
-    }
    // Force exit to prevent hanging on open connections
    process.exit(0);
 }
--- a/dist/index.js.map
+++ b/dist/index.js.map
--- a/eslint.config.mjs
+++ b/eslint.config.mjs
@@ -19,7 +19,7 @@ const compat = new FlatCompat({

 export default [
  {
-    ignores: ['**/coverage', '**/dist', '**/linter', '**/node_modules'],
+    ignores: ['**/coverage', '**/dist', '**/linter', '**/node_modules', 'script/**'],
  },
  ...compat.extends(
    'eslint:recommended',
--- a/script/mock-inference-server.mjs
+++ b/script/mock-inference-server.mjs
@@ -0,0 +1,71 @@
+#!/usr/bin/env node
+/**
+ * A simple mock OpenAI-compatible inference server for CI testing.
+ * This returns predictable responses without needing real API credentials.
+ */
+
+import http from 'http'
+
+const PORT = process.env.MOCK_SERVER_PORT || 3456
+
+const server = http.createServer((req, res) => {
+  let body = ''
+
+  req.on('data', chunk => {
+    body += chunk.toString()
+  })
+
+  req.on('end', () => {
+    console.log(`[Mock Server] ${req.method} ${req.url}`)
+
+    // Handle chat completions endpoint
+    if (req.url === '/chat/completions' && req.method === 'POST') {
+      const request = JSON.parse(body)
+      const userMessage = request.messages?.find(m => m.role === 'user')?.content || 'No prompt'
+
+      const response = {
+        id: 'mock-completion-id',
+        object: 'chat.completion',
+        created: Date.now(),
+        model: request.model || 'mock-model',
+        choices: [
+          {
+            index: 0,
+            message: {
+              role: 'assistant',
+              content: `Mock response to: "${userMessage.slice(0, 50)}..."`,
+            },
+            finish_reason: 'stop',
+          },
+        ],
+        usage: {
+          prompt_tokens: 10,
+          completion_tokens: 20,
+          total_tokens: 30,
+        },
+      }
+
+      res.writeHead(200, {'Content-Type': 'application/json'})
+      res.end(JSON.stringify(response))
+      return
+    }
+
+    // Health check endpoint
+    if (req.url === '/health' || req.url === '/') {
+      res.writeHead(200, {'Content-Type': 'application/json'})
+      res.end(JSON.stringify({status: 'ok'}))
+      return
+    }
+
+    // 404 for unknown routes
+    res.writeHead(404, {'Content-Type': 'application/json'})
+    res.end(JSON.stringify({error: 'Not found'}))
+  })
+})
+
+server.listen(PORT, () => {
+  console.log(`[Mock Server] Listening on http://localhost:${PORT}`)
+  console.log('[Mock Server] Endpoints:')
+  console.log('  POST /chat/completions - Mock chat completion')
+  console.log('  GET  /health           - Health check')
+})
--- a/src/helpers.ts
+++ b/src/helpers.ts
@@ -82,6 +82,8 @@ export function buildInferenceRequest(
  systemPrompt: string | undefined,
  prompt: string | undefined,
  modelName: string,
+  temperature: number | undefined,
+  topP: number | undefined,
  maxTokens: number,
  endpoint: string,
  token: string,
@@ -92,6 +94,8 @@ export function buildInferenceRequest(
  return {
    messages,
    modelName,
+    temperature,
+    topP,
    maxTokens,
    endpoint,
    token,
--- a/src/inference.ts
+++ b/src/inference.ts
@@ -15,6 +15,8 @@ export interface InferenceRequest {
  maxTokens: number
  endpoint: string
  token: string
+  temperature?: number
+  topP?: number
  responseFormat?: {type: 'json_schema'; json_schema: unknown} // Processed response format for the API
 }

@@ -45,6 +47,8 @@ export async function simpleInference(request: InferenceRequest): Promise<string
    messages: request.messages as OpenAI.Chat.Completions.ChatCompletionMessageParam[],
    max_tokens: request.maxTokens,
    model: request.modelName,
+    temperature: request.temperature,
+    top_p: request.topP,
  }

  // Add response format if specified
@@ -53,21 +57,10 @@ export async function simpleInference(request: InferenceRequest): Promise<string
    chatCompletionRequest.response_format = request.responseFormat as any
  }

-  try {
-    const response = await client.chat.completions.create(chatCompletionRequest)
-
-    if ('choices' in response) {
-      const modelResponse = response.choices[0]?.message?.content
-      core.info(`Model response: ${modelResponse || 'No response content'}`)
-      return modelResponse || null
-    } else {
-      core.error(`Unexpected response format from API: ${JSON.stringify(response)}`)
-      return null
-    }
-  } catch (error) {
-    core.error(`API error: ${error}`)
-    throw error
-  }
+  const response = await chatCompletion(client, chatCompletionRequest, 'simpleInference')
+  const modelResponse = response.choices[0]?.message?.content
+  core.info(`Model response: ${modelResponse || 'No response content'}`)
+  return modelResponse || null
 }

 /**
@@ -101,6 +94,8 @@ export async function mcpInference(
      messages: messages as OpenAI.Chat.Completions.ChatCompletionMessageParam[],
      max_tokens: request.maxTokens,
      model: request.modelName,
+      temperature: request.temperature,
+      top_p: request.topP,
    }

    // Add response format if specified (only on final iteration to avoid conflicts with tool calls)
@@ -112,11 +107,7 @@ export async function mcpInference(
    }

    try {
-      const response = await client.chat.completions.create(chatCompletionRequest)
-
-      if (!('choices' in response)) {
-        throw new Error(`Unexpected response format from API: ${JSON.stringify(response)}`)
-      }
+      const response = await chatCompletion(client, chatCompletionRequest, `mcpInference iteration ${iterationCount}`)

      const assistantMessage = response.choices[0]?.message
      const modelResponse = assistantMessage?.content
@@ -133,20 +124,13 @@ export async function mcpInference(
      if (!toolCalls || toolCalls.length === 0) {
        core.info('No tool calls requested, ending GitHub MCP inference loop')

-        // If we have a response format set and we haven't explicitly run one final message iteration,
-        // do another loop with the response format set
        if (request.responseFormat && !finalMessage) {
          core.info('Making one more MCP loop with the requested response format...')
-
-          // Add a user message requesting JSON format and try again
          messages.push({
            role: 'user',
            content: `Please provide your response in the exact ${request.responseFormat.type} format specified.`,
          })
-
          finalMessage = true
-
-          // Continue the loop to get a properly formatted response
          continue
        } else {
          return modelResponse || null
@@ -154,13 +138,8 @@ export async function mcpInference(
      }

      core.info(`Model requested ${toolCalls.length} tool calls`)
-
-      // Execute all tool calls via GitHub MCP
      const toolResults = await executeToolCalls(githubMcpClient.client, toolCalls as ToolCall[])
-
-      // Add tool results to the conversation
      messages.push(...toolResults)
-
      core.info('Tool results added, continuing conversation...')
    } catch (error) {
      core.error(`OpenAI API error: ${error}`)
@@ -178,3 +157,43 @@ export async function mcpInference(

  return lastAssistantMessage?.content || null
 }
+
+/**
+ * Wrapper around OpenAI chat.completions.create with defensive handling for cases where
+ * the SDK returns a raw string (e.g., unexpected content-type or streaming body) instead of
+ * a parsed object. Ensures an object with a 'choices' array is returned or throws a descriptive error.
+ */
+async function chatCompletion(
+  client: OpenAI,
+  params: OpenAI.Chat.Completions.ChatCompletionCreateParams,
+  context: string,
+): Promise<OpenAI.Chat.Completions.ChatCompletion> {
+  try {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    let response: any = await client.chat.completions.create(params)
+    core.debug(`${context}: raw response typeof=${typeof response}`)
+
+    if (typeof response === 'string') {
+      // Attempt to parse if we unexpectedly received a string
+      try {
+        response = JSON.parse(response)
+      } catch (e) {
+        const preview = response.slice(0, 400)
+        throw new Error(
+          `${context}: Chat completion response was a string and not valid JSON (${(e as Error).message}). Preview: ${preview}`,
+        )
+      }
+    }
+
+    if (!response || typeof response !== 'object' || !('choices' in response)) {
+      const preview = JSON.stringify(response)?.slice(0, 800)
+      throw new Error(`${context}: Unexpected response shape (no choices). Preview: ${preview}`)
+    }
+
+    return response as OpenAI.Chat.Completions.ChatCompletion
+  } catch (err) {
+    // Re-throw after logging for upstream handling
+    core.error(`${context}: chatCompletion failed: ${err}`)
+    throw err
+  }
+}
--- a/src/main.ts
+++ b/src/main.ts
@@ -18,11 +18,6 @@ import {
 * @returns Resolves when the action is complete.
 */
 export async function run(): Promise<void> {
-  let responseFile: tmp.FileResult | null = null
-
-  // Set up graceful cleanup for temporary files on process exit
-  tmp.setGracefulCleanup()
-
  try {
    const promptFilePath = core.getInput('prompt-file')
    const inputVariables = core.getInput('input')
@@ -53,7 +48,11 @@ export async function run(): Promise<void> {

    // Get common parameters
    const modelName = promptConfig?.model || core.getInput('model')
-    const maxTokens = parseInt(core.getInput('max-tokens'), 10)
+    let maxTokens = promptConfig?.modelParameters?.maxTokens ?? core.getInput('max-tokens')
+
+    if (typeof maxTokens === 'string') {
+      maxTokens = parseInt(maxTokens, 10)
+    }

    const token = process.env['GITHUB_TOKEN'] || core.getInput('token')
    if (token === undefined) {
@@ -71,6 +70,8 @@ export async function run(): Promise<void> {
      systemPrompt,
      prompt,
      modelName,
+      promptConfig?.modelParameters?.temperature,
+      promptConfig?.modelParameters?.topP,
      maxTokens,
      endpoint,
      token,
@@ -95,10 +96,13 @@ export async function run(): Promise<void> {

    core.setOutput('response', modelResponse || '')

-    // Create a secure temporary file instead of using the temp directory directly
-    responseFile = tmp.fileSync({
+    // Create a temporary file for the response that persists for downstream steps.
+    // We use keep: true to prevent automatic cleanup - the file will be cleaned up
+    // by the runner when the job completes.
+    const responseFile = tmp.fileSync({
      prefix: 'modelResponse-',
      postfix: '.txt',
+      keep: true,
    })

    core.setOutput('response-file', responseFile.name)
@@ -114,16 +118,6 @@ export async function run(): Promise<void> {
    }
    // Force exit to prevent hanging on open connections
    process.exit(1)
-  } finally {
-    // Explicit cleanup of temporary file if it was created
-    if (responseFile) {
-      try {
-        responseFile.removeCallback()
-      } catch (cleanupError) {
-        // Log cleanup errors but don't fail the action
-        core.warning(`Failed to cleanup temporary file: ${cleanupError}`)
-      }
-    }
  }

  // Force exit to prevent hanging on open connections
--- a/src/prompt.ts
+++ b/src/prompt.ts
@@ -7,9 +7,16 @@ export interface PromptMessage {
  content: string
 }

+export interface ModelParameters {
+  maxTokens?: number
+  temperature?: number
+  topP?: number
+}
+
 export interface PromptConfig {
  messages: PromptMessage[]
  model?: string
+  modelParameters?: ModelParameters
  responseFormat?: 'text' | 'json_schema'
  jsonSchema?: string
 }
@@ -101,11 +108,8 @@ export function loadPromptFile(filePath: string, templateVariables: TemplateVari

  const fileContent = fs.readFileSync(filePath, 'utf-8')

-  // Apply template variable substitution
-  const processedContent = replaceTemplateVariables(fileContent, templateVariables)
-
  try {
-    const config = yaml.load(processedContent) as PromptConfig
+    const config = yaml.load(fileContent) as PromptConfig

    if (!config.messages || !Array.isArray(config.messages)) {
      throw new Error('Prompt file must contain a "messages" array')
@@ -121,6 +125,14 @@ export function loadPromptFile(filePath: string, templateVariables: TemplateVari
      }
    }

+    // Prepare messages by replacing template variables with actual content
+    config.messages = config.messages.map(msg => {
+      return {
+        ...msg,
+        content: replaceTemplateVariables(msg.content, templateVariables),
+      }
+    })
+
    return config
  } catch (error) {
    throw new Error(`Failed to parse prompt file: ${error instanceof Error ? error.message : 'Unknown error'}`)
Author	SHA1	Message	Date
Sean Goedecke	02c6cc30ae	Merge pull request #150 from actions/sgoedecke/mock-inference-in-ci Mock inference in CI	2025-11-28 08:17:33 +11:00
Sean Goedecke	18d468666d	fix: keep response-file temp file for downstream steps The temporary file created for response-file was being cleaned up before downstream steps could access it. Now using keep: true to ensure the file persists until the job completes. Also added script/ to eslint ignores for the mock server.	2025-11-27 21:06:42 +00:00
Sean Goedecke	fd73d0264c	Mock inference in CI	2025-11-27 20:59:41 +00:00
Sean Goedecke	5022b33bc1	Merge pull request #148 from dsanders11/feat/prompt-yaml-model-parameters feat: support modelParameters in prompt.yaml files	2025-11-24 11:27:47 +11:00
David Sanders	c9e14713bc	chore: update dist	2025-11-23 16:19:48 -08:00
David Sanders	39308142df	chore: apply code review comment Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>	2025-11-23 16:19:12 -08:00
David Sanders	48f0edec4d	feat: support modelParameters in prompt.yaml files	2025-11-23 16:07:11 -08:00
Sean Goedecke	36ea1371dc	Merge pull request #136 from dsanders11/fix/template-substition fix: do template substition after parsing prompt YAML	2025-11-24 10:22:42 +11:00
Sean Goedecke	de16a30c20	Merge branch 'main' into fix/template-substition	2025-11-24 10:21:49 +11:00
Sean Goedecke	dd3dff10ba	Merge pull request #147 from srt32/patch-1 Clarify PAT requirement for github-mcp-token	2025-11-24 10:18:50 +11:00
Simon Taranto	4bb01ee5ee	Clarify PAT requirement for github-mcp-token I mistakenly read the description of the mcp-token field to mean I needed a "PAT for MCP" as if there were a PAT permission for MCP. This change clarifies the language.	2025-11-21 13:36:45 -05:00
David Sanders	af1c1c29a3	fix: do template substition after parsing prompt YAML	2025-10-20 21:32:06 -07:00
Sean Goedecke	83bb5ca3e8	Merge pull request #93 from FidelusAleksander/main docs: update documentation on mcp usage	2025-08-26 18:13:39 +10:00
Aleksander Fidelus	4d2337d006	Merge branch 'actions:main' into main	2025-08-25 11:08:41 +02:00
Yuzuki	7ba7530ad4	Merge pull request #94 from actions/dependabot/github_actions/actions/checkout-5 chore(deps): bump actions/checkout from 4 to 5	2025-08-25 14:00:39 +10:00
Yuzuki	4d7d83c494	Merge branch 'main' into dependabot/github_actions/actions/checkout-5	2025-08-25 13:55:57 +10:00
Sean Goedecke	a1c1182922	Merge pull request #97 from actions/sgoedecke/defensive-parsing Parse inference response format defensively	2025-08-25 08:47:18 +10:00
Sean Goedecke	dfaa426c29	Parse inference response format defensively	2025-08-22 22:34:18 +00:00
FidelusAleksander	7fa0024f13	docs: run prettier	2025-08-18 14:42:29 +02:00
dependabot[bot]	fc6f9a0800	chore(deps): bump actions/checkout from 4 to 5 Bumps [actions/checkout](https://github.com/actions/checkout) from 4 to 5. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com>	2025-08-18 03:58:02 +00:00
FidelusAleksander	a1d07305b7	docs: update github-mcp-token description	2025-08-15 08:22:55 +02:00
FidelusAleksander	6e0d8949d8	docs: update documentation on mcp usage	2025-08-15 07:52:22 +02:00