diff --git a/action.yml b/action.yml
index 4fc7863..2e27cf6 100644
--- a/action.yml
+++ b/action.yml
@@ -43,11 +43,11 @@ inputs:
     required: false
     default: ''
   max-tokens:
-    description: Maximum tokens to generate (deprecated)
+    description: The maximum tokens to generate (deprecated)
     required: false
     default: '200'
   max-completion-tokens:
-    description: Maximum tokens to generate
+    description: The maximum tokens to generate
     required: false
     default: ''
   temperature:
diff --git a/src/inference.ts b/src/inference.ts
index e8d5136..5a40130 100644
--- a/src/inference.ts
+++ b/src/inference.ts
@@ -35,7 +35,7 @@ export interface InferenceResponse {
 }
 
 /**
- * Build the token limit params for a chat completion request.
+ * Build according to what input was passed, default to max_tokens.
  * Only one of max_tokens or max_completion_tokens will be set.
  */
 function buildMaxTokensParam(request: InferenceRequest): {max_tokens?: number; max_completion_tokens?: number} {
@@ -177,7 +177,9 @@ export async function mcpInference(
 }
 
 /**
- * Wrapper around OpenAI chat.completions.create with response validation.
+ * Wrapper around OpenAI chat.completions.create with defensive handling for cases where
+ * the SDK returns a raw string (e.g., unexpected content-type or streaming body) instead of
+ * a parsed object. Ensures an object with a 'choices' array is returned or throws a descriptive error.
  */
 async function chatCompletion(
   client: OpenAI,