fix(xai-responses): strip enum values containing '/' from tool schemas
xAI's /v1/responses and /v1/chat/completions endpoints reject tool schemas
whose enum values contain a forward slash with a generic HTTP 400 'Invalid
arguments passed to the model.' before any token is emitted — the schema
compiler trips on the '/' character regardless of where it appears.
Most commonly hit by MCP-derived tools whose enum lists HuggingFace model
IDs ('Qwen/Qwen3.5-0.8B', 'openai/gpt-oss-20b') or owner/name environment
identifiers.
Mirrors the existing strip_pattern_and_format sanitizer (PR for #27197).
The new strip_slash_enum walks tool parameters and drops the entire enum
keyword when any value contains '/' — keeping it partial would still 400
since xAI's failure is all-or-nothing on the enum. The field description
still reaches the model so the prompting hint is preserved.
Wired in at both code paths for parity:
- agent/chat_completion_helpers.py (main agent xAI Responses path)
- agent/auxiliary_client.py (aux client xAI Responses path, matching
the same parity guarantee 2fae8fba9 established for pattern/format)
Salvaged from #28021 by @Slimydog21 — contributor's branch was severely
stale (would have reverted ~5000 LOC across azure/kanban/i18n); fix
re-applied surgically on current main with their sanitizer + 9 tests
preserved verbatim. Author noreply email used (original was a Mac
hostname leak).
This commit is contained in:
@@ -711,8 +711,12 @@ class _CodexCompletionsAdapter:
|
|||||||
# keywords (HTTP 400). Strip them here to match the parity guarantee that
|
# keywords (HTTP 400). Strip them here to match the parity guarantee that
|
||||||
# chat_completion_helpers.py provides for the main-agent xAI path.
|
# chat_completion_helpers.py provides for the main-agent xAI path.
|
||||||
try:
|
try:
|
||||||
from tools.schema_sanitizer import strip_pattern_and_format
|
from tools.schema_sanitizer import (
|
||||||
|
strip_pattern_and_format,
|
||||||
|
strip_slash_enum,
|
||||||
|
)
|
||||||
tools, _ = strip_pattern_and_format(list(tools))
|
tools, _ = strip_pattern_and_format(list(tools))
|
||||||
|
tools, _ = strip_slash_enum(tools)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Auxiliary client: failed to sanitize tool schemas for "
|
"Auxiliary client: failed to sanitize tool schemas for "
|
||||||
|
|||||||
@@ -291,10 +291,17 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
|
|||||||
# in tool schemas (HTTP 400 "Invalid arguments passed to the model").
|
# in tool schemas (HTTP 400 "Invalid arguments passed to the model").
|
||||||
# Most commonly hit when MCP-derived tools carry JSON Schema validation
|
# Most commonly hit when MCP-derived tools carry JSON Schema validation
|
||||||
# keywords through. Strip them before building kwargs. See #27197.
|
# keywords through. Strip them before building kwargs. See #27197.
|
||||||
|
# It also rejects ``enum`` values containing ``/`` (HuggingFace IDs
|
||||||
|
# like ``Qwen/Qwen3.5-0.8B`` shipped by MCP servers) — same 400 with
|
||||||
|
# the same opaque message; strip those enums too.
|
||||||
if is_xai_responses:
|
if is_xai_responses:
|
||||||
try:
|
try:
|
||||||
from tools.schema_sanitizer import strip_pattern_and_format
|
from tools.schema_sanitizer import (
|
||||||
|
strip_pattern_and_format,
|
||||||
|
strip_slash_enum,
|
||||||
|
)
|
||||||
tools_for_api, _ = strip_pattern_and_format(tools_for_api)
|
tools_for_api, _ = strip_pattern_and_format(tools_for_api)
|
||||||
|
tools_for_api, _ = strip_slash_enum(tools_for_api)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"%s⚠️ Failed to sanitize tool schemas for xAI: %s",
|
"%s⚠️ Failed to sanitize tool schemas for xAI: %s",
|
||||||
|
|||||||
@@ -9,7 +9,11 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import copy
|
import copy
|
||||||
|
|
||||||
from tools.schema_sanitizer import sanitize_tool_schemas, strip_pattern_and_format
|
from tools.schema_sanitizer import (
|
||||||
|
sanitize_tool_schemas,
|
||||||
|
strip_pattern_and_format,
|
||||||
|
strip_slash_enum,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _tool(name: str, parameters: dict) -> dict:
|
def _tool(name: str, parameters: dict) -> dict:
|
||||||
@@ -491,3 +495,142 @@ def test_strip_responses_mixed_formats():
|
|||||||
# Verify structure preserved
|
# Verify structure preserved
|
||||||
assert result[0]["function"]["parameters"]["type"] == "object"
|
assert result[0]["function"]["parameters"]["type"] == "object"
|
||||||
assert result[1]["parameters"]["type"] == "object"
|
assert result[1]["parameters"]["type"] == "object"
|
||||||
|
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
# strip_slash_enum — reactive recovery when xAI's /v1/responses (and
|
||||||
|
# /v1/chat/completions) grammar-compiler rejects enum values containing
|
||||||
|
# a forward slash. Symptom: HTTP 400 "Invalid arguments passed to the
|
||||||
|
# model" before any token is emitted. Most commonly hit by MCP-derived
|
||||||
|
# tools whose enum lists HuggingFace IDs like "Qwen/Qwen3.5-0.8B".
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_slash_enum_removes_huggingface_id_enum():
|
||||||
|
"""enum containing HF-style 'owner/name' IDs → stripped."""
|
||||||
|
tools = [_tool("train", {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"model": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["Qwen/Qwen3.5-0.8B", "openai/gpt-oss-20b"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})]
|
||||||
|
_, stripped = strip_slash_enum(tools)
|
||||||
|
assert stripped == 1
|
||||||
|
prop = tools[0]["function"]["parameters"]["properties"]["model"]
|
||||||
|
assert "enum" not in prop
|
||||||
|
# Type + description survive so the model still gets the prompting hint.
|
||||||
|
assert prop["type"] == "string"
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_slash_enum_preserves_slashless_enum():
|
||||||
|
"""enum without any '/' → preserved."""
|
||||||
|
tools = [_tool("pick", {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"mode": {"type": "string", "enum": ["fast", "slow"]},
|
||||||
|
},
|
||||||
|
})]
|
||||||
|
_, stripped = strip_slash_enum(tools)
|
||||||
|
assert stripped == 0
|
||||||
|
assert tools[0]["function"]["parameters"]["properties"]["mode"]["enum"] == ["fast", "slow"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_slash_enum_partial_match_strips_whole_enum():
|
||||||
|
"""Any single value containing '/' triggers removal of the entire enum.
|
||||||
|
|
||||||
|
Rationale: if we kept the slashless values, the model could still pick
|
||||||
|
them, but xAI's grammar-compile failure is all-or-nothing on the enum
|
||||||
|
keyword — keeping a mixed-content enum would still 400. Drop it whole.
|
||||||
|
"""
|
||||||
|
tools = [_tool("pick", {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"target": {"type": "string", "enum": ["local", "hf://Qwen/Qwen3"]},
|
||||||
|
},
|
||||||
|
})]
|
||||||
|
_, stripped = strip_slash_enum(tools)
|
||||||
|
assert stripped == 1
|
||||||
|
assert "enum" not in tools[0]["function"]["parameters"]["properties"]["target"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_slash_enum_responses_format():
|
||||||
|
"""Responses-format tools (no `function` wrapper) are also handled."""
|
||||||
|
tools = [{
|
||||||
|
"type": "function",
|
||||||
|
"name": "mcp_prime_lab_train_model",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"model": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["Qwen/Qwen3.5-0.8B", "meta-llama/Llama-3.2-1B-Instruct"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_, stripped = strip_slash_enum(tools)
|
||||||
|
assert stripped == 1
|
||||||
|
assert "enum" not in tools[0]["parameters"]["properties"]["model"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_slash_enum_recurses_into_anyof():
|
||||||
|
"""enum-with-slash inside an anyOf variant is also stripped."""
|
||||||
|
tools = [_tool("t", {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"value": {
|
||||||
|
"anyOf": [
|
||||||
|
{"type": "string", "enum": ["owner/repo"]},
|
||||||
|
{"type": "null"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})]
|
||||||
|
_, stripped = strip_slash_enum(tools)
|
||||||
|
assert stripped == 1
|
||||||
|
variants = tools[0]["function"]["parameters"]["properties"]["value"]["anyOf"]
|
||||||
|
assert "enum" not in variants[0]
|
||||||
|
assert variants[0]["type"] == "string"
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_slash_enum_is_idempotent():
|
||||||
|
"""Second call on already-stripped tools is a no-op."""
|
||||||
|
tools = [_tool("t", {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"m": {"type": "string", "enum": ["a/b"]}},
|
||||||
|
})]
|
||||||
|
_, first = strip_slash_enum(tools)
|
||||||
|
_, second = strip_slash_enum(tools)
|
||||||
|
assert first == 1
|
||||||
|
assert second == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_slash_enum_empty_returns_zero():
|
||||||
|
tools, stripped = strip_slash_enum([])
|
||||||
|
assert tools == []
|
||||||
|
assert stripped == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_slash_enum_none_returns_zero():
|
||||||
|
tools, stripped = strip_slash_enum(None)
|
||||||
|
assert tools is None
|
||||||
|
assert stripped == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_slash_enum_ignores_non_string_enum_values():
|
||||||
|
"""Integer/boolean enum values can't contain '/' — leave them alone."""
|
||||||
|
tools = [_tool("t", {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"level": {"type": "integer", "enum": [1, 2, 3]},
|
||||||
|
"flag": {"type": "boolean", "enum": [True, False]},
|
||||||
|
},
|
||||||
|
})]
|
||||||
|
_, stripped = strip_slash_enum(tools)
|
||||||
|
assert stripped == 0
|
||||||
|
props = tools[0]["function"]["parameters"]["properties"]
|
||||||
|
assert props["level"]["enum"] == [1, 2, 3]
|
||||||
|
assert props["flag"]["enum"] == [True, False]
|
||||||
|
|||||||
@@ -380,3 +380,66 @@ def strip_pattern_and_format(tools: list[dict]) -> tuple[list[dict], int]:
|
|||||||
stripped,
|
stripped,
|
||||||
)
|
)
|
||||||
return tools, stripped
|
return tools, stripped
|
||||||
|
|
||||||
|
|
||||||
|
def strip_slash_enum(tools: list[dict]) -> tuple[list[dict], int]:
|
||||||
|
"""Strip ``enum`` keywords whose string values contain a forward slash.
|
||||||
|
|
||||||
|
xAI's ``/v1/responses`` and ``/v1/chat/completions`` endpoints compile
|
||||||
|
tool schemas to a grammar that rejects ``enum`` values containing ``/``
|
||||||
|
(the request fails with HTTP 400 "Invalid arguments passed to the
|
||||||
|
model" before any token is emitted). Most commonly hit by MCP-derived
|
||||||
|
tools whose enum lists HuggingFace model IDs (``Qwen/Qwen3.5-0.8B``,
|
||||||
|
``openai/gpt-oss-20b``) or owner/name environment IDs. The constraint
|
||||||
|
is purely a prompting hint; dropping it lets the model still see the
|
||||||
|
field description and pick a value, without xAI tripping on the slash.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
tools: OpenAI-format or Responses-format tool list, mutated in
|
||||||
|
place. Callers that need to preserve the original should
|
||||||
|
deep-copy first.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
``(tools, stripped_count)`` — same list reference plus a count of
|
||||||
|
how many ``enum`` keywords were removed.
|
||||||
|
"""
|
||||||
|
if not tools:
|
||||||
|
return tools, 0
|
||||||
|
|
||||||
|
stripped = 0
|
||||||
|
|
||||||
|
def _walk(node: Any) -> None:
|
||||||
|
nonlocal stripped
|
||||||
|
if isinstance(node, dict):
|
||||||
|
enum_val = node.get("enum")
|
||||||
|
if isinstance(enum_val, list) and any(
|
||||||
|
isinstance(v, str) and "/" in v for v in enum_val
|
||||||
|
):
|
||||||
|
node.pop("enum", None)
|
||||||
|
stripped += 1
|
||||||
|
for v in node.values():
|
||||||
|
_walk(v)
|
||||||
|
elif isinstance(node, list):
|
||||||
|
for item in node:
|
||||||
|
_walk(item)
|
||||||
|
|
||||||
|
for tool in tools:
|
||||||
|
if not isinstance(tool, dict):
|
||||||
|
continue
|
||||||
|
fn = tool.get("function")
|
||||||
|
if isinstance(fn, dict):
|
||||||
|
params = fn.get("parameters")
|
||||||
|
if isinstance(params, dict):
|
||||||
|
_walk(params)
|
||||||
|
continue
|
||||||
|
params = tool.get("parameters")
|
||||||
|
if isinstance(params, dict):
|
||||||
|
_walk(params)
|
||||||
|
|
||||||
|
if stripped:
|
||||||
|
logger.info(
|
||||||
|
"schema_sanitizer: stripped %d enum keyword(s) containing '/' "
|
||||||
|
"from tool schemas (xAI Responses grammar-compile recovery)",
|
||||||
|
stripped,
|
||||||
|
)
|
||||||
|
return tools, stripped
|
||||||
|
|||||||
Reference in New Issue
Block a user