* chore: remove Atropos RL environments, tools, tests, skill, and tinker-atropos submodule Delete: - environments/ (43 files — base env, agent loop, tool call parsers, benchmarks) - rl_cli.py (standalone RL training CLI) - tools/rl_training_tool.py (all 10 rl_* tools) - tests: test_rl_training_tool, test_tool_call_parsers, test_managed_server_tool_support, test_agent_loop, test_agent_loop_vllm, test_agent_loop_tool_calling, test_terminalbench2_env_security - optional-skills/mlops/hermes-atropos-environments/ - tinker-atropos git submodule + .gitmodules * chore: remove RL/Atropos references from Python source - toolsets.py: remove rl toolset block + update comment - model_tools.py: remove rl_tools group + update async bridging comment - hermes_cli/tools_config.py: remove RL display entry, _DEFAULT_OFF_TOOLSETS, setup block, and rl_training post-setup handler - tools/budget_config.py: remove RL environment reference in docstring - tests/test_model_tools.py: remove rl_tools from expected groups - tests/run_agent/test_streaming_tool_call_repair.py: fix stale cross-reference * chore: remove rl/yc-bench extras and tinker-atropos refs from pyproject.toml - Remove rl extra (atroposlib, tinker, fastapi, uvicorn, wandb) - Remove yc-bench extra - Remove rl_cli from py-modules - Remove [tool.ty.src] exclude for tinker-atropos - Remove [tool.ruff] exclude for tinker-atropos - Regenerate uv.lock * chore: remove tinker-atropos from install/setup scripts - setup-hermes.sh: remove entire tinker-atropos submodule install block - scripts/install.sh: remove both tinker-atropos blocks (Termux + standard) - scripts/install.ps1: remove tinker-atropos block - nix/hermes-agent.nix: remove tinker-atropos pip install line * chore: remove RL references from cli-config.yaml.example * docs: remove Atropos/RL references from README, CONTRIBUTING, AGENTS.md * docs: remove RL/Atropos references from website - Delete: environments.md, rl-training.md, mlops-hermes-atropos-environments.md - sidebars.ts: remove rl-training and environments sidebar entries - optional-skills-catalog.md: remove hermes-atropos-environments row - tools-reference.md: remove entire rl toolset section - toolsets-reference.md: remove rl row + update example - integrations/index.md: remove RL Training bullet - architecture.md: remove environments/ from tree + RL section - contributing.md: remove tinker-atropos setup - updating.md: remove tinker-atropos install + stale submodule update * chore: remove remaining RL/Atropos stragglers - hermes_cli/config.py: remove TINKER_API_KEY + WANDB_API_KEY env var defs - hermes_cli/doctor.py: remove Submodules check section (tinker-atropos) - hermes_cli/setup.py: remove RL Training status check - hermes_cli/status.py: remove Tinker + WandB from API key status display - agent/display.py: remove both rl_* tool preview/activity blocks - website/docs: remove RL references from providers.md + env-variables.md - tests: remove TINKER_API_KEY from conftest, set_config_value, setup_script * chore: remove RL training section from .env.example
116 lines
4.6 KiB
Python
116 lines
4.6 KiB
Python
"""Tests for tool call argument repair in the streaming assembly path.
|
|
|
|
The streaming path (run_agent._call_chat_completions) assembles tool call
|
|
deltas into full arguments. When a model truncates or malforms the JSON
|
|
(e.g. GLM-5.1 via Ollama), the assembly path used to pass the broken JSON
|
|
straight through — setting has_truncated_tool_args but NOT repairing it.
|
|
That triggered the truncation handler to kill the session with /new required.
|
|
|
|
The fix: repair arguments in the streaming assembly path using
|
|
_repair_tool_call_arguments() so repairable malformations (trailing commas,
|
|
unclosed brackets, Python None) don't kill the session.
|
|
"""
|
|
|
|
import json
|
|
import pytest
|
|
|
|
from run_agent import _repair_tool_call_arguments
|
|
|
|
|
|
class TestStreamingAssemblyRepair:
|
|
"""Verify that _repair_tool_call_arguments is applied to streaming tool
|
|
call arguments before they're assembled into mock_tool_calls.
|
|
|
|
These tests verify the REPAIR FUNCTION itself works correctly for the
|
|
cases that arise during streaming assembly. Integration tests that
|
|
exercise the full streaming path are in run_agent.py's streaming tests.
|
|
"""
|
|
|
|
# -- Truncation cases (most common streaming failure) --
|
|
|
|
def test_truncated_object_no_close_brace(self):
|
|
"""Model stops mid-JSON, common with output length limits."""
|
|
raw = '{"command": "ls -la", "timeout": 30'
|
|
result = _repair_tool_call_arguments(raw, "terminal")
|
|
parsed = json.loads(result)
|
|
assert parsed["command"] == "ls -la"
|
|
assert parsed["timeout"] == 30
|
|
|
|
def test_truncated_nested_object(self):
|
|
"""Model truncates inside a nested structure."""
|
|
raw = '{"path": "/tmp/foo", "content": "hello"'
|
|
result = _repair_tool_call_arguments(raw, "write_file")
|
|
parsed = json.loads(result)
|
|
assert parsed["path"] == "/tmp/foo"
|
|
|
|
def test_truncated_mid_value(self):
|
|
"""Model cuts off mid-string-value."""
|
|
raw = '{"command": "git clone ht'
|
|
result = _repair_tool_call_arguments(raw, "terminal")
|
|
# Should produce valid JSON (even if command value is lost)
|
|
json.loads(result)
|
|
|
|
# -- Trailing comma cases (Ollama/GLM common) --
|
|
|
|
def test_trailing_comma_before_close_brace(self):
|
|
raw = '{"path": "/tmp", "content": "x",}'
|
|
result = _repair_tool_call_arguments(raw, "write_file")
|
|
assert json.loads(result) == {"path": "/tmp", "content": "x"}
|
|
|
|
def test_trailing_comma_in_list(self):
|
|
raw = '{"items": [1, 2, 3,]}'
|
|
result = _repair_tool_call_arguments(raw, "test")
|
|
assert json.loads(result) == {"items": [1, 2, 3]}
|
|
|
|
# -- Python None from model output --
|
|
|
|
def test_python_none_literal(self):
|
|
raw = "None"
|
|
result = _repair_tool_call_arguments(raw, "test")
|
|
assert result == "{}"
|
|
|
|
# -- Empty arguments (some models emit empty string) --
|
|
|
|
def test_empty_string(self):
|
|
assert _repair_tool_call_arguments("", "test") == "{}"
|
|
|
|
def test_whitespace_only(self):
|
|
assert _repair_tool_call_arguments(" \n ", "test") == "{}"
|
|
|
|
# -- Already-valid JSON passes through unchanged --
|
|
|
|
def test_valid_json_passthrough(self):
|
|
raw = '{"path": "/tmp/foo", "content": "hello"}'
|
|
result = _repair_tool_call_arguments(raw, "write_file")
|
|
assert json.loads(result) == {"path": "/tmp/foo", "content": "hello"}
|
|
|
|
# -- Extra closing brackets (rare but happens) --
|
|
|
|
def test_extra_closing_brace(self):
|
|
raw = '{"key": "value"}}'
|
|
result = _repair_tool_call_arguments(raw, "test")
|
|
assert json.loads(result) == {"key": "value"}
|
|
|
|
# -- Real-world GLM-5.1 truncation pattern --
|
|
|
|
def test_glm_truncation_pattern(self):
|
|
"""GLM-5.1 via Ollama commonly truncates like this.
|
|
|
|
This pattern has an unclosed colon at the end ("background":) which
|
|
makes it unrepairable — the last-resort empty object {} is the
|
|
safest option. The important thing is that repairable patterns
|
|
(trailing comma, unclosed brace WITHOUT hanging colon) DO get fixed.
|
|
"""
|
|
raw = '{"command": "ls -la /tmp", "timeout": 30, "background":'
|
|
result = _repair_tool_call_arguments(raw, "terminal")
|
|
# Unrepairable — returns empty object (hanging colon can't be fixed)
|
|
parsed = json.loads(result)
|
|
assert parsed == {}
|
|
|
|
def test_glm_truncation_repairable(self):
|
|
"""GLM-5.1 truncation pattern that IS repairable."""
|
|
raw = '{"command": "ls -la /tmp", "timeout": 30'
|
|
result = _repair_tool_call_arguments(raw, "terminal")
|
|
parsed = json.loads(result)
|
|
assert parsed["command"] == "ls -la /tmp"
|
|
assert parsed["timeout"] == 30 |