fix(tui): show correct context length

2026-04-27 11:59:32 -04:00
parent fa2bee1215
commit 01ad0aacaf
3 changed files with 29 additions and 12 deletions
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -625,8 +625,6 @@ def fetch_endpoint_model_metadata(
                        if isinstance(ctx, int) and ctx > 0:
                            context_length = ctx
                            break
-                    if context_length is None:
-                        context_length = _extract_context_length(model)
                    if context_length is not None:
                        entry["context_length"] = context_length

@@ -1016,10 +1014,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
                                ctx = cfg.get("context_length")
                                if ctx and isinstance(ctx, (int, float)):
                                    return int(ctx)
-                            # Fall back to max_context_length (theoretical model max)
-                            ctx = m.get("max_context_length") or m.get("context_length")
-                            if ctx and isinstance(ctx, (int, float)):
-                                return int(ctx)
+                            break

            # LM Studio / vLLM / llama.cpp: try /v1/models/{model}
            resp = client.get(f"{server_url}/v1/models/{model}")
--- a/run_agent.py
+++ b/run_agent.py
@@ -2145,9 +2145,25 @@ class AIAgent:
            if config_context_length is None:
                config_context_length = getattr(self, "_config_context_length", None)
            target_ctx = max(config_context_length or 0, MINIMUM_CONTEXT_LENGTH)
-            ensure_lmstudio_model_loaded(
+            loaded_ctx = ensure_lmstudio_model_loaded(
                self.model, self.base_url, getattr(self, "api_key", ""), target_ctx,
            )
+            if loaded_ctx:
+                self._lmstudio_loaded_context = loaded_ctx
+                # Push into the live compressor so the status bar reflects the
+                # real loaded ctx the moment the load resolves, instead of
+                # holding the previous model's value (or "ctx --") through the
+                # next render tick.
+                cc = getattr(self, "context_compressor", None)
+                if cc is not None:
+                    cc.update_model(
+                        model=self.model,
+                        context_length=loaded_ctx,
+                        base_url=self.base_url,
+                        api_key=getattr(self, "api_key", ""),
+                        provider=self.provider,
+                        api_mode=self.api_mode,
+                    )
        except Exception as err:
            logger.debug("LM Studio preload skipped: %s", err)

--- a/tests/agent/test_model_metadata_local_ctx.py
+++ b/tests/agent/test_model_metadata_local_ctx.py
@@ -274,13 +274,15 @@ class TestQueryLocalContextLengthLmStudio:
        return client_mock

    def test_lmstudio_exact_key_match(self):
-        """Reads max_context_length when key matches exactly."""
+        """Resolves loaded ctx when key matches exactly."""
        from agent.model_metadata import _query_local_context_length

        native_resp = self._make_resp(200, {
            "models": [
-                {"key": "nvidia/nvidia-nemotron-super-49b-v1", "id": "nvidia/nvidia-nemotron-super-49b-v1",
-                 "max_context_length": 131072},
+                {"key": "nvidia/nvidia-nemotron-super-49b-v1",
+                 "id": "nvidia/nvidia-nemotron-super-49b-v1",
+                 "max_context_length": 1_048_576,
+                 "loaded_instances": [{"config": {"context_length": 131072}}]},
            ]
        })
        client_mock = self._make_client(
@@ -310,7 +312,8 @@ class TestQueryLocalContextLengthLmStudio:
            "models": [
                {"key": "nvidia/nvidia-nemotron-super-49b-v1",
                 "id": "nvidia/nvidia-nemotron-super-49b-v1",
-                 "max_context_length": 131072},
+                 "max_context_length": 1_048_576,
+                 "loaded_instances": [{"config": {"context_length": 131072}}]},
            ]
        })
        client_mock = self._make_client(
@@ -463,7 +466,10 @@ class TestFetchEndpointModelMetadataLmStudio:
                    {
                        "key": "lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf",
                        "id": "lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf",
-                        "max_context_length": 131072,
+                        "max_context_length": 1_048_576,
+                        "loaded_instances": [
+                            {"config": {"context_length": 131072}}
+                        ],
                    }
                ]
            }