fix(tui): show correct context length

This commit is contained in:
Rugved Somwanshi
2026-04-27 11:59:32 -04:00
committed by kshitij
parent fa2bee1215
commit 01ad0aacaf
3 changed files with 29 additions and 12 deletions

View File

@@ -625,8 +625,6 @@ def fetch_endpoint_model_metadata(
if isinstance(ctx, int) and ctx > 0:
context_length = ctx
break
if context_length is None:
context_length = _extract_context_length(model)
if context_length is not None:
entry["context_length"] = context_length
@@ -1016,10 +1014,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
ctx = cfg.get("context_length")
if ctx and isinstance(ctx, (int, float)):
return int(ctx)
# Fall back to max_context_length (theoretical model max)
ctx = m.get("max_context_length") or m.get("context_length")
if ctx and isinstance(ctx, (int, float)):
return int(ctx)
break
# LM Studio / vLLM / llama.cpp: try /v1/models/{model}
resp = client.get(f"{server_url}/v1/models/{model}")

View File

@@ -2145,9 +2145,25 @@ class AIAgent:
if config_context_length is None:
config_context_length = getattr(self, "_config_context_length", None)
target_ctx = max(config_context_length or 0, MINIMUM_CONTEXT_LENGTH)
ensure_lmstudio_model_loaded(
loaded_ctx = ensure_lmstudio_model_loaded(
self.model, self.base_url, getattr(self, "api_key", ""), target_ctx,
)
if loaded_ctx:
self._lmstudio_loaded_context = loaded_ctx
# Push into the live compressor so the status bar reflects the
# real loaded ctx the moment the load resolves, instead of
# holding the previous model's value (or "ctx --") through the
# next render tick.
cc = getattr(self, "context_compressor", None)
if cc is not None:
cc.update_model(
model=self.model,
context_length=loaded_ctx,
base_url=self.base_url,
api_key=getattr(self, "api_key", ""),
provider=self.provider,
api_mode=self.api_mode,
)
except Exception as err:
logger.debug("LM Studio preload skipped: %s", err)

View File

@@ -274,13 +274,15 @@ class TestQueryLocalContextLengthLmStudio:
return client_mock
def test_lmstudio_exact_key_match(self):
"""Reads max_context_length when key matches exactly."""
"""Resolves loaded ctx when key matches exactly."""
from agent.model_metadata import _query_local_context_length
native_resp = self._make_resp(200, {
"models": [
{"key": "nvidia/nvidia-nemotron-super-49b-v1", "id": "nvidia/nvidia-nemotron-super-49b-v1",
"max_context_length": 131072},
{"key": "nvidia/nvidia-nemotron-super-49b-v1",
"id": "nvidia/nvidia-nemotron-super-49b-v1",
"max_context_length": 1_048_576,
"loaded_instances": [{"config": {"context_length": 131072}}]},
]
})
client_mock = self._make_client(
@@ -310,7 +312,8 @@ class TestQueryLocalContextLengthLmStudio:
"models": [
{"key": "nvidia/nvidia-nemotron-super-49b-v1",
"id": "nvidia/nvidia-nemotron-super-49b-v1",
"max_context_length": 131072},
"max_context_length": 1_048_576,
"loaded_instances": [{"config": {"context_length": 131072}}]},
]
})
client_mock = self._make_client(
@@ -463,7 +466,10 @@ class TestFetchEndpointModelMetadataLmStudio:
{
"key": "lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf",
"id": "lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf",
"max_context_length": 131072,
"max_context_length": 1_048_576,
"loaded_instances": [
{"config": {"context_length": 131072}}
],
}
]
}