fix(tui): show correct context length
This commit is contained in:
committed by
kshitij
parent
fa2bee1215
commit
01ad0aacaf
@@ -625,8 +625,6 @@ def fetch_endpoint_model_metadata(
|
||||
if isinstance(ctx, int) and ctx > 0:
|
||||
context_length = ctx
|
||||
break
|
||||
if context_length is None:
|
||||
context_length = _extract_context_length(model)
|
||||
if context_length is not None:
|
||||
entry["context_length"] = context_length
|
||||
|
||||
@@ -1016,10 +1014,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
|
||||
ctx = cfg.get("context_length")
|
||||
if ctx and isinstance(ctx, (int, float)):
|
||||
return int(ctx)
|
||||
# Fall back to max_context_length (theoretical model max)
|
||||
ctx = m.get("max_context_length") or m.get("context_length")
|
||||
if ctx and isinstance(ctx, (int, float)):
|
||||
return int(ctx)
|
||||
break
|
||||
|
||||
# LM Studio / vLLM / llama.cpp: try /v1/models/{model}
|
||||
resp = client.get(f"{server_url}/v1/models/{model}")
|
||||
|
||||
18
run_agent.py
18
run_agent.py
@@ -2145,9 +2145,25 @@ class AIAgent:
|
||||
if config_context_length is None:
|
||||
config_context_length = getattr(self, "_config_context_length", None)
|
||||
target_ctx = max(config_context_length or 0, MINIMUM_CONTEXT_LENGTH)
|
||||
ensure_lmstudio_model_loaded(
|
||||
loaded_ctx = ensure_lmstudio_model_loaded(
|
||||
self.model, self.base_url, getattr(self, "api_key", ""), target_ctx,
|
||||
)
|
||||
if loaded_ctx:
|
||||
self._lmstudio_loaded_context = loaded_ctx
|
||||
# Push into the live compressor so the status bar reflects the
|
||||
# real loaded ctx the moment the load resolves, instead of
|
||||
# holding the previous model's value (or "ctx --") through the
|
||||
# next render tick.
|
||||
cc = getattr(self, "context_compressor", None)
|
||||
if cc is not None:
|
||||
cc.update_model(
|
||||
model=self.model,
|
||||
context_length=loaded_ctx,
|
||||
base_url=self.base_url,
|
||||
api_key=getattr(self, "api_key", ""),
|
||||
provider=self.provider,
|
||||
api_mode=self.api_mode,
|
||||
)
|
||||
except Exception as err:
|
||||
logger.debug("LM Studio preload skipped: %s", err)
|
||||
|
||||
|
||||
@@ -274,13 +274,15 @@ class TestQueryLocalContextLengthLmStudio:
|
||||
return client_mock
|
||||
|
||||
def test_lmstudio_exact_key_match(self):
|
||||
"""Reads max_context_length when key matches exactly."""
|
||||
"""Resolves loaded ctx when key matches exactly."""
|
||||
from agent.model_metadata import _query_local_context_length
|
||||
|
||||
native_resp = self._make_resp(200, {
|
||||
"models": [
|
||||
{"key": "nvidia/nvidia-nemotron-super-49b-v1", "id": "nvidia/nvidia-nemotron-super-49b-v1",
|
||||
"max_context_length": 131072},
|
||||
{"key": "nvidia/nvidia-nemotron-super-49b-v1",
|
||||
"id": "nvidia/nvidia-nemotron-super-49b-v1",
|
||||
"max_context_length": 1_048_576,
|
||||
"loaded_instances": [{"config": {"context_length": 131072}}]},
|
||||
]
|
||||
})
|
||||
client_mock = self._make_client(
|
||||
@@ -310,7 +312,8 @@ class TestQueryLocalContextLengthLmStudio:
|
||||
"models": [
|
||||
{"key": "nvidia/nvidia-nemotron-super-49b-v1",
|
||||
"id": "nvidia/nvidia-nemotron-super-49b-v1",
|
||||
"max_context_length": 131072},
|
||||
"max_context_length": 1_048_576,
|
||||
"loaded_instances": [{"config": {"context_length": 131072}}]},
|
||||
]
|
||||
})
|
||||
client_mock = self._make_client(
|
||||
@@ -463,7 +466,10 @@ class TestFetchEndpointModelMetadataLmStudio:
|
||||
{
|
||||
"key": "lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf",
|
||||
"id": "lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf",
|
||||
"max_context_length": 131072,
|
||||
"max_context_length": 1_048_576,
|
||||
"loaded_instances": [
|
||||
{"config": {"context_length": 131072}}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user