diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index d883263e6..afd8bee19 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -625,8 +625,6 @@ def fetch_endpoint_model_metadata(
                         if isinstance(ctx, int) and ctx > 0:
                             context_length = ctx
                             break
-                    if context_length is None:
-                        context_length = _extract_context_length(model)
                     if context_length is not None:
                         entry["context_length"] = context_length
 
@@ -1016,10 +1014,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
                                 ctx = cfg.get("context_length")
                                 if ctx and isinstance(ctx, (int, float)):
                                     return int(ctx)
-                            # Fall back to max_context_length (theoretical model max)
-                            ctx = m.get("max_context_length") or m.get("context_length")
-                            if ctx and isinstance(ctx, (int, float)):
-                                return int(ctx)
+                            break
 
             # LM Studio / vLLM / llama.cpp: try /v1/models/{model}
             resp = client.get(f"{server_url}/v1/models/{model}")
diff --git a/run_agent.py b/run_agent.py
index 65be5add9..6668cd543 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -2145,9 +2145,25 @@ class AIAgent:
             if config_context_length is None:
                 config_context_length = getattr(self, "_config_context_length", None)
             target_ctx = max(config_context_length or 0, MINIMUM_CONTEXT_LENGTH)
-            ensure_lmstudio_model_loaded(
+            loaded_ctx = ensure_lmstudio_model_loaded(
                 self.model, self.base_url, getattr(self, "api_key", ""), target_ctx,
             )
+            if loaded_ctx:
+                self._lmstudio_loaded_context = loaded_ctx
+                # Push into the live compressor so the status bar reflects the
+                # real loaded ctx the moment the load resolves, instead of
+                # holding the previous model's value (or "ctx --") through the
+                # next render tick.
+                cc = getattr(self, "context_compressor", None)
+                if cc is not None:
+                    cc.update_model(
+                        model=self.model,
+                        context_length=loaded_ctx,
+                        base_url=self.base_url,
+                        api_key=getattr(self, "api_key", ""),
+                        provider=self.provider,
+                        api_mode=self.api_mode,
+                    )
         except Exception as err:
             logger.debug("LM Studio preload skipped: %s", err)
 
diff --git a/tests/agent/test_model_metadata_local_ctx.py b/tests/agent/test_model_metadata_local_ctx.py
index 5da1ed703..f449255c0 100644
--- a/tests/agent/test_model_metadata_local_ctx.py
+++ b/tests/agent/test_model_metadata_local_ctx.py
@@ -274,13 +274,15 @@ class TestQueryLocalContextLengthLmStudio:
         return client_mock
 
     def test_lmstudio_exact_key_match(self):
-        """Reads max_context_length when key matches exactly."""
+        """Resolves loaded ctx when key matches exactly."""
         from agent.model_metadata import _query_local_context_length
 
         native_resp = self._make_resp(200, {
             "models": [
-                {"key": "nvidia/nvidia-nemotron-super-49b-v1", "id": "nvidia/nvidia-nemotron-super-49b-v1",
-                 "max_context_length": 131072},
+                {"key": "nvidia/nvidia-nemotron-super-49b-v1",
+                 "id": "nvidia/nvidia-nemotron-super-49b-v1",
+                 "max_context_length": 1_048_576,
+                 "loaded_instances": [{"config": {"context_length": 131072}}]},
             ]
         })
         client_mock = self._make_client(
@@ -310,7 +312,8 @@ class TestQueryLocalContextLengthLmStudio:
             "models": [
                 {"key": "nvidia/nvidia-nemotron-super-49b-v1",
                  "id": "nvidia/nvidia-nemotron-super-49b-v1",
-                 "max_context_length": 131072},
+                 "max_context_length": 1_048_576,
+                 "loaded_instances": [{"config": {"context_length": 131072}}]},
             ]
         })
         client_mock = self._make_client(
@@ -463,7 +466,10 @@ class TestFetchEndpointModelMetadataLmStudio:
                     {
                         "key": "lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf",
                         "id": "lmstudio-community/Qwen3.5-27B-GGUF/Qwen3.5-27B-Q8_0.gguf",
-                        "max_context_length": 131072,
+                        "max_context_length": 1_048_576,
+                        "loaded_instances": [
+                            {"config": {"context_length": 131072}}
+                        ],
                     }
                 ]
             }