feat(gateway/signal): add support for multiple images sending

Adds a new `send_multiple_images` method to the ``BasePlatformAdapter`` that implements the default "One image per message" loop and allows for platform-specific overriding. Implements such an override for the Signal adapter, batching images and trying (best-effort) to work around rate-limits for voluminous batches using a specific scheduler. Also implements batching + rate-limit handling in the `send_message` tool. New tests added for the Signal adapter, its rate-limit scheduler and the `send_message` tool
2026-04-30 12:11:07 +02:00
parent 411f586c67
commit 04ea895ffb
9 changed files with 2010 additions and 84 deletions
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -1505,7 +1505,64 @@ class BasePlatformAdapter(ABC):
        Default is a no-op for platforms with one-shot typing indicators.
        """
        pass
-    
+
+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[Tuple[str, str]],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images.
+
+        Accepts ``http(s)://``, ``file://`` URIs in the first tuple
+        element.
+
+        Default implementation sends each item individually,
+        routing animated GIFs through ``send_animation`` and local
+        files through ``send_image_file``.
+
+        Override in subclasses to bundle into a single native API call
+        (e.g. Signal's multi-attachment RPC)
+        """
+        from urllib.parse import unquote as _unquote
+
+        for image_url, alt_text in images:
+            if human_delay > 0:
+                await asyncio.sleep(human_delay)
+            try:
+                logger.info(
+                    "[%s] Sending image: %s (alt=%s)",
+                    self.name,
+                    safe_url_for_log(image_url),
+                    alt_text[:30] if alt_text else "",
+                )
+                if image_url.startswith("file://"):
+                    img_result = await self.send_image_file(
+                        chat_id=chat_id,
+                        image_path=_unquote(image_url[7:]),
+                        caption=alt_text if alt_text else None,
+                        metadata=metadata,
+                    )
+                elif self._is_animation_url(image_url):
+                    img_result = await self.send_animation(
+                        chat_id=chat_id,
+                        animation_url=image_url,
+                        caption=alt_text if alt_text else None,
+                        metadata=metadata,
+                    )
+                else:
+                    img_result = await self.send_image(
+                        chat_id=chat_id,
+                        image_url=image_url,
+                        caption=alt_text if alt_text else None,
+                        metadata=metadata,
+                    )
+                if not img_result.success:
+                    logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
+            except Exception as img_err:
+                logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)
+
    async def send_image(
        self,
        chat_id: str,
@@ -2587,41 +2644,52 @@ class BasePlatformAdapter(ABC):
                # Send extracted images as native attachments
                if images:
                    logger.info("[%s] Extracted %d image(s) to send as attachments", self.name, len(images))
-                for image_url, alt_text in images:
-                    if human_delay > 0:
-                        await asyncio.sleep(human_delay)
                    try:
-                        logger.info(
-                            "[%s] Sending image: %s (alt=%s)",
-                            self.name,
-                            safe_url_for_log(image_url),
-                            alt_text[:30] if alt_text else "",
+                        await self.send_multiple_images(
+                            chat_id=event.source.chat_id,
+                            images=images,
+                            metadata=_thread_metadata,
+                            human_delay=human_delay,
                        )
-                        # Route animated GIFs through send_animation for proper playback
-                        if self._is_animation_url(image_url):
-                            img_result = await self.send_animation(
-                                chat_id=event.source.chat_id,
-                                animation_url=image_url,
-                                caption=alt_text if alt_text else None,
-                                metadata=_thread_metadata,
-                            )
-                        else:
-                            img_result = await self.send_image(
-                                chat_id=event.source.chat_id,
-                                image_url=image_url,
-                                caption=alt_text if alt_text else None,
-                                metadata=_thread_metadata,
-                            )
-                        if not img_result.success:
-                            logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
-                    except Exception as img_err:
-                        logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)
+                    except Exception as batch_err:
+                        logger.warning("[%s] Error batching images: %s", self.name, batch_err, exc_info=True)
+

                # Send extracted media files — route by file type
                _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
                _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}

+                # Partition images out of media_files + local_files so they
+                # can be sent as a single batch (Signal RPC)
+                from urllib.parse import quote as _quote
+                _image_paths: list = []
+                _non_image_media: list = []
                for media_path, is_voice in media_files:
+                    _ext = Path(media_path).suffix.lower()
+                    if _ext in _IMAGE_EXTS and not is_voice:
+                        _image_paths.append(media_path)
+                    else:
+                        _non_image_media.append((media_path, is_voice))
+                _non_image_local: list = []
+                for file_path in local_files:
+                    if Path(file_path).suffix.lower() in _IMAGE_EXTS:
+                        _image_paths.append(file_path)
+                    else:
+                        _non_image_local.append(file_path)
+
+                if _image_paths:
+                    try:
+                        _batch = [(f"file://{_quote(p)}", "") for p in _image_paths]
+                        await self.send_multiple_images(
+                            chat_id=event.source.chat_id,
+                            images=_batch,
+                            metadata=_thread_metadata,
+                            human_delay=human_delay,
+                        )
+                    except Exception as batch_err:
+                        logger.warning("[%s] Error batching images: %s", self.name, batch_err, exc_info=True)
+
+                for media_path, is_voice in _non_image_media:
                    if human_delay > 0:
                        await asyncio.sleep(human_delay)
                    try:
@@ -2638,12 +2706,6 @@ class BasePlatformAdapter(ABC):
                                video_path=media_path,
                                metadata=_thread_metadata,
                            )
-                        elif ext in _IMAGE_EXTS:
-                            media_result = await self.send_image_file(
-                                chat_id=event.source.chat_id,
-                                image_path=media_path,
-                                metadata=_thread_metadata,
-                            )
                        else:
                            media_result = await self.send_document(
                                chat_id=event.source.chat_id,
@@ -2656,19 +2718,13 @@ class BasePlatformAdapter(ABC):
                    except Exception as media_err:
                        logger.warning("[%s] Error sending media: %s", self.name, media_err)

-                # Send auto-detected local files as native attachments
-                for file_path in local_files:
+                # Send auto-detected local non-image files as native attachments
+                for file_path in _non_image_local:
                    if human_delay > 0:
                        await asyncio.sleep(human_delay)
                    try:
                        ext = Path(file_path).suffix.lower()
-                        if ext in _IMAGE_EXTS:
-                            await self.send_image_file(
-                                chat_id=event.source.chat_id,
-                                image_path=file_path,
-                                metadata=_thread_metadata,
-                            )
-                        elif ext in _VIDEO_EXTS:
+                        if ext in _VIDEO_EXTS:
                            await self.send_video(
                                chat_id=event.source.chat_id,
                                video_path=file_path,
--- a/gateway/platforms/signal.py
+++ b/gateway/platforms/signal.py
@@ -21,7 +21,7 @@ import time
 import uuid
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Dict, List, Optional, Any
+from typing import Any, Dict, List, Optional, Tuple
 from urllib.parse import quote, unquote

 import httpx
@@ -39,6 +39,17 @@ from gateway.platforms.base import (
    cache_image_from_url,
 )
 from gateway.platforms.helpers import redact_phone
+from gateway.platforms.signal_rate_limit import (
+    SIGNAL_BATCH_PACING_NOTICE_THRESHOLD,
+    SIGNAL_MAX_ATTACHMENTS_PER_MSG,
+    SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
+    SignalRateLimitError,
+    _extract_retry_after_seconds,
+    _format_wait,
+    _is_signal_rate_limit_error,
+    _signal_send_timeout,
+    get_scheduler,
+)

 logger = logging.getLogger(__name__)

@@ -53,6 +64,7 @@ SSE_RETRY_DELAY_MAX = 60.0
 HEALTH_CHECK_INTERVAL = 30.0  # seconds between health checks
 HEALTH_CHECK_STALE_THRESHOLD = 120.0  # seconds without SSE activity before concern

+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -674,6 +686,8 @@ class SignalAdapter(BasePlatformAdapter):
        rpc_id: str = None,
        *,
        log_failures: bool = True,
+        raise_on_rate_limit: bool = False,
+        timeout: float = 30.0,
    ) -> Any:
        """Send a JSON-RPC 2.0 request to signal-cli daemon.

@@ -682,6 +696,11 @@ class SignalAdapter(BasePlatformAdapter):
        repeated NETWORK_FAILURE spam for unreachable recipients while
        still preserving visibility for the first occurrence and for
        unrelated RPCs.
+
+        When ``raise_on_rate_limit=True``, a Signal ``[429]`` /
+        ``RateLimitException`` response raises ``SignalRateLimitError``
+        instead of being swallowed — lets callers (multi-attachment send)
+        opt into backoff-retry without changing default behaviour.
        """
        if not self.client:
            logger.warning("Signal: RPC called but client not connected")
@@ -701,20 +720,28 @@ class SignalAdapter(BasePlatformAdapter):
            resp = await self.client.post(
                f"{self.http_url}/api/v1/rpc",
                json=payload,
-                timeout=30.0,
+                timeout=timeout,
            )
            resp.raise_for_status()
            data = resp.json()

            if "error" in data:
+                err = data["error"]
+                if raise_on_rate_limit:
+                    if _is_signal_rate_limit_error(err):
+                        err_msg = str(err.get("message", "")) if isinstance(err, dict) else str(err)
+                        retry_after = _extract_retry_after_seconds(err)
+                        raise SignalRateLimitError(err_msg, retry_after=retry_after)
                if log_failures:
-                    logger.warning("Signal RPC error (%s): %s", method, data["error"])
+                    logger.warning("Signal RPC error (%s): %s", method, err)
                else:
-                    logger.debug("Signal RPC error (%s): %s", method, data["error"])
+                    logger.debug("Signal RPC error (%s): %s", method, err)
                return None

            return data.get("result")

+        except SignalRateLimitError:
+            raise
        except Exception as e:
            if log_failures:
                logger.warning("Signal RPC %s failed: %s", method, e)
@@ -978,6 +1005,178 @@ class SignalAdapter(BasePlatformAdapter):
            self._typing_failures.pop(chat_id, None)
            self._typing_skip_until.pop(chat_id, None)

+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[Tuple[str, str]],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images via chunked Signal RPC calls.
+
+        Per-image alt texts are dropped — Signal's send RPC only carries
+        one shared message body. Bad images (download failure, missing
+        file, oversize) are skipped with a warning so one bad URL
+        doesn't lose the rest of the batch. ``human_delay`` is ignored:
+        the rate-limit scheduler handles inter-batch pacing.
+        """
+        if not images:
+            return
+
+        scheduler = get_scheduler()
+        logger.info(
+            "Signal send_multiple_images: received %d image(s) for %s — "
+            "scheduler state: %s",
+            len(images), chat_id[:30], scheduler.state(),
+        )
+
+        await self._stop_typing_indicator(chat_id)
+
+        attachments: List[str] = []
+        skipped_download = 0
+        skipped_missing = 0
+        skipped_oversize = 0
+        for image_url, _alt_text in images:
+            if image_url.startswith("file://"):
+                file_path = unquote(image_url[7:])
+            else:
+                try:
+                    file_path = await cache_image_from_url(image_url)
+                except Exception as e:
+                    logger.warning("Signal: failed to download image %s: %s", image_url, e)
+                    skipped_download += 1
+                    continue
+
+            if not file_path or not Path(file_path).exists():
+                logger.warning("Signal: image file not found for %s", image_url)
+                skipped_missing += 1
+                continue
+
+            file_size = Path(file_path).stat().st_size
+            if file_size > SIGNAL_MAX_ATTACHMENT_SIZE:
+                logger.warning(
+                    "Signal: image too large (%d bytes), skipping %s", file_size, image_url
+                )
+                skipped_oversize += 1
+                continue
+
+            attachments.append(file_path)
+
+        if not attachments:
+            logger.error(
+                "Signal: no valid images in batch of %d "
+                "(download=%d missing=%d oversize=%d)",
+                len(images), skipped_download, skipped_missing, skipped_oversize,
+            )
+            return
+
+        logger.info(
+            "Signal send_multiple_images: %d/%d images valid, sending in chunks",
+            len(attachments), len(images),
+        )
+
+        base_params: Dict[str, Any] = {
+            "account": self.account,
+            "message": "",
+        }
+        if chat_id.startswith("group:"):
+            base_params["groupId"] = chat_id[6:]
+        else:
+            base_params["recipient"] = [await self._resolve_recipient(chat_id)]
+
+        att_batches = [
+            attachments[i:i + SIGNAL_MAX_ATTACHMENTS_PER_MSG]
+            for i in range(0, len(attachments), SIGNAL_MAX_ATTACHMENTS_PER_MSG)
+        ]
+
+        for idx, att_batch in enumerate(att_batches):
+            n = len(att_batch)
+            estimated = scheduler.estimate_wait(n)
+            logger.debug(
+                "Signal batch %d/%d: %d attachments, estimated wait=%.1fs",
+                idx + 1, len(att_batches), n, estimated,
+            )
+            if estimated >= SIGNAL_BATCH_PACING_NOTICE_THRESHOLD:
+                await self._notify_batch_pacing(
+                    chat_id, idx + 1, len(att_batches), estimated
+                )
+
+            params = dict(base_params, attachments=att_batch)
+            send_timeout = _signal_send_timeout(n)
+
+            for attempt in range(1, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS + 1):
+                await scheduler.acquire(n)
+                try:
+                    _rpc_t0 = time.monotonic()
+                    result = await self._rpc(
+                        "send", params, raise_on_rate_limit=True, timeout=send_timeout,
+                    )
+                    _rpc_duration = time.monotonic() - _rpc_t0
+                    if result is not None:
+                        self._track_sent_timestamp(result)
+                        await scheduler.report_rpc_duration(_rpc_duration, n)
+                        logger.info(
+                            "Signal batch %d/%d: %d attachments sent in %.1fs "
+                            "(attempt %d/%d)",
+                            idx + 1, len(att_batches), n, _rpc_duration,
+                            attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
+                        )
+                    else:
+                        # Assume the server didn't accept the batch, don't deduce tokens
+                        logger.error(
+                            "Signal: RPC send failed for batch %d/%d (%d attachments, "
+                            "attempt %d/%d, rpc_duration=%.1fs)",
+                            idx + 1, len(att_batches), n,
+                            attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
+                            _rpc_duration,
+                        )
+                        # Retry transient (non-rate-limit) failures once
+                        if attempt < SIGNAL_RATE_LIMIT_MAX_ATTEMPTS:
+                            backoff = 2.0 ** attempt
+                            logger.info(
+                                "Signal: retrying batch %d/%d after %.1fs backoff",
+                                idx + 1, len(att_batches), backoff,
+                            )
+                            await asyncio.sleep(backoff)
+                            continue
+                    break
+                except SignalRateLimitError as e:
+                    scheduler.feedback(e.retry_after, n)
+                    if attempt >= SIGNAL_RATE_LIMIT_MAX_ATTEMPTS:
+                        logger.error(
+                            "Signal: rate-limit retries exhausted on batch %d/%d "
+                            "(%d attachments lost, server retry_after=%s)",
+                            idx + 1, len(att_batches), n,
+                            f"{e.retry_after:.0f}s" if e.retry_after else "unknown",
+                        )
+                        break
+                    logger.warning(
+                        "Signal: rate-limited on batch %d/%d "
+                        "(attempt %d/%d, server retry_after=%s); "
+                        "scheduler will pace the retry",
+                        idx + 1, len(att_batches),
+                        attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
+                        f"{e.retry_after:.0f}s" if e.retry_after else "unknown",
+                    )
+
+    async def _notify_batch_pacing(
+        self,
+        chat_id: str,
+        next_batch_idx: int,
+        total_batches: int,
+        wait_s: float,
+    ) -> None:
+        """Inform the user when an inter-batch pacing wait crosses the
+        notice threshold. Best-effort; logs and continues on failure."""
+        try:
+            await self.send(
+                chat_id,
+                f"(More images coming — pausing ~{_format_wait(wait_s)} "
+                f"for Signal rate limit, batch {next_batch_idx}/{total_batches}.)",
+            )
+        except Exception as e:
+            logger.warning("Signal: failed to send pacing notice: %s", e)
+
    async def send_image(
        self,
        chat_id: str,
--- a/gateway/platforms/signal_rate_limit.py
+++ b/gateway/platforms/signal_rate_limit.py
@@ -0,0 +1,369 @@
+"""
+Signal attachment rate-limit scheduler.
+
+Process-wide token-bucket simulator that mirrors the per-account
+attachment rate limit signal-cli/Signal-Server enforce. Producers
+(``SignalAdapter.send_multiple_images`` and the ``send_message`` tool's
+Signal path) call ``acquire(n)`` before an attachment send; on a 429
+they call ``feedback(retry_after, n)`` so the model recalibrates from
+the server's authoritative hint.
+
+The scheduler serializes concurrent calls through an ``asyncio.Lock``,
+giving FIFO fairness across agent sessions sharing one signal-cli
+daemon.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import re
+import time
+from typing import Any, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+SIGNAL_MAX_ATTACHMENTS_PER_MSG = 32  # per-message attachment cap (source: Signal-{Android,Desktop} source code)
+SIGNAL_RATE_LIMIT_BUCKET_CAPACITY = 50  # server-side token-bucket capacity for attachments rate limiting
+SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER = 4  # fallback token refill interval for signal-cli < v0.14.3
+SIGNAL_RATE_LIMIT_MAX_ATTEMPTS = 2  # initial attempt + 1 retry
+SIGNAL_BATCH_PACING_NOTICE_THRESHOLD = 10.0  # if estimated waiting time > 10s, notify the user about the delay
+SIGNAL_RPC_ERROR_RATELIMIT = -5  # signal-cli (v0.14.3+) JSON-RPC error code for RateLimitException
+
+
+# ---------------------------------------------------------------------------
+# Errors
+# ---------------------------------------------------------------------------
+
+class SignalRateLimitError(Exception):
+    """
+    Raised by ``SignalAdapter._rpc`` for rate-limit responses when the
+    caller has opted in via ``raise_on_rate_limit=True``.
+
+    Carries the server-supplied per-token Retry-After (in seconds) on
+    signal-cli ≥ v0.14.3
+    ``retry_after`` is None when the version doesn't expose it.
+    """
+
+    def __init__(self, message: str, retry_after: Optional[float] = None) -> None:
+        super().__init__(message)
+        self.retry_after = retry_after
+
+
+class SignalSchedulerError(Exception):
+    pass
+
+# ---------------------------------------------------------------------------
+# Detection helpers — used to fish a 429 out of signal-cli's various error
+# shapes (typed code, [429] substring, libsignal-net RetryLaterException
+# leaked through AttachmentInvalidException).
+# ---------------------------------------------------------------------------
+
+# "Retry after 4 seconds" / "retry after 4 second" — libsignal-net's
+# RetryLaterException string form, surfaced when 429s hit during
+# attachment upload (signal-cli wraps these as AttachmentInvalidException
+# rather than RateLimitException, so the typed path doesn't fire).
+_RETRY_AFTER_RE = re.compile(r"Retry after (\d+(?:\.\d+)?)\s*second", re.IGNORECASE)
+
+
+def _extract_retry_after_seconds(err: Any) -> Optional[float]:
+    """Pull the per-token Retry-After window from a signal-cli rate-limit error.
+
+    Tries two sources, in order:
+    1. ``error.data.response.results[*].retryAfterSeconds`` — the
+       structured field signal-cli ≥ v0.14.3 surfaces for plain
+       RateLimitException.
+    2. ``"Retry after N seconds"`` parsed out of the message — covers
+       libsignal-net's RetryLaterException that gets wrapped as
+       AttachmentInvalidException during attachment upload, where the
+       structured field stays null.
+
+    Returns None when neither yields a value.
+    """
+    msg = ""
+    if isinstance(err, dict):
+        data = err.get("data") or {}
+        response = data.get("response") or {}
+        results = response.get("results") or []
+        candidates = [
+            r.get("retryAfterSeconds") for r in results
+            if isinstance(r, dict) and r.get("retryAfterSeconds")
+        ]
+        if candidates:
+            return float(max(candidates))
+        msg = str(err.get("message", ""))
+    else:
+        msg = str(err)
+    match = _RETRY_AFTER_RE.search(msg)
+    return float(match.group(1)) if match else None
+
+
+def _is_signal_rate_limit_error(err: Any) -> bool:
+    """True if a signal-cli RPC error reflects a rate-limit failure.
+
+    Matches three layers:
+    - typed ``RATELIMIT_ERROR`` code (signal-cli ≥ v0.14.3, plain
+      RateLimitException)
+    - legacy ``[429] / RateLimitException`` substrings
+    - libsignal-net's ``RetryLaterException`` / ``Retry after N seconds``
+      surfaced inside ``AttachmentInvalidException`` when the rate
+      limit is hit during attachment upload — signal-cli never re-tags
+      these as RateLimitException, so substring is the only signal.
+    """
+    if isinstance(err, dict) and err.get("code") == SIGNAL_RPC_ERROR_RATELIMIT:
+        return True
+
+    message = (
+        str(err.get("message", ""))
+        if isinstance(err, dict)
+        else str(err)
+    )
+    msg_lower = message.lower()
+    return (
+        "[429]" in message
+        or "ratelimit" in msg_lower
+        or "retrylaterexception" in msg_lower
+        or "retry after" in msg_lower
+    )
+
+
+# ---------------------------------------------------------------------------
+# Misc helpers
+# ---------------------------------------------------------------------------
+
+def _format_wait(seconds: float) -> str:
+    """Human-friendly wait label for user-facing pacing notices."""
+    s = max(0.0, seconds)
+    if s < 90:
+        return f"{int(round(s))}s"
+    return f"{max(1, int(round(s / 60)))} min"
+
+
+def _signal_send_timeout(num_attachments: int) -> float:
+    """HTTP timeout for a Signal ``send`` RPC.
+
+    signal-cli uploads attachments serially during the call, so the
+    server-side time scales with batch size. Default 30s is fine for
+    text-only sends but truncates large attachment batches mid-upload —
+    we then log a phantom failure even though signal-cli completes the
+    send a few seconds later. Scale at 5s/attachment with a 60s floor.
+    """
+    if num_attachments <= 0:
+        return 30.0
+    return max(60.0, 5.0 * num_attachments)
+
+
+# ---------------------------------------------------------------------------
+# Scheduler
+# ---------------------------------------------------------------------------
+
+class SignalAttachmentScheduler:
+    """Process-wide token-bucket simulator for Signal attachment sends.
+
+    The bucket holds up to ``capacity`` tokens (default 50, matching
+    Signal's server-side rate-limit bucket size). Each attachment consumes one
+    token. Tokens refill at ``refill_rate`` tokens/second, calibrated
+    from the per-token Retry-After hint we get from the server when a
+    429 fires. Until we've observed one, we use the documented default
+    (1 token / 4 seconds).
+
+    Concurrent ``acquire(n)`` calls serialize through an
+    ``asyncio.Lock`` — natural FIFO across agent sessions hitting the
+    same daemon.
+    """
+
+    def __init__(
+        self,
+        capacity: float = float(SIGNAL_RATE_LIMIT_BUCKET_CAPACITY),
+        default_retry_after: float = float(SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER),
+    ) -> None:
+        self.capacity = float(capacity)
+        self.tokens = float(capacity)
+        self.refill_rate = 1.0 / float(default_retry_after)
+        self.last_refill = time.monotonic()
+        self._lock = asyncio.Lock()
+
+    # ------------------------------------------------------------------
+    # Internals
+    # ------------------------------------------------------------------
+
+    def _refill(self) -> None:
+        now = time.monotonic()
+        elapsed = now - self.last_refill
+        if elapsed > 0 and self.tokens < self.capacity:
+            self.tokens = min(self.capacity, self.tokens + elapsed * self.refill_rate)
+        self.last_refill = now
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def estimate_wait(self, n: int) -> float:
+        """Best-effort estimate of the seconds until ``n`` tokens would
+        be available. Used to decide whether to emit a user-facing
+        pacing notice *before* committing to an ``acquire`` that may
+        block silently. Lock-free; small races vs. concurrent acquires
+        are benign for an informational notice.
+        """
+        now = time.monotonic()
+        elapsed = now - self.last_refill
+        projected = self.tokens
+        if elapsed > 0 and projected < self.capacity:
+            projected = min(self.capacity, projected + elapsed * self.refill_rate)
+        deficit = n - projected
+        if deficit <= 0:
+            return 0.0
+        return deficit / self.refill_rate
+
+    async def acquire(self, n: int) -> float:
+        """Block until at least ``n`` tokens are available, return the
+        seconds slept.
+
+        Does **not** deduct tokens — the bucket is a read-only model of
+        server-side capacity.  Call ``report_rpc_duration()`` after the
+        RPC to synchronise the model with the server timeline.
+
+        Not perfect in case lots of coroutines try to acquire for big
+        uploads (``report_rpc_duration`` will take a long time to get hit)
+        but this is just a simulation. Signal server is ground truth and
+        will raise rate-limit exceptions triggering requeues.
+
+        The lock is released during ``asyncio.sleep`` so other callers
+        can interleave.  A retry loop re-checks after each sleep in
+        case the deadline was pessimistic.
+        """
+        if n <= 0:
+            return 0.0
+        if n > self.capacity:
+            raise SignalSchedulerError(
+                f"Signal scheduler was called requesting {n} tokens "
+                f"(max is {self.capacity})",
+            )
+
+        total_slept = 0.0
+        first_pass = True
+        while True:
+            async with self._lock:
+                self._refill()
+                if self.tokens >= n:
+                    if not first_pass or total_slept > 0:
+                        logger.debug(
+                            "Signal scheduler: tokens sufficient for %d "
+                            "(remaining=%.1f, total_slept=%.1fs)",
+                            n, self.tokens, total_slept,
+                        )
+                    return total_slept
+                deficit = n - self.tokens
+            wait = deficit / self.refill_rate
+            if first_pass:
+                logger.info(
+                    "Signal scheduler: pausing %.1fs for %d tokens "
+                    "(available=%.1f, deficit=%.1f, refill=%.4f/s ≈ %.1fs/token)",
+                    wait, n, self.tokens, deficit,
+                    self.refill_rate, 1.0 / self.refill_rate,
+                )
+                first_pass = False
+            await asyncio.sleep(wait)
+            total_slept += wait
+
+    async def report_rpc_duration(self, rpc_duration: float, n_attachments: int) -> None:
+        """Record an attachment-send RPC that just completed.
+
+        Deducts ``n_attachments`` tokens without crediting refill during
+        the upload window. Signal's server checks the bucket at RPC start
+        and does *not* refill during request processing — refill resumes
+        after the response. Crediting upload-time refill causes cumulative
+        drift that eventually triggers 429s.
+
+        Advances ``last_refill`` so the next ``acquire`` / ``_refill``
+        starts counting from this point.
+        """
+        if n_attachments <= 0:
+            return
+
+        async with self._lock:
+            now = time.monotonic()
+            token_before = self.tokens
+            self.tokens = max(0.0, token_before - float(n_attachments))
+            self.last_refill = now
+        logger.log(
+            logging.INFO if rpc_duration > 10 and n_attachments > 5 else logging.DEBUG,
+            "Signal scheduler: RPC for %d att took %.1fs — "
+            "tokens %.1f → %.1f (deducted=%d, no upload refill credited, refill=%.4fs⁻¹)",
+            n_attachments, rpc_duration,
+            token_before, self.tokens,
+            n_attachments, self.refill_rate,
+        )
+
+    def feedback(self, retry_after: Optional[float], n_attempted: int) -> None:
+        """Apply server feedback after a 429.
+
+        ``retry_after`` is the per-*token* refill window the server
+        reports (None when signal-cli is older than v0.14.3 and didn't
+        surface it).
+
+        When present we calibrate ``refill_rate`` from it:
+        the server is authoritative.
+        """
+        if retry_after and retry_after > 0:
+            new_rate = 1.0 / float(retry_after)
+            if new_rate != self.refill_rate:
+                logger.info(
+                    "Signal scheduler: calibrating refill_rate to %.4f tokens/sec "
+                    "(server retry_after=%.1fs per token)",
+                    new_rate, retry_after,
+                )
+                self.refill_rate = new_rate
+        self.tokens = 0.0
+        self.last_refill = time.monotonic()
+
+    def state(self) -> dict:
+        """Return current scheduler state for diagnostic logging (read-only).
+
+        Does not advance ``last_refill`` — safe to call from logging paths
+        without perturbing the bucket.
+        """
+        now = time.monotonic()
+        elapsed = now - self.last_refill
+        projected = self.tokens
+        if elapsed > 0 and projected < self.capacity:
+            projected = min(self.capacity, projected + elapsed * self.refill_rate)
+        return {
+            "tokens": round(projected, 1),
+            "capacity": int(self.capacity),
+            "refill_rate": round(self.refill_rate, 4),
+            "refill_seconds_per_token": round(1.0 / self.refill_rate, 1) if self.refill_rate > 0 else float("inf"),
+        }
+
+
+# ---------------------------------------------------------------------------
+# Process-wide singleton
+# ---------------------------------------------------------------------------
+
+_scheduler: Optional[SignalAttachmentScheduler] = None
+
+
+def get_scheduler() -> SignalAttachmentScheduler:
+    """Return the process-wide scheduler, creating it on first access."""
+    global _scheduler
+    if _scheduler is None:
+        _scheduler = SignalAttachmentScheduler()
+        logger.info(
+            "Signal scheduler: created (capacity=%d tokens, refill=%.4f/s ≈ %.1fs/token)",
+            int(_scheduler.capacity),
+            _scheduler.refill_rate,
+            1.0 / _scheduler.refill_rate,
+        )
+    return _scheduler
+
+
+def _reset_scheduler() -> None:
+    """Drop the cached scheduler so the next ``get_scheduler`` call
+    builds a fresh one. Test-only — never call from production paths."""
+    global _scheduler
+    _scheduler = None
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -7186,6 +7186,7 @@ class GatewayRunner:
        that the normal _process_message_background path would have caught.
        """
        from pathlib import Path
+        from urllib.parse import quote as _quote

        try:
            media_files, _ = adapter.extract_media(response)
@@ -7199,7 +7200,36 @@ class GatewayRunner:
            _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
            _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}

+            # Partition out images so they can be sent as a single batch
+            # (e.g. Signal's multi-attachment RPC)
+            image_paths: list = []
+            non_image_media: list = []
            for media_path, is_voice in media_files:
+                ext = Path(media_path).suffix.lower()
+                if ext in _IMAGE_EXTS and not is_voice:
+                    image_paths.append(media_path)
+                else:
+                    non_image_media.append((media_path, is_voice))
+
+            non_image_local: list = []
+            for file_path in local_files:
+                if Path(file_path).suffix.lower() in _IMAGE_EXTS:
+                    image_paths.append(file_path)
+                else:
+                    non_image_local.append(file_path)
+
+            if image_paths:
+                try:
+                    images = [(f"file://{_quote(p)}", "") for p in image_paths]
+                    await adapter.send_multiple_images(
+                        chat_id=event.source.chat_id,
+                        images=images,
+                        metadata=_thread_meta,
+                    )
+                except Exception as e:
+                    logger.warning("[%s] Post-stream image batch delivery failed: %s", adapter.name, e)
+
+            for media_path, is_voice in non_image_media:
                try:
                    ext = Path(media_path).suffix.lower()
                    if should_send_media_as_audio(event.source.platform, ext, is_voice=is_voice):
@@ -7214,12 +7244,6 @@ class GatewayRunner:
                            video_path=media_path,
                            metadata=_thread_meta,
                        )
-                    elif ext in _IMAGE_EXTS:
-                        await adapter.send_image_file(
-                            chat_id=event.source.chat_id,
-                            image_path=media_path,
-                            metadata=_thread_meta,
-                        )
                    else:
                        await adapter.send_document(
                            chat_id=event.source.chat_id,
@@ -7229,13 +7253,13 @@ class GatewayRunner:
                except Exception as e:
                    logger.warning("[%s] Post-stream media delivery failed: %s", adapter.name, e)

-            for file_path in local_files:
+            for file_path in non_image_local:
                try:
                    ext = Path(file_path).suffix.lower()
-                    if ext in _IMAGE_EXTS:
-                        await adapter.send_image_file(
+                    if ext in _VIDEO_EXTS:
+                        await adapter.send_video(
                            chat_id=event.source.chat_id,
-                            image_path=file_path,
+                            video_path=file_path,
                            metadata=_thread_meta,
                        )
                    else: