fix(weixin): use Tencent SILK encoding for voice replies

2026-04-14 04:12:04 +00:00
parent 53da34a4fc
commit 678b69ec1b
2 changed files with 128 additions and 1 deletions
--- a/gateway/platforms/weixin.py
+++ b/gateway/platforms/weixin.py
@@ -25,6 +25,7 @@ import struct
 import tempfile
 import time
 import uuid
+import subprocess
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
@@ -66,6 +67,14 @@ from gateway.platforms.base import (
 from hermes_constants import get_hermes_home
 from utils import atomic_json_write

+try:
+    import pilk
+
+    PILK_AVAILABLE = True
+except ImportError:  # pragma: no cover - optional dependency
+    pilk = None  # type: ignore[assignment]
+    PILK_AVAILABLE = False
+
 ILINK_BASE_URL = "https://ilinkai.weixin.qq.com"
 WEIXIN_CDN_BASE_URL = "https://novac2c.cdn.weixin.qq.com/c2c"
 ILINK_APP_ID = "bot"
@@ -1590,7 +1599,74 @@ class WeixinAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        return await self.send_document(chat_id, audio_path, caption=caption or "", metadata=metadata)
+        if not self._session or not self._token:
+            return SendResult(success=False, error="Not connected")
+
+        temp_paths: List[str] = []
+        try:
+            voice_path = self._prepare_voice_payload(audio_path)
+            if voice_path != audio_path:
+                temp_paths.append(voice_path)
+            message_id = await self._send_file(chat_id, voice_path, caption or "")
+            return SendResult(success=True, message_id=message_id)
+        except Exception as exc:
+            logger.error("[%s] send_voice failed to=%s: %s", self.name, _safe_id(chat_id), exc)
+            return SendResult(success=False, error=str(exc))
+        finally:
+            for path in temp_paths:
+                try:
+                    os.unlink(path)
+                except OSError:
+                    pass
+
+    def _prepare_voice_payload(self, audio_path: str) -> str:
+        path = str(audio_path)
+        if path.endswith(".silk"):
+            return path
+        if not PILK_AVAILABLE:
+            raise RuntimeError(
+                "Weixin native voice requires SILK encoding, but pilk is not installed"
+            )
+
+        wav_path = self._transcode_audio_to_wav(path)
+        try:
+            fd, silk_path = tempfile.mkstemp(suffix='.silk')
+            os.close(fd)
+            pilk.encode(wav_path, silk_path, tencent=True)
+            if not os.path.exists(silk_path) or os.path.getsize(silk_path) <= 0:
+                raise RuntimeError("Generated SILK voice file is empty")
+            return silk_path
+        finally:
+            try:
+                os.unlink(wav_path)
+            except OSError:
+                pass
+
+    def _transcode_audio_to_wav(self, input_path: str) -> str:
+        fd, wav_path = tempfile.mkstemp(suffix='.wav')
+        os.close(fd)
+        try:
+            result = subprocess.run(
+                [
+                    'ffmpeg', '-y', '-i', input_path,
+                    '-ar', '24000', '-ac', '1', '-f', 'wav', wav_path,
+                ],
+                capture_output=True,
+                timeout=60,
+                check=False,
+            )
+            if result.returncode != 0:
+                stderr = result.stderr.decode('utf-8', errors='ignore')[:400]
+                raise RuntimeError(f"ffmpeg voice conversion failed: {stderr}")
+            if not os.path.exists(wav_path) or os.path.getsize(wav_path) <= 0:
+                raise RuntimeError("ffmpeg produced empty wav for Weixin voice")
+            return wav_path
+        except Exception:
+            try:
+                os.unlink(wav_path)
+            except OSError:
+                pass
+            raise

    async def _download_remote_media(self, url: str) -> str:
        from tools.url_safety import is_safe_url
--- a/tests/gateway/test_weixin.py
+++ b/tests/gateway/test_weixin.py
@@ -3,6 +3,7 @@
 import asyncio
 import json
 import os
+from pathlib import Path
 from unittest.mock import AsyncMock, patch

 from gateway.config import PlatformConfig
@@ -580,3 +581,53 @@ class TestWeixinSendImageFileParameterName:
            caption="",
            metadata=None,
        )
+
+
+class TestWeixinVoiceSending:
+    def _connected_adapter(self) -> WeixinAdapter:
+        adapter = _make_adapter()
+        adapter._session = object()
+        adapter._token = "test-token"
+        adapter._base_url = "https://weixin.example.com"
+        adapter._token_store.get = lambda account_id, chat_id: "ctx-token"
+        return adapter
+
+    @patch.object(WeixinAdapter, "_send_file", new_callable=AsyncMock)
+    @patch.object(WeixinAdapter, "_prepare_voice_payload")
+    def test_send_voice_uses_silk_payload(self, prepare_mock, send_file_mock, tmp_path):
+        adapter = self._connected_adapter()
+        source = tmp_path / "voice.ogg"
+        silk = tmp_path / "voice.silk"
+        source.write_bytes(b"ogg")
+        silk.write_bytes(b"silk")
+        prepare_mock.return_value = str(silk)
+        send_file_mock.return_value = "msg-1"
+
+        result = asyncio.run(adapter.send_voice("wxid_test123", str(source)))
+
+        assert result.success is True
+        prepare_mock.assert_called_once_with(str(source))
+        send_file_mock.assert_awaited_once_with("wxid_test123", str(silk), "")
+
+    @patch("gateway.platforms.weixin.pilk.encode")
+    @patch.object(WeixinAdapter, "_transcode_audio_to_wav")
+    def test_prepare_voice_payload_transcodes_to_silk(self, transcode_mock, pilk_encode_mock, tmp_path):
+        adapter = _make_adapter()
+        src = tmp_path / "voice.ogg"
+        src.write_bytes(b"ogg")
+        wav = tmp_path / "voice.wav"
+        wav.write_bytes(b"wav")
+        transcode_mock.return_value = str(wav)
+
+        def _fake_encode(infile, outfile, **kwargs):
+            Path(outfile).write_bytes(b"silk-bytes")
+
+        pilk_encode_mock.side_effect = _fake_encode
+
+        silk_path = adapter._prepare_voice_payload(str(src))
+
+        assert silk_path.endswith('.silk')
+        assert Path(silk_path).read_bytes() == b"silk-bytes"
+        pilk_encode_mock.assert_called_once_with(str(wav), silk_path, tencent=True)
+        assert not wav.exists()
+        os.unlink(silk_path)