diff --git a/gateway/platforms/weixin.py b/gateway/platforms/weixin.py index 921d822aa..2af648505 100644 --- a/gateway/platforms/weixin.py +++ b/gateway/platforms/weixin.py @@ -25,6 +25,7 @@ import struct import tempfile import time import uuid +import subprocess from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional, Tuple @@ -66,6 +67,14 @@ from gateway.platforms.base import ( from hermes_constants import get_hermes_home from utils import atomic_json_write +try: + import pilk + + PILK_AVAILABLE = True +except ImportError: # pragma: no cover - optional dependency + pilk = None # type: ignore[assignment] + PILK_AVAILABLE = False + ILINK_BASE_URL = "https://ilinkai.weixin.qq.com" WEIXIN_CDN_BASE_URL = "https://novac2c.cdn.weixin.qq.com/c2c" ILINK_APP_ID = "bot" @@ -1590,7 +1599,74 @@ class WeixinAdapter(BasePlatformAdapter): reply_to: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> SendResult: - return await self.send_document(chat_id, audio_path, caption=caption or "", metadata=metadata) + if not self._session or not self._token: + return SendResult(success=False, error="Not connected") + + temp_paths: List[str] = [] + try: + voice_path = self._prepare_voice_payload(audio_path) + if voice_path != audio_path: + temp_paths.append(voice_path) + message_id = await self._send_file(chat_id, voice_path, caption or "") + return SendResult(success=True, message_id=message_id) + except Exception as exc: + logger.error("[%s] send_voice failed to=%s: %s", self.name, _safe_id(chat_id), exc) + return SendResult(success=False, error=str(exc)) + finally: + for path in temp_paths: + try: + os.unlink(path) + except OSError: + pass + + def _prepare_voice_payload(self, audio_path: str) -> str: + path = str(audio_path) + if path.endswith(".silk"): + return path + if not PILK_AVAILABLE: + raise RuntimeError( + "Weixin native voice requires SILK encoding, but pilk is not installed" + ) + + wav_path = self._transcode_audio_to_wav(path) + try: + fd, silk_path = tempfile.mkstemp(suffix='.silk') + os.close(fd) + pilk.encode(wav_path, silk_path, tencent=True) + if not os.path.exists(silk_path) or os.path.getsize(silk_path) <= 0: + raise RuntimeError("Generated SILK voice file is empty") + return silk_path + finally: + try: + os.unlink(wav_path) + except OSError: + pass + + def _transcode_audio_to_wav(self, input_path: str) -> str: + fd, wav_path = tempfile.mkstemp(suffix='.wav') + os.close(fd) + try: + result = subprocess.run( + [ + 'ffmpeg', '-y', '-i', input_path, + '-ar', '24000', '-ac', '1', '-f', 'wav', wav_path, + ], + capture_output=True, + timeout=60, + check=False, + ) + if result.returncode != 0: + stderr = result.stderr.decode('utf-8', errors='ignore')[:400] + raise RuntimeError(f"ffmpeg voice conversion failed: {stderr}") + if not os.path.exists(wav_path) or os.path.getsize(wav_path) <= 0: + raise RuntimeError("ffmpeg produced empty wav for Weixin voice") + return wav_path + except Exception: + try: + os.unlink(wav_path) + except OSError: + pass + raise async def _download_remote_media(self, url: str) -> str: from tools.url_safety import is_safe_url diff --git a/tests/gateway/test_weixin.py b/tests/gateway/test_weixin.py index 45d5c4a87..03aeda60b 100644 --- a/tests/gateway/test_weixin.py +++ b/tests/gateway/test_weixin.py @@ -3,6 +3,7 @@ import asyncio import json import os +from pathlib import Path from unittest.mock import AsyncMock, patch from gateway.config import PlatformConfig @@ -580,3 +581,53 @@ class TestWeixinSendImageFileParameterName: caption="", metadata=None, ) + + +class TestWeixinVoiceSending: + def _connected_adapter(self) -> WeixinAdapter: + adapter = _make_adapter() + adapter._session = object() + adapter._token = "test-token" + adapter._base_url = "https://weixin.example.com" + adapter._token_store.get = lambda account_id, chat_id: "ctx-token" + return adapter + + @patch.object(WeixinAdapter, "_send_file", new_callable=AsyncMock) + @patch.object(WeixinAdapter, "_prepare_voice_payload") + def test_send_voice_uses_silk_payload(self, prepare_mock, send_file_mock, tmp_path): + adapter = self._connected_adapter() + source = tmp_path / "voice.ogg" + silk = tmp_path / "voice.silk" + source.write_bytes(b"ogg") + silk.write_bytes(b"silk") + prepare_mock.return_value = str(silk) + send_file_mock.return_value = "msg-1" + + result = asyncio.run(adapter.send_voice("wxid_test123", str(source))) + + assert result.success is True + prepare_mock.assert_called_once_with(str(source)) + send_file_mock.assert_awaited_once_with("wxid_test123", str(silk), "") + + @patch("gateway.platforms.weixin.pilk.encode") + @patch.object(WeixinAdapter, "_transcode_audio_to_wav") + def test_prepare_voice_payload_transcodes_to_silk(self, transcode_mock, pilk_encode_mock, tmp_path): + adapter = _make_adapter() + src = tmp_path / "voice.ogg" + src.write_bytes(b"ogg") + wav = tmp_path / "voice.wav" + wav.write_bytes(b"wav") + transcode_mock.return_value = str(wav) + + def _fake_encode(infile, outfile, **kwargs): + Path(outfile).write_bytes(b"silk-bytes") + + pilk_encode_mock.side_effect = _fake_encode + + silk_path = adapter._prepare_voice_payload(str(src)) + + assert silk_path.endswith('.silk') + assert Path(silk_path).read_bytes() == b"silk-bytes" + pilk_encode_mock.assert_called_once_with(str(wav), silk_path, tencent=True) + assert not wav.exists() + os.unlink(silk_path)