fix(weixin): use Tencent SILK encoding for voice replies

This commit is contained in:
Patrick Wang
2026-04-14 04:12:04 +00:00
committed by Teknium
parent 53da34a4fc
commit 678b69ec1b
2 changed files with 128 additions and 1 deletions

View File

@@ -25,6 +25,7 @@ import struct
import tempfile
import time
import uuid
import subprocess
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
@@ -66,6 +67,14 @@ from gateway.platforms.base import (
from hermes_constants import get_hermes_home
from utils import atomic_json_write
try:
import pilk
PILK_AVAILABLE = True
except ImportError: # pragma: no cover - optional dependency
pilk = None # type: ignore[assignment]
PILK_AVAILABLE = False
ILINK_BASE_URL = "https://ilinkai.weixin.qq.com"
WEIXIN_CDN_BASE_URL = "https://novac2c.cdn.weixin.qq.com/c2c"
ILINK_APP_ID = "bot"
@@ -1590,7 +1599,74 @@ class WeixinAdapter(BasePlatformAdapter):
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
return await self.send_document(chat_id, audio_path, caption=caption or "", metadata=metadata)
if not self._session or not self._token:
return SendResult(success=False, error="Not connected")
temp_paths: List[str] = []
try:
voice_path = self._prepare_voice_payload(audio_path)
if voice_path != audio_path:
temp_paths.append(voice_path)
message_id = await self._send_file(chat_id, voice_path, caption or "")
return SendResult(success=True, message_id=message_id)
except Exception as exc:
logger.error("[%s] send_voice failed to=%s: %s", self.name, _safe_id(chat_id), exc)
return SendResult(success=False, error=str(exc))
finally:
for path in temp_paths:
try:
os.unlink(path)
except OSError:
pass
def _prepare_voice_payload(self, audio_path: str) -> str:
path = str(audio_path)
if path.endswith(".silk"):
return path
if not PILK_AVAILABLE:
raise RuntimeError(
"Weixin native voice requires SILK encoding, but pilk is not installed"
)
wav_path = self._transcode_audio_to_wav(path)
try:
fd, silk_path = tempfile.mkstemp(suffix='.silk')
os.close(fd)
pilk.encode(wav_path, silk_path, tencent=True)
if not os.path.exists(silk_path) or os.path.getsize(silk_path) <= 0:
raise RuntimeError("Generated SILK voice file is empty")
return silk_path
finally:
try:
os.unlink(wav_path)
except OSError:
pass
def _transcode_audio_to_wav(self, input_path: str) -> str:
fd, wav_path = tempfile.mkstemp(suffix='.wav')
os.close(fd)
try:
result = subprocess.run(
[
'ffmpeg', '-y', '-i', input_path,
'-ar', '24000', '-ac', '1', '-f', 'wav', wav_path,
],
capture_output=True,
timeout=60,
check=False,
)
if result.returncode != 0:
stderr = result.stderr.decode('utf-8', errors='ignore')[:400]
raise RuntimeError(f"ffmpeg voice conversion failed: {stderr}")
if not os.path.exists(wav_path) or os.path.getsize(wav_path) <= 0:
raise RuntimeError("ffmpeg produced empty wav for Weixin voice")
return wav_path
except Exception:
try:
os.unlink(wav_path)
except OSError:
pass
raise
async def _download_remote_media(self, url: str) -> str:
from tools.url_safety import is_safe_url

View File

@@ -3,6 +3,7 @@
import asyncio
import json
import os
from pathlib import Path
from unittest.mock import AsyncMock, patch
from gateway.config import PlatformConfig
@@ -580,3 +581,53 @@ class TestWeixinSendImageFileParameterName:
caption="",
metadata=None,
)
class TestWeixinVoiceSending:
def _connected_adapter(self) -> WeixinAdapter:
adapter = _make_adapter()
adapter._session = object()
adapter._token = "test-token"
adapter._base_url = "https://weixin.example.com"
adapter._token_store.get = lambda account_id, chat_id: "ctx-token"
return adapter
@patch.object(WeixinAdapter, "_send_file", new_callable=AsyncMock)
@patch.object(WeixinAdapter, "_prepare_voice_payload")
def test_send_voice_uses_silk_payload(self, prepare_mock, send_file_mock, tmp_path):
adapter = self._connected_adapter()
source = tmp_path / "voice.ogg"
silk = tmp_path / "voice.silk"
source.write_bytes(b"ogg")
silk.write_bytes(b"silk")
prepare_mock.return_value = str(silk)
send_file_mock.return_value = "msg-1"
result = asyncio.run(adapter.send_voice("wxid_test123", str(source)))
assert result.success is True
prepare_mock.assert_called_once_with(str(source))
send_file_mock.assert_awaited_once_with("wxid_test123", str(silk), "")
@patch("gateway.platforms.weixin.pilk.encode")
@patch.object(WeixinAdapter, "_transcode_audio_to_wav")
def test_prepare_voice_payload_transcodes_to_silk(self, transcode_mock, pilk_encode_mock, tmp_path):
adapter = _make_adapter()
src = tmp_path / "voice.ogg"
src.write_bytes(b"ogg")
wav = tmp_path / "voice.wav"
wav.write_bytes(b"wav")
transcode_mock.return_value = str(wav)
def _fake_encode(infile, outfile, **kwargs):
Path(outfile).write_bytes(b"silk-bytes")
pilk_encode_mock.side_effect = _fake_encode
silk_path = adapter._prepare_voice_payload(str(src))
assert silk_path.endswith('.silk')
assert Path(silk_path).read_bytes() == b"silk-bytes"
pilk_encode_mock.assert_called_once_with(str(wav), silk_path, tencent=True)
assert not wav.exists()
os.unlink(silk_path)