fix(weixin): use Tencent SILK encoding for voice replies
This commit is contained in:
@@ -25,6 +25,7 @@ import struct
|
||||
import tempfile
|
||||
import time
|
||||
import uuid
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
@@ -66,6 +67,14 @@ from gateway.platforms.base import (
|
||||
from hermes_constants import get_hermes_home
|
||||
from utils import atomic_json_write
|
||||
|
||||
try:
|
||||
import pilk
|
||||
|
||||
PILK_AVAILABLE = True
|
||||
except ImportError: # pragma: no cover - optional dependency
|
||||
pilk = None # type: ignore[assignment]
|
||||
PILK_AVAILABLE = False
|
||||
|
||||
ILINK_BASE_URL = "https://ilinkai.weixin.qq.com"
|
||||
WEIXIN_CDN_BASE_URL = "https://novac2c.cdn.weixin.qq.com/c2c"
|
||||
ILINK_APP_ID = "bot"
|
||||
@@ -1590,7 +1599,74 @@ class WeixinAdapter(BasePlatformAdapter):
|
||||
reply_to: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> SendResult:
|
||||
return await self.send_document(chat_id, audio_path, caption=caption or "", metadata=metadata)
|
||||
if not self._session or not self._token:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
|
||||
temp_paths: List[str] = []
|
||||
try:
|
||||
voice_path = self._prepare_voice_payload(audio_path)
|
||||
if voice_path != audio_path:
|
||||
temp_paths.append(voice_path)
|
||||
message_id = await self._send_file(chat_id, voice_path, caption or "")
|
||||
return SendResult(success=True, message_id=message_id)
|
||||
except Exception as exc:
|
||||
logger.error("[%s] send_voice failed to=%s: %s", self.name, _safe_id(chat_id), exc)
|
||||
return SendResult(success=False, error=str(exc))
|
||||
finally:
|
||||
for path in temp_paths:
|
||||
try:
|
||||
os.unlink(path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def _prepare_voice_payload(self, audio_path: str) -> str:
|
||||
path = str(audio_path)
|
||||
if path.endswith(".silk"):
|
||||
return path
|
||||
if not PILK_AVAILABLE:
|
||||
raise RuntimeError(
|
||||
"Weixin native voice requires SILK encoding, but pilk is not installed"
|
||||
)
|
||||
|
||||
wav_path = self._transcode_audio_to_wav(path)
|
||||
try:
|
||||
fd, silk_path = tempfile.mkstemp(suffix='.silk')
|
||||
os.close(fd)
|
||||
pilk.encode(wav_path, silk_path, tencent=True)
|
||||
if not os.path.exists(silk_path) or os.path.getsize(silk_path) <= 0:
|
||||
raise RuntimeError("Generated SILK voice file is empty")
|
||||
return silk_path
|
||||
finally:
|
||||
try:
|
||||
os.unlink(wav_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def _transcode_audio_to_wav(self, input_path: str) -> str:
|
||||
fd, wav_path = tempfile.mkstemp(suffix='.wav')
|
||||
os.close(fd)
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[
|
||||
'ffmpeg', '-y', '-i', input_path,
|
||||
'-ar', '24000', '-ac', '1', '-f', 'wav', wav_path,
|
||||
],
|
||||
capture_output=True,
|
||||
timeout=60,
|
||||
check=False,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
stderr = result.stderr.decode('utf-8', errors='ignore')[:400]
|
||||
raise RuntimeError(f"ffmpeg voice conversion failed: {stderr}")
|
||||
if not os.path.exists(wav_path) or os.path.getsize(wav_path) <= 0:
|
||||
raise RuntimeError("ffmpeg produced empty wav for Weixin voice")
|
||||
return wav_path
|
||||
except Exception:
|
||||
try:
|
||||
os.unlink(wav_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
async def _download_remote_media(self, url: str) -> str:
|
||||
from tools.url_safety import is_safe_url
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
from gateway.config import PlatformConfig
|
||||
@@ -580,3 +581,53 @@ class TestWeixinSendImageFileParameterName:
|
||||
caption="",
|
||||
metadata=None,
|
||||
)
|
||||
|
||||
|
||||
class TestWeixinVoiceSending:
|
||||
def _connected_adapter(self) -> WeixinAdapter:
|
||||
adapter = _make_adapter()
|
||||
adapter._session = object()
|
||||
adapter._token = "test-token"
|
||||
adapter._base_url = "https://weixin.example.com"
|
||||
adapter._token_store.get = lambda account_id, chat_id: "ctx-token"
|
||||
return adapter
|
||||
|
||||
@patch.object(WeixinAdapter, "_send_file", new_callable=AsyncMock)
|
||||
@patch.object(WeixinAdapter, "_prepare_voice_payload")
|
||||
def test_send_voice_uses_silk_payload(self, prepare_mock, send_file_mock, tmp_path):
|
||||
adapter = self._connected_adapter()
|
||||
source = tmp_path / "voice.ogg"
|
||||
silk = tmp_path / "voice.silk"
|
||||
source.write_bytes(b"ogg")
|
||||
silk.write_bytes(b"silk")
|
||||
prepare_mock.return_value = str(silk)
|
||||
send_file_mock.return_value = "msg-1"
|
||||
|
||||
result = asyncio.run(adapter.send_voice("wxid_test123", str(source)))
|
||||
|
||||
assert result.success is True
|
||||
prepare_mock.assert_called_once_with(str(source))
|
||||
send_file_mock.assert_awaited_once_with("wxid_test123", str(silk), "")
|
||||
|
||||
@patch("gateway.platforms.weixin.pilk.encode")
|
||||
@patch.object(WeixinAdapter, "_transcode_audio_to_wav")
|
||||
def test_prepare_voice_payload_transcodes_to_silk(self, transcode_mock, pilk_encode_mock, tmp_path):
|
||||
adapter = _make_adapter()
|
||||
src = tmp_path / "voice.ogg"
|
||||
src.write_bytes(b"ogg")
|
||||
wav = tmp_path / "voice.wav"
|
||||
wav.write_bytes(b"wav")
|
||||
transcode_mock.return_value = str(wav)
|
||||
|
||||
def _fake_encode(infile, outfile, **kwargs):
|
||||
Path(outfile).write_bytes(b"silk-bytes")
|
||||
|
||||
pilk_encode_mock.side_effect = _fake_encode
|
||||
|
||||
silk_path = adapter._prepare_voice_payload(str(src))
|
||||
|
||||
assert silk_path.endswith('.silk')
|
||||
assert Path(silk_path).read_bytes() == b"silk-bytes"
|
||||
pilk_encode_mock.assert_called_once_with(str(wav), silk_path, tencent=True)
|
||||
assert not wav.exists()
|
||||
os.unlink(silk_path)
|
||||
|
||||
Reference in New Issue
Block a user