feat(gateway): centralize audio routing + FLAC support + Telegram doc fallback (#17833)
Extracted from PR #17211 (@versun) so it can land independently of the local_command TTS provider redesign. - Add should_send_media_as_audio(platform, ext, is_voice) in gateway/platforms/base.py; single source of truth for audio routing. - Add .flac to recognized audio extensions (MEDIA regex, weixin audio set, send_message audio set). - Telegram send_voice() now falls back to send_document for formats Telegram's Bot API can't play natively (.wav, .flac, ...) instead of raising; MP3/M4A still go to sendAudio, Opus/OGG still go to sendVoice. - Route _send_telegram() in send_message_tool through a narrower _TELEGRAM_SEND_AUDIO_EXTS = {.mp3, .m4a} set. - cron.scheduler._send_media_via_adapter now delegates the audio decision to should_send_media_as_audio so it matches the gateway. - Update the cron live-adapter ogg test to flag [[audio_as_voice]] so it still routes to sendVoice under the new Telegram-specific policy. - Tests: unit coverage for should_send_media_as_audio across platforms, end-to-end MEDIA routing via _process_message_background and GatewayRunner._deliver_media_from_response, TelegramAdapter.send_voice fallback for FLAC/WAV. Co-authored-by: Versun <me+github7604@versun.org>
This commit is contained in:
@@ -551,14 +551,14 @@ class TestDeliverResultWrapping:
|
||||
patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro):
|
||||
_deliver_result(
|
||||
job,
|
||||
"MEDIA:/tmp/voice.ogg",
|
||||
"[[audio_as_voice]]\nMEDIA:/tmp/voice.ogg",
|
||||
adapters={Platform.TELEGRAM: adapter},
|
||||
loop=loop,
|
||||
)
|
||||
|
||||
# Text send should NOT be called (no text after stripping MEDIA tag)
|
||||
adapter.send.assert_not_called()
|
||||
# Audio should still be delivered
|
||||
# Audio should still be delivered as a voice bubble
|
||||
adapter.send_voice.assert_called_once()
|
||||
|
||||
def test_live_adapter_sends_cleaned_text_not_raw(self):
|
||||
|
||||
@@ -323,6 +323,55 @@ class TestExtractMedia:
|
||||
assert "Here" in cleaned
|
||||
assert "After" in cleaned
|
||||
|
||||
def test_media_tag_supports_unquoted_flac_paths_with_spaces(self):
|
||||
content = "MEDIA:/tmp/Jane Doe/speech.flac"
|
||||
media, cleaned = BasePlatformAdapter.extract_media(content)
|
||||
assert media == [("/tmp/Jane Doe/speech.flac", False)]
|
||||
assert cleaned == ""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# should_send_media_as_audio
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestShouldSendMediaAsAudio:
|
||||
"""Audio-routing policy shared by gateway + scheduler + send_message."""
|
||||
|
||||
def test_unknown_extension_returns_false(self):
|
||||
from gateway.platforms.base import should_send_media_as_audio
|
||||
assert should_send_media_as_audio(None, ".png") is False
|
||||
assert should_send_media_as_audio("telegram", ".pdf") is False
|
||||
|
||||
def test_non_telegram_platforms_route_all_audio(self):
|
||||
from gateway.platforms.base import should_send_media_as_audio
|
||||
for ext in (".mp3", ".m4a", ".wav", ".flac", ".ogg", ".opus"):
|
||||
assert should_send_media_as_audio("discord", ext) is True
|
||||
assert should_send_media_as_audio("slack", ext) is True
|
||||
|
||||
def test_telegram_mp3_and_m4a_route_to_audio(self):
|
||||
from gateway.platforms.base import should_send_media_as_audio
|
||||
assert should_send_media_as_audio("telegram", ".mp3") is True
|
||||
assert should_send_media_as_audio("telegram", ".m4a") is True
|
||||
|
||||
def test_telegram_wav_and_flac_fall_through_to_document(self):
|
||||
from gateway.platforms.base import should_send_media_as_audio
|
||||
assert should_send_media_as_audio("telegram", ".wav") is False
|
||||
assert should_send_media_as_audio("telegram", ".flac") is False
|
||||
|
||||
def test_telegram_ogg_opus_only_when_voice_flagged(self):
|
||||
from gateway.platforms.base import should_send_media_as_audio
|
||||
assert should_send_media_as_audio("telegram", ".ogg", is_voice=True) is True
|
||||
assert should_send_media_as_audio("telegram", ".opus", is_voice=True) is True
|
||||
assert should_send_media_as_audio("telegram", ".ogg") is False
|
||||
assert should_send_media_as_audio("telegram", ".opus") is False
|
||||
|
||||
def test_accepts_platform_enum(self):
|
||||
from gateway.config import Platform
|
||||
from gateway.platforms.base import should_send_media_as_audio
|
||||
assert should_send_media_as_audio(Platform.TELEGRAM, ".mp3") is True
|
||||
assert should_send_media_as_audio(Platform.TELEGRAM, ".flac") is False
|
||||
assert should_send_media_as_audio(Platform.DISCORD, ".flac") is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# truncate_message
|
||||
|
||||
@@ -453,6 +453,87 @@ class TestMediaGroups:
|
||||
adapter.handle_message.assert_not_awaited()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestSendVoice — outbound audio delivery
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSendVoice:
|
||||
"""Tests for TelegramAdapter.send_voice() routing across audio formats."""
|
||||
|
||||
@pytest.fixture()
|
||||
def connected_adapter(self, adapter):
|
||||
"""Adapter with a mock bot attached."""
|
||||
bot = AsyncMock()
|
||||
adapter._bot = bot
|
||||
return adapter
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_flac_falls_back_to_document(self, connected_adapter, tmp_path):
|
||||
"""Telegram sendAudio does not accept FLAC — must fall back to sendDocument."""
|
||||
audio_file = tmp_path / "clip.flac"
|
||||
audio_file.write_bytes(b"fLaC" + b"\x00" * 32)
|
||||
|
||||
mock_msg = MagicMock()
|
||||
mock_msg.message_id = 101
|
||||
connected_adapter._bot.send_voice = AsyncMock()
|
||||
connected_adapter._bot.send_audio = AsyncMock()
|
||||
connected_adapter._bot.send_document = AsyncMock(return_value=mock_msg)
|
||||
|
||||
result = await connected_adapter.send_voice(
|
||||
chat_id="12345",
|
||||
audio_path=str(audio_file),
|
||||
caption="Audio",
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
assert result.message_id == "101"
|
||||
connected_adapter._bot.send_document.assert_awaited_once()
|
||||
connected_adapter._bot.send_audio.assert_not_awaited()
|
||||
connected_adapter._bot.send_voice.assert_not_awaited()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_wav_falls_back_to_document(self, connected_adapter, tmp_path):
|
||||
"""Telegram sendAudio does not accept WAV — must fall back to sendDocument."""
|
||||
audio_file = tmp_path / "clip.wav"
|
||||
audio_file.write_bytes(b"RIFF" + b"\x00" * 32)
|
||||
|
||||
mock_msg = MagicMock()
|
||||
mock_msg.message_id = 102
|
||||
connected_adapter._bot.send_voice = AsyncMock()
|
||||
connected_adapter._bot.send_audio = AsyncMock()
|
||||
connected_adapter._bot.send_document = AsyncMock(return_value=mock_msg)
|
||||
|
||||
result = await connected_adapter.send_voice(
|
||||
chat_id="12345",
|
||||
audio_path=str(audio_file),
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
connected_adapter._bot.send_document.assert_awaited_once()
|
||||
connected_adapter._bot.send_audio.assert_not_awaited()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_mp3_routes_to_send_audio(self, connected_adapter, tmp_path):
|
||||
"""MP3 is Telegram-sendAudio-compatible."""
|
||||
audio_file = tmp_path / "clip.mp3"
|
||||
audio_file.write_bytes(b"ID3" + b"\x00" * 32)
|
||||
|
||||
mock_msg = MagicMock()
|
||||
mock_msg.message_id = 103
|
||||
connected_adapter._bot.send_voice = AsyncMock()
|
||||
connected_adapter._bot.send_audio = AsyncMock(return_value=mock_msg)
|
||||
connected_adapter._bot.send_document = AsyncMock()
|
||||
|
||||
result = await connected_adapter.send_voice(
|
||||
chat_id="12345",
|
||||
audio_path=str(audio_file),
|
||||
)
|
||||
|
||||
assert result.success is True
|
||||
connected_adapter._bot.send_audio.assert_awaited_once()
|
||||
connected_adapter._bot.send_document.assert_not_awaited()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TestSendDocument — outbound file attachment delivery
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
195
tests/gateway/test_tts_media_routing.py
Normal file
195
tests/gateway/test_tts_media_routing.py
Normal file
@@ -0,0 +1,195 @@
|
||||
"""
|
||||
Tests for cross-platform audio/voice media routing.
|
||||
|
||||
These tests pin the expected delivery path for audio media files across
|
||||
Telegram (where Bot-API sendAudio only accepts MP3/M4A and .ogg/.opus
|
||||
only renders as a voice bubble when explicitly flagged) and via
|
||||
``GatewayRunner._deliver_media_from_response``.
|
||||
"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import Platform, PlatformConfig
|
||||
from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType, SendResult
|
||||
from gateway.run import GatewayRunner
|
||||
from gateway.session import SessionSource, build_session_key
|
||||
|
||||
|
||||
class _MediaRoutingAdapter(BasePlatformAdapter):
|
||||
def __init__(self):
|
||||
super().__init__(PlatformConfig(enabled=True, token="test"), Platform.TELEGRAM)
|
||||
|
||||
async def connect(self):
|
||||
return True
|
||||
|
||||
async def disconnect(self):
|
||||
pass
|
||||
|
||||
async def send(self, chat_id, content=None, **kwargs):
|
||||
return SendResult(success=True, message_id="text")
|
||||
|
||||
async def get_chat_info(self, chat_id):
|
||||
return {"id": chat_id, "type": "dm"}
|
||||
|
||||
|
||||
def _event(thread_id=None):
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="chat-1",
|
||||
chat_type="dm",
|
||||
thread_id=thread_id,
|
||||
)
|
||||
return MessageEvent(
|
||||
text="make speech",
|
||||
message_type=MessageType.TEXT,
|
||||
source=source,
|
||||
message_id="msg-1",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_base_adapter_routes_telegram_flac_media_tag_to_document_sender():
|
||||
adapter = _MediaRoutingAdapter()
|
||||
event = _event()
|
||||
adapter._message_handler = AsyncMock(return_value="MEDIA:/tmp/speech.flac")
|
||||
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
|
||||
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
|
||||
|
||||
await adapter._process_message_background(event, build_session_key(event.source))
|
||||
|
||||
adapter.send_document.assert_awaited_once_with(
|
||||
chat_id="chat-1",
|
||||
file_path="/tmp/speech.flac",
|
||||
metadata=None,
|
||||
)
|
||||
adapter.send_voice.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_base_adapter_routes_non_voice_telegram_ogg_media_tag_to_document_sender():
|
||||
adapter = _MediaRoutingAdapter()
|
||||
event = _event()
|
||||
adapter._message_handler = AsyncMock(return_value="MEDIA:/tmp/speech.ogg")
|
||||
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
|
||||
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
|
||||
|
||||
await adapter._process_message_background(event, build_session_key(event.source))
|
||||
|
||||
adapter.send_document.assert_awaited_once_with(
|
||||
chat_id="chat-1",
|
||||
file_path="/tmp/speech.ogg",
|
||||
metadata=None,
|
||||
)
|
||||
adapter.send_voice.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_base_adapter_routes_voice_tagged_telegram_ogg_media_tag_to_voice_sender():
|
||||
adapter = _MediaRoutingAdapter()
|
||||
event = _event()
|
||||
adapter._message_handler = AsyncMock(
|
||||
return_value="[[audio_as_voice]]\nMEDIA:/tmp/speech.ogg"
|
||||
)
|
||||
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
|
||||
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
|
||||
|
||||
await adapter._process_message_background(event, build_session_key(event.source))
|
||||
|
||||
adapter.send_voice.assert_awaited_once_with(
|
||||
chat_id="chat-1",
|
||||
audio_path="/tmp/speech.ogg",
|
||||
metadata=None,
|
||||
)
|
||||
adapter.send_document.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_delivery_routes_telegram_flac_media_tag_to_document_sender():
|
||||
event = _event(thread_id="topic-1")
|
||||
adapter = SimpleNamespace(
|
||||
name="test",
|
||||
extract_media=BasePlatformAdapter.extract_media,
|
||||
extract_images=BasePlatformAdapter.extract_images,
|
||||
extract_local_files=BasePlatformAdapter.extract_local_files,
|
||||
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
|
||||
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
|
||||
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
|
||||
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
|
||||
)
|
||||
|
||||
await GatewayRunner._deliver_media_from_response(
|
||||
object(),
|
||||
"MEDIA:/tmp/speech.flac",
|
||||
event,
|
||||
adapter,
|
||||
)
|
||||
|
||||
adapter.send_document.assert_awaited_once_with(
|
||||
chat_id="chat-1",
|
||||
file_path="/tmp/speech.flac",
|
||||
metadata={"thread_id": "topic-1"},
|
||||
)
|
||||
adapter.send_voice.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_delivery_routes_non_voice_telegram_ogg_media_tag_to_document_sender():
|
||||
event = _event(thread_id="topic-1")
|
||||
adapter = SimpleNamespace(
|
||||
name="test",
|
||||
extract_media=BasePlatformAdapter.extract_media,
|
||||
extract_images=BasePlatformAdapter.extract_images,
|
||||
extract_local_files=BasePlatformAdapter.extract_local_files,
|
||||
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
|
||||
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
|
||||
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
|
||||
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
|
||||
)
|
||||
|
||||
await GatewayRunner._deliver_media_from_response(
|
||||
object(),
|
||||
"MEDIA:/tmp/speech.ogg",
|
||||
event,
|
||||
adapter,
|
||||
)
|
||||
|
||||
adapter.send_document.assert_awaited_once_with(
|
||||
chat_id="chat-1",
|
||||
file_path="/tmp/speech.ogg",
|
||||
metadata={"thread_id": "topic-1"},
|
||||
)
|
||||
adapter.send_voice.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_delivery_routes_telegram_mp3_media_tag_to_voice_sender():
|
||||
"""MP3 audio on Telegram must go through send_voice (which routes to
|
||||
sendAudio internally); Telegram accepts MP3 for the audio player."""
|
||||
event = _event(thread_id="topic-1")
|
||||
adapter = SimpleNamespace(
|
||||
name="test",
|
||||
extract_media=BasePlatformAdapter.extract_media,
|
||||
extract_images=BasePlatformAdapter.extract_images,
|
||||
extract_local_files=BasePlatformAdapter.extract_local_files,
|
||||
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
|
||||
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
|
||||
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
|
||||
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
|
||||
)
|
||||
|
||||
await GatewayRunner._deliver_media_from_response(
|
||||
object(),
|
||||
"MEDIA:/tmp/speech.mp3",
|
||||
event,
|
||||
adapter,
|
||||
)
|
||||
|
||||
adapter.send_voice.assert_awaited_once_with(
|
||||
chat_id="chat-1",
|
||||
audio_path="/tmp/speech.mp3",
|
||||
metadata={"thread_id": "topic-1"},
|
||||
)
|
||||
adapter.send_document.assert_not_awaited()
|
||||
Reference in New Issue
Block a user