feat(gateway): centralize audio routing + FLAC support + Telegram doc fallback (#17833)

Extracted from PR #17211 (@versun) so it can land independently of the
local_command TTS provider redesign.

- Add should_send_media_as_audio(platform, ext, is_voice) in
  gateway/platforms/base.py; single source of truth for audio routing.
- Add .flac to recognized audio extensions (MEDIA regex, weixin audio
  set, send_message audio set).
- Telegram send_voice() now falls back to send_document for formats
  Telegram's Bot API can't play natively (.wav, .flac, ...) instead of
  raising; MP3/M4A still go to sendAudio, Opus/OGG still go to sendVoice.
- Route _send_telegram() in send_message_tool through a narrower
  _TELEGRAM_SEND_AUDIO_EXTS = {.mp3, .m4a} set.
- cron.scheduler._send_media_via_adapter now delegates the audio
  decision to should_send_media_as_audio so it matches the gateway.
- Update the cron live-adapter ogg test to flag [[audio_as_voice]] so
  it still routes to sendVoice under the new Telegram-specific policy.
- Tests: unit coverage for should_send_media_as_audio across platforms,
  end-to-end MEDIA routing via _process_message_background and
  GatewayRunner._deliver_media_from_response, TelegramAdapter.send_voice
  fallback for FLAC/WAV.

Co-authored-by: Versun <me+github7604@versun.org>
This commit is contained in:
Teknium
2026-04-30 01:32:31 -07:00
committed by GitHub
parent 26787ce638
commit aa7bf329bc
10 changed files with 417 additions and 19 deletions

View File

@@ -551,14 +551,14 @@ class TestDeliverResultWrapping:
patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro):
_deliver_result(
job,
"MEDIA:/tmp/voice.ogg",
"[[audio_as_voice]]\nMEDIA:/tmp/voice.ogg",
adapters={Platform.TELEGRAM: adapter},
loop=loop,
)
# Text send should NOT be called (no text after stripping MEDIA tag)
adapter.send.assert_not_called()
# Audio should still be delivered
# Audio should still be delivered as a voice bubble
adapter.send_voice.assert_called_once()
def test_live_adapter_sends_cleaned_text_not_raw(self):

View File

@@ -323,6 +323,55 @@ class TestExtractMedia:
assert "Here" in cleaned
assert "After" in cleaned
def test_media_tag_supports_unquoted_flac_paths_with_spaces(self):
content = "MEDIA:/tmp/Jane Doe/speech.flac"
media, cleaned = BasePlatformAdapter.extract_media(content)
assert media == [("/tmp/Jane Doe/speech.flac", False)]
assert cleaned == ""
# ---------------------------------------------------------------------------
# should_send_media_as_audio
# ---------------------------------------------------------------------------
class TestShouldSendMediaAsAudio:
"""Audio-routing policy shared by gateway + scheduler + send_message."""
def test_unknown_extension_returns_false(self):
from gateway.platforms.base import should_send_media_as_audio
assert should_send_media_as_audio(None, ".png") is False
assert should_send_media_as_audio("telegram", ".pdf") is False
def test_non_telegram_platforms_route_all_audio(self):
from gateway.platforms.base import should_send_media_as_audio
for ext in (".mp3", ".m4a", ".wav", ".flac", ".ogg", ".opus"):
assert should_send_media_as_audio("discord", ext) is True
assert should_send_media_as_audio("slack", ext) is True
def test_telegram_mp3_and_m4a_route_to_audio(self):
from gateway.platforms.base import should_send_media_as_audio
assert should_send_media_as_audio("telegram", ".mp3") is True
assert should_send_media_as_audio("telegram", ".m4a") is True
def test_telegram_wav_and_flac_fall_through_to_document(self):
from gateway.platforms.base import should_send_media_as_audio
assert should_send_media_as_audio("telegram", ".wav") is False
assert should_send_media_as_audio("telegram", ".flac") is False
def test_telegram_ogg_opus_only_when_voice_flagged(self):
from gateway.platforms.base import should_send_media_as_audio
assert should_send_media_as_audio("telegram", ".ogg", is_voice=True) is True
assert should_send_media_as_audio("telegram", ".opus", is_voice=True) is True
assert should_send_media_as_audio("telegram", ".ogg") is False
assert should_send_media_as_audio("telegram", ".opus") is False
def test_accepts_platform_enum(self):
from gateway.config import Platform
from gateway.platforms.base import should_send_media_as_audio
assert should_send_media_as_audio(Platform.TELEGRAM, ".mp3") is True
assert should_send_media_as_audio(Platform.TELEGRAM, ".flac") is False
assert should_send_media_as_audio(Platform.DISCORD, ".flac") is True
# ---------------------------------------------------------------------------
# truncate_message

View File

@@ -453,6 +453,87 @@ class TestMediaGroups:
adapter.handle_message.assert_not_awaited()
# ---------------------------------------------------------------------------
# TestSendVoice — outbound audio delivery
# ---------------------------------------------------------------------------
class TestSendVoice:
"""Tests for TelegramAdapter.send_voice() routing across audio formats."""
@pytest.fixture()
def connected_adapter(self, adapter):
"""Adapter with a mock bot attached."""
bot = AsyncMock()
adapter._bot = bot
return adapter
@pytest.mark.asyncio
async def test_flac_falls_back_to_document(self, connected_adapter, tmp_path):
"""Telegram sendAudio does not accept FLAC — must fall back to sendDocument."""
audio_file = tmp_path / "clip.flac"
audio_file.write_bytes(b"fLaC" + b"\x00" * 32)
mock_msg = MagicMock()
mock_msg.message_id = 101
connected_adapter._bot.send_voice = AsyncMock()
connected_adapter._bot.send_audio = AsyncMock()
connected_adapter._bot.send_document = AsyncMock(return_value=mock_msg)
result = await connected_adapter.send_voice(
chat_id="12345",
audio_path=str(audio_file),
caption="Audio",
)
assert result.success is True
assert result.message_id == "101"
connected_adapter._bot.send_document.assert_awaited_once()
connected_adapter._bot.send_audio.assert_not_awaited()
connected_adapter._bot.send_voice.assert_not_awaited()
@pytest.mark.asyncio
async def test_wav_falls_back_to_document(self, connected_adapter, tmp_path):
"""Telegram sendAudio does not accept WAV — must fall back to sendDocument."""
audio_file = tmp_path / "clip.wav"
audio_file.write_bytes(b"RIFF" + b"\x00" * 32)
mock_msg = MagicMock()
mock_msg.message_id = 102
connected_adapter._bot.send_voice = AsyncMock()
connected_adapter._bot.send_audio = AsyncMock()
connected_adapter._bot.send_document = AsyncMock(return_value=mock_msg)
result = await connected_adapter.send_voice(
chat_id="12345",
audio_path=str(audio_file),
)
assert result.success is True
connected_adapter._bot.send_document.assert_awaited_once()
connected_adapter._bot.send_audio.assert_not_awaited()
@pytest.mark.asyncio
async def test_mp3_routes_to_send_audio(self, connected_adapter, tmp_path):
"""MP3 is Telegram-sendAudio-compatible."""
audio_file = tmp_path / "clip.mp3"
audio_file.write_bytes(b"ID3" + b"\x00" * 32)
mock_msg = MagicMock()
mock_msg.message_id = 103
connected_adapter._bot.send_voice = AsyncMock()
connected_adapter._bot.send_audio = AsyncMock(return_value=mock_msg)
connected_adapter._bot.send_document = AsyncMock()
result = await connected_adapter.send_voice(
chat_id="12345",
audio_path=str(audio_file),
)
assert result.success is True
connected_adapter._bot.send_audio.assert_awaited_once()
connected_adapter._bot.send_document.assert_not_awaited()
# ---------------------------------------------------------------------------
# TestSendDocument — outbound file attachment delivery
# ---------------------------------------------------------------------------

View File

@@ -0,0 +1,195 @@
"""
Tests for cross-platform audio/voice media routing.
These tests pin the expected delivery path for audio media files across
Telegram (where Bot-API sendAudio only accepts MP3/M4A and .ogg/.opus
only renders as a voice bubble when explicitly flagged) and via
``GatewayRunner._deliver_media_from_response``.
"""
from types import SimpleNamespace
from unittest.mock import AsyncMock
import pytest
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType, SendResult
from gateway.run import GatewayRunner
from gateway.session import SessionSource, build_session_key
class _MediaRoutingAdapter(BasePlatformAdapter):
def __init__(self):
super().__init__(PlatformConfig(enabled=True, token="test"), Platform.TELEGRAM)
async def connect(self):
return True
async def disconnect(self):
pass
async def send(self, chat_id, content=None, **kwargs):
return SendResult(success=True, message_id="text")
async def get_chat_info(self, chat_id):
return {"id": chat_id, "type": "dm"}
def _event(thread_id=None):
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="chat-1",
chat_type="dm",
thread_id=thread_id,
)
return MessageEvent(
text="make speech",
message_type=MessageType.TEXT,
source=source,
message_id="msg-1",
)
@pytest.mark.asyncio
async def test_base_adapter_routes_telegram_flac_media_tag_to_document_sender():
adapter = _MediaRoutingAdapter()
event = _event()
adapter._message_handler = AsyncMock(return_value="MEDIA:/tmp/speech.flac")
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
await adapter._process_message_background(event, build_session_key(event.source))
adapter.send_document.assert_awaited_once_with(
chat_id="chat-1",
file_path="/tmp/speech.flac",
metadata=None,
)
adapter.send_voice.assert_not_awaited()
@pytest.mark.asyncio
async def test_base_adapter_routes_non_voice_telegram_ogg_media_tag_to_document_sender():
adapter = _MediaRoutingAdapter()
event = _event()
adapter._message_handler = AsyncMock(return_value="MEDIA:/tmp/speech.ogg")
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
await adapter._process_message_background(event, build_session_key(event.source))
adapter.send_document.assert_awaited_once_with(
chat_id="chat-1",
file_path="/tmp/speech.ogg",
metadata=None,
)
adapter.send_voice.assert_not_awaited()
@pytest.mark.asyncio
async def test_base_adapter_routes_voice_tagged_telegram_ogg_media_tag_to_voice_sender():
adapter = _MediaRoutingAdapter()
event = _event()
adapter._message_handler = AsyncMock(
return_value="[[audio_as_voice]]\nMEDIA:/tmp/speech.ogg"
)
adapter.send_voice = AsyncMock(return_value=SendResult(success=True, message_id="voice"))
adapter.send_document = AsyncMock(return_value=SendResult(success=True, message_id="doc"))
await adapter._process_message_background(event, build_session_key(event.source))
adapter.send_voice.assert_awaited_once_with(
chat_id="chat-1",
audio_path="/tmp/speech.ogg",
metadata=None,
)
adapter.send_document.assert_not_awaited()
@pytest.mark.asyncio
async def test_streaming_delivery_routes_telegram_flac_media_tag_to_document_sender():
event = _event(thread_id="topic-1")
adapter = SimpleNamespace(
name="test",
extract_media=BasePlatformAdapter.extract_media,
extract_images=BasePlatformAdapter.extract_images,
extract_local_files=BasePlatformAdapter.extract_local_files,
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
)
await GatewayRunner._deliver_media_from_response(
object(),
"MEDIA:/tmp/speech.flac",
event,
adapter,
)
adapter.send_document.assert_awaited_once_with(
chat_id="chat-1",
file_path="/tmp/speech.flac",
metadata={"thread_id": "topic-1"},
)
adapter.send_voice.assert_not_awaited()
@pytest.mark.asyncio
async def test_streaming_delivery_routes_non_voice_telegram_ogg_media_tag_to_document_sender():
event = _event(thread_id="topic-1")
adapter = SimpleNamespace(
name="test",
extract_media=BasePlatformAdapter.extract_media,
extract_images=BasePlatformAdapter.extract_images,
extract_local_files=BasePlatformAdapter.extract_local_files,
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
)
await GatewayRunner._deliver_media_from_response(
object(),
"MEDIA:/tmp/speech.ogg",
event,
adapter,
)
adapter.send_document.assert_awaited_once_with(
chat_id="chat-1",
file_path="/tmp/speech.ogg",
metadata={"thread_id": "topic-1"},
)
adapter.send_voice.assert_not_awaited()
@pytest.mark.asyncio
async def test_streaming_delivery_routes_telegram_mp3_media_tag_to_voice_sender():
"""MP3 audio on Telegram must go through send_voice (which routes to
sendAudio internally); Telegram accepts MP3 for the audio player."""
event = _event(thread_id="topic-1")
adapter = SimpleNamespace(
name="test",
extract_media=BasePlatformAdapter.extract_media,
extract_images=BasePlatformAdapter.extract_images,
extract_local_files=BasePlatformAdapter.extract_local_files,
send_voice=AsyncMock(return_value=SendResult(success=True, message_id="voice")),
send_document=AsyncMock(return_value=SendResult(success=True, message_id="doc")),
send_image_file=AsyncMock(return_value=SendResult(success=True, message_id="image")),
send_video=AsyncMock(return_value=SendResult(success=True, message_id="video")),
)
await GatewayRunner._deliver_media_from_response(
object(),
"MEDIA:/tmp/speech.mp3",
event,
adapter,
)
adapter.send_voice.assert_awaited_once_with(
chat_id="chat-1",
audio_path="/tmp/speech.mp3",
metadata={"thread_id": "topic-1"},
)
adapter.send_document.assert_not_awaited()