From a3b9343f0819ec68224f4902b0af90d3f8020c98 Mon Sep 17 00:00:00 2001 From: LeonSGP43 <154585401+LeonSGP43@users.noreply.github.com> Date: Thu, 23 Apr 2026 14:48:03 +0800 Subject: [PATCH] feat(telegram): render markdown tables as row groups --- gateway/platforms/telegram.py | 58 ++++++++++++++++++++------ tests/gateway/test_telegram_format.py | 60 +++++++++++++++------------ 2 files changed, 79 insertions(+), 39 deletions(-) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index ce536f515..b08971469 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -123,12 +123,12 @@ def _strip_mdv2(text: str) -> str: # --------------------------------------------------------------------------- -# Markdown table → code block conversion +# Markdown table → Telegram-friendly row groups # --------------------------------------------------------------------------- # Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal, # so pipe tables render as noisy backslash-pipe text with no alignment. -# Wrapping the table in a fenced code block makes Telegram render it as -# monospace preformatted text with columns intact. +# Reformating each row into a bold heading plus bullet list keeps the content +# readable on mobile clients while preserving the source data. # Matches a GFM table delimiter row: optional outer pipes, cells containing # only dashes (with optional leading/trailing colons for alignment) separated @@ -145,13 +145,49 @@ def _is_table_row(line: str) -> bool: return bool(stripped) and '|' in stripped +def _split_markdown_table_row(line: str) -> list[str]: + """Split a simple GFM table row into stripped cell values.""" + stripped = line.strip() + if stripped.startswith("|"): + stripped = stripped[1:] + if stripped.endswith("|"): + stripped = stripped[:-1] + return [cell.strip() for cell in stripped.split("|")] + + +def _render_table_block_for_telegram(table_block: list[str]) -> str: + """Render a detected GFM table as Telegram-friendly row groups.""" + if len(table_block) < 3: + return "\n".join(table_block) + + headers = _split_markdown_table_row(table_block[0]) + if len(headers) < 2: + return "\n".join(table_block) + + rendered_rows: list[str] = [] + for index, row in enumerate(table_block[2:], start=1): + cells = _split_markdown_table_row(row) + if len(cells) < len(headers): + cells.extend([""] * (len(headers) - len(cells))) + elif len(cells) > len(headers): + cells = cells[: len(headers)] + + heading = next((cell for cell in cells if cell), f"Row {index}") + rendered_rows.append(f"**{heading}**") + rendered_rows.extend( + f"• {header}: {value}" for header, value in zip(headers, cells) + ) + + return "\n\n".join(rendered_rows) + + def _wrap_markdown_tables(text: str) -> str: - """Wrap GFM-style pipe tables in ``` fences so Telegram renders them. + """Rewrite GFM-style pipe tables into Telegram-friendly bullet groups. Detected by a row containing '|' immediately followed by a delimiter row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing - non-blank lines are consumed as the table body and included in the - wrapped block. Tables inside existing fenced code blocks are left + non-blank lines are consumed as the table body and rewritten as + per-row bullet groups. Tables inside existing fenced code blocks are left alone. """ if '|' not in text or '-' not in text: @@ -188,9 +224,7 @@ def _wrap_markdown_tables(text: str) -> str: while j < len(lines) and _is_table_row(lines[j]): table_block.append(lines[j]) j += 1 - out.append('```') - out.extend(table_block) - out.append('```') + out.append(_render_table_block_for_telegram(table_block)) i = j continue @@ -2081,10 +2115,8 @@ class TelegramAdapter(BasePlatformAdapter): text = content - # 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't - # render tables natively, but fenced code blocks render as - # monospace preformatted text with columns intact. The wrapped - # tables then flow through step (1) below as protected regions. + # 0) Rewrite GFM-style pipe tables into Telegram-friendly row groups + # before the normal MarkdownV2 conversions run. text = _wrap_markdown_tables(text) # 1) Protect fenced code blocks (``` ... ```) diff --git a/tests/gateway/test_telegram_format.py b/tests/gateway/test_telegram_format.py index ce7e02a47..594e0bd01 100644 --- a/tests/gateway/test_telegram_format.py +++ b/tests/gateway/test_telegram_format.py @@ -546,11 +546,10 @@ class TestStripMdv2: class TestWrapMarkdownTables: - """_wrap_markdown_tables wraps GFM pipe tables in ``` fences so - Telegram renders them as monospace preformatted text instead of the - noisy backslash-pipe mess MarkdownV2 produces.""" + """_wrap_markdown_tables rewrites GFM pipe tables into Telegram-friendly + row groups instead of leaving noisy pipe syntax in the final message.""" - def test_basic_table_wrapped(self): + def test_basic_table_rewritten_as_row_groups(self): text = ( "Scores:\n\n" "| Player | Score |\n" @@ -560,20 +559,23 @@ class TestWrapMarkdownTables: "\nEnd." ) out = _wrap_markdown_tables(text) - # Table is now wrapped in a fence - assert "```\n| Player | Score |" in out - assert "| Bob | 120 |\n```" in out + assert "**Alice**" in out + assert "• Player: Alice" in out + assert "• Score: 150" in out + assert "**Bob**" in out + assert "• Score: 120" in out # Surrounding prose is preserved assert out.startswith("Scores:") assert out.endswith("End.") - def test_bare_pipe_table_wrapped(self): + def test_bare_pipe_table_rewritten(self): """Tables without outer pipes (GFM allows this) are still detected.""" text = "head1 | head2\n--- | ---\na | b\nc | d" out = _wrap_markdown_tables(text) - assert out.startswith("```\n") - assert out.rstrip().endswith("```") - assert "head1 | head2" in out + assert out.startswith("**a**") + assert "• head1: a" in out + assert "• head2: b" in out + assert "**c**" in out def test_alignment_separators(self): """Separator rows with :--- / ---: / :---: alignment markers match.""" @@ -583,9 +585,11 @@ class TestWrapMarkdownTables: "| Ada | 30 | NYC |" ) out = _wrap_markdown_tables(text) - assert out.count("```") == 2 + assert "**Ada**" in out + assert "• Age: 30" in out + assert "• City: NYC" in out - def test_two_consecutive_tables_wrapped_separately(self): + def test_two_consecutive_tables_rewritten_separately(self): text = ( "| A | B |\n" "|---|---|\n" @@ -596,8 +600,10 @@ class TestWrapMarkdownTables: "| 9 | 8 |" ) out = _wrap_markdown_tables(text) - # Four fences total — one opening + closing per table - assert out.count("```") == 4 + assert out.count("**1**") == 1 + assert out.count("**9**") == 1 + assert "• A: 1" in out + assert "• X: 9" in out def test_plain_text_with_pipes_not_wrapped(self): """A bare pipe in prose must NOT trigger wrapping.""" @@ -637,11 +643,10 @@ class TestWrapMarkdownTables: class TestFormatMessageTables: - """End-to-end: a pipe table passes through format_message with its - pipes and dashes left alone inside the fence, not mangled by MarkdownV2 - escaping.""" + """End-to-end: pipe tables become readable Telegram-native text instead + of escaped pipe syntax or fenced code blocks.""" - def test_table_rendered_as_code_block(self, adapter): + def test_table_rendered_as_bullets(self, adapter): text = ( "Data:\n\n" "| Col1 | Col2 |\n" @@ -649,11 +654,11 @@ class TestFormatMessageTables: "| A | B |\n" ) out = adapter.format_message(text) - # Pipes inside the fenced block are NOT escaped - assert "```\n| Col1 | Col2 |" in out - assert "\\|" not in out.split("```")[1] - # Dashes in separator not escaped inside fence - assert "\\-" not in out.split("```")[1] + assert "*A*" in out + assert "• Col1: A" in out + assert "• Col2: B" in out + assert "```" not in out + assert "\\|" not in out def test_text_after_table_still_formatted(self, adapter): text = ( @@ -668,6 +673,8 @@ class TestFormatMessageTables: assert "*work*" in out # Exclamation outside fence is escaped assert "\\!" in out + assert "*1*" in out + assert "• A: 1" in out def test_multiple_tables_in_single_message(self, adapter): text = ( @@ -682,8 +689,9 @@ class TestFormatMessageTables: "| 9 | 8 |\n" ) out = adapter.format_message(text) - # Two separate fenced blocks in the output - assert out.count("```") == 4 + assert out.count("*1*") == 1 + assert out.count("*9*") == 1 + assert "• X: 9" in out @pytest.mark.asyncio