feat(telegram): render markdown tables as row groups

This commit is contained in:
LeonSGP43
2026-04-23 14:48:03 +08:00
committed by Teknium
parent d8c5573ffe
commit a3b9343f08
2 changed files with 79 additions and 39 deletions

View File

@@ -123,12 +123,12 @@ def _strip_mdv2(text: str) -> str:
# ---------------------------------------------------------------------------
# Markdown table → code block conversion
# Markdown table → Telegram-friendly row groups
# ---------------------------------------------------------------------------
# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
# so pipe tables render as noisy backslash-pipe text with no alignment.
# Wrapping the table in a fenced code block makes Telegram render it as
# monospace preformatted text with columns intact.
# Reformating each row into a bold heading plus bullet list keeps the content
# readable on mobile clients while preserving the source data.
# Matches a GFM table delimiter row: optional outer pipes, cells containing
# only dashes (with optional leading/trailing colons for alignment) separated
@@ -145,13 +145,49 @@ def _is_table_row(line: str) -> bool:
return bool(stripped) and '|' in stripped
def _split_markdown_table_row(line: str) -> list[str]:
"""Split a simple GFM table row into stripped cell values."""
stripped = line.strip()
if stripped.startswith("|"):
stripped = stripped[1:]
if stripped.endswith("|"):
stripped = stripped[:-1]
return [cell.strip() for cell in stripped.split("|")]
def _render_table_block_for_telegram(table_block: list[str]) -> str:
"""Render a detected GFM table as Telegram-friendly row groups."""
if len(table_block) < 3:
return "\n".join(table_block)
headers = _split_markdown_table_row(table_block[0])
if len(headers) < 2:
return "\n".join(table_block)
rendered_rows: list[str] = []
for index, row in enumerate(table_block[2:], start=1):
cells = _split_markdown_table_row(row)
if len(cells) < len(headers):
cells.extend([""] * (len(headers) - len(cells)))
elif len(cells) > len(headers):
cells = cells[: len(headers)]
heading = next((cell for cell in cells if cell), f"Row {index}")
rendered_rows.append(f"**{heading}**")
rendered_rows.extend(
f"{header}: {value}" for header, value in zip(headers, cells)
)
return "\n\n".join(rendered_rows)
def _wrap_markdown_tables(text: str) -> str:
"""Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
"""Rewrite GFM-style pipe tables into Telegram-friendly bullet groups.
Detected by a row containing '|' immediately followed by a delimiter
row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing
non-blank lines are consumed as the table body and included in the
wrapped block. Tables inside existing fenced code blocks are left
non-blank lines are consumed as the table body and rewritten as
per-row bullet groups. Tables inside existing fenced code blocks are left
alone.
"""
if '|' not in text or '-' not in text:
@@ -188,9 +224,7 @@ def _wrap_markdown_tables(text: str) -> str:
while j < len(lines) and _is_table_row(lines[j]):
table_block.append(lines[j])
j += 1
out.append('```')
out.extend(table_block)
out.append('```')
out.append(_render_table_block_for_telegram(table_block))
i = j
continue
@@ -2081,10 +2115,8 @@ class TelegramAdapter(BasePlatformAdapter):
text = content
# 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't
# render tables natively, but fenced code blocks render as
# monospace preformatted text with columns intact. The wrapped
# tables then flow through step (1) below as protected regions.
# 0) Rewrite GFM-style pipe tables into Telegram-friendly row groups
# before the normal MarkdownV2 conversions run.
text = _wrap_markdown_tables(text)
# 1) Protect fenced code blocks (``` ... ```)

View File

@@ -546,11 +546,10 @@ class TestStripMdv2:
class TestWrapMarkdownTables:
"""_wrap_markdown_tables wraps GFM pipe tables in ``` fences so
Telegram renders them as monospace preformatted text instead of the
noisy backslash-pipe mess MarkdownV2 produces."""
"""_wrap_markdown_tables rewrites GFM pipe tables into Telegram-friendly
row groups instead of leaving noisy pipe syntax in the final message."""
def test_basic_table_wrapped(self):
def test_basic_table_rewritten_as_row_groups(self):
text = (
"Scores:\n\n"
"| Player | Score |\n"
@@ -560,20 +559,23 @@ class TestWrapMarkdownTables:
"\nEnd."
)
out = _wrap_markdown_tables(text)
# Table is now wrapped in a fence
assert "```\n| Player | Score |" in out
assert "| Bob | 120 |\n```" in out
assert "**Alice**" in out
assert "• Player: Alice" in out
assert "• Score: 150" in out
assert "**Bob**" in out
assert "• Score: 120" in out
# Surrounding prose is preserved
assert out.startswith("Scores:")
assert out.endswith("End.")
def test_bare_pipe_table_wrapped(self):
def test_bare_pipe_table_rewritten(self):
"""Tables without outer pipes (GFM allows this) are still detected."""
text = "head1 | head2\n--- | ---\na | b\nc | d"
out = _wrap_markdown_tables(text)
assert out.startswith("```\n")
assert out.rstrip().endswith("```")
assert "head1 | head2" in out
assert out.startswith("**a**")
assert "• head1: a" in out
assert " head2: b" in out
assert "**c**" in out
def test_alignment_separators(self):
"""Separator rows with :--- / ---: / :---: alignment markers match."""
@@ -583,9 +585,11 @@ class TestWrapMarkdownTables:
"| Ada | 30 | NYC |"
)
out = _wrap_markdown_tables(text)
assert out.count("```") == 2
assert "**Ada**" in out
assert "• Age: 30" in out
assert "• City: NYC" in out
def test_two_consecutive_tables_wrapped_separately(self):
def test_two_consecutive_tables_rewritten_separately(self):
text = (
"| A | B |\n"
"|---|---|\n"
@@ -596,8 +600,10 @@ class TestWrapMarkdownTables:
"| 9 | 8 |"
)
out = _wrap_markdown_tables(text)
# Four fences total — one opening + closing per table
assert out.count("```") == 4
assert out.count("**1**") == 1
assert out.count("**9**") == 1
assert "• A: 1" in out
assert "• X: 9" in out
def test_plain_text_with_pipes_not_wrapped(self):
"""A bare pipe in prose must NOT trigger wrapping."""
@@ -637,11 +643,10 @@ class TestWrapMarkdownTables:
class TestFormatMessageTables:
"""End-to-end: a pipe table passes through format_message with its
pipes and dashes left alone inside the fence, not mangled by MarkdownV2
escaping."""
"""End-to-end: pipe tables become readable Telegram-native text instead
of escaped pipe syntax or fenced code blocks."""
def test_table_rendered_as_code_block(self, adapter):
def test_table_rendered_as_bullets(self, adapter):
text = (
"Data:\n\n"
"| Col1 | Col2 |\n"
@@ -649,11 +654,11 @@ class TestFormatMessageTables:
"| A | B |\n"
)
out = adapter.format_message(text)
# Pipes inside the fenced block are NOT escaped
assert "```\n| Col1 | Col2 |" in out
assert "\\|" not in out.split("```")[1]
# Dashes in separator not escaped inside fence
assert "\\-" not in out.split("```")[1]
assert "*A*" in out
assert "• Col1: A" in out
assert "• Col2: B" in out
assert "```" not in out
assert "\\|" not in out
def test_text_after_table_still_formatted(self, adapter):
text = (
@@ -668,6 +673,8 @@ class TestFormatMessageTables:
assert "*work*" in out
# Exclamation outside fence is escaped
assert "\\!" in out
assert "*1*" in out
assert "• A: 1" in out
def test_multiple_tables_in_single_message(self, adapter):
text = (
@@ -682,8 +689,9 @@ class TestFormatMessageTables:
"| 9 | 8 |\n"
)
out = adapter.format_message(text)
# Two separate fenced blocks in the output
assert out.count("```") == 4
assert out.count("*1*") == 1
assert out.count("*9*") == 1
assert "• X: 9" in out
@pytest.mark.asyncio