feat(telegram): render markdown tables as row groups
This commit is contained in:
@@ -123,12 +123,12 @@ def _strip_mdv2(text: str) -> str:
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Markdown table → code block conversion
|
||||
# Markdown table → Telegram-friendly row groups
|
||||
# ---------------------------------------------------------------------------
|
||||
# Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
|
||||
# so pipe tables render as noisy backslash-pipe text with no alignment.
|
||||
# Wrapping the table in a fenced code block makes Telegram render it as
|
||||
# monospace preformatted text with columns intact.
|
||||
# Reformating each row into a bold heading plus bullet list keeps the content
|
||||
# readable on mobile clients while preserving the source data.
|
||||
|
||||
# Matches a GFM table delimiter row: optional outer pipes, cells containing
|
||||
# only dashes (with optional leading/trailing colons for alignment) separated
|
||||
@@ -145,13 +145,49 @@ def _is_table_row(line: str) -> bool:
|
||||
return bool(stripped) and '|' in stripped
|
||||
|
||||
|
||||
def _split_markdown_table_row(line: str) -> list[str]:
|
||||
"""Split a simple GFM table row into stripped cell values."""
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("|"):
|
||||
stripped = stripped[1:]
|
||||
if stripped.endswith("|"):
|
||||
stripped = stripped[:-1]
|
||||
return [cell.strip() for cell in stripped.split("|")]
|
||||
|
||||
|
||||
def _render_table_block_for_telegram(table_block: list[str]) -> str:
|
||||
"""Render a detected GFM table as Telegram-friendly row groups."""
|
||||
if len(table_block) < 3:
|
||||
return "\n".join(table_block)
|
||||
|
||||
headers = _split_markdown_table_row(table_block[0])
|
||||
if len(headers) < 2:
|
||||
return "\n".join(table_block)
|
||||
|
||||
rendered_rows: list[str] = []
|
||||
for index, row in enumerate(table_block[2:], start=1):
|
||||
cells = _split_markdown_table_row(row)
|
||||
if len(cells) < len(headers):
|
||||
cells.extend([""] * (len(headers) - len(cells)))
|
||||
elif len(cells) > len(headers):
|
||||
cells = cells[: len(headers)]
|
||||
|
||||
heading = next((cell for cell in cells if cell), f"Row {index}")
|
||||
rendered_rows.append(f"**{heading}**")
|
||||
rendered_rows.extend(
|
||||
f"• {header}: {value}" for header, value in zip(headers, cells)
|
||||
)
|
||||
|
||||
return "\n\n".join(rendered_rows)
|
||||
|
||||
|
||||
def _wrap_markdown_tables(text: str) -> str:
|
||||
"""Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
|
||||
"""Rewrite GFM-style pipe tables into Telegram-friendly bullet groups.
|
||||
|
||||
Detected by a row containing '|' immediately followed by a delimiter
|
||||
row matching :data:`_TABLE_SEPARATOR_RE`. Subsequent pipe-containing
|
||||
non-blank lines are consumed as the table body and included in the
|
||||
wrapped block. Tables inside existing fenced code blocks are left
|
||||
non-blank lines are consumed as the table body and rewritten as
|
||||
per-row bullet groups. Tables inside existing fenced code blocks are left
|
||||
alone.
|
||||
"""
|
||||
if '|' not in text or '-' not in text:
|
||||
@@ -188,9 +224,7 @@ def _wrap_markdown_tables(text: str) -> str:
|
||||
while j < len(lines) and _is_table_row(lines[j]):
|
||||
table_block.append(lines[j])
|
||||
j += 1
|
||||
out.append('```')
|
||||
out.extend(table_block)
|
||||
out.append('```')
|
||||
out.append(_render_table_block_for_telegram(table_block))
|
||||
i = j
|
||||
continue
|
||||
|
||||
@@ -2081,10 +2115,8 @@ class TelegramAdapter(BasePlatformAdapter):
|
||||
|
||||
text = content
|
||||
|
||||
# 0) Pre-wrap GFM-style pipe tables in ``` fences. Telegram can't
|
||||
# render tables natively, but fenced code blocks render as
|
||||
# monospace preformatted text with columns intact. The wrapped
|
||||
# tables then flow through step (1) below as protected regions.
|
||||
# 0) Rewrite GFM-style pipe tables into Telegram-friendly row groups
|
||||
# before the normal MarkdownV2 conversions run.
|
||||
text = _wrap_markdown_tables(text)
|
||||
|
||||
# 1) Protect fenced code blocks (``` ... ```)
|
||||
|
||||
@@ -546,11 +546,10 @@ class TestStripMdv2:
|
||||
|
||||
|
||||
class TestWrapMarkdownTables:
|
||||
"""_wrap_markdown_tables wraps GFM pipe tables in ``` fences so
|
||||
Telegram renders them as monospace preformatted text instead of the
|
||||
noisy backslash-pipe mess MarkdownV2 produces."""
|
||||
"""_wrap_markdown_tables rewrites GFM pipe tables into Telegram-friendly
|
||||
row groups instead of leaving noisy pipe syntax in the final message."""
|
||||
|
||||
def test_basic_table_wrapped(self):
|
||||
def test_basic_table_rewritten_as_row_groups(self):
|
||||
text = (
|
||||
"Scores:\n\n"
|
||||
"| Player | Score |\n"
|
||||
@@ -560,20 +559,23 @@ class TestWrapMarkdownTables:
|
||||
"\nEnd."
|
||||
)
|
||||
out = _wrap_markdown_tables(text)
|
||||
# Table is now wrapped in a fence
|
||||
assert "```\n| Player | Score |" in out
|
||||
assert "| Bob | 120 |\n```" in out
|
||||
assert "**Alice**" in out
|
||||
assert "• Player: Alice" in out
|
||||
assert "• Score: 150" in out
|
||||
assert "**Bob**" in out
|
||||
assert "• Score: 120" in out
|
||||
# Surrounding prose is preserved
|
||||
assert out.startswith("Scores:")
|
||||
assert out.endswith("End.")
|
||||
|
||||
def test_bare_pipe_table_wrapped(self):
|
||||
def test_bare_pipe_table_rewritten(self):
|
||||
"""Tables without outer pipes (GFM allows this) are still detected."""
|
||||
text = "head1 | head2\n--- | ---\na | b\nc | d"
|
||||
out = _wrap_markdown_tables(text)
|
||||
assert out.startswith("```\n")
|
||||
assert out.rstrip().endswith("```")
|
||||
assert "head1 | head2" in out
|
||||
assert out.startswith("**a**")
|
||||
assert "• head1: a" in out
|
||||
assert "• head2: b" in out
|
||||
assert "**c**" in out
|
||||
|
||||
def test_alignment_separators(self):
|
||||
"""Separator rows with :--- / ---: / :---: alignment markers match."""
|
||||
@@ -583,9 +585,11 @@ class TestWrapMarkdownTables:
|
||||
"| Ada | 30 | NYC |"
|
||||
)
|
||||
out = _wrap_markdown_tables(text)
|
||||
assert out.count("```") == 2
|
||||
assert "**Ada**" in out
|
||||
assert "• Age: 30" in out
|
||||
assert "• City: NYC" in out
|
||||
|
||||
def test_two_consecutive_tables_wrapped_separately(self):
|
||||
def test_two_consecutive_tables_rewritten_separately(self):
|
||||
text = (
|
||||
"| A | B |\n"
|
||||
"|---|---|\n"
|
||||
@@ -596,8 +600,10 @@ class TestWrapMarkdownTables:
|
||||
"| 9 | 8 |"
|
||||
)
|
||||
out = _wrap_markdown_tables(text)
|
||||
# Four fences total — one opening + closing per table
|
||||
assert out.count("```") == 4
|
||||
assert out.count("**1**") == 1
|
||||
assert out.count("**9**") == 1
|
||||
assert "• A: 1" in out
|
||||
assert "• X: 9" in out
|
||||
|
||||
def test_plain_text_with_pipes_not_wrapped(self):
|
||||
"""A bare pipe in prose must NOT trigger wrapping."""
|
||||
@@ -637,11 +643,10 @@ class TestWrapMarkdownTables:
|
||||
|
||||
|
||||
class TestFormatMessageTables:
|
||||
"""End-to-end: a pipe table passes through format_message with its
|
||||
pipes and dashes left alone inside the fence, not mangled by MarkdownV2
|
||||
escaping."""
|
||||
"""End-to-end: pipe tables become readable Telegram-native text instead
|
||||
of escaped pipe syntax or fenced code blocks."""
|
||||
|
||||
def test_table_rendered_as_code_block(self, adapter):
|
||||
def test_table_rendered_as_bullets(self, adapter):
|
||||
text = (
|
||||
"Data:\n\n"
|
||||
"| Col1 | Col2 |\n"
|
||||
@@ -649,11 +654,11 @@ class TestFormatMessageTables:
|
||||
"| A | B |\n"
|
||||
)
|
||||
out = adapter.format_message(text)
|
||||
# Pipes inside the fenced block are NOT escaped
|
||||
assert "```\n| Col1 | Col2 |" in out
|
||||
assert "\\|" not in out.split("```")[1]
|
||||
# Dashes in separator not escaped inside fence
|
||||
assert "\\-" not in out.split("```")[1]
|
||||
assert "*A*" in out
|
||||
assert "• Col1: A" in out
|
||||
assert "• Col2: B" in out
|
||||
assert "```" not in out
|
||||
assert "\\|" not in out
|
||||
|
||||
def test_text_after_table_still_formatted(self, adapter):
|
||||
text = (
|
||||
@@ -668,6 +673,8 @@ class TestFormatMessageTables:
|
||||
assert "*work*" in out
|
||||
# Exclamation outside fence is escaped
|
||||
assert "\\!" in out
|
||||
assert "*1*" in out
|
||||
assert "• A: 1" in out
|
||||
|
||||
def test_multiple_tables_in_single_message(self, adapter):
|
||||
text = (
|
||||
@@ -682,8 +689,9 @@ class TestFormatMessageTables:
|
||||
"| 9 | 8 |\n"
|
||||
)
|
||||
out = adapter.format_message(text)
|
||||
# Two separate fenced blocks in the output
|
||||
assert out.count("```") == 4
|
||||
assert out.count("*1*") == 1
|
||||
assert out.count("*9*") == 1
|
||||
assert "• X: 9" in out
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
Reference in New Issue
Block a user