fix(slack): extract rich_text quotes/lists and link unfurl previews

Slack's modern composer sends messages with a 'blocks' array that
contains rich_text elements. When a user forwards or quotes another
message, the quoted content shows up in the rich_text_quote children
of that array — and is NOT included in the plain 'text' field. The
agent saw only the lossy plain text and was blind to forwarded /
quoted content. Same story for link unfurl previews (Notion, docs,
GitHub, etc.) which Slack puts in the 'attachments' array.

Two fixes in the inbound handler:

1. _extract_text_from_slack_blocks walks rich_text / rich_text_quote /
   rich_text_list / rich_text_preformatted trees and renders readable
   text ('> quoted', '• bullet', code fences), dedupes against the
   plain text field, and appends the extracted content so the agent
   sees everything.

2. Link unfurl / attachment preview extraction reads title, url,
   body, and footer from the 'attachments' array and appends a
   '📎 [title](url)\n   body\n   _footer_' section per preview.
   Skips is_msg_unfurl to avoid echoing our own Slack replies back.

Routing is careful not to trust augmented text: mention gating
(is_mentioned) and slash-command detection both run against the
original 'text' field, so forwarded content containing '<@bot>' or
'/deploy' in a quote can't trick the bot into responding in a
channel it shouldn't or classifying a normal message as a command.

Adjustment from original PR: dropped _serialize_slack_blocks_for_agent,
which inlined a redacted JSON dump of non-rich_text blocks (section,
accessory, actions, etc.) — the agent would see the raw Block Kit
structure for UI-heavy alerts. It added up to 6000 characters to the
prompt context on every qualifying message with no opt-out. The
rich_text extraction and attachment unfurls cover the common bug-fix
case (quoted/forwarded content + link previews) without the prefill
tax. If a user needs block inspection later, it can return as a
config opt-in.

Also updates the Slack platform notes in session.py to accurately
describe what the gateway inlines.
This commit is contained in:
Wang-tianhao
2026-04-26 13:02:27 -07:00
committed by Teknium
parent 4921b26945
commit 6087e04043
4 changed files with 429 additions and 7 deletions

View File

@@ -63,6 +63,160 @@ def check_slack_requirements() -> bool:
return SLACK_AVAILABLE
def _extract_text_from_slack_blocks(blocks: list) -> str:
"""Extract readable text from Slack Block Kit blocks, including quoted/forwarded content.
Slack's modern WYSIWYG composer sends messages with a ``blocks`` array
containing ``rich_text`` elements. When a user forwards or quotes another
message, the quoted content appears as nested ``rich_text_quote`` elements
that are *not* included in the plain ``text`` field of the event.
This helper walks the rich-text tree recursively and returns readable lines,
preserving quotes, list items, and preformatted blocks so the agent can see
forwarded/quoted content instead of only the lossy plain-text field.
"""
if not blocks:
return ""
parts: list[str] = []
def _render_inline_elements(elements: list) -> str:
"""Render inline elements (text, link, channel, user, emoji, etc.)."""
pieces: list[str] = []
for el in elements:
el_type = el.get("type", "")
if el_type == "text":
pieces.append(el.get("text", ""))
elif el_type == "link":
url = el.get("url", "")
text = el.get("text", "") or url
pieces.append(f"{text} ({url})")
elif el_type == "channel":
pieces.append(f"<#{el.get('channel_id', '')}>")
elif el_type == "user":
pieces.append(f"<@{el.get('user_id', '')}>")
elif el_type == "usergroup":
pieces.append(f"<!subteam^{el.get('usergroup_id', '')}>")
elif el_type == "emoji":
pieces.append(f":{el.get('name', '')}:")
elif el_type == "broadcast":
pieces.append(f"<!{el.get('range', 'here')}>")
elif el_type == "date":
pieces.append(el.get("fallback", ""))
return "".join(pieces)
def _append_line(text: str, quote_depth: int = 0, bullet: str = "") -> None:
if not text or not text.strip():
return
prefix = ((">" * quote_depth) + " ") if quote_depth else ""
parts.append(f"{prefix}{bullet}{text}".rstrip())
def _walk_elements(elements: list, quote_depth: int = 0, bullet: str = "") -> None:
for elem in elements:
elem_type = elem.get("type", "")
if elem_type == "rich_text_section":
_append_line(
_render_inline_elements(elem.get("elements", [])),
quote_depth=quote_depth,
bullet=bullet,
)
elif elem_type == "rich_text_quote":
_walk_elements(elem.get("elements", []), quote_depth=quote_depth + 1)
elif elem_type == "rich_text_list":
list_style = elem.get("style")
for idx, item in enumerate(elem.get("elements", [])):
item_bullet = "" if list_style == "bullet" else f"{idx + 1}. "
_walk_elements([item], quote_depth=quote_depth, bullet=item_bullet)
elif elem_type == "rich_text_preformatted":
code_lines: list[str] = []
for child in elem.get("elements", []):
child_type = child.get("type", "")
if child_type == "rich_text_section":
rendered = _render_inline_elements(child.get("elements", []))
else:
rendered = _render_inline_elements([child])
if rendered:
code_lines.append(rendered)
code_text = "\n".join(code_lines)
if code_text:
lang = elem.get("language", "")
_append_line(f"```{lang}\n{code_text}\n```", quote_depth=quote_depth, bullet=bullet)
else:
rendered = _render_inline_elements([elem])
if rendered:
_append_line(rendered, quote_depth=quote_depth, bullet=bullet)
for block in blocks:
if (block or {}).get("type") == "rich_text":
_walk_elements(block.get("elements", []))
return "\n".join(parts)
def _serialize_slack_blocks_for_agent(blocks: list, max_chars: int = 6000) -> str:
"""Return a compact, redacted JSON view of the current message's Block Kit payload."""
if not blocks:
return ""
if all((block or {}).get("type") == "rich_text" for block in blocks):
return ""
scalar_allowlist = {
"type",
"block_id",
"action_id",
"style",
"dispatch_action",
"optional",
"multiple",
"emoji",
}
recursive_allowlist = {
"text",
"title",
"description",
"label",
"placeholder",
"accessory",
"fields",
"elements",
"options",
"option_groups",
"confirm",
"submit",
"close",
"hint",
}
def _sanitize(value):
if isinstance(value, list):
return [item for item in (_sanitize(v) for v in value) if item not in (None, {}, [], "")]
if isinstance(value, dict):
sanitized = {}
for key, item in value.items():
if key in scalar_allowlist:
sanitized[key] = item
elif key in recursive_allowlist:
cleaned = _sanitize(item)
if cleaned not in (None, {}, [], ""):
sanitized[key] = cleaned
return sanitized
if isinstance(value, (str, int, float, bool)) or value is None:
return value
return repr(value)
try:
payload = json.dumps(_sanitize(blocks), ensure_ascii=False, indent=2)
except Exception:
payload = repr(blocks)
if len(payload) > max_chars:
payload = payload[: max_chars - 18].rstrip() + "\n... [truncated]"
return f"[Slack Block Kit payload for this message]\n```json\n{payload}\n```"
class SlackAdapter(BasePlatformAdapter):
"""
Slack bot adapter using Socket Mode.
@@ -1133,7 +1287,98 @@ class SlackAdapter(BasePlatformAdapter):
if subtype in ("message_changed", "message_deleted"):
return
text = event.get("text", "")
original_text = event.get("text", "")
text = original_text
# Extract quoted/forwarded content from Slack blocks.
# Slack's modern composer embeds forwarded messages in the ``blocks``
# array as ``rich_text_quote`` elements, which are NOT reflected in
# the plain ``text`` field. Merge block text so the agent sees the
# full message content.
blocks = event.get("blocks")
if blocks:
blocks_text = _extract_text_from_slack_blocks(blocks)
if blocks_text:
# Only append if the blocks contain text not already present
# in the plain text field (avoids duplication).
stripped_blocks = blocks_text.strip()
if stripped_blocks and stripped_blocks not in text.strip():
logger.debug(
"Slack: extracted additional text from blocks "
"(likely quoted/forwarded content): %s",
stripped_blocks[:300],
)
text = (text.strip() + "\n" + stripped_blocks).strip()
blocks_payload = _serialize_slack_blocks_for_agent(blocks)
if blocks_payload:
text = (text.strip() + "\n\n" + blocks_payload).strip()
# Extract link unfurls / rich attachments (e.g. Notion previews).
# Slack places unfurled link previews in the ``attachments`` array with
# fields like title, title_link/from_url, text, footer, and fallback.
# Without reading these, the agent never sees shared link previews.
slack_attachments = event.get("attachments") or []
if slack_attachments:
att_parts: list[str] = []
for att in slack_attachments:
att_title = att.get("title", "")
att_url = att.get("title_link", "") or att.get("from_url", "")
att_text = att.get("text", "")
att_footer = att.get("footer", "")
att_fallback = att.get("fallback", "")
# Skip message-type attachments (e.g. Slack bot messages with
# is_msg_unfurl) to avoid echoing our own content.
if att.get("is_msg_unfurl"):
continue
# Build a readable representation.
if att_title and att_url:
header = f"📎 [{att_title}]({att_url})"
elif att_title:
header = f"📎 {att_title}"
elif att_url:
header = f"📎 {att_url}"
else:
header = None
# Prefer preview text, fall back to fallback description.
body = att_text or att_fallback or ""
if body:
body = body.strip()
if len(body) > 500:
body = body[:497] + "..."
if header and body:
section = f"{header}\n {body}"
elif header:
section = header
elif body:
section = f"📎 {body}"
else:
continue
# Deduplicate only when the fully rendered section is already
# present. The shared URL often already appears in the user's
# message text, and skipping on URL/title alone would hide the
# preview body we actually want the agent to see.
if section in text:
continue
if att_footer:
section = f"{section}\n _{att_footer}_"
att_parts.append(section)
if att_parts:
attachment_text = "\n\n".join(att_parts)
text = (text.strip() + "\n\n" + attachment_text).strip()
logger.debug(
"Slack: appended %d link unfurl(s) to message text",
len(att_parts),
)
channel_id = event.get("channel", "")
ts = event.get("ts", "")
assistant_meta = self._lookup_assistant_thread_metadata(
@@ -1182,7 +1427,8 @@ class SlackAdapter(BasePlatformAdapter):
# 3. The message is in a thread where the bot was previously @mentioned, OR
# 4. There's an existing session for this thread (survives restarts)
bot_uid = self._team_bot_user_ids.get(team_id, self._bot_user_id)
is_mentioned = bot_uid and f"<@{bot_uid}>" in text
routing_text = original_text or ""
is_mentioned = bot_uid and f"<@{bot_uid}>" in routing_text
event_thread_ts = event.get("thread_ts")
is_thread_reply = bool(event_thread_ts and event_thread_ts != ts)
@@ -1244,7 +1490,7 @@ class SlackAdapter(BasePlatformAdapter):
# Determine message type
msg_type = MessageType.TEXT
if text.startswith("/"):
if (original_text or "").startswith("/"):
msg_type = MessageType.COMMAND
# Handle file attachments

View File

@@ -310,8 +310,9 @@ def build_session_context_prompt(
"**Platform notes:** You are running inside Slack. "
"You do NOT have access to Slack-specific APIs — you cannot search "
"channel history, pin/unpin messages, manage channels, or list users. "
"Do not promise to perform these actions. If the user asks, explain "
"that you can only read messages sent directly to you and respond."
"Do not promise to perform these actions. The gateway may inline the "
"current message's Slack block/attachment payload when available, but "
"you still cannot call Slack APIs yourself."
)
elif context.source.platform == Platform.DISCORD:
# Inject the Discord IDs block only when the agent actually has

View File

@@ -245,6 +245,7 @@ class TestBuildSessionContextPrompt:
assert "Slack" in prompt
assert "cannot search" in prompt.lower()
assert "pin" in prompt.lower()
assert "current message's slack block/attachment payload" in prompt.lower()
def test_discord_prompt_with_channel_topic(self):
"""Channel topic should appear in the session context prompt."""

View File

@@ -355,15 +355,17 @@ class TestSendVideo:
# ---------------------------------------------------------------------------
class TestIncomingDocumentHandling:
def _make_event(self, files=None, text="hello", channel_type="im"):
def _make_event(self, files=None, text="hello", channel_type="im", blocks=None, attachments=None):
"""Build a mock Slack message event with file attachments."""
return {
"text": text,
"user": "U_USER",
"channel": "C123",
"channel": "D123",
"channel_type": channel_type,
"ts": "1234567890.000001",
"files": files or [],
"blocks": blocks or [],
"attachments": attachments or [],
}
@pytest.mark.asyncio
@@ -540,6 +542,178 @@ class TestIncomingDocumentHandling:
assert "403" in msg_event.text
assert "what's in this?" in msg_event.text
@pytest.mark.asyncio
async def test_rich_text_blocks_do_not_duplicate_plain_text(self, adapter):
"""Plain rich_text composer blocks match the plain text field exactly,
so the dedupe guard keeps the message clean."""
event = self._make_event(
text="hello world",
blocks=[
{
"type": "rich_text",
"elements": [
{
"type": "rich_text_section",
"elements": [
{"type": "text", "text": "hello world"},
],
}
],
}
],
)
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0]
assert msg_event.text == "hello world"
@pytest.mark.asyncio
async def test_rich_text_quotes_and_lists_are_extracted(self, adapter):
"""Nested quote and list content should be surfaced from rich_text blocks."""
event = self._make_event(
text="Can you summarize this?",
blocks=[
{
"type": "rich_text",
"elements": [
{
"type": "rich_text_quote",
"elements": [
{
"type": "rich_text_section",
"elements": [{"type": "text", "text": "Quoted line"}],
}
],
},
{
"type": "rich_text_list",
"style": "bullet",
"elements": [
{
"type": "rich_text_section",
"elements": [{"type": "text", "text": "First bullet"}],
},
{
"type": "rich_text_section",
"elements": [{"type": "text", "text": "Second bullet"}],
},
],
},
],
}
],
)
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0]
assert "Can you summarize this?" in msg_event.text
assert "> Quoted line" in msg_event.text
assert "• First bullet" in msg_event.text
assert "• Second bullet" in msg_event.text
@pytest.mark.asyncio
async def test_attachments_unfurl_text_is_appended_even_when_url_is_in_message(self, adapter):
"""Shared URLs should still expose unfurl preview text to the agent."""
event = self._make_event(
text="Look at this doc https://example.com/spec",
attachments=[
{
"title": "Spec",
"from_url": "https://example.com/spec",
"text": "The latest product spec preview",
"footer": "Notion",
}
],
)
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0]
assert "Look at this doc https://example.com/spec" in msg_event.text
assert "📎 [Spec](https://example.com/spec)" in msg_event.text
assert "The latest product spec preview" in msg_event.text
assert "_Notion_" in msg_event.text
@pytest.mark.asyncio
async def test_message_unfurl_attachments_are_skipped(self, adapter):
"""Message unfurls should be skipped to avoid echoing Slack message copies."""
event = self._make_event(
text="https://example.com/thread",
attachments=[
{
"is_msg_unfurl": True,
"title": "Thread copy",
"text": "This should not be appended",
}
],
)
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0]
assert msg_event.text == "https://example.com/thread"
@pytest.mark.asyncio
async def test_channel_routing_ignores_bot_mentions_inside_block_text(self, adapter):
"""Block-extracted text with a bot mention must not satisfy mention
gating in channels — routing decisions use the original user text so
quoted/forwarded content can't trick the bot into responding."""
event = self._make_event(
text="please review",
channel_type="channel",
blocks=[
{
"type": "rich_text",
"elements": [
{
"type": "rich_text_quote",
"elements": [
{
"type": "rich_text_section",
"elements": [{"type": "text", "text": "Contains <@U_BOT> in quoted text"}],
}
],
}
],
}
],
)
await adapter._handle_slack_message(event)
adapter.handle_message.assert_not_called()
@pytest.mark.asyncio
async def test_quoted_slash_command_text_does_not_change_message_type(self, adapter):
"""Quoted slash-like content should not convert a normal message into a command."""
event = self._make_event(
text="",
blocks=[
{
"type": "rich_text",
"elements": [
{
"type": "rich_text_quote",
"elements": [
{
"type": "rich_text_section",
"elements": [{"type": "text", "text": "/deploy now"}],
}
],
}
],
}
],
)
await adapter._handle_slack_message(event)
msg_event = adapter.handle_message.call_args[0][0]
assert msg_event.message_type == MessageType.TEXT
assert "> /deploy now" in msg_event.text
# ---------------------------------------------------------------------------
# TestMessageRouting