From 6209e85e7d10492d668f9fa50467e8c94a23cfac Mon Sep 17 00:00:00 2001 From: huangke Date: Sun, 12 Apr 2026 16:25:22 +0800 Subject: [PATCH] feat: support document/archive extensions in MEDIA: tag extraction Add epub, pdf, zip, rar, 7z, docx, xlsx, pptx, txt, csv, apk, ipa to the MEDIA: path regex in extract_media(). These file types were already routed to send_document() in the delivery loop (base.py:1705), but the extraction regex only matched media extensions (audio/video/image), causing document paths to fall through to the generic \S+ branch which could fail silently in some cases. This explicit list ensures reliable matching and delivery for all common document formats. --- gateway/platforms/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 56bb3c5cb..ad6e89b77 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -1343,7 +1343,7 @@ class BasePlatformAdapter(ABC): # Extract MEDIA: tags, allowing optional whitespace after the colon # and quoted/backticked paths for LLM-formatted outputs. media_pattern = re.compile( - r'''[`"']?MEDIA:\s*(?P`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?''' + r'''[`"']?MEDIA:\s*(?P`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?''' ) for match in media_pattern.finditer(content): path = match.group("path").strip()