fix(whatsapp_identity): pin identifier regex to ASCII, clarify it's defense-in-depth
Follow-up on top of #16243. Two small tweaks: - Compile the regex once as `_SAFE_IDENTIFIER_RE` and pin it to `[A-Za-z0-9@.+\-]`. The previous `\w` accepts Unicode word chars (full-width digits, accented letters) which aren't valid WhatsApp identifiers and shouldn't reach the mapping-file lookup. - Add a comment clarifying this is defense-in-depth, not a live traversal. The hardcoded `lid-mapping-{current}{suffix}.json` prefix already prevents escape via pathlib's component split — with `current='../secrets'`, the first path component under `session/` is the literal directory name `lid-mapping-..`, which the attacker cannot create. E2E verified: legit mapping chains still resolve, all probed attack shapes (`../`, absolute paths, shell metacharacters, Unicode digit tricks) are rejected before any file access.
This commit is contained in:
@@ -37,6 +37,11 @@ from typing import Set
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# WhatsApp JIDs are numeric (or plus-prefixed numeric) with optional
|
||||||
|
# ``@``, ``.`` and ``:`` separators. ``\w`` is pinned to ASCII so
|
||||||
|
# full-width digits / Unicode word chars can't sneak through.
|
||||||
|
_SAFE_IDENTIFIER_RE = re.compile(r"^[A-Za-z0-9@.+\-]+$")
|
||||||
|
|
||||||
from hermes_constants import get_hermes_home
|
from hermes_constants import get_hermes_home
|
||||||
|
|
||||||
|
|
||||||
@@ -85,7 +90,15 @@ def expand_whatsapp_aliases(identifier: str) -> Set[str]:
|
|||||||
current = queue.pop(0)
|
current = queue.pop(0)
|
||||||
if not current or current in resolved:
|
if not current or current in resolved:
|
||||||
continue
|
continue
|
||||||
if not re.match(r'^[\w@.+-]+$', current):
|
# Defense-in-depth: reject identifiers that could sneak path
|
||||||
|
# separators / traversal segments into the ``lid-mapping-{current}``
|
||||||
|
# filename below. The hardcoded ``lid-mapping-`` prefix already
|
||||||
|
# prevents escape via pathlib's component split (an attacker can't
|
||||||
|
# create ``lid-mapping-..`` as a real directory in session_dir), but
|
||||||
|
# this keeps the identifier space to the characters WhatsApp JIDs
|
||||||
|
# actually use and avoids depending on that filesystem-layout
|
||||||
|
# invariant.
|
||||||
|
if not _SAFE_IDENTIFIER_RE.match(current):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
resolved.add(current)
|
resolved.add(current)
|
||||||
|
|||||||
Reference in New Issue
Block a user