fix(security): normalize input before dangerous command detection (#3260)

detect_dangerous_command() ran regex patterns against raw command strings
without normalization, allowing bypass via Unicode fullwidth chars,
ANSI escape codes, null bytes, and 8-bit C1 controls.

Adds _normalize_command_for_detection() that:
- Strips ANSI escapes using the full ECMA-48 strip_ansi() from
  tools/ansi_strip (CSI, OSC, DCS, 8-bit C1, nF sequences)
- Removes null bytes
- Normalizes Unicode via NFKC (fullwidth Latin → ASCII, etc.)

Includes 12 regression tests covering fullwidth, ANSI, C1, null byte,
and combined obfuscation bypasses.

Salvaged from PR #3089 by thakoreh — improved ANSI stripping to use
existing comprehensive strip_ansi() instead of a weaker hand-rolled
regex, and added test coverage.

Co-authored-by: Hiren <hiren.thakore58@gmail.com>
This commit is contained in:
Teknium
2026-03-26 14:33:18 -07:00
committed by GitHub
parent a8e02c7d49
commit 76ed15dd4d
2 changed files with 90 additions and 1 deletions

View File

@@ -512,3 +512,73 @@ class TestGatewayProtection:
dangerous, key, desc = detect_dangerous_command(cmd)
assert dangerous is False
class TestNormalizationBypass:
"""Obfuscation techniques must not bypass dangerous command detection."""
def test_fullwidth_unicode_rm(self):
"""Fullwidth Unicode ' - /' must be caught after NFKC normalization."""
cmd = "\uff52\uff4d -\uff52\uff46 /" # - /
dangerous, key, desc = detect_dangerous_command(cmd)
assert dangerous is True, f"Fullwidth 'rm -rf /' was not detected: {cmd!r}"
def test_fullwidth_unicode_dd(self):
"""Fullwidth ' if=/dev/zero' must be caught."""
cmd = "\uff44\uff44 if=/dev/zero of=/dev/sda"
dangerous, key, desc = detect_dangerous_command(cmd)
assert dangerous is True
def test_fullwidth_unicode_chmod(self):
"""Fullwidth ' 777' must be caught."""
cmd = "\uff43\uff48\uff4d\uff4f\uff44 777 /tmp/test"
dangerous, key, desc = detect_dangerous_command(cmd)
assert dangerous is True
def test_ansi_csi_wrapped_rm(self):
"""ANSI CSI color codes wrapping 'rm' must be stripped and caught."""
cmd = "\x1b[31mrm\x1b[0m -rf /"
dangerous, key, desc = detect_dangerous_command(cmd)
assert dangerous is True, f"ANSI-wrapped 'rm -rf /' was not detected"
def test_ansi_osc_embedded_rm(self):
"""ANSI OSC sequences embedded in command must be stripped."""
cmd = "\x1b]0;title\x07rm -rf /"
dangerous, key, desc = detect_dangerous_command(cmd)
assert dangerous is True
def test_ansi_8bit_c1_wrapped_rm(self):
"""8-bit C1 CSI (0x9b) wrapping 'rm' must be stripped and caught."""
cmd = "\x9b31mrm\x9b0m -rf /"
dangerous, key, desc = detect_dangerous_command(cmd)
assert dangerous is True, "8-bit C1 CSI bypass was not caught"
def test_null_byte_in_rm(self):
"""Null bytes injected into 'rm' must be stripped and caught."""
cmd = "r\x00m -rf /"
dangerous, key, desc = detect_dangerous_command(cmd)
assert dangerous is True, f"Null-byte 'rm' was not detected: {cmd!r}"
def test_null_byte_in_dd(self):
"""Null bytes in 'dd' must be stripped."""
cmd = "d\x00d if=/dev/sda"
dangerous, key, desc = detect_dangerous_command(cmd)
assert dangerous is True
def test_mixed_fullwidth_and_ansi(self):
"""Combined fullwidth + ANSI obfuscation must still be caught."""
cmd = "\x1b[1m\uff52\uff4d\x1b[0m -rf /"
dangerous, key, desc = detect_dangerous_command(cmd)
assert dangerous is True
def test_safe_command_after_normalization(self):
"""Normal safe commands must not be flagged after normalization."""
cmd = "ls -la /tmp"
dangerous, key, desc = detect_dangerous_command(cmd)
assert dangerous is False
def test_fullwidth_safe_command_not_flagged(self):
"""Fullwidth ' -' is safe and must not be flagged."""
cmd = "\uff4c\uff53 -\uff4c\uff41 /tmp"
dangerous, key, desc = detect_dangerous_command(cmd)
assert dangerous is False