#!/usr/bin/env python3 """Secret scanner for Claude Code and Cursor hooks (single-file). Provides pre/post hook scanning with minimal dependencies. Designed to be portable (copy one file) while readable and maintainable. """ from __future__ import annotations import argparse import json import os import re import sys from bisect import bisect_right __all__ = [ "__version__", "main", "console_main", "console_main_claude", "console_main_cursor", ] __version__ = "0.1.14" # ----------------------------------------------------------------------------- # Configuration and Patterns # ----------------------------------------------------------------------------- MAX_SCAN_BYTES = 5 * 1024 * 1024 # 5MB cap per file SAMPLE_BYTES = 4096 # used for binary sniffing # (strict schema access; direct .get only) PATTERNS = { # AWS "AWS Access Key ID": re.compile(r"\b(?:A3T[A-Z0-9]|ABIA|ACCA|AKIA|ASIA)[A-Z0-9]{16}\b"), "AWS Secret Access Key": re.compile(r"(?i)(?:aws_?secret_?access_?key|secret_?access_?key)\s*[:=]\s*['\"]?([A-Za-z0-9/+=]{40})['\"]?"), # GitHub / GitLab "GitHub Token": re.compile(r"\b(ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9_]{36}\b"), "GitHub Fine-Grained PAT": re.compile(r"\bgithub_pat_[A-Za-z0-9_]{20,255}\b"), "GitLab Tokens": re.compile(r"\b(?:glpat|gldt|glft|glsoat|glrt)-[A-Za-z0-9_\-]{20,50}(?!\w)\b|\bGR1348941[A-Za-z0-9_\-]{20,50}(?!\w)\b|\bglcbt-(?:[0-9a-fA-F]{2}_)?[A-Za-z0-9_\-]{20,50}(?!\w)\b|\bglimt-[A-Za-z0-9_\-]{25}(?!\w)\b|\bglptt-[A-Za-z0-9_\-]{40}(?!\w)\b|\bglagent-[A-Za-z0-9_\-]{50,1024}(?!\w)\b|\bgloas-[A-Za-z0-9_\-]{64}(?!\w)\b"), # Slack / Discord / Telegram "Slack Token": re.compile(r"xox(?:a|b|p|o|s|r)-(?:\d+-)+[a-z0-9]+", re.IGNORECASE), "Slack Webhook": re.compile(r"https://hooks\.slack\.com/services/T[a-zA-Z0-9_]+/B[a-zA-Z0-9_]+/[a-zA-Z0-9_]+", re.IGNORECASE), "Discord Bot Token": re.compile(r"\b[MNO][A-Za-z0-9_-]{23,25}\.[A-Za-z0-9_-]{6}\.[A-Za-z0-9_-]{27}\b"), "Discord Webhook": re.compile(r"https://(?:canary\.|ptb\.)?discord(?:app)?\.com/api/webhooks/\d{5,30}/[A-Za-z0-9_-]{30,}"), "Telegram Bot Token": re.compile(r"\b\d{8,10}:[0-9A-Za-z_-]{35}\b"), # Stripe / Twilio / SendGrid "Stripe Secret Key": re.compile(r"\b(?:r|s)k_(?:live|test)_[0-9A-Za-z]{24,}\b"), "Stripe Publishable Key": re.compile(r"\bpk_(?:live|test)_[A-Za-z0-9]{20,}\b"), "Twilio Account SID": re.compile(r"\bAC[0-9a-fA-F]{32}\b"), "Twilio API Key SID": re.compile(r"\bSK[0-9a-fA-F]{32}\b"), "Twilio Auth Token": re.compile(r"(?i)\b(?:twilio_)?auth_?token['\"]?\s*[:=]\s*['\"]?([0-9a-f]{32})['\"]?"), "SendGrid API Key": re.compile(r"\bSG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}\b"), # Package registries "NPM Token": re.compile(r"\bnpm_[A-Za-z0-9]{30,}\b"), "NPM .npmrc Auth Token": re.compile(r"\/\/[^\n]+\/:_authToken=\s*((npm_.+)|([A-Fa-f0-9-]{36}))"), "PyPI Token": re.compile(r"\bpypi-(?:AgEIcHlwaS5vcmc|AgENdGVzdC5weXBpLm9yZw)[A-Za-z0-9-_]{70,}\b"), # Cloud providers & services "Azure Storage Connection String": re.compile(r"DefaultEndpointsProtocol=(?:http|https);AccountName=[A-Za-z0-9\-]+;AccountKey=([A-Za-z0-9+/=]{40,});EndpointSuffix=core\.windows\.net"), "Azure Storage Account Key": re.compile(r"AccountKey=[A-Za-z0-9+/=]{88}"), "Azure SAS Token": re.compile(r"[\?&]sv=\d{4}-\d{2}-\d{2}[^ \n]*?&sig=[A-Za-z0-9%+/=]{16,}"), "Artifactory Credentials": re.compile(r"(?:\s|=|:|\"|^)AKC[a-zA-Z0-9]{10,}(?:\s|\"|$)"), "Artifactory Encrypted Password": re.compile(r"(?:\s|=|:|\"|^)AP[\dABCDEF][a-zA-Z0-9]{8,}(?:\s|\"|$)"), "Cloudant URL Credential": re.compile(r"https?://[\w\-]+:([0-9a-f]{64}|[a-z]{24})@[\w\-]+\.cloudant\.com", re.IGNORECASE), "SoftLayer API Token": re.compile(r"https?://api\.softlayer\.com/soap/(?:v3|v3\.1)/([a-z0-9]{64})", re.IGNORECASE), # JWT and keys "JWT Token": re.compile(r"\beyJ[A-Za-z0-9\-_=]+\.[A-Za-z0-9\-_=]+\.?[A-Za-z0-9\-_.+/=]*?\b"), "Private Key (PEM)": re.compile(r"-----BEGIN (?:RSA |EC |DSA |ENCRYPTED )?PRIVATE KEY-----\s*\n(?:(?:[A-Za-z0-9\-]+:[^\n]*\n)*\s*)?(?:[A-Za-z0-9+/=]{40,}\s*\n)+-----END (?:RSA |EC |DSA |ENCRYPTED )?PRIVATE KEY-----"), "OpenSSH Private Key": re.compile(r"-----BEGIN OPENSSH PRIVATE KEY-----\s*\n(?:[A-Za-z0-9+/=]{40,}\s*\n)+-----END OPENSSH PRIVATE KEY-----"), "PGP Private Key": re.compile(r"-----BEGIN PGP PRIVATE KEY BLOCK-----\s*\n(?:(?:[A-Za-z0-9\-]+:[^\n]*\n)*\s*)?(?:[A-Za-z0-9+/=]{40,}\s*\n)+-----END PGP PRIVATE KEY BLOCK-----"), "SSH2 Encrypted Private Key": re.compile(r"-----BEGIN SSH2 ENCRYPTED PRIVATE KEY-----\s*\n(?:[A-Za-z0-9+/=]{40,}\s*\n)+-----END SSH2 ENCRYPTED PRIVATE KEY-----"), "PuTTY Private Key": re.compile(r"(?:^|\n)PuTTY-User-Key-File-\d+:\s*\S+"), # Other common tokens "Google API Key": re.compile(r"\bAIza[0-9A-Za-z\-_\\]{32,40}\b"), "Google OAuth Token": re.compile(r"\bya29\.[0-9A-Za-z\-_]{20,}\b"), "Anthropic API Key": re.compile(r"\bsk-ant-api\d+-[A-Za-z0-9_-]{90,}\b"), "OpenAI API Key": re.compile(r"\bsk-[A-Za-z0-9-_]*[A-Za-z0-9]{20}T3BlbkFJ[A-Za-z0-9]{20}\b"), "Password Assignment": re.compile(r"(?i)\b(pass(word)?|pwd)\s*[:=]\s*['\"][^'\"\n]{8,}['\"]"), "Mailchimp API Key": re.compile(r"\b[0-9a-z]{32}-us[0-9]{1,2}\b"), # From detect-secrets basic_auth plugin, limits username/password chars to avoid false positives "Basic Auth Credentials": re.compile(r"://[^:/?#\[\]@!$&'()*+,;=\s]+:([^:/?#\[\]@!$&'()*+,;=\s]+)@"), "Databricks PAT": re.compile(r"\bdapi[A-Za-z0-9]{32}\b"), "Firebase FCM Server Key": re.compile(r"\bAAAA[A-Za-z0-9_-]{7,}:[A-Za-z0-9_-]{140,}\b"), "Shopify Token": re.compile(r"\bshp(?:at|pa|ss)_[0-9a-f]{32}\b"), "Notion Integration Token": re.compile(r"\bsecret_[A-Za-z0-9]{32,}\b"), "Linear API Key": re.compile(r"\blin_api_[A-Za-z0-9]{40,}\b"), "Mapbox Access Token": re.compile(r"\b[ps]k\.[A-Za-z0-9\-_.]{30,}\b"), "Dropbox Access Token": re.compile(r"\bsl\.[A-Za-z0-9_-]{120,}\b"), "DigitalOcean Personal Access Token": re.compile(r"\bdop_v1_[a-f0-9]{64}\b"), "Square Access Token": re.compile(r"\bEAAA[A-Za-z0-9]{60}\b"), "Square OAuth Secret": re.compile(r"\bsq0csp-[0-9A-Za-z_\-]{43}\b"), "Airtable Personal Access Token": re.compile(r"\bpat[A-Za-z0-9]{14}\.[a-f0-9]{64}\b"), "Facebook Access Token": re.compile(r"\bEAA[A-Za-z0-9]{30,}\b"), } # ----------------------------------------------------------------------------- # Scanning utilities # ----------------------------------------------------------------------------- def is_probably_binary(block: bytes) -> bool: if b"\x00" in block: return True textchars = bytes(range(32, 127)) + b"\n\r\t\b" nontext = block.translate(None, textchars) return len(nontext) / max(1, len(block)) > 0.30 def should_scan_file(path: str) -> bool: try: with open(path, "rb") as sample: head = sample.read(SAMPLE_BYTES) except OSError: return False if not head: return True return not is_probably_binary(head) def scan_text(text: str, path: str): findings = [] line_starts = [0] for idx, ch in enumerate(text): if ch == "\n": line_starts.append(idx + 1) for pname, rx in PATTERNS.items(): for m in rx.finditer(text): line_no = bisect_right(line_starts, m.start()) findings.append({"file": path, "line": line_no, "type": pname, "match": m.group(0)}) return findings def scan_file(path: str): if not os.path.exists(path): raise FileNotFoundError(f"File does not exist: {path}") if not should_scan_file(path): return [] size = os.path.getsize(path) if size > MAX_SCAN_BYTES: raise RuntimeError(f"File size {size} bytes exceeds scan limit of {MAX_SCAN_BYTES} bytes") with open(path, "rb") as f: blob = f.read() if is_probably_binary(blob): return [] return scan_text(blob.decode("utf-8", "ignore"), path) def build_findings_message(findings, heading: str, limit: int = 5) -> str: if not findings: return heading grouped = {} for it in findings: grouped.setdefault(it.get("file") or "[unknown]", []).append(it) lines = [] for label, entries in grouped.items(): types = sorted({e["type"] for e in entries}) nums = ", ".join(str(e["line"]) for e in entries[:limit]) s = f"{label}: {', '.join(types[:3])}" if nums: s += f" (lines {nums})" if len(entries) > limit: s += f" (+{len(entries) - limit} more)" lines.append(s) msg = "\n".join(f" - {ln}" for ln in lines[:limit]) out = f"{heading}\n{msg}" total = len(findings) if total > limit: out += f"\nShowing first {limit} of {total} findings." return out # ----------------------------------------------------------------------------- # Client adapters (Cursor / Claude) # ----------------------------------------------------------------------------- def detect_hook_type(hook_input): if not isinstance(hook_input, dict): return "claude_code" ev = hook_input.get("hook_event_name") if isinstance(ev, str): ev = ev.strip() claude_events = { "PreToolUse", "PostToolUse", "UserPromptSubmit", "Notification", "Stop", "SubagentStop", "PreCompact", "SessionStart", "SessionEnd", } cursor_events = { "beforeReadFile", "afterFileEdit", "beforeSubmitPrompt", "beforeShellExecution", "afterShellExecution", "beforeMCPExecution", "afterMCPExecution", "stop", } if ev in claude_events: return "claude_code" if ev in cursor_events: return "cursor" return "claude_code" # Removed legacy helpers; using direct documented fields only def _detect_tool_name(tool_input) -> str: if isinstance(tool_input, str) and tool_input.strip(): return tool_input if isinstance(tool_input, dict): value = tool_input.get("tool_name") if isinstance(value, str) and value.strip(): return value if isinstance(tool_input.get("command"), str): return "command" return "tool" def collect_cursor_post_payloads(hook_input, event_name: str | None): """Extract Cursor post-event outputs from documented fields.""" evt = (event_name or "").strip() payloads = [] if evt == "afterShellExecution": stdout = hook_input.get("stdout") stderr = hook_input.get("stderr") if isinstance(stdout, str) and stdout.strip(): payloads.append(("[shell stdout]", stdout)) if isinstance(stderr, str) and stderr.strip(): payloads.append(("[shell stderr]", stderr)) return payloads if evt == "afterMCPExecution": for key, label in ( ("stdout", "[mcp stdout]"), ("stderr", "[mcp stderr]"), ("text", "[mcp output]"), ("message", "[mcp output]"), ): val = hook_input.get(key) if isinstance(val, str) and val.strip(): payloads.append((label, val)) return payloads return payloads def collect_claude_post_payloads(hook_input): """Extract Claude post-event outputs from documented fields.""" tool_input = hook_input.get("tool_input") or {} tool_result = hook_input.get("tool_response") or {} tool_name = (hook_input.get("tool_name") or _detect_tool_name(tool_input) or "").strip() payloads = [] if tool_name.lower() == "bash": stdout = tool_result.get("stdout") if isinstance(tool_result, dict) else None stderr = tool_result.get("stderr") if isinstance(tool_result, dict) else None if isinstance(stdout, str) and stdout.strip(): payloads.append(("[bash stdout]", stdout)) if isinstance(stderr, str) and stderr.strip(): payloads.append(("[bash stderr]", stderr)) return payloads # Read tool or similar may return content directly if isinstance(tool_result, dict): content = tool_result.get("content") if isinstance(content, str) and content.strip(): label = tool_input.get("file_path") if isinstance(tool_input, dict) else None payloads.append((label or "[tool output]", content)) elif isinstance(tool_result, str) and tool_result.strip(): label = tool_input.get("file_path") if isinstance(tool_input, dict) else None payloads.append((label or "[tool output]", tool_result)) return payloads def format_cursor_response(action: str, message: str | None, event_name: str | None): permission_map = {"allow": "allow", "block": "deny", "ask": "ask"} event = (event_name or "").strip() if event == "beforeSubmitPrompt": payload = {"continue": action != "block"} if message: payload["userMessage"] = message return payload if event in {"beforeReadFile", "beforeShellExecution", "beforeMCPExecution"}: payload = {"permission": permission_map.get(action, "allow")} if message: payload["userMessage"] = message return payload if event in {"afterFileEdit", "afterShellExecution", "afterMCPExecution", "stop"}: payload = {} if message: payload["message"] = message return payload payload = {} if action in permission_map: payload["permission"] = permission_map[action] elif action == "block": payload["permission"] = "deny" if message: payload["userMessage"] = message if not payload: payload["continue"] = action != "block" return payload def format_claude_response(action: str, message: str | None, hook_event: str): msg = message.rstrip() if isinstance(message, str) else None if hook_event == "PreToolUse": decision = "deny" if action == "block" else "allow" out = {"hookSpecificOutput": {"hookEventName": "PreToolUse", "permissionDecision": decision}} if msg: out["hookSpecificOutput"]["permissionDecisionReason"] = msg return out if hook_event == "PostToolUse": out = {"hookSpecificOutput": {"hookEventName": "PostToolUse"}} if action == "block" and msg: out["decision"] = "block" out["reason"] = msg elif msg: out["hookSpecificOutput"]["additionalContext"] = msg return out if hook_event == "UserPromptSubmit": out = {"hookSpecificOutput": {"hookEventName": "UserPromptSubmit"}} if action == "block": out["decision"] = "block" if msg: out["reason"] = msg elif msg: out["hookSpecificOutput"]["additionalContext"] = msg return out out = {"action": action} if msg: out["message"] = msg return out # ----------------------------------------------------------------------------- # CLI # ----------------------------------------------------------------------------- def _emit(hook_type: str, hook_event: str, action: str, message: str | None, event_name: str | None = None, *, allow_code=0, block_code=2, warn_code=1): if hook_type == "cursor": print(json.dumps(format_cursor_response(action, message, event_name))) return payload = format_claude_response(action, message, hook_event) text = json.dumps(payload) if action == "block": sys.stderr.write(text + "\n") sys.stderr.flush() sys.exit(block_code) else: sys.stdout.write(text + "\n") sys.stdout.flush() sys.exit(allow_code) def run_pre_hook(client_override: str | None = None): hook_type = "claude_code" event_name = None try: hook_input = json.load(sys.stdin) hook_type = client_override or detect_hook_type(hook_input) event_name = hook_input.get("hook_event_name") hook_event = event_name or ("PreToolUse" if hook_type == "claude_code" else "beforeReadFile") findings = [] if hook_type == "cursor": # Cursor: extract directly per docs, avoid walking arbitrary JSON evt = (event_name or "").strip() if evt == "beforeReadFile": file_path = hook_input.get("file_path") or "" content = hook_input.get("content") if isinstance(content, str) and content.strip(): findings.extend(scan_text(content, file_path or "[file content]")) elif file_path: try: findings.extend(scan_file(file_path)) except Exception as exc: _emit(hook_type, hook_event, "block", f"Secret scan error: {exc}", event_name) return elif evt == "beforeSubmitPrompt": prompt = hook_input.get("prompt") if isinstance(prompt, str) and prompt.strip(): findings.extend(scan_text(prompt, "[prompt]")) elif evt == "beforeShellExecution": cmd = hook_input.get("command") if isinstance(cmd, str) and cmd.strip(): findings.extend(scan_text(cmd, "[shell command]")) elif evt == "beforeMCPExecution": cmd = hook_input.get("command") if isinstance(cmd, str) and cmd.strip(): findings.extend(scan_text(cmd, "[mcp command]")) # No other Cursor pre-events are scanned else: ev = hook_input.get("hook_event_name") or "" ev = ev.strip() if ev == "PreToolUse": tool_input = hook_input.get("tool_input") or {} tool_name = (hook_input.get("tool_name") or _detect_tool_name(tool_input) or "").strip() if isinstance(tool_input, dict): if tool_name in {"Write", "Edit", "MultiEdit", "Read"}: content = tool_input.get("content") if isinstance(content, str) and content.strip(): findings.extend(scan_text(content, tool_input.get("file_path") or "[content]")) else: fp = tool_input.get("file_path") if isinstance(fp, str) and fp.strip(): try: findings.extend(scan_file(fp)) except Exception as exc: _emit(hook_type, hook_event, "block", f"Secret scan error: {exc}", event_name) return elif tool_name == "Bash": cmd = tool_input.get("command") if isinstance(cmd, str) and cmd.strip(): findings.extend(scan_text(cmd, "[bash command]")) else: content = tool_input.get("content") if isinstance(content, str) and content.strip(): findings.extend(scan_text(content, "[tool content]")) elif ev == "UserPromptSubmit": prompt = hook_input.get("prompt") if isinstance(prompt, str) and prompt.strip(): findings.extend(scan_text(prompt, "[prompt]")) # No other Claude pre-events are scanned if findings: _emit(hook_type, hook_event, "block", build_findings_message(findings, "SECRET DETECTED (submission blocked)"), event_name) else: _emit(hook_type, hook_event, "allow", None, event_name) except Exception as exc: _emit(hook_type, "UserPromptSubmit", "block", f"Secret scan error: {exc}", event_name) def run_post_hook(client_override: str | None = None): hook_type = "claude_code" event_name = None try: hook_input = json.load(sys.stdin) hook_type = client_override or detect_hook_type(hook_input) event_name = hook_input.get("hook_event_name") if hook_type == "cursor" else None payloads = collect_cursor_post_payloads(hook_input, event_name) if hook_type == "cursor" else collect_claude_post_payloads(hook_input) if not payloads: _emit(hook_type, "PostToolUse", "allow", None, event_name) return findings = [] for label, text in payloads: findings.extend(scan_text(text, label)) if findings: msg = build_findings_message(findings, "SECRET DETECTED in recent output") + "\nBe careful with this sensitive data!" _emit(hook_type, "PostToolUse", "block", msg, event_name) else: _emit(hook_type, "PostToolUse", "allow", None, event_name) except Exception as exc: if hook_type == "claude_code": sys.stderr.write(json.dumps(format_claude_response("allow", f"Post-read secret scan error: {exc}", "PostToolUse")) + "\n") sys.stderr.flush() sys.exit(1) else: print(json.dumps(format_cursor_response("allow", f"Post-read secret scan error: {exc}", event_name))) def _build_cli_parser(): p = argparse.ArgumentParser(description=f"Secret scanner hooks v{__version__}") p.add_argument("--mode", choices=["pre", "post"], required=True) p.add_argument("--client", choices=["claude_code", "cursor"], default=None) return p def main(argv=None, *, default_client=None): args = _build_cli_parser().parse_args(argv) if argv is not None else _build_cli_parser().parse_args() if default_client and args.client is None: args.client = default_client if args.mode == "pre": run_pre_hook(args.client) else: run_post_hook(args.client) def console_main(): main() def console_main_claude(): main(default_client="claude_code") def console_main_cursor(): main(default_client="cursor") if __name__ == "__main__": main()