Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:20:28 +08:00
commit b727790a9e
65 changed files with 16412 additions and 0 deletions

View File

@@ -0,0 +1,533 @@
#!/usr/bin/env bash
# ============================================================================
# File Scanner - Detect dangerous files and sensitive configurations
# ============================================================================
# Purpose: Identify files that should not be committed to version control
# Version: 1.0.0
# Usage: ./file-scanner.sh <path> <patterns> <include_hidden> <check_gitignore>
# Returns: 0=no dangerous files, 1=dangerous files found, 2=error
# ============================================================================
set -euo pipefail
# Source shared validation library
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
if [[ -f "${PLUGIN_ROOT}/scripts/validate-lib.sh" ]]; then
source "${PLUGIN_ROOT}/scripts/validate-lib.sh"
fi
# ============================================================================
# Configuration
# ============================================================================
PATH_TO_SCAN="${1:-.}"
PATTERNS="${2:-all}"
INCLUDE_HIDDEN="${3:-true}"
CHECK_GITIGNORE="${4:-true}"
DANGEROUS_FILES_FOUND=0
declare -a FINDINGS=()
# ============================================================================
# Dangerous File Pattern Definitions
# ============================================================================
# Environment Files (CRITICAL)
declare -a ENV_PATTERNS=(
".env"
".env.local"
".env.production"
".env.development"
".env.staging"
".env.test"
"env.sh"
"setenv.sh"
".envrc"
)
# Credential Files (CRITICAL)
declare -a CREDENTIAL_PATTERNS=(
"*credentials*"
"*secrets*"
"*password*"
".aws/credentials"
".azure/credentials"
".gcp/credentials.json"
"gcloud/credentials"
"service-account*.json"
)
# Private Keys (CRITICAL)
declare -a KEY_PATTERNS=(
"id_rsa"
"id_dsa"
"id_ed25519"
"id_ecdsa"
"*.pem"
"*.key"
"*.p12"
"*.pfx"
"*.jks"
"*.keystore"
".gnupg/*"
".ssh/id_*"
)
# Database Files (HIGH)
declare -a DATABASE_PATTERNS=(
"*.db"
"*.sqlite"
"*.sqlite3"
"dump.sql"
"*backup*.sql"
"*.mdb"
"*.accdb"
)
# Configuration Files (MEDIUM)
declare -a CONFIG_PATTERNS=(
"config/database.yml"
"appsettings.json"
"wp-config.php"
"settings.py"
".htpasswd"
)
# Backup Files (MEDIUM)
declare -a BACKUP_PATTERNS=(
"*.bak"
"*.backup"
"*.old"
"*.orig"
"*.copy"
"*~"
"*.swp"
"*.swo"
)
# Log Files (LOW)
declare -a LOG_PATTERNS=(
"*.log"
"debug.log"
"error.log"
)
# ============================================================================
# Severity Classification
# ============================================================================
get_file_severity() {
local filename="$1"
# CRITICAL: Environment, credentials, keys
for pattern in "${ENV_PATTERNS[@]}" "${CREDENTIAL_PATTERNS[@]}" "${KEY_PATTERNS[@]}"; do
if [[ "${filename}" == ${pattern} ]] || [[ "${filename}" =~ ${pattern//\*/.*} ]]; then
echo "critical"
return
fi
done
# HIGH: Databases
for pattern in "${DATABASE_PATTERNS[@]}"; do
if [[ "${filename}" == ${pattern} ]] || [[ "${filename}" =~ ${pattern//\*/.*} ]]; then
echo "high"
return
fi
done
# MEDIUM: Config, backups
for pattern in "${CONFIG_PATTERNS[@]}" "${BACKUP_PATTERNS[@]}"; do
if [[ "${filename}" == ${pattern} ]] || [[ "${filename}" =~ ${pattern//\*/.*} ]]; then
echo "medium"
return
fi
done
# LOW: Logs
for pattern in "${LOG_PATTERNS[@]}"; do
if [[ "${filename}" == ${pattern} ]] || [[ "${filename}" =~ ${pattern//\*/.*} ]]; then
echo "low"
return
fi
done
echo "unknown"
}
get_file_type() {
local filename="$1"
for pattern in "${ENV_PATTERNS[@]}"; do
if [[ "${filename}" == ${pattern} ]] || [[ "${filename}" =~ ${pattern//\*/.*} ]]; then
echo "Environment File"
return
fi
done
for pattern in "${CREDENTIAL_PATTERNS[@]}"; do
if [[ "${filename}" == ${pattern} ]] || [[ "${filename}" =~ ${pattern//\*/.*} ]]; then
echo "Credential File"
return
fi
done
for pattern in "${KEY_PATTERNS[@]}"; do
if [[ "${filename}" == ${pattern} ]] || [[ "${filename}" =~ ${pattern//\*/.*} ]]; then
echo "Private Key"
return
fi
done
for pattern in "${DATABASE_PATTERNS[@]}"; do
if [[ "${filename}" == ${pattern} ]] || [[ "${filename}" =~ ${pattern//\*/.*} ]]; then
echo "Database File"
return
fi
done
for pattern in "${CONFIG_PATTERNS[@]}"; do
if [[ "${filename}" == ${pattern} ]] || [[ "${filename}" =~ ${pattern//\*/.*} ]]; then
echo "Configuration File"
return
fi
done
for pattern in "${BACKUP_PATTERNS[@]}"; do
if [[ "${filename}" == ${pattern} ]] || [[ "${filename}" =~ ${pattern//\*/.*} ]]; then
echo "Backup File"
return
fi
done
for pattern in "${LOG_PATTERNS[@]}"; do
if [[ "${filename}" == ${pattern} ]] || [[ "${filename}" =~ ${pattern//\*/.*} ]]; then
echo "Log File"
return
fi
done
echo "Unknown"
}
get_risk_description() {
local file_type="$1"
case "${file_type}" in
"Environment File")
echo "Contains secrets, API keys, and configuration"
;;
"Credential File")
echo "Direct access credentials"
;;
"Private Key")
echo "Authentication keys"
;;
"Database File")
echo "May contain sensitive user data"
;;
"Configuration File")
echo "May contain hardcoded secrets"
;;
"Backup File")
echo "May contain previous versions with secrets"
;;
"Log File")
echo "May contain leaked sensitive information"
;;
*)
echo "Unknown risk"
;;
esac
}
get_remediation() {
local file_type="$1"
local in_gitignore="$2"
if [[ "${in_gitignore}" == "false" ]]; then
echo "Add to .gitignore, remove from git history, rotate credentials"
else
echo "Verify .gitignore is working, review if file should exist"
fi
}
# ============================================================================
# .gitignore Checking
# ============================================================================
is_in_gitignore() {
local file="$1"
local gitignore="${PATH_TO_SCAN}/.gitignore"
if [[ ! -f "${gitignore}" ]]; then
echo "false"
return
fi
# Simple check - does not handle all gitignore patterns perfectly
local basename
basename=$(basename "${file}")
local dirname
dirname=$(dirname "${file}")
if grep -qF "${basename}" "${gitignore}" 2>/dev/null; then
echo "true"
return
fi
if grep -qF "${file}" "${gitignore}" 2>/dev/null; then
echo "true"
return
fi
# Check pattern matches
while IFS= read -r pattern; do
# Skip comments and empty lines
[[ "${pattern}" =~ ^#.*$ || -z "${pattern}" ]] && continue
# Simple pattern matching (not complete gitignore spec)
if [[ "${basename}" == ${pattern} ]]; then
echo "true"
return
fi
done < "${gitignore}"
echo "false"
}
# ============================================================================
# File Scanning
# ============================================================================
should_check_pattern() {
local filename="$1"
if [[ "${PATTERNS}" == "all" ]]; then
return 0
fi
case "${PATTERNS}" in
*env*)
for pattern in "${ENV_PATTERNS[@]}"; do
[[ "${filename}" == ${pattern} ]] && return 0
done
;;
*credentials*)
for pattern in "${CREDENTIAL_PATTERNS[@]}"; do
[[ "${filename}" == ${pattern} ]] && return 0
done
;;
*keys*)
for pattern in "${KEY_PATTERNS[@]}"; do
[[ "${filename}" == ${pattern} ]] && return 0
done
;;
esac
return 1
}
scan_file() {
local filepath="$1"
local filename
filename=$(basename "${filepath}")
# Check if hidden file (skip if not including hidden)
if [[ "${filename}" =~ ^\. && "${INCLUDE_HIDDEN}" != "true" ]]; then
return
fi
# Skip certain directories
if [[ "${filepath}" =~ (\.git|node_modules|vendor|dist|build)/ ]]; then
return
fi
# Check if file matches dangerous patterns
local severity
severity=$(get_file_severity "${filename}")
if [[ "${severity}" == "unknown" ]]; then
return
fi
if ! should_check_pattern "${filename}"; then
return
fi
# Get file details
local file_type
file_type=$(get_file_type "${filename}")
local size
size=$(stat -f%z "${filepath}" 2>/dev/null || stat -c%s "${filepath}" 2>/dev/null || echo "0")
local in_gitignore="false"
if [[ "${CHECK_GITIGNORE}" == "true" ]]; then
in_gitignore=$(is_in_gitignore "${filepath}")
fi
local risk
risk=$(get_risk_description "${file_type}")
local remediation
remediation=$(get_remediation "${file_type}" "${in_gitignore}")
FINDINGS+=("${severity}|${filepath}|${file_type}|${size}|${in_gitignore}|${risk}|${remediation}")
((DANGEROUS_FILES_FOUND++))
}
# ============================================================================
# Main Execution
# ============================================================================
main() {
# Validate path
if [[ ! -d "${PATH_TO_SCAN}" ]]; then
echo "ERROR: Path is not a directory: ${PATH_TO_SCAN}" >&2
exit 2
fi
echo "Dangerous Files Scan Results"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Path: ${PATH_TO_SCAN}"
echo "Include Hidden: ${INCLUDE_HIDDEN}"
echo "Check .gitignore: ${CHECK_GITIGNORE}"
echo ""
# Scan files
local files_scanned=0
if [[ "${INCLUDE_HIDDEN}" == "true" ]]; then
while IFS= read -r -d '' file; do
scan_file "${file}"
((files_scanned++))
done < <(find "${PATH_TO_SCAN}" -type f -print0 2>/dev/null)
else
while IFS= read -r -d '' file; do
scan_file "${file}"
((files_scanned++))
done < <(find "${PATH_TO_SCAN}" -type f -not -path '*/.*' -print0 2>/dev/null)
fi
echo "Files Scanned: ${files_scanned}"
echo ""
# Report findings
if [[ ${DANGEROUS_FILES_FOUND} -eq 0 ]]; then
echo "✅ SUCCESS: No dangerous files detected"
echo "All files safe"
exit 0
fi
echo "⚠️ DANGEROUS FILES DETECTED: ${DANGEROUS_FILES_FOUND}"
echo ""
# Check .gitignore status
if [[ "${CHECK_GITIGNORE}" == "true" && ! -f "${PATH_TO_SCAN}/.gitignore" ]]; then
echo "⚠️ WARNING: No .gitignore file found"
echo " Recommendation: Create .gitignore to prevent committing sensitive files"
echo ""
fi
# Group by severity
local critical_count=0
local high_count=0
local medium_count=0
local low_count=0
local not_in_gitignore=0
for finding in "${FINDINGS[@]}"; do
IFS='|' read -r severity filepath file_type size in_gitignore risk remediation <<< "${finding}"
case "${severity}" in
critical) ((critical_count++)) ;;
high) ((high_count++)) ;;
medium) ((medium_count++)) ;;
low) ((low_count++)) ;;
esac
[[ "${in_gitignore}" == "false" ]] && ((not_in_gitignore++))
done
# Print findings by severity
if [[ ${critical_count} -gt 0 ]]; then
echo "CRITICAL Files (${critical_count}):"
for finding in "${FINDINGS[@]}"; do
IFS='|' read -r severity filepath file_type size in_gitignore risk remediation <<< "${finding}"
if [[ "${severity}" == "critical" ]]; then
# Convert size to human readable
local size_human
if [[ ${size} -ge 1048576 ]]; then
size_human="$(( size / 1048576 )) MB"
elif [[ ${size} -ge 1024 ]]; then
size_human="$(( size / 1024 )) KB"
else
size_human="${size} bytes"
fi
echo "${filepath} (${size_human})"
echo " Type: ${file_type}"
echo " Risk: ${risk}"
if [[ "${CHECK_GITIGNORE}" == "true" ]]; then
if [[ "${in_gitignore}" == "true" ]]; then
echo " Status: In .gitignore ✓"
else
echo " Status: NOT in .gitignore ⚠️"
fi
fi
echo " Remediation: ${remediation}"
echo ""
fi
done
fi
if [[ ${high_count} -gt 0 ]]; then
echo "HIGH Files (${high_count}):"
for finding in "${FINDINGS[@]}"; do
IFS='|' read -r severity filepath file_type size in_gitignore risk remediation <<< "${finding}"
if [[ "${severity}" == "high" ]]; then
local size_human
if [[ ${size} -ge 1048576 ]]; then
size_human="$(( size / 1048576 )) MB"
elif [[ ${size} -ge 1024 ]]; then
size_human="$(( size / 1024 )) KB"
else
size_human="${size} bytes"
fi
echo " ⚠️ ${filepath} (${size_human})"
echo " Type: ${file_type}"
if [[ "${CHECK_GITIGNORE}" == "true" ]]; then
echo " Status: $([ "${in_gitignore}" == "true" ] && echo "In .gitignore ✓" || echo "NOT in .gitignore ⚠️")"
fi
echo ""
fi
done
fi
if [[ ${medium_count} -gt 0 ]]; then
echo "MEDIUM Files (${medium_count}):"
for finding in "${FINDINGS[@]}"; do
IFS='|' read -r severity filepath file_type size in_gitignore risk remediation <<< "${finding}"
if [[ "${severity}" == "medium" ]]; then
echo " 💡 ${filepath}"
echo " Type: ${file_type}"
echo ""
fi
done
fi
echo "Summary:"
echo " Critical: ${critical_count}"
echo " High: ${high_count}"
echo " Medium: ${medium_count}"
echo " Low: ${low_count}"
if [[ "${CHECK_GITIGNORE}" == "true" ]]; then
echo " Not in .gitignore: ${not_in_gitignore}"
fi
echo ""
echo "Action Required: $([ ${critical_count} -gt 0 ] || [ ${not_in_gitignore} -gt 0 ] && echo "YES" || echo "REVIEW")"
exit 1
}
main "$@"

View File

@@ -0,0 +1,407 @@
#!/usr/bin/env bash
# ============================================================================
# Permission Checker - Audit file permissions for security issues
# ============================================================================
# Purpose: Detect world-writable files, overly permissive scripts, and permission issues
# Version: 1.0.0
# Usage: ./permission-checker.sh <path> <strict> <check_executables> <report_all>
# Returns: 0=all permissions correct, 1=issues found, 2=error
# ============================================================================
set -euo pipefail
# Source shared validation library
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
if [[ -f "${PLUGIN_ROOT}/scripts/validate-lib.sh" ]]; then
source "${PLUGIN_ROOT}/scripts/validate-lib.sh"
fi
# ============================================================================
# Configuration
# ============================================================================
PATH_TO_SCAN="${1:-.}"
STRICT="${2:-false}"
CHECK_EXECUTABLES="${3:-true}"
REPORT_ALL="${4:-false}"
ISSUES_FOUND=0
declare -a FINDINGS=()
# ============================================================================
# Permission Classification
# ============================================================================
get_permission_octal() {
local file="$1"
stat -f "%Op" "${file}" 2>/dev/null | sed 's/.*\([0-7][0-7][0-7][0-7]\)$/\1/' || \
stat -c "%a" "${file}" 2>/dev/null || echo "0644"
}
get_permission_symbolic() {
local file="$1"
ls -ld "${file}" 2>/dev/null | awk '{print $1}' | tail -c 10
}
is_world_writable() {
local perms="$1"
[[ "${perms: -1}" =~ [2367] ]]
}
is_world_readable() {
local perms="$1"
[[ "${perms: -1}" =~ [4567] ]]
}
is_executable() {
local perms="$1"
[[ "${perms}" =~ [1357] ]]
}
# ============================================================================
# Severity Classification
# ============================================================================
get_issue_severity() {
local issue_type="$1"
local perms="$2"
case "${issue_type}" in
world_writable_executable)
echo "critical"
;;
world_writable)
echo "critical"
;;
missing_shebang)
echo "high"
;;
overly_permissive_sensitive)
echo "high"
;;
wrong_directory_perms)
echo "medium"
;;
non_executable_script)
echo "medium"
;;
inconsistent_perms)
echo "low"
;;
*)
echo "low"
;;
esac
}
# ============================================================================
# Shebang Validation
# ============================================================================
has_shebang() {
local file="$1"
if [[ ! -f "${file}" ]]; then
return 1
fi
local first_line
first_line=$(head -n 1 "${file}" 2>/dev/null || echo "")
[[ "${first_line}" =~ ^#! ]]
}
get_expected_shebang() {
local file="$1"
local basename
basename=$(basename "${file}")
case "${basename}" in
*.sh|*.bash)
echo "#!/usr/bin/env bash"
;;
*.py)
echo "#!/usr/bin/env python3"
;;
*.js)
echo "#!/usr/bin/env node"
;;
*.rb)
echo "#!/usr/bin/env ruby"
;;
*)
echo ""
;;
esac
}
# ============================================================================
# Expected Permissions
# ============================================================================
get_expected_permissions() {
local file="$1"
local basename
basename=$(basename "${file}")
local is_exec
# Check if currently executable
if [[ -x "${file}" ]]; then
is_exec="true"
else
is_exec="false"
fi
# Sensitive files
if [[ "${basename}" =~ ^\.env || "${basename}" =~ credentials || "${basename}" =~ secrets ]]; then
echo "600"
return
fi
# SSH/GPG files
if [[ "${file}" =~ \.ssh/id_ || "${file}" =~ \.gnupg/ ]]; then
if [[ "${basename}" =~ \.pub$ ]]; then
echo "644"
else
echo "600"
fi
return
fi
# Scripts
if [[ "${basename}" =~ \.(sh|bash|py|js|rb)$ ]]; then
if [[ "${is_exec}" == "true" ]] || has_shebang "${file}"; then
echo "755"
else
echo "644"
fi
return
fi
# Directories
if [[ -d "${file}" ]]; then
if [[ "${basename}" =~ ^\.ssh$ || "${basename}" =~ ^\.gnupg$ ]]; then
echo "700"
else
echo "755"
fi
return
fi
# Default
echo "644"
}
# ============================================================================
# Permission Checking
# ============================================================================
check_file_permissions() {
local file="$1"
local perms
perms=$(get_permission_octal "${file}")
local symbolic
symbolic=$(get_permission_symbolic "${file}")
local expected
expected=$(get_expected_permissions "${file}")
local basename
basename=$(basename "${file}")
# Skip certain directories
if [[ "${file}" =~ (\.git|node_modules|vendor|dist|build)/ ]]; then
return
fi
# CRITICAL: Check for 777 (world-writable and executable)
if [[ "${perms}" == "0777" || "${perms}" == "777" ]]; then
local issue_type="world_writable_executable"
local severity
severity=$(get_issue_severity "${issue_type}" "${perms}")
FINDINGS+=("${severity}|${file}|${perms}|${symbolic}|${expected}|World-writable and executable|Anyone can modify and execute|chmod ${expected} \"${file}\"")
((ISSUES_FOUND++))
return
fi
# CRITICAL: Check for 666 (world-writable)
if [[ "${perms}" == "0666" || "${perms}" == "666" ]]; then
local issue_type="world_writable"
local severity
severity=$(get_issue_severity "${issue_type}" "${perms}")
FINDINGS+=("${severity}|${file}|${perms}|${symbolic}|${expected}|World-writable file|Anyone can modify content|chmod ${expected} \"${file}\"")
((ISSUES_FOUND++))
return
fi
# Check if executable but missing shebang
if [[ -f "${file}" && -x "${file}" && "${CHECK_EXECUTABLES}" == "true" ]]; then
if [[ "${basename}" =~ \.(sh|bash|py|js|rb)$ ]]; then
if ! has_shebang "${file}"; then
local expected_shebang
expected_shebang=$(get_expected_shebang "${file}")
FINDINGS+=("high|${file}|${perms}|${symbolic}|${perms}|Executable without shebang|May not execute correctly|Add ${expected_shebang} to first line")
((ISSUES_FOUND++))
fi
fi
fi
# Check sensitive files
if [[ "${basename}" =~ ^\.env || "${basename}" =~ credentials || "${basename}" =~ secrets ]]; then
if is_world_readable "${perms}"; then
FINDINGS+=("high|${file}|${perms}|${symbolic}|600|Sensitive file world-readable|Secrets visible to all users|chmod 600 \"${file}\"")
((ISSUES_FOUND++))
return
fi
if [[ "${perms}" != "0600" && "${perms}" != "600" && "${STRICT}" == "true" ]]; then
FINDINGS+=("medium|${file}|${perms}|${symbolic}|600|Sensitive file should be 600|Reduce permissions|chmod 600 \"${file}\"")
((ISSUES_FOUND++))
return
fi
fi
# Strict mode: Check for any discrepancies
if [[ "${STRICT}" == "true" ]]; then
if [[ "${perms}" != "0${expected}" && "${perms}" != "${expected}" ]]; then
# Check if it's a minor discrepancy
if [[ "${perms}" =~ ^0?775$ && "${expected}" == "755" ]]; then
FINDINGS+=("medium|${file}|${perms}|${symbolic}|${expected}|Group-writable (strict mode)|Remove group write|chmod ${expected} \"${file}\"")
((ISSUES_FOUND++))
elif [[ "${perms}" =~ ^0?755$ && "${expected}" == "644" ]]; then
FINDINGS+=("low|${file}|${perms}|${symbolic}|${expected}|Executable but should not be|Remove executable bit|chmod ${expected} \"${file}\"")
((ISSUES_FOUND++))
fi
fi
fi
# Report all mode
if [[ "${REPORT_ALL}" == "true" ]]; then
if [[ "${perms}" == "0${expected}" || "${perms}" == "${expected}" ]]; then
FINDINGS+=("info|${file}|${perms}|${symbolic}|${expected}|Permissions correct|N/A|N/A")
fi
fi
}
# ============================================================================
# Main Execution
# ============================================================================
main() {
# Validate path
if [[ ! -e "${PATH_TO_SCAN}" ]]; then
echo "ERROR: Path does not exist: ${PATH_TO_SCAN}" >&2
exit 2
fi
echo "File Permission Audit Results"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Path: ${PATH_TO_SCAN}"
echo "Strict Mode: ${STRICT}"
echo "Check Executables: ${CHECK_EXECUTABLES}"
echo ""
# Scan files
local files_checked=0
if [[ -f "${PATH_TO_SCAN}" ]]; then
check_file_permissions "${PATH_TO_SCAN}"
((files_checked++))
elif [[ -d "${PATH_TO_SCAN}" ]]; then
while IFS= read -r -d '' file; do
check_file_permissions "${file}"
((files_checked++))
done < <(find "${PATH_TO_SCAN}" -print0 2>/dev/null)
fi
echo "Files Checked: ${files_checked}"
echo ""
# Report findings
if [[ ${ISSUES_FOUND} -eq 0 ]]; then
echo "✅ SUCCESS: All file permissions correct"
echo "No permission issues detected"
exit 0
fi
echo "⚠️ PERMISSION ISSUES DETECTED: ${ISSUES_FOUND}"
echo ""
# Group by severity
local critical_count=0
local high_count=0
local medium_count=0
local low_count=0
for finding in "${FINDINGS[@]}"; do
IFS='|' read -r severity file perms symbolic expected issue risk fix <<< "${finding}"
case "${severity}" in
critical) ((critical_count++)) ;;
high) ((high_count++)) ;;
medium) ((medium_count++)) ;;
low) ((low_count++)) ;;
info) ;; # Don't count info
esac
done
# Print findings by severity
if [[ ${critical_count} -gt 0 ]]; then
echo "CRITICAL Issues (${critical_count}):"
for finding in "${FINDINGS[@]}"; do
IFS='|' read -r severity file perms symbolic expected issue risk fix <<< "${finding}"
if [[ "${severity}" == "critical" ]]; then
echo "${file} (${perms})"
echo " Current: ${symbolic} (${perms})"
echo " Issue: ${issue}"
echo " Risk: ${risk}"
echo " Fix: ${fix}"
echo ""
fi
done
fi
if [[ ${high_count} -gt 0 ]]; then
echo "HIGH Issues (${high_count}):"
for finding in "${FINDINGS[@]}"; do
IFS='|' read -r severity file perms symbolic expected issue risk fix <<< "${finding}"
if [[ "${severity}" == "high" ]]; then
echo " ⚠️ ${file} (${perms})"
echo " Issue: ${issue}"
echo " Fix: ${fix}"
echo ""
fi
done
fi
if [[ ${medium_count} -gt 0 ]]; then
echo "MEDIUM Issues (${medium_count}):"
for finding in "${FINDINGS[@]}"; do
IFS='|' read -r severity file perms symbolic expected issue risk fix <<< "${finding}"
if [[ "${severity}" == "medium" ]]; then
echo " 💡 ${file} (${perms})"
echo " Recommendation: ${issue}"
echo " Fix: ${fix}"
echo ""
fi
done
fi
echo "Summary:"
echo " Critical: ${critical_count}"
echo " High: ${high_count}"
echo " Medium: ${medium_count}"
echo " Low: ${low_count}"
echo ""
if [[ ${critical_count} -gt 0 ]]; then
echo "Action Required: FIX IMMEDIATELY"
elif [[ ${high_count} -gt 0 ]]; then
echo "Action Required: YES"
else
echo "Action Required: REVIEW"
fi
exit 1
}
main "$@"

View File

@@ -0,0 +1,416 @@
#!/usr/bin/env bash
# ============================================================================
# Secret Scanner - Detect exposed secrets with 50+ patterns
# ============================================================================
# Purpose: Comprehensive secret detection for API keys, tokens, credentials
# Version: 1.0.0
# Usage: ./secret-scanner.sh <path> <recursive> <patterns> <exclude> <severity>
# Returns: 0=no secrets, 1=secrets found, 2=error
# ============================================================================
set -euo pipefail
# Source shared validation library
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PLUGIN_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
if [[ -f "${PLUGIN_ROOT}/scripts/validate-lib.sh" ]]; then
source "${PLUGIN_ROOT}/scripts/validate-lib.sh"
fi
# ============================================================================
# Configuration
# ============================================================================
# Default values
PATH_TO_SCAN="${1:-.}"
RECURSIVE="${2:-true}"
PATTERNS="${3:-all}"
EXCLUDE="${4:-}"
MIN_SEVERITY="${5:-medium}"
SECRETS_FOUND=0
declare -a FINDINGS=()
# ============================================================================
# Secret Pattern Definitions (50+ patterns)
# ============================================================================
# API Keys & Service Tokens
declare -A API_KEY_PATTERNS=(
# Stripe
["stripe_live_key"]='sk_live_[a-zA-Z0-9]{24,}'
["stripe_test_key"]='sk_test_[a-zA-Z0-9]{24,}'
["stripe_publishable_live"]='pk_live_[a-zA-Z0-9]{24,}'
["stripe_publishable_test"]='pk_test_[a-zA-Z0-9]{24,}'
# OpenAI
["openai_api_key"]='sk-[a-zA-Z0-9]{32,}'
# AWS
["aws_access_key_id"]='AKIA[0-9A-Z]{16}'
["aws_secret_access_key"]='aws_secret_access_key.*[=:].*[A-Za-z0-9/+=]{40}'
# Google
["google_api_key"]='AIza[0-9A-Za-z_-]{35}'
["google_oauth_id"]='[0-9]+-[0-9A-Za-z_-]{32}\.apps\.googleusercontent\.com'
# GitHub
["github_personal_token"]='ghp_[a-zA-Z0-9]{36}'
["github_oauth_token"]='gho_[a-zA-Z0-9]{36}'
["github_app_token"]='ghs_[a-zA-Z0-9]{36}'
["github_user_token"]='ghu_[a-zA-Z0-9]{36}'
["github_refresh_token"]='ghr_[a-zA-Z0-9]{36}'
# Slack
["slack_token"]='xox[baprs]-[0-9a-zA-Z]{10,}'
["slack_webhook"]='https://hooks\.slack\.com/services/T[0-9A-Z]{8}/B[0-9A-Z]{8}/[0-9A-Za-z]{24}'
# Twitter
["twitter_access_token"]='[0-9]{15,}-[0-9a-zA-Z]{35,44}'
["twitter_api_key"]='[A-Za-z0-9]{25}'
["twitter_api_secret"]='[A-Za-z0-9]{50}'
# Facebook
["facebook_access_token"]='EAA[0-9A-Za-z]{90,}'
# SendGrid
["sendgrid_api_key"]='SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}'
# Mailgun
["mailgun_api_key"]='key-[0-9a-zA-Z]{32}'
# Twilio
["twilio_account_sid"]='AC[a-f0-9]{32}'
["twilio_api_key"]='SK[a-f0-9]{32}'
# Azure
["azure_storage_key"]='[a-zA-Z0-9/+=]{88}'
["azure_connection_string"]='AccountKey=[a-zA-Z0-9/+=]{88}'
# Generic patterns
["generic_api_key"]='api[_-]?key.*[=:].*['\''"][a-zA-Z0-9]{20,}['\''"]'
["generic_secret"]='secret.*[=:].*['\''"][a-zA-Z0-9]{20,}['\''"]'
["generic_token"]='token.*[=:].*['\''"][a-zA-Z0-9]{20,}['\''"]'
["generic_password"]='password.*[=:].*['\''"][^'\''\"]{8,}['\''"]'
["bearer_token"]='Bearer [a-zA-Z0-9_-]{20,}'
["authorization_header"]='Authorization.*Basic [a-zA-Z0-9+/=]{20,}'
)
# Private Keys
declare -A PRIVATE_KEY_PATTERNS=(
["rsa_private_key"]='-----BEGIN RSA PRIVATE KEY-----'
["openssh_private_key"]='-----BEGIN OPENSSH PRIVATE KEY-----'
["private_key_generic"]='-----BEGIN PRIVATE KEY-----'
["pgp_private_key"]='-----BEGIN PGP PRIVATE KEY BLOCK-----'
["dsa_private_key"]='-----BEGIN DSA PRIVATE KEY-----'
["ec_private_key"]='-----BEGIN EC PRIVATE KEY-----'
["encrypted_private_key"]='-----BEGIN ENCRYPTED PRIVATE KEY-----'
)
# Cloud Provider Credentials
declare -A CLOUD_PATTERNS=(
["aws_credentials_block"]='aws_access_key_id|aws_secret_access_key'
["gcp_service_account"]='type.*service_account'
["azure_client_secret"]='client_secret.*[=:].*[a-zA-Z0-9~._-]{34,}'
["heroku_api_key"]='[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}'
)
# Database Connection Strings
declare -A DATABASE_PATTERNS=(
["mongodb_connection"]='mongodb(\+srv)?://[^:]+:[^@]+@'
["postgres_connection"]='postgres(ql)?://[^:]+:[^@]+@'
["mysql_connection"]='mysql://[^:]+:[^@]+@'
["redis_connection"]='redis://[^:]+:[^@]+@'
)
# ============================================================================
# Severity Classification
# ============================================================================
get_pattern_severity() {
local pattern_name="$1"
case "${pattern_name}" in
# CRITICAL: Private keys, production credentials
*_private_key*|aws_access_key_id|aws_secret_access_key|*_connection)
echo "critical"
;;
# HIGH: Service API keys, OAuth tokens
stripe_live_key|openai_api_key|github_*_token|slack_token|*_access_token)
echo "high"
;;
# MEDIUM: Passwords, secrets, test keys
*_password|*_secret|stripe_test_key|generic_*)
echo "medium"
;;
# LOW: Everything else
*)
echo "low"
;;
esac
}
# ============================================================================
# Pattern Filtering
# ============================================================================
should_check_pattern() {
local pattern_name="$1"
local severity
severity=$(get_pattern_severity "${pattern_name}")
# Check if pattern category requested
if [[ "${PATTERNS}" != "all" ]]; then
case "${PATTERNS}" in
*api-keys*) [[ "${pattern_name}" =~ _api_key|_token ]] || return 1 ;;
*private-keys*) [[ "${pattern_name}" =~ private_key ]] || return 1 ;;
*passwords*) [[ "${pattern_name}" =~ password ]] || return 1 ;;
*cloud*) [[ "${pattern_name}" =~ aws_|gcp_|azure_ ]] || return 1 ;;
esac
fi
# Check severity threshold
case "${MIN_SEVERITY}" in
critical)
[[ "${severity}" == "critical" ]] || return 1
;;
high)
[[ "${severity}" == "critical" || "${severity}" == "high" ]] || return 1
;;
medium)
[[ "${severity}" != "low" ]] || return 1
;;
low)
# Report all
;;
esac
return 0
}
# ============================================================================
# File Exclusion
# ============================================================================
should_exclude_file() {
local file="$1"
# Default exclusions
if [[ "${file}" =~ \.(git|node_modules|vendor|dist|build)/ ]]; then
return 0
fi
# User-specified exclusions
if [[ -n "${EXCLUDE}" ]]; then
IFS=',' read -ra EXCLUDE_PATTERNS <<< "${EXCLUDE}"
for pattern in "${EXCLUDE_PATTERNS[@]}"; do
if [[ "${file}" =~ ${pattern} ]]; then
return 0
fi
done
fi
return 1
}
# ============================================================================
# Secret Scanning
# ============================================================================
scan_file() {
local file="$1"
local file_findings=0
# Skip excluded files
if should_exclude_file "${file}"; then
return 0
fi
# Skip binary files
if file "${file}" 2>/dev/null | grep -q "text"; then
:
else
return 0
fi
# Scan with all pattern categories
for pattern_name in "${!API_KEY_PATTERNS[@]}"; do
if should_check_pattern "${pattern_name}"; then
local pattern="${API_KEY_PATTERNS[${pattern_name}]}"
if grep -nE "${pattern}" "${file}" &>/dev/null; then
local severity
severity=$(get_pattern_severity "${pattern_name}")
local line_numbers
line_numbers=$(grep -nE "${pattern}" "${file}" | cut -d: -f1 | tr '\n' ',' | sed 's/,$//')
FINDINGS+=("${severity}|${file}|${line_numbers}|${pattern_name}|API Key")
((file_findings++))
fi
fi
done
for pattern_name in "${!PRIVATE_KEY_PATTERNS[@]}"; do
if should_check_pattern "${pattern_name}"; then
local pattern="${PRIVATE_KEY_PATTERNS[${pattern_name}]}"
if grep -nF "${pattern}" "${file}" &>/dev/null; then
local severity
severity=$(get_pattern_severity "${pattern_name}")
local line_numbers
line_numbers=$(grep -nF "${pattern}" "${file}" | cut -d: -f1 | tr '\n' ',' | sed 's/,$//')
FINDINGS+=("critical|${file}|${line_numbers}|${pattern_name}|Private Key")
((file_findings++))
fi
fi
done
for pattern_name in "${!CLOUD_PATTERNS[@]}"; do
if should_check_pattern "${pattern_name}"; then
local pattern="${CLOUD_PATTERNS[${pattern_name}]}"
if grep -nE "${pattern}" "${file}" &>/dev/null; then
local severity
severity=$(get_pattern_severity "${pattern_name}")
local line_numbers
line_numbers=$(grep -nE "${pattern}" "${file}" | cut -d: -f1 | tr '\n' ',' | sed 's/,$//')
FINDINGS+=("${severity}|${file}|${line_numbers}|${pattern_name}|Cloud Credential")
((file_findings++))
fi
fi
done
for pattern_name in "${!DATABASE_PATTERNS[@]}"; do
if should_check_pattern "${pattern_name}"; then
local pattern="${DATABASE_PATTERNS[${pattern_name}]}"
if grep -nE "${pattern}" "${file}" &>/dev/null; then
FINDINGS+=("critical|${file}|$(grep -nE "${pattern}" "${file}" | cut -d: -f1 | tr '\n' ',' | sed 's/,$//')|${pattern_name}|Database Connection")
((file_findings++))
fi
fi
done
((SECRETS_FOUND += file_findings))
return 0
}
# ============================================================================
# Main Execution
# ============================================================================
main() {
# Validate path
if [[ ! -e "${PATH_TO_SCAN}" ]]; then
echo "ERROR: Path does not exist: ${PATH_TO_SCAN}" >&2
exit 2
fi
echo "Secret Scanner"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "Path: ${PATH_TO_SCAN}"
echo "Recursive: ${RECURSIVE}"
echo "Min Severity: ${MIN_SEVERITY}"
echo "Patterns: 50+"
echo ""
# Scan files
local files_scanned=0
if [[ -f "${PATH_TO_SCAN}" ]]; then
# Single file
scan_file "${PATH_TO_SCAN}"
((files_scanned++))
elif [[ -d "${PATH_TO_SCAN}" ]]; then
# Directory
if [[ "${RECURSIVE}" == "true" ]]; then
while IFS= read -r -d '' file; do
scan_file "${file}"
((files_scanned++))
done < <(find "${PATH_TO_SCAN}" -type f -print0)
else
while IFS= read -r file; do
scan_file "${file}"
((files_scanned++))
done < <(find "${PATH_TO_SCAN}" -maxdepth 1 -type f)
fi
fi
echo "Files Scanned: ${files_scanned}"
echo ""
# Report findings
if [[ ${SECRETS_FOUND} -eq 0 ]]; then
echo "✅ SUCCESS: No secrets detected"
echo "All files clean"
exit 0
fi
echo "⚠️ SECRETS DETECTED: ${SECRETS_FOUND}"
echo ""
# Group by severity
local critical_count=0
local high_count=0
local medium_count=0
local low_count=0
for finding in "${FINDINGS[@]}"; do
IFS='|' read -r severity file lines pattern type <<< "${finding}"
case "${severity}" in
critical) ((critical_count++)) ;;
high) ((high_count++)) ;;
medium) ((medium_count++)) ;;
low) ((low_count++)) ;;
esac
done
# Print findings by severity
if [[ ${critical_count} -gt 0 ]]; then
echo "CRITICAL Issues (${critical_count}):"
for finding in "${FINDINGS[@]}"; do
IFS='|' read -r severity file lines pattern type <<< "${finding}"
if [[ "${severity}" == "critical" ]]; then
echo "${file}:${lines}"
echo " Type: ${type}"
echo " Pattern: ${pattern}"
echo " Remediation: Remove and rotate immediately"
echo ""
fi
done
fi
if [[ ${high_count} -gt 0 ]]; then
echo "HIGH Issues (${high_count}):"
for finding in "${FINDINGS[@]}"; do
IFS='|' read -r severity file lines pattern type <<< "${finding}"
if [[ "${severity}" == "high" ]]; then
echo " ⚠️ ${file}:${lines}"
echo " Type: ${type}"
echo " Pattern: ${pattern}"
echo ""
fi
done
fi
if [[ ${medium_count} -gt 0 ]]; then
echo "MEDIUM Issues (${medium_count}):"
for finding in "${FINDINGS[@]}"; do
IFS='|' read -r severity file lines pattern type <<< "${finding}"
if [[ "${severity}" == "medium" ]]; then
echo " 💡 ${file}:${lines}"
echo " Type: ${type}"
echo ""
fi
done
fi
echo "Summary:"
echo " Critical: ${critical_count}"
echo " High: ${high_count}"
echo " Medium: ${medium_count}"
echo " Low: ${low_count}"
echo ""
echo "Action Required: YES"
exit 1
}
main "$@"

View File

@@ -0,0 +1,386 @@
#!/usr/bin/env python3
"""
URL Validator - Check URL safety and detect malicious patterns
"""
import sys
import os
import re
import json
from pathlib import Path
from urllib.parse import urlparse
from typing import List, Dict, Tuple, Set
# ============================================================================
# Configuration
# ============================================================================
class Config:
"""Configuration for URL validation"""
SUSPICIOUS_TLDS = {'.tk', '.ml', '.ga', '.cf', '.gq'}
URL_SHORTENERS = {'bit.ly', 'tinyurl.com', 'goo.gl', 't.co', 'ow.ly'}
TRUSTED_REGISTRIES = {
'registry.npmjs.org',
'pypi.org',
'registry.hub.docker.com',
'github.com',
'gitlab.com'
}
# ============================================================================
# URL Pattern Definitions
# ============================================================================
# Comprehensive URL pattern
URL_PATTERN = re.compile(
r'(?:(?:https?|ftp|file)://|www\.|ftp\.)'
r'(?:\S+(?::\S*)?@)?'
r'(?:'
r'(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])'
r'(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}'
r'(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))'
r'|'
r'(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)'
r'(?:\.(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)*'
r'(?:\.(?:[a-z\u00a1-\uffff]{2,}))'
r')'
r'(?::\d{2,5})?'
r'(?:[/?#]\S*)?',
re.IGNORECASE
)
# Dangerous code execution patterns
DANGEROUS_PATTERNS = {
'curl_pipe_sh': re.compile(r'curl\s+[^|]+\|\s*(sh|bash)', re.IGNORECASE),
'wget_pipe_sh': re.compile(r'wget\s+[^|]+\|\s*(sh|bash)', re.IGNORECASE),
'curl_silent_pipe': re.compile(r'curl\s+-[a-zA-Z]*s[a-zA-Z]*\s+[^|]+\|\s*(sh|bash)', re.IGNORECASE),
'bash_redirect': re.compile(r'bash\s+<\s*\(\s*curl', re.IGNORECASE),
'eval_fetch': re.compile(r'eval.*fetch\s*\(', re.IGNORECASE),
'eval_curl': re.compile(r'eval.*curl', re.IGNORECASE),
'exec_wget': re.compile(r'exec\s*\(.*wget', re.IGNORECASE),
'rm_rf_url': re.compile(r'rm\s+-rf.*https?://', re.IGNORECASE),
}
# Obfuscation patterns
OBFUSCATION_PATTERNS = {
'base64_url': re.compile(r'(?:atob|base64|Buffer\.from)\s*\([^)]*https?:', re.IGNORECASE),
'hex_encoded': re.compile(r'\\x[0-9a-f]{2}.*https?:', re.IGNORECASE),
'unicode_escape': re.compile(r'\\u[0-9a-f]{4}.*https?:', re.IGNORECASE),
}
# ============================================================================
# Severity Classification
# ============================================================================
class Severity:
CRITICAL = 'critical'
HIGH = 'high'
MEDIUM = 'medium'
LOW = 'low'
# ============================================================================
# Finding Class
# ============================================================================
class Finding:
"""Represents a URL security finding"""
def __init__(self, file_path: str, line_num: int, url: str, issue: str,
severity: str, risk: str, remediation: str):
self.file = file_path
self.line = line_num
self.url = url
self.issue = issue
self.severity = severity
self.risk = risk
self.remediation = remediation
def to_dict(self) -> Dict:
return {
'file': self.file,
'line': self.line,
'url': self.url,
'issue': self.issue,
'severity': self.severity,
'risk': self.risk,
'remediation': self.remediation
}
# ============================================================================
# URL Validator
# ============================================================================
class URLValidator:
"""Main URL validation class"""
def __init__(self, path: str, https_only: bool = False,
allow_localhost: bool = True, check_code_patterns: bool = True):
self.path = Path(path)
self.https_only = https_only
self.allow_localhost = allow_localhost
self.check_code_patterns = check_code_patterns
self.findings: List[Finding] = []
self.urls_checked = 0
self.files_scanned = 0
def is_text_file(self, file_path: Path) -> bool:
"""Check if file is text"""
try:
with open(file_path, 'rb') as f:
chunk = f.read(512)
if b'\0' in chunk:
return False
return True
except Exception:
return False
def should_exclude(self, file_path: Path) -> bool:
"""Check if file should be excluded"""
exclude_patterns = {'.git', 'node_modules', 'vendor', 'dist', 'build', '__pycache__'}
return any(part in exclude_patterns for part in file_path.parts)
def get_context(self, file_path: Path, line_num: int) -> str:
"""Get context around a line"""
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
if 0 <= line_num - 1 < len(lines):
# Check if in comment or documentation
line = lines[line_num - 1].strip()
if line.startswith('#') or line.startswith('//') or line.startswith('*'):
return 'documentation'
if 'test' in str(file_path).lower() or 'spec' in str(file_path).lower():
return 'test'
if 'example' in str(file_path).lower() or 'mock' in str(file_path).lower():
return 'example'
return 'production'
except Exception:
pass
return 'unknown'
def check_url_safety(self, url: str, file_path: Path, line_num: int) -> None:
"""Check if URL is safe"""
try:
parsed = urlparse(url)
except Exception:
return
context = self.get_context(file_path, line_num)
# Check protocol
if parsed.scheme == 'http':
# Allow localhost in development
if self.allow_localhost and parsed.hostname in ('localhost', '127.0.0.1', '0.0.0.0'):
return
# Enforce HTTPS
if self.https_only or context == 'production':
severity = Severity.HIGH if context == 'production' else Severity.MEDIUM
self.findings.append(Finding(
str(file_path), line_num, url,
'Non-HTTPS URL',
severity,
'Man-in-the-middle attacks, data interception',
'Change to HTTPS: ' + url.replace('http://', 'https://')
))
return
# Check for FTP/Telnet
if parsed.scheme in ('ftp', 'telnet'):
self.findings.append(Finding(
str(file_path), line_num, url,
'Insecure protocol',
Severity.HIGH,
'Unencrypted data transmission',
'Use secure alternatives (HTTPS, SFTP, SSH)'
))
return
# Check for file:// protocol
if parsed.scheme == 'file':
self.findings.append(Finding(
str(file_path), line_num, url,
'File protocol detected',
Severity.MEDIUM,
'Potential security risk, path disclosure',
'Review necessity of file:// protocol'
))
# Check for IP addresses
if parsed.hostname and re.match(r'^\d+\.\d+\.\d+\.\d+$', parsed.hostname):
self.findings.append(Finding(
str(file_path), line_num, url,
'IP address instead of domain',
Severity.LOW,
'Harder to verify legitimacy, no certificate validation',
'Use domain name instead of IP address'
))
# Check for suspicious TLDs
if parsed.hostname:
for tld in Config.SUSPICIOUS_TLDS:
if parsed.hostname.endswith(tld):
self.findings.append(Finding(
str(file_path), line_num, url,
'Suspicious TLD',
Severity.MEDIUM,
'Often used for malicious purposes',
'Verify domain legitimacy before use'
))
break
# Check for URL shorteners
if parsed.hostname in Config.URL_SHORTENERS:
self.findings.append(Finding(
str(file_path), line_num, url,
'Shortened URL',
Severity.LOW,
'Cannot verify destination',
'Expand URL and use full destination'
))
def check_dangerous_patterns(self, content: str, file_path: Path) -> None:
"""Check for dangerous code execution patterns"""
if not self.check_code_patterns:
return
lines = content.split('\n')
for pattern_name, pattern in DANGEROUS_PATTERNS.items():
for match in pattern.finditer(content):
line_num = content[:match.start()].count('\n') + 1
self.findings.append(Finding(
str(file_path), line_num, match.group(0),
'Remote code execution pattern',
Severity.CRITICAL,
f'Executes arbitrary code from remote source ({pattern_name})',
'Download, verify checksum, review code, then execute'
))
for pattern_name, pattern in OBFUSCATION_PATTERNS.items():
for match in pattern.finditer(content):
line_num = content[:match.start()].count('\n') + 1
self.findings.append(Finding(
str(file_path), line_num, match.group(0)[:50] + '...',
'Obfuscated URL',
Severity.HIGH,
f'URL obfuscation detected ({pattern_name})',
'Review obfuscated content for malicious intent'
))
def scan_file(self, file_path: Path) -> None:
"""Scan a single file"""
if self.should_exclude(file_path) or not self.is_text_file(file_path):
return
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
self.files_scanned += 1
# Check for dangerous patterns first
self.check_dangerous_patterns(content, file_path)
# Find all URLs
lines = content.split('\n')
for line_num, line in enumerate(lines, 1):
for match in URL_PATTERN.finditer(line):
url = match.group(0)
self.urls_checked += 1
self.check_url_safety(url, file_path, line_num)
except Exception as e:
print(f"Warning: Could not scan {file_path}: {e}", file=sys.stderr)
def scan(self) -> None:
"""Scan path for URLs"""
if self.path.is_file():
self.scan_file(self.path)
elif self.path.is_dir():
for file_path in self.path.rglob('*'):
if file_path.is_file():
self.scan_file(file_path)
def report(self) -> int:
"""Generate report and return exit code"""
print("URL Safety Scan Results")
print("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
print(f"Path: {self.path}")
print(f"Files Scanned: {self.files_scanned}")
print(f"URLs Checked: {self.urls_checked}")
print()
if not self.findings:
print("✅ SUCCESS: All URLs safe")
print("No unsafe URLs or malicious patterns detected")
return 0
# Group by severity
critical = [f for f in self.findings if f.severity == Severity.CRITICAL]
high = [f for f in self.findings if f.severity == Severity.HIGH]
medium = [f for f in self.findings if f.severity == Severity.MEDIUM]
low = [f for f in self.findings if f.severity == Severity.LOW]
print(f"⚠️ UNSAFE URLS DETECTED: {len(self.findings)}")
print()
if critical:
print(f"CRITICAL Issues ({len(critical)}):")
for finding in critical:
print(f"{finding.file}:{finding.line}")
print(f" Pattern: {finding.url}")
print(f" Risk: {finding.risk}")
print(f" Remediation: {finding.remediation}")
print()
if high:
print(f"HIGH Issues ({len(high)}):")
for finding in high:
print(f" ⚠️ {finding.file}:{finding.line}")
print(f" URL: {finding.url}")
print(f" Issue: {finding.issue}")
print(f" Remediation: {finding.remediation}")
print()
if medium:
print(f"MEDIUM Issues ({len(medium)}):")
for finding in medium:
print(f" 💡 {finding.file}:{finding.line}")
print(f" Issue: {finding.issue}")
print()
print("Summary:")
print(f" Critical: {len(critical)}")
print(f" High: {len(high)}")
print(f" Medium: {len(medium)}")
print(f" Low: {len(low)}")
print()
print("Action Required: YES" if (critical or high) else "Review Recommended")
return 1
# ============================================================================
# Main
# ============================================================================
def main():
if len(sys.argv) < 2:
print("Usage: url-validator.py <path> [https_only] [allow_localhost] [check_code_patterns]")
sys.exit(2)
path = sys.argv[1]
https_only = sys.argv[2].lower() == 'true' if len(sys.argv) > 2 else False
allow_localhost = sys.argv[3].lower() == 'true' if len(sys.argv) > 3 else True
check_code_patterns = sys.argv[4].lower() == 'true' if len(sys.argv) > 4 else True
if not os.path.exists(path):
print(f"ERROR: Path does not exist: {path}", file=sys.stderr)
sys.exit(2)
validator = URLValidator(path, https_only, allow_localhost, check_code_patterns)
validator.scan()
sys.exit(validator.report())
if __name__ == '__main__':
main()