Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 17:51:02 +08:00
commit ff1f4bd119
252 changed files with 72682 additions and 0 deletions

View File

@@ -0,0 +1,141 @@
# GitHub Actions - Semgrep Security Scanning
# Save as .github/workflows/semgrep.yml
name: Semgrep Security Scan
on:
# Scan on push to main/master
push:
branches:
- main
- master
# Scan pull requests
pull_request:
branches:
- main
- master
# Manual trigger
workflow_dispatch:
# Schedule daily scans
schedule:
- cron: '0 0 * * *' # Run at midnight UTC
jobs:
semgrep:
name: SAST Security Scan
runs-on: ubuntu-latest
# Required for uploading results to GitHub Security
permissions:
security-events: write
actions: read
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Run Semgrep
uses: semgrep/semgrep-action@v1
with:
# Ruleset to use
config: >-
p/security-audit
p/owasp-top-ten
p/cwe-top-25
# Generate SARIF for GitHub Security
publishToken: ${{ secrets.SEMGREP_APP_TOKEN }}
publishDeployment: ${{ secrets.SEMGREP_DEPLOYMENT_ID }}
# Fail on HIGH/ERROR severity
# auditOn: push
- name: Upload SARIF to GitHub Security
if: always()
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: semgrep.sarif
- name: Upload scan results as artifact
if: always()
uses: actions/upload-artifact@v4
with:
name: semgrep-results
path: semgrep.sarif
# Alternative: Simpler configuration without Semgrep Cloud
---
name: Semgrep Security Scan (Simple)
on:
pull_request:
branches: [main, master]
push:
branches: [main, master]
jobs:
semgrep:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install Semgrep
run: pip install semgrep
- name: Run Semgrep Scan
run: |
semgrep --config="p/security-audit" \
--config="p/owasp-top-ten" \
--sarif \
--output=semgrep-results.sarif \
--severity=ERROR \
--severity=WARNING
- name: Upload SARIF results
if: always()
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: semgrep-results.sarif
# PR-specific: Only scan changed files
---
name: Semgrep PR Scan
on:
pull_request:
jobs:
semgrep-diff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch full history for diff
- name: Install Semgrep
run: pip install semgrep
- name: Scan changed files only
run: |
semgrep --config="p/security-audit" \
--baseline-commit="${{ github.event.pull_request.base.sha }}" \
--json \
--output=results.json
- name: Check for findings
run: |
FINDINGS=$(jq '.results | length' results.json)
echo "Found $FINDINGS security issues"
if [ "$FINDINGS" -gt 0 ]; then
echo "❌ Security issues detected!"
jq '.results[] | "[\(.extra.severity)] \(.check_id) - \(.path):\(.start.line)"' results.json
exit 1
else
echo "✅ No security issues found"
fi

View File

@@ -0,0 +1,106 @@
# GitLab CI - Semgrep Security Scanning
# Add to .gitlab-ci.yml
stages:
- test
- security
# Basic Semgrep scan
semgrep-scan:
stage: security
image: semgrep/semgrep:latest
script:
- semgrep --config="p/security-audit"
--config="p/owasp-top-ten"
--gitlab-sast
--output=gl-sast-report.json
artifacts:
reports:
sast: gl-sast-report.json
paths:
- gl-sast-report.json
expire_in: 1 week
rules:
- if: $CI_MERGE_REQUEST_ID # Run on MRs
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH # Run on default branch
# Advanced: Fail on HIGH severity findings
semgrep-strict:
stage: security
image: python:3.11-slim
before_script:
- pip install semgrep
script:
- |
semgrep --config="p/security-audit" \
--severity=ERROR \
--json \
--output=results.json
CRITICAL=$(jq '[.results[] | select(.extra.severity == "ERROR")] | length' results.json)
echo "Found $CRITICAL critical findings"
if [ "$CRITICAL" -gt 0 ]; then
echo "❌ Critical security issues detected!"
jq '.results[] | select(.extra.severity == "ERROR")' results.json
exit 1
fi
artifacts:
paths:
- results.json
expire_in: 1 week
when: always
allow_failure: false
# Differential scanning - only new findings in MR
semgrep-diff:
stage: security
image: semgrep/semgrep:latest
script:
- git fetch origin $CI_MERGE_REQUEST_TARGET_BRANCH_NAME
- |
semgrep --config="p/security-audit" \
--baseline-commit="origin/$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" \
--gitlab-sast \
--output=gl-sast-report.json
artifacts:
reports:
sast: gl-sast-report.json
rules:
- if: $CI_MERGE_REQUEST_ID
# Scheduled full scan (daily)
semgrep-scheduled:
stage: security
image: semgrep/semgrep:latest
script:
- |
semgrep --config="p/security-audit" \
--config="p/owasp-top-ten" \
--config="p/cwe-top-25" \
--json \
--output=full-scan-results.json
artifacts:
paths:
- full-scan-results.json
expire_in: 30 days
rules:
- if: $CI_PIPELINE_SOURCE == "schedule"
# Custom rules integration
semgrep-custom:
stage: security
image: semgrep/semgrep:latest
script:
- |
semgrep --config="p/owasp-top-ten" \
--config="custom-rules/security.yaml" \
--gitlab-sast \
--output=gl-sast-report.json
artifacts:
reports:
sast: gl-sast-report.json
rules:
- if: $CI_MERGE_REQUEST_ID
exists:
- custom-rules/security.yaml

View File

@@ -0,0 +1,190 @@
// Jenkinsfile - Semgrep Security Scanning
// Basic pipeline with Semgrep security gate
pipeline {
agent any
environment {
SEMGREP_VERSION = '1.50.0' // Pin to specific version
}
stages {
stage('Checkout') {
steps {
checkout scm
}
}
stage('Security Scan') {
steps {
script {
// Install Semgrep
sh 'pip3 install semgrep==${SEMGREP_VERSION}'
// Run Semgrep scan
sh '''
semgrep --config="p/security-audit" \
--config="p/owasp-top-ten" \
--json \
--output=semgrep-results.json \
--severity=ERROR \
--severity=WARNING
'''
}
}
}
stage('Process Results') {
steps {
script {
// Parse results
def results = readJSON file: 'semgrep-results.json'
def findings = results.results.size()
def critical = results.results.findAll {
it.extra.severity == 'ERROR'
}.size()
echo "Total findings: ${findings}"
echo "Critical findings: ${critical}"
// Fail build if critical findings
if (critical > 0) {
error("❌ Critical security vulnerabilities detected!")
}
}
}
}
}
post {
always {
// Archive scan results
archiveArtifacts artifacts: 'semgrep-results.json',
fingerprint: true
// Publish results (if using warnings-ng plugin)
// recordIssues(
// tools: [semgrep(pattern: 'semgrep-results.json')],
// qualityGates: [[threshold: 1, type: 'TOTAL', unstable: false]]
// )
}
failure {
echo '❌ Security scan failed - review findings'
}
success {
echo '✅ No critical security issues detected'
}
}
}
// Advanced: Differential scanning for PRs
pipeline {
agent any
environment {
TARGET_BRANCH = env.CHANGE_TARGET ?: 'main'
}
stages {
stage('Checkout') {
steps {
checkout scm
script {
// Fetch target branch for comparison
sh """
git fetch origin ${TARGET_BRANCH}:${TARGET_BRANCH}
"""
}
}
}
stage('Differential Scan') {
when {
changeRequest() // Only for pull requests
}
steps {
sh """
pip3 install semgrep
semgrep --config="p/security-audit" \
--baseline-commit="${TARGET_BRANCH}" \
--json \
--output=semgrep-diff.json
"""
script {
def results = readJSON file: 'semgrep-diff.json'
def newFindings = results.results.size()
if (newFindings > 0) {
echo "❌ ${newFindings} new security issues introduced"
error("Fix security issues before merging")
} else {
echo "✅ No new security issues"
}
}
}
}
stage('Full Scan') {
when {
branch 'main' // Full scan on main branch
}
steps {
sh """
semgrep --config="p/security-audit" \
--config="p/owasp-top-ten" \
--config="p/cwe-top-25" \
--json \
--output=semgrep-full.json
"""
}
}
}
post {
always {
archiveArtifacts artifacts: 'semgrep-*.json',
allowEmptyArchive: true
}
}
}
// With custom rules
pipeline {
agent any
stages {
stage('Security Scan with Custom Rules') {
steps {
sh """
pip3 install semgrep
# Run with both official and custom rules
semgrep --config="p/owasp-top-ten" \
--config="custom-rules/" \
--json \
--output=results.json
"""
script {
// Generate HTML report (requires additional tooling)
sh """
python3 -c "
import json
with open('semgrep-results.json') as f:
results = json.load(f)
findings = results['results']
print(f'Security Scan Complete:')
print(f' Total Findings: {len(findings)}')
for severity in ['ERROR', 'WARNING', 'INFO']:
count = len([f for f in findings if f.get('extra', {}).get('severity') == severity])
print(f' {severity}: {count}')
"
"""
}
}
}
}
}

View File

@@ -0,0 +1,120 @@
rules:
- id: custom-rule-template
# Pattern matching - choose one or combine multiple
pattern: dangerous_function($ARG)
# OR use pattern combinations:
# patterns:
# - pattern: execute($QUERY)
# - pattern-inside: |
# $QUERY = $USER_INPUT + ...
# - pattern-not: execute("SAFE_QUERY")
# Message shown when rule matches
message: |
Potential security vulnerability detected.
Explain the risk and provide remediation guidance.
# Severity level
severity: ERROR # ERROR, WARNING, or INFO
# Supported languages
languages: [python] # python, javascript, java, go, etc.
# Metadata for categorization and tracking
metadata:
category: security
technology: [web-app]
cwe:
- "CWE-XXX: Vulnerability Name"
owasp:
- "AXX:2021-Category Name"
confidence: HIGH # HIGH, MEDIUM, LOW
likelihood: MEDIUM # How likely is exploitation
impact: HIGH # Potential security impact
references:
- https://owasp.org/...
- https://cwe.mitre.org/data/definitions/XXX.html
subcategory:
- vuln-type # e.g., sqli, xss, command-injection
# Optional: Autofix suggestion
# fix: |
# safe_function($ARG)
# Optional: Path filtering
# paths:
# include:
# - "src/"
# exclude:
# - "*/tests/*"
# - "*/test_*.py"
# Example: SQL Injection Detection
- id: example-sql-injection
patterns:
- pattern-either:
- pattern: cursor.execute(f"... {$VAR} ...")
- pattern: cursor.execute("..." + $VAR + "...")
- pattern-not: cursor.execute("...", ...)
message: |
SQL injection vulnerability detected. User input is concatenated into SQL query.
Remediation:
- Use parameterized queries: cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
- Use ORM methods that automatically parameterize queries
severity: ERROR
languages: [python]
metadata:
category: security
cwe: ["CWE-89: SQL Injection"]
owasp: ["A03:2021-Injection"]
confidence: HIGH
likelihood: HIGH
impact: HIGH
references:
- https://owasp.org/Top10/A03_2021-Injection/
# Example: Hard-coded Secret Detection
- id: example-hardcoded-secret
pattern-regex: |
(password|passwd|pwd|secret|token|api[_-]?key)\s*=\s*['"][^'"]{8,}['"]
message: |
Potential hard-coded secret detected.
Remediation:
- Use environment variables: os.getenv('API_KEY')
- Use secrets management: AWS Secrets Manager, HashiCorp Vault
- Never commit secrets to version control
severity: WARNING
languages: [python, javascript, java, go]
metadata:
category: security
cwe: ["CWE-798: Use of Hard-coded Credentials"]
owasp: ["A07:2021-Identification-and-Authentication-Failures"]
confidence: MEDIUM
# Example: Insecure Deserialization
- id: example-unsafe-deserialization
patterns:
- pattern-either:
- pattern: pickle.loads($DATA)
- pattern: pickle.load($FILE)
- pattern-not-inside: |
# Safe pickle usage
...
message: |
Unsafe deserialization using pickle. Attackers can execute arbitrary code.
Remediation:
- Use JSON for serialization: json.loads(data)
- If pickle is required, validate and sanitize data source
- Never deserialize data from untrusted sources
severity: ERROR
languages: [python]
metadata:
category: security
cwe: ["CWE-502: Deserialization of Untrusted Data"]
owasp: ["A08:2021-Software-and-Data-Integrity-Failures"]
confidence: HIGH
likelihood: HIGH
impact: CRITICAL

View File

@@ -0,0 +1,80 @@
# Recommended Semgrep Configuration
# Save as .semgrepconfig or semgrep.yml in your project root
# Rules to run
rules: p/security-audit
# Alternative: Specify multiple rulesets
# rules:
# - p/owasp-top-ten
# - p/cwe-top-25
# - path/to/custom-rules.yaml
# Paths to exclude from scanning
exclude:
- "*/node_modules/*"
- "*/vendor/*"
- "*/.venv/*"
- "*/venv/*"
- "*/dist/*"
- "*/build/*"
- "*/.git/*"
- "*/tests/*"
- "*/test/*"
- "*_test.go"
- "test_*.py"
- "*.test.js"
- "*.spec.js"
- "*.min.js"
- "*.bundle.js"
# Paths to include (optional - scans all by default)
# include:
# - "src/"
# - "app/"
# - "lib/"
# Maximum file size to scan (in bytes)
max_target_bytes: 1000000 # 1MB
# Timeout for each file (in seconds)
timeout: 30
# Number of jobs for parallel scanning
# jobs: 4
# Metrics and telemetry (disable for privacy)
metrics: off
# Autofix mode (use with caution)
# autofix: false
# Output format
# Can be: text, json, sarif, gitlab-sast, junit-xml, emacs, vim
# Set via CLI: semgrep --config=<this-file> --json
# output_format: text
# Severity thresholds
# Only report findings at or above this severity
# Can be: ERROR, WARNING, INFO
# min_severity: WARNING
# Scan statistics
# Show timing and performance stats
# time: false
# Show stats after scanning
# verbose: false
# CI/CD specific settings
# These are typically set via CLI or CI environment
# Fail on findings
# Set exit code 1 if findings are detected
# error: true
# Baseline commit for diff scanning
# baseline_commit: origin/main
# SARIF output settings (for GitHub Security, etc.)
# sarif:
# output: semgrep-results.sarif