Initial commit

2025-11-29 17:51:02 +08:00
commit ff1f4bd119
252 changed files with 72682 additions and 0 deletions
--- a/skills/appsec/sast-semgrep/assets/ci_config_examples/github-actions.yml
+++ b/skills/appsec/sast-semgrep/assets/ci_config_examples/github-actions.yml
@@ -0,0 +1,141 @@
+# GitHub Actions - Semgrep Security Scanning
+# Save as .github/workflows/semgrep.yml
+
+name: Semgrep Security Scan
+
+on:
+  # Scan on push to main/master
+  push:
+    branches:
+      - main
+      - master
+  # Scan pull requests
+  pull_request:
+    branches:
+      - main
+      - master
+  # Manual trigger
+  workflow_dispatch:
+  # Schedule daily scans
+  schedule:
+    - cron: '0 0 * * *'  # Run at midnight UTC
+
+jobs:
+  semgrep:
+    name: SAST Security Scan
+    runs-on: ubuntu-latest
+
+    # Required for uploading results to GitHub Security
+    permissions:
+      security-events: write
+      actions: read
+      contents: read
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Run Semgrep
+        uses: semgrep/semgrep-action@v1
+        with:
+          # Ruleset to use
+          config: >-
+            p/security-audit
+            p/owasp-top-ten
+            p/cwe-top-25
+
+          # Generate SARIF for GitHub Security
+          publishToken: ${{ secrets.SEMGREP_APP_TOKEN }}
+          publishDeployment: ${{ secrets.SEMGREP_DEPLOYMENT_ID }}
+
+          # Fail on HIGH/ERROR severity
+          # auditOn: push
+
+      - name: Upload SARIF to GitHub Security
+        if: always()
+        uses: github/codeql-action/upload-sarif@v3
+        with:
+          sarif_file: semgrep.sarif
+
+      - name: Upload scan results as artifact
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: semgrep-results
+          path: semgrep.sarif
+
+# Alternative: Simpler configuration without Semgrep Cloud
+---
+name: Semgrep Security Scan (Simple)
+
+on:
+  pull_request:
+    branches: [main, master]
+  push:
+    branches: [main, master]
+
+jobs:
+  semgrep:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install Semgrep
+        run: pip install semgrep
+
+      - name: Run Semgrep Scan
+        run: |
+          semgrep --config="p/security-audit" \
+                   --config="p/owasp-top-ten" \
+                   --sarif \
+                   --output=semgrep-results.sarif \
+                   --severity=ERROR \
+                   --severity=WARNING
+
+      - name: Upload SARIF results
+        if: always()
+        uses: github/codeql-action/upload-sarif@v3
+        with:
+          sarif_file: semgrep-results.sarif
+
+# PR-specific: Only scan changed files
+---
+name: Semgrep PR Scan
+
+on:
+  pull_request:
+
+jobs:
+  semgrep-diff:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Fetch full history for diff
+
+      - name: Install Semgrep
+        run: pip install semgrep
+
+      - name: Scan changed files only
+        run: |
+          semgrep --config="p/security-audit" \
+                   --baseline-commit="${{ github.event.pull_request.base.sha }}" \
+                   --json \
+                   --output=results.json
+
+      - name: Check for findings
+        run: |
+          FINDINGS=$(jq '.results | length' results.json)
+          echo "Found $FINDINGS security issues"
+          if [ "$FINDINGS" -gt 0 ]; then
+            echo "❌ Security issues detected!"
+            jq '.results[] | "[\(.extra.severity)] \(.check_id) - \(.path):\(.start.line)"' results.json
+            exit 1
+          else
+            echo "✅ No security issues found"
+          fi
--- a/skills/appsec/sast-semgrep/assets/ci_config_examples/gitlab-ci.yml
+++ b/skills/appsec/sast-semgrep/assets/ci_config_examples/gitlab-ci.yml
@@ -0,0 +1,106 @@
+# GitLab CI - Semgrep Security Scanning
+# Add to .gitlab-ci.yml
+
+stages:
+  - test
+  - security
+
+# Basic Semgrep scan
+semgrep-scan:
+  stage: security
+  image: semgrep/semgrep:latest
+  script:
+    - semgrep --config="p/security-audit"
+              --config="p/owasp-top-ten"
+              --gitlab-sast
+              --output=gl-sast-report.json
+  artifacts:
+    reports:
+      sast: gl-sast-report.json
+    paths:
+      - gl-sast-report.json
+    expire_in: 1 week
+  rules:
+    - if: $CI_MERGE_REQUEST_ID  # Run on MRs
+    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH  # Run on default branch
+
+# Advanced: Fail on HIGH severity findings
+semgrep-strict:
+  stage: security
+  image: python:3.11-slim
+  before_script:
+    - pip install semgrep
+  script:
+    - |
+      semgrep --config="p/security-audit" \
+               --severity=ERROR \
+               --json \
+               --output=results.json
+
+      CRITICAL=$(jq '[.results[] | select(.extra.severity == "ERROR")] | length' results.json)
+      echo "Found $CRITICAL critical findings"
+
+      if [ "$CRITICAL" -gt 0 ]; then
+        echo "❌ Critical security issues detected!"
+        jq '.results[] | select(.extra.severity == "ERROR")' results.json
+        exit 1
+      fi
+  artifacts:
+    paths:
+      - results.json
+    expire_in: 1 week
+    when: always
+  allow_failure: false
+
+# Differential scanning - only new findings in MR
+semgrep-diff:
+  stage: security
+  image: semgrep/semgrep:latest
+  script:
+    - git fetch origin $CI_MERGE_REQUEST_TARGET_BRANCH_NAME
+    - |
+      semgrep --config="p/security-audit" \
+               --baseline-commit="origin/$CI_MERGE_REQUEST_TARGET_BRANCH_NAME" \
+               --gitlab-sast \
+               --output=gl-sast-report.json
+  artifacts:
+    reports:
+      sast: gl-sast-report.json
+  rules:
+    - if: $CI_MERGE_REQUEST_ID
+
+# Scheduled full scan (daily)
+semgrep-scheduled:
+  stage: security
+  image: semgrep/semgrep:latest
+  script:
+    - |
+      semgrep --config="p/security-audit" \
+               --config="p/owasp-top-ten" \
+               --config="p/cwe-top-25" \
+               --json \
+               --output=full-scan-results.json
+  artifacts:
+    paths:
+      - full-scan-results.json
+    expire_in: 30 days
+  rules:
+    - if: $CI_PIPELINE_SOURCE == "schedule"
+
+# Custom rules integration
+semgrep-custom:
+  stage: security
+  image: semgrep/semgrep:latest
+  script:
+    - |
+      semgrep --config="p/owasp-top-ten" \
+               --config="custom-rules/security.yaml" \
+               --gitlab-sast \
+               --output=gl-sast-report.json
+  artifacts:
+    reports:
+      sast: gl-sast-report.json
+  rules:
+    - if: $CI_MERGE_REQUEST_ID
+      exists:
+        - custom-rules/security.yaml
--- a/skills/appsec/sast-semgrep/assets/ci_config_examples/jenkins.groovy
+++ b/skills/appsec/sast-semgrep/assets/ci_config_examples/jenkins.groovy
@@ -0,0 +1,190 @@
+// Jenkinsfile - Semgrep Security Scanning
+// Basic pipeline with Semgrep security gate
+
+pipeline {
+    agent any
+
+    environment {
+        SEMGREP_VERSION = '1.50.0'  // Pin to specific version
+    }
+
+    stages {
+        stage('Checkout') {
+            steps {
+                checkout scm
+            }
+        }
+
+        stage('Security Scan') {
+            steps {
+                script {
+                    // Install Semgrep
+                    sh 'pip3 install semgrep==${SEMGREP_VERSION}'
+
+                    // Run Semgrep scan
+                    sh '''
+                        semgrep --config="p/security-audit" \
+                                --config="p/owasp-top-ten" \
+                                --json \
+                                --output=semgrep-results.json \
+                                --severity=ERROR \
+                                --severity=WARNING
+                    '''
+                }
+            }
+        }
+
+        stage('Process Results') {
+            steps {
+                script {
+                    // Parse results
+                    def results = readJSON file: 'semgrep-results.json'
+                    def findings = results.results.size()
+                    def critical = results.results.findAll {
+                        it.extra.severity == 'ERROR'
+                    }.size()
+
+                    echo "Total findings: ${findings}"
+                    echo "Critical findings: ${critical}"
+
+                    // Fail build if critical findings
+                    if (critical > 0) {
+                        error("❌ Critical security vulnerabilities detected!")
+                    }
+                }
+            }
+        }
+    }
+
+    post {
+        always {
+            // Archive scan results
+            archiveArtifacts artifacts: 'semgrep-results.json',
+                           fingerprint: true
+
+            // Publish results (if using warnings-ng plugin)
+            // recordIssues(
+            //     tools: [semgrep(pattern: 'semgrep-results.json')],
+            //     qualityGates: [[threshold: 1, type: 'TOTAL', unstable: false]]
+            // )
+        }
+        failure {
+            echo '❌ Security scan failed - review findings'
+        }
+        success {
+            echo '✅ No critical security issues detected'
+        }
+    }
+}
+
+// Advanced: Differential scanning for PRs
+pipeline {
+    agent any
+
+    environment {
+        TARGET_BRANCH = env.CHANGE_TARGET ?: 'main'
+    }
+
+    stages {
+        stage('Checkout') {
+            steps {
+                checkout scm
+
+                script {
+                    // Fetch target branch for comparison
+                    sh """
+                        git fetch origin ${TARGET_BRANCH}:${TARGET_BRANCH}
+                    """
+                }
+            }
+        }
+
+        stage('Differential Scan') {
+            when {
+                changeRequest()  // Only for pull requests
+            }
+            steps {
+                sh """
+                    pip3 install semgrep
+
+                    semgrep --config="p/security-audit" \
+                            --baseline-commit="${TARGET_BRANCH}" \
+                            --json \
+                            --output=semgrep-diff.json
+                """
+
+                script {
+                    def results = readJSON file: 'semgrep-diff.json'
+                    def newFindings = results.results.size()
+
+                    if (newFindings > 0) {
+                        echo "❌ ${newFindings} new security issues introduced"
+                        error("Fix security issues before merging")
+                    } else {
+                        echo "✅ No new security issues"
+                    }
+                }
+            }
+        }
+
+        stage('Full Scan') {
+            when {
+                branch 'main'  // Full scan on main branch
+            }
+            steps {
+                sh """
+                    semgrep --config="p/security-audit" \
+                            --config="p/owasp-top-ten" \
+                            --config="p/cwe-top-25" \
+                            --json \
+                            --output=semgrep-full.json
+                """
+            }
+        }
+    }
+
+    post {
+        always {
+            archiveArtifacts artifacts: 'semgrep-*.json',
+                           allowEmptyArchive: true
+        }
+    }
+}
+
+// With custom rules
+pipeline {
+    agent any
+
+    stages {
+        stage('Security Scan with Custom Rules') {
+            steps {
+                sh """
+                    pip3 install semgrep
+
+                    # Run with both official and custom rules
+                    semgrep --config="p/owasp-top-ten" \
+                            --config="custom-rules/" \
+                            --json \
+                            --output=results.json
+                """
+
+                script {
+                    // Generate HTML report (requires additional tooling)
+                    sh """
+                        python3 -c "
+import json
+with open('semgrep-results.json') as f:
+    results = json.load(f)
+    findings = results['results']
+    print(f'Security Scan Complete:')
+    print(f'  Total Findings: {len(findings)}')
+    for severity in ['ERROR', 'WARNING', 'INFO']:
+        count = len([f for f in findings if f.get('extra', {}).get('severity') == severity])
+        print(f'  {severity}: {count}')
+"
+                    """
+                }
+            }
+        }
+    }
+}
--- a/skills/appsec/sast-semgrep/assets/rule_template.yaml
+++ b/skills/appsec/sast-semgrep/assets/rule_template.yaml
@@ -0,0 +1,120 @@
+rules:
+  - id: custom-rule-template
+    # Pattern matching - choose one or combine multiple
+    pattern: dangerous_function($ARG)
+    # OR use pattern combinations:
+    # patterns:
+    #   - pattern: execute($QUERY)
+    #   - pattern-inside: |
+    #       $QUERY = $USER_INPUT + ...
+    #   - pattern-not: execute("SAFE_QUERY")
+
+    # Message shown when rule matches
+    message: |
+      Potential security vulnerability detected.
+      Explain the risk and provide remediation guidance.
+
+    # Severity level
+    severity: ERROR  # ERROR, WARNING, or INFO
+
+    # Supported languages
+    languages: [python]  # python, javascript, java, go, etc.
+
+    # Metadata for categorization and tracking
+    metadata:
+      category: security
+      technology: [web-app]
+      cwe:
+        - "CWE-XXX: Vulnerability Name"
+      owasp:
+        - "AXX:2021-Category Name"
+      confidence: HIGH  # HIGH, MEDIUM, LOW
+      likelihood: MEDIUM  # How likely is exploitation
+      impact: HIGH  # Potential security impact
+      references:
+        - https://owasp.org/...
+        - https://cwe.mitre.org/data/definitions/XXX.html
+      subcategory:
+        - vuln-type  # e.g., sqli, xss, command-injection
+
+    # Optional: Autofix suggestion
+    # fix: |
+    #   safe_function($ARG)
+
+    # Optional: Path filtering
+    # paths:
+    #   include:
+    #     - "src/"
+    #   exclude:
+    #     - "*/tests/*"
+    #     - "*/test_*.py"
+
+# Example: SQL Injection Detection
+  - id: example-sql-injection
+    patterns:
+      - pattern-either:
+          - pattern: cursor.execute(f"... {$VAR} ...")
+          - pattern: cursor.execute("..." + $VAR + "...")
+      - pattern-not: cursor.execute("...", ...)
+    message: |
+      SQL injection vulnerability detected. User input is concatenated into SQL query.
+
+      Remediation:
+      - Use parameterized queries: cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
+      - Use ORM methods that automatically parameterize queries
+    severity: ERROR
+    languages: [python]
+    metadata:
+      category: security
+      cwe: ["CWE-89: SQL Injection"]
+      owasp: ["A03:2021-Injection"]
+      confidence: HIGH
+      likelihood: HIGH
+      impact: HIGH
+      references:
+        - https://owasp.org/Top10/A03_2021-Injection/
+
+# Example: Hard-coded Secret Detection
+  - id: example-hardcoded-secret
+    pattern-regex: |
+      (password|passwd|pwd|secret|token|api[_-]?key)\s*=\s*['"][^'"]{8,}['"]
+    message: |
+      Potential hard-coded secret detected.
+
+      Remediation:
+      - Use environment variables: os.getenv('API_KEY')
+      - Use secrets management: AWS Secrets Manager, HashiCorp Vault
+      - Never commit secrets to version control
+    severity: WARNING
+    languages: [python, javascript, java, go]
+    metadata:
+      category: security
+      cwe: ["CWE-798: Use of Hard-coded Credentials"]
+      owasp: ["A07:2021-Identification-and-Authentication-Failures"]
+      confidence: MEDIUM
+
+# Example: Insecure Deserialization
+  - id: example-unsafe-deserialization
+    patterns:
+      - pattern-either:
+          - pattern: pickle.loads($DATA)
+          - pattern: pickle.load($FILE)
+      - pattern-not-inside: |
+          # Safe pickle usage
+          ...
+    message: |
+      Unsafe deserialization using pickle. Attackers can execute arbitrary code.
+
+      Remediation:
+      - Use JSON for serialization: json.loads(data)
+      - If pickle is required, validate and sanitize data source
+      - Never deserialize data from untrusted sources
+    severity: ERROR
+    languages: [python]
+    metadata:
+      category: security
+      cwe: ["CWE-502: Deserialization of Untrusted Data"]
+      owasp: ["A08:2021-Software-and-Data-Integrity-Failures"]
+      confidence: HIGH
+      likelihood: HIGH
+      impact: CRITICAL
--- a/skills/appsec/sast-semgrep/assets/semgrep_config.yaml
+++ b/skills/appsec/sast-semgrep/assets/semgrep_config.yaml
@@ -0,0 +1,80 @@
+# Recommended Semgrep Configuration
+# Save as .semgrepconfig or semgrep.yml in your project root
+
+# Rules to run
+rules: p/security-audit
+
+# Alternative: Specify multiple rulesets
+# rules:
+#   - p/owasp-top-ten
+#   - p/cwe-top-25
+#   - path/to/custom-rules.yaml
+
+# Paths to exclude from scanning
+exclude:
+  - "*/node_modules/*"
+  - "*/vendor/*"
+  - "*/.venv/*"
+  - "*/venv/*"
+  - "*/dist/*"
+  - "*/build/*"
+  - "*/.git/*"
+  - "*/tests/*"
+  - "*/test/*"
+  - "*_test.go"
+  - "test_*.py"
+  - "*.test.js"
+  - "*.spec.js"
+  - "*.min.js"
+  - "*.bundle.js"
+
+# Paths to include (optional - scans all by default)
+# include:
+#   - "src/"
+#   - "app/"
+#   - "lib/"
+
+# Maximum file size to scan (in bytes)
+max_target_bytes: 1000000  # 1MB
+
+# Timeout for each file (in seconds)
+timeout: 30
+
+# Number of jobs for parallel scanning
+# jobs: 4
+
+# Metrics and telemetry (disable for privacy)
+metrics: off
+
+# Autofix mode (use with caution)
+# autofix: false
+
+# Output format
+# Can be: text, json, sarif, gitlab-sast, junit-xml, emacs, vim
+# Set via CLI: semgrep --config=<this-file> --json
+# output_format: text
+
+# Severity thresholds
+# Only report findings at or above this severity
+# Can be: ERROR, WARNING, INFO
+# min_severity: WARNING
+
+# Scan statistics
+# Show timing and performance stats
+# time: false
+# Show stats after scanning
+# verbose: false
+
+# CI/CD specific settings
+# These are typically set via CLI or CI environment
+
+# Fail on findings
+# Set exit code 1 if findings are detected
+# error: true
+
+# Baseline commit for diff scanning
+# baseline_commit: origin/main
+
+# SARIF output settings (for GitHub Security, etc.)
+# sarif:
+#   output: semgrep-results.sarif