Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:20:28 +08:00
commit b727790a9e
65 changed files with 16412 additions and 0 deletions

View File

@@ -0,0 +1,254 @@
#!/usr/bin/env bash
# ============================================================================
# CHANGELOG Validator
# ============================================================================
# Purpose: Validate CHANGELOG.md format compliance (Keep a Changelog)
# Version: 1.0.0
# Usage: ./changelog-validator.sh <changelog-path> [--strict] [--json]
# Returns: 0=success, 1=error, JSON output to stdout if --json
# ============================================================================
set -euo pipefail
# Default values
STRICT_MODE=false
JSON_OUTPUT=false
REQUIRE_UNRELEASED=true
# Valid change categories per Keep a Changelog
VALID_CATEGORIES=("Added" "Changed" "Deprecated" "Removed" "Fixed" "Security")
# Parse arguments
CHANGELOG_PATH="${1:-CHANGELOG.md}"
shift || true
while [[ $# -gt 0 ]]; do
case "$1" in
--strict)
STRICT_MODE=true
shift
;;
--json)
JSON_OUTPUT=true
shift
;;
--no-unreleased)
REQUIRE_UNRELEASED=false
shift
;;
*)
shift
;;
esac
done
# Initialize results
declare -a issues=()
declare -a version_entries=()
declare -a categories_used=()
has_title=false
has_unreleased=false
compliance_score=100
# Check if file exists
if [[ ! -f "$CHANGELOG_PATH" ]]; then
if $JSON_OUTPUT; then
cat <<EOF
{
"error": "CHANGELOG not found",
"path": "$CHANGELOG_PATH",
"present": false,
"score": 0,
"status": "warning",
"issues": ["CHANGELOG.md not found"]
}
EOF
else
echo "⚠️ WARNING: CHANGELOG not found at $CHANGELOG_PATH"
echo "CHANGELOG is recommended but not required for initial submission."
fi
exit 1
fi
# Read content
content=$(<"$CHANGELOG_PATH")
# Check for title
if echo "$content" | grep -qiE "^#\s*(changelog|change.?log)"; then
has_title=true
else
issues+=("Missing title 'Changelog' or 'Change Log'")
((compliance_score-=10)) || true
fi
# Check for Unreleased section
if echo "$content" | grep -qE "^##\s*\[Unreleased\]"; then
has_unreleased=true
else
if $REQUIRE_UNRELEASED; then
issues+=("Missing [Unreleased] section")
((compliance_score-=15)) || true
fi
fi
# Extract version headers
while IFS= read -r line; do
if [[ $line =~ ^##[[:space:]]*\[([0-9]+\.[0-9]+\.[0-9]+)\][[:space:]]*-[[:space:]]*([0-9]{4}-[0-9]{2}-[0-9]{2}) ]]; then
version="${BASH_REMATCH[1]}"
date="${BASH_REMATCH[2]}"
version_entries+=("$version|$date")
elif [[ $line =~ ^##[[:space:]]*\[?([0-9]+\.[0-9]+\.[0-9]+)\]? ]] && [[ ! $line =~ \[Unreleased\] ]]; then
# Invalid format detected
issues+=("Invalid version header format: '$line' (should be '## [X.Y.Z] - YYYY-MM-DD')")
((compliance_score-=10)) || true
fi
done <<< "$content"
# Check for valid change categories
for category in "${VALID_CATEGORIES[@]}"; do
if echo "$content" | grep -qE "^###[[:space:]]*$category"; then
categories_used+=("$category")
fi
done
# Detect invalid categories
while IFS= read -r line; do
if [[ $line =~ ^###[[:space:]]+(.*) ]]; then
cat_name="${BASH_REMATCH[1]}"
is_valid=false
for valid_cat in "${VALID_CATEGORIES[@]}"; do
if [[ "$cat_name" == "$valid_cat" ]]; then
is_valid=true
break
fi
done
if ! $is_valid; then
issues+=("Non-standard category: '### $cat_name' (should be one of: ${VALID_CATEGORIES[*]})")
((compliance_score-=5)) || true
fi
fi
done <<< "$content"
# Check date formats in version headers
for entry in "${version_entries[@]}"; do
date_part="${entry#*|}"
if [[ ! $date_part =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]]; then
issues+=("Invalid date format in version entry: $date_part (should be YYYY-MM-DD)")
((compliance_score-=5)) || true
fi
done
# Ensure score doesn't go negative
if ((compliance_score < 0)); then
compliance_score=0
fi
# Determine status
status="pass"
if ((compliance_score < 60)); then
status="fail"
elif ((compliance_score < 80)); then
status="warning"
fi
# Output results
if $JSON_OUTPUT; then
# Build JSON output
cat <<EOF
{
"present": true,
"path": "$CHANGELOG_PATH",
"has_title": $has_title,
"has_unreleased": $has_unreleased,
"version_count": ${#version_entries[@]},
"version_entries": [
$(IFS=,; for entry in "${version_entries[@]}"; do
version="${entry%|*}"
date="${entry#*|}"
echo " {\"version\": \"$version\", \"date\": \"$date\"}"
done | paste -sd, -)
],
"categories_used": [
$(IFS=,; for cat in "${categories_used[@]}"; do
echo " \"$cat\""
done | paste -sd, -)
],
"compliance_score": $compliance_score,
"status": "$status",
"issues": [
$(IFS=,; for issue in "${issues[@]}"; do
# Escape quotes in issue text
escaped_issue="${issue//\"/\\\"}"
echo " \"$escaped_issue\""
done | paste -sd, -)
]
}
EOF
else
# Human-readable output
echo ""
echo "CHANGELOG Validation Results"
echo "========================================"
echo "File: $CHANGELOG_PATH"
echo "Compliance Score: $compliance_score/100"
echo ""
if $has_title; then
echo "✓ Title present"
else
echo "✗ Title missing"
fi
if $has_unreleased; then
echo "✓ [Unreleased] section present"
else
if $REQUIRE_UNRELEASED; then
echo "✗ [Unreleased] section missing"
else
echo "⚠ [Unreleased] section missing (not required)"
fi
fi
echo ""
echo "Version Entries: ${#version_entries[@]}"
for entry in "${version_entries[@]}"; do
version="${entry%|*}"
date="${entry#*|}"
echo " • [$version] - $date"
done
if [[ ${#categories_used[@]} -gt 0 ]]; then
echo ""
echo "Change Categories Used:"
for cat in "${categories_used[@]}"; do
echo "$cat"
done
fi
if [[ ${#issues[@]} -gt 0 ]]; then
echo ""
echo "Issues Found: ${#issues[@]}"
for issue in "${issues[@]}"; do
echo "$issue"
done
fi
echo ""
if [[ "$status" == "pass" ]]; then
echo "Overall: ✓ PASS"
elif [[ "$status" == "warning" ]]; then
echo "Overall: ⚠ WARNINGS"
else
echo "Overall: ✗ FAIL"
fi
echo ""
fi
# Exit with appropriate code
if [[ "$status" == "fail" ]]; then
exit 1
else
exit 0
fi

View File

@@ -0,0 +1,325 @@
#!/usr/bin/env bash
# ============================================================================
# Example Validator
# ============================================================================
# Purpose: Validate example quality and detect placeholder patterns
# Version: 1.0.0
# Usage: ./example-validator.sh <path> [--no-placeholders] [--recursive] [--json]
# Returns: 0=success, 1=warning, JSON output to stdout if --json
# ============================================================================
set -euo pipefail
# Default values
NO_PLACEHOLDERS=true
RECURSIVE=true
JSON_OUTPUT=false
EXTENSIONS="md,txt,json,sh,py,js,ts,yaml,yml"
# Parse arguments
TARGET_PATH="${1:-.}"
shift || true
while [[ $# -gt 0 ]]; do
case "$1" in
--no-placeholders)
NO_PLACEHOLDERS=true
shift
;;
--allow-placeholders)
NO_PLACEHOLDERS=false
shift
;;
--recursive)
RECURSIVE=true
shift
;;
--non-recursive)
RECURSIVE=false
shift
;;
--json)
JSON_OUTPUT=true
shift
;;
--extensions)
EXTENSIONS="$2"
shift 2
;;
*)
shift
;;
esac
done
# Initialize counters
files_checked=0
example_count=0
placeholder_count=0
todo_count=0
declare -a issues=()
declare -a files_with_issues=()
# Build find command based on recursiveness
if $RECURSIVE; then
FIND_DEPTH=""
else
FIND_DEPTH="-maxdepth 1"
fi
# Build extension pattern
ext_pattern=""
IFS=',' read -ra EXT_ARRAY <<< "$EXTENSIONS"
for ext in "${EXT_ARRAY[@]}"; do
if [[ -z "$ext_pattern" ]]; then
ext_pattern="-name '*.${ext}'"
else
ext_pattern="$ext_pattern -o -name '*.${ext}'"
fi
done
# Find files to check
mapfile -t files < <(eval "find '$TARGET_PATH' $FIND_DEPTH -type f \( $ext_pattern \) 2>/dev/null" || true)
# Placeholder patterns to detect
declare -a PLACEHOLDER_PATTERNS=(
'TODO[:\)]'
'FIXME[:\)]'
'XXX[:\)]'
'HACK[:\)]'
'placeholder'
'PLACEHOLDER'
'your-.*-here'
'<your-'
'INSERT.?HERE'
'YOUR_[A-Z_]+'
)
# Generic dummy value patterns
declare -a GENERIC_PATTERNS=(
'\bfoo\b'
'\bbar\b'
'\bbaz\b'
'\bdummy\b'
)
# Acceptable patterns (don't count these)
declare -a ACCEPTABLE_PATTERNS=(
'\{\{[^}]+\}\}' # {{variable}} template syntax
'\$\{[^}]+\}' # ${variable} template syntax
'\$[A-Z_]+' # $VARIABLE environment variables
)
# Function to check if line contains acceptable pattern
is_acceptable_pattern() {
local line="$1"
for pattern in "${ACCEPTABLE_PATTERNS[@]}"; do
if echo "$line" | grep -qE "$pattern"; then
return 0 # Is acceptable
fi
done
return 1 # Not acceptable
}
# Function to count code examples in markdown
count_code_examples() {
local file="$1"
if [[ ! "$file" =~ \.md$ ]]; then
return 0
fi
# Count code blocks (```)
local count
count=$(grep -c '```' "$file" 2>/dev/null || echo "0")
# Ensure count is numeric and divide by 2 since each code block has opening and closing
if [[ "$count" =~ ^[0-9]+$ ]]; then
count=$((count / 2))
else
count=0
fi
echo "$count"
}
# Check each file
for file in "${files[@]}"; do
((files_checked++)) || true
# Count examples in markdown files
if [[ "$file" =~ \.md$ ]]; then
file_examples=$(count_code_examples "$file")
((example_count += file_examples)) || true
fi
file_issues=0
# Check for placeholder patterns
for pattern in "${PLACEHOLDER_PATTERNS[@]}"; do
while IFS=: read -r line_num line_content; do
# Skip if it's an acceptable pattern
if is_acceptable_pattern "$line_content"; then
continue
fi
((placeholder_count++)) || true
((file_issues++)) || true
issue="$file:$line_num: Placeholder pattern detected"
issues+=("$issue")
# Track TODO/FIXME separately
if echo "$pattern" | grep -qE 'TODO|FIXME|XXX'; then
((todo_count++)) || true
fi
done < <(grep -inE "$pattern" "$file" 2>/dev/null || true)
done
# Check for generic dummy values (only in non-test files)
if [[ ! "$file" =~ test ]] && [[ ! "$file" =~ example ]] && [[ ! "$file" =~ spec ]]; then
for pattern in "${GENERIC_PATTERNS[@]}"; do
while IFS=: read -r line_num line_content; do
# Skip code comments explaining these terms
if echo "$line_content" | grep -qE '(#|//|/\*).*'"$pattern"; then
continue
fi
# Skip if in an acceptable context
if is_acceptable_pattern "$line_content"; then
continue
fi
((file_issues++)) || true
issue="$file:$line_num: Generic placeholder value detected"
issues+=("$issue")
done < <(grep -inE "$pattern" "$file" 2>/dev/null || true)
done
fi
# Track files with issues
if ((file_issues > 0)); then
files_with_issues+=("$file:$file_issues")
fi
done
# Calculate quality score
quality_score=100
((quality_score -= placeholder_count * 10)) || true
((quality_score -= todo_count * 5)) || true
if ((example_count < 2)); then
((quality_score -= 20)) || true
fi
# Ensure score doesn't go negative
if ((quality_score < 0)); then
quality_score=0
fi
# Determine status
status="pass"
if ((quality_score < 60)); then
status="fail"
elif ((quality_score < 80)); then
status="warning"
fi
# Output results
if $JSON_OUTPUT; then
# Build JSON output
cat <<EOF
{
"files_checked": $files_checked,
"example_count": $example_count,
"placeholder_count": $placeholder_count,
"todo_count": $todo_count,
"files_with_issues": ${#files_with_issues[@]},
"quality_score": $quality_score,
"status": "$status",
"issues": [
$(IFS=; for issue in "${issues[@]:0:20}"; do # Limit to first 20 issues
# Escape quotes in issue text
escaped_issue="${issue//\"/\\\"}"
echo " \"$escaped_issue\","
done | sed '$ s/,$//')
],
"files_with_issues_list": [
$(IFS=; for file_info in "${files_with_issues[@]:0:10}"; do # Limit to first 10 files
file_path="${file_info%:*}"
file_count="${file_info#*:}"
echo " {\"file\": \"$file_path\", \"issue_count\": $file_count},"
done | sed '$ s/,$//')
]
}
EOF
else
# Human-readable output
echo ""
echo "Example Quality Validation"
echo "========================================"
echo "Files Checked: $files_checked"
echo "Code Examples Found: $example_count"
echo "Quality Score: $quality_score/100"
echo ""
if ((placeholder_count > 0)) || ((todo_count > 0)); then
echo "Issues Detected:"
echo " • Placeholder patterns: $placeholder_count"
echo " • TODO/FIXME markers: $todo_count"
echo " • Files with issues: ${#files_with_issues[@]}"
echo ""
if ((${#files_with_issues[@]} > 0)); then
echo "Files with issues:"
for file_info in "${files_with_issues[@]:0:5}"; do # Show first 5
file_path="${file_info%:*}"
file_count="${file_info#*:}"
echo "$file_path ($file_count issues)"
done
if ((${#files_with_issues[@]} > 5)); then
echo " ... and $((${#files_with_issues[@]} - 5)) more files"
fi
fi
echo ""
echo "Sample Issues:"
for issue in "${issues[@]:0:5}"; do # Show first 5
echo "$issue"
done
if ((${#issues[@]} > 5)); then
echo " ... and $((${#issues[@]} - 5)) more issues"
fi
else
echo "✓ No placeholder patterns detected"
fi
if ((example_count < 2)); then
echo ""
echo "⚠ Recommendation: Add more code examples (found: $example_count, recommended: 3+)"
fi
echo ""
if [[ "$status" == "pass" ]]; then
echo "Overall: ✓ PASS"
elif [[ "$status" == "warning" ]]; then
echo "Overall: ⚠ WARNINGS"
else
echo "Overall: ✗ FAIL"
fi
echo ""
fi
# Exit with appropriate code
if [[ "$status" == "fail" ]]; then
exit 1
elif [[ "$status" == "warning" ]]; then
exit 0 # Warning is not a failure
else
exit 0
fi

View File

@@ -0,0 +1,344 @@
#!/usr/bin/env python3
# ============================================================================
# License Detector
# ============================================================================
# Purpose: Detect and validate LICENSE file content
# Version: 1.0.0
# Usage: ./license-detector.py <path> [--expected LICENSE] [--json]
# Returns: 0=success, 1=error, JSON output to stdout
# ============================================================================
import sys
import os
import re
import json
import argparse
from pathlib import Path
from typing import Dict, Optional, Tuple
# OSI-approved license patterns
LICENSE_PATTERNS = {
"MIT": {
"pattern": r"Permission is hereby granted, free of charge",
"confidence": 95,
"osi_approved": True,
"full_name": "MIT License"
},
"Apache-2.0": {
"pattern": r"Licensed under the Apache License, Version 2\.0",
"confidence": 95,
"osi_approved": True,
"full_name": "Apache License 2.0"
},
"GPL-3.0": {
"pattern": r"GNU GENERAL PUBLIC LICENSE.*Version 3",
"confidence": 95,
"osi_approved": True,
"full_name": "GNU General Public License v3.0"
},
"GPL-2.0": {
"pattern": r"GNU GENERAL PUBLIC LICENSE.*Version 2",
"confidence": 95,
"osi_approved": True,
"full_name": "GNU General Public License v2.0"
},
"BSD-3-Clause": {
"pattern": r"Redistribution and use in source and binary forms.*3\.",
"confidence": 85,
"osi_approved": True,
"full_name": "BSD 3-Clause License"
},
"BSD-2-Clause": {
"pattern": r"Redistribution and use in source and binary forms",
"confidence": 80,
"osi_approved": True,
"full_name": "BSD 2-Clause License"
},
"ISC": {
"pattern": r"Permission to use, copy, modify, and/or distribute",
"confidence": 90,
"osi_approved": True,
"full_name": "ISC License"
},
"MPL-2.0": {
"pattern": r"Mozilla Public License Version 2\.0",
"confidence": 95,
"osi_approved": True,
"full_name": "Mozilla Public License 2.0"
}
}
# License name variations/aliases
LICENSE_ALIASES = {
"MIT License": "MIT",
"MIT license": "MIT",
"Apache License 2.0": "Apache-2.0",
"Apache 2.0": "Apache-2.0",
"Apache-2": "Apache-2.0",
"GNU GPL v3": "GPL-3.0",
"GPLv3": "GPL-3.0",
"GNU GPL v2": "GPL-2.0",
"GPLv2": "GPL-2.0",
"BSD 3-Clause": "BSD-3-Clause",
"BSD 2-Clause": "BSD-2-Clause",
}
def find_license_file(path: str) -> Optional[str]:
"""Find LICENSE file in path."""
path_obj = Path(path)
# Check if path is directly to LICENSE
if path_obj.is_file() and 'license' in path_obj.name.lower():
return str(path_obj)
# Search for LICENSE in directory
if path_obj.is_dir():
for filename in ['LICENSE', 'LICENSE.txt', 'LICENSE.md', 'COPYING', 'COPYING.txt', 'LICENCE']:
license_path = path_obj / filename
if license_path.exists():
return str(license_path)
return None
def read_plugin_manifest(path: str) -> Optional[str]:
"""Read license from plugin.json."""
path_obj = Path(path)
if path_obj.is_file():
path_obj = path_obj.parent
manifest_path = path_obj / '.claude-plugin' / 'plugin.json'
if not manifest_path.exists():
return None
try:
with open(manifest_path, 'r', encoding='utf-8') as f:
manifest = json.load(f)
return manifest.get('license')
except Exception:
return None
def detect_license(content: str) -> Tuple[Optional[str], int, bool]:
"""
Detect license type from content.
Returns: (license_type, confidence, is_complete)
"""
content_normalized = ' '.join(content.split()) # Normalize whitespace
best_match = None
best_confidence = 0
# Check for license text patterns
for license_id, license_info in LICENSE_PATTERNS.items():
pattern = license_info["pattern"]
if re.search(pattern, content, re.IGNORECASE | re.DOTALL):
confidence = license_info["confidence"]
if confidence > best_confidence:
best_match = license_id
best_confidence = confidence
# Check if it's just a name without full text
is_complete = True
if best_match and len(content.strip()) < 200: # Very short content
is_complete = False
# If no pattern match, check for just license names
if not best_match:
for alias, license_id in LICENSE_ALIASES.items():
if re.search(r'\b' + re.escape(alias) + r'\b', content, re.IGNORECASE):
best_match = license_id
best_confidence = 50 # Lower confidence for name-only
is_complete = False
break
return best_match, best_confidence, is_complete
def normalize_license_name(license_name: str) -> str:
"""Normalize license name for comparison."""
if not license_name:
return ""
# Check if it's already a standard ID
if license_name in LICENSE_PATTERNS:
return license_name
# Check aliases
if license_name in LICENSE_ALIASES:
return LICENSE_ALIASES[license_name]
# Normalize common variations
normalized = license_name.strip()
normalized = re.sub(r'\s+', ' ', normalized)
# Try fuzzy matching
for alias, license_id in LICENSE_ALIASES.items():
if normalized.lower() == alias.lower():
return license_id
return license_name
def licenses_match(detected: str, expected: str) -> Tuple[bool, str]:
"""
Check if detected license matches expected.
Returns: (matches, match_type)
"""
detected_norm = normalize_license_name(detected)
expected_norm = normalize_license_name(expected)
if detected_norm == expected_norm:
return True, "exact"
# Check if they're aliases of the same license
if detected_norm in LICENSE_PATTERNS and expected_norm in LICENSE_PATTERNS:
if LICENSE_PATTERNS[detected_norm]["full_name"] == LICENSE_PATTERNS[expected_norm]["full_name"]:
return True, "alias"
# Fuzzy match
if detected_norm.lower().replace('-', '').replace(' ', '') == expected_norm.lower().replace('-', '').replace(' ', ''):
return True, "fuzzy"
return False, "mismatch"
def main():
parser = argparse.ArgumentParser(description='Detect and validate LICENSE file')
parser.add_argument('path', help='Path to LICENSE file or directory containing it')
parser.add_argument('--expected', help='Expected license type (from plugin.json)', default=None)
parser.add_argument('--strict', action='store_true', help='Strict validation (requires full text)')
parser.add_argument('--json', action='store_true', help='Output JSON format')
args = parser.parse_args()
# Find LICENSE file
license_path = find_license_file(args.path)
if not license_path:
result = {
"error": "LICENSE file not found",
"path": args.path,
"present": False,
"score": 0,
"status": "fail",
"issues": ["LICENSE file not found in specified path"]
}
if args.json:
print(json.dumps(result, indent=2))
else:
print("❌ CRITICAL: LICENSE file not found")
print(f"Path: {args.path}")
print("LICENSE file is required for plugin submission.")
return 1
# Read LICENSE content
try:
with open(license_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
result = {
"error": f"Failed to read LICENSE: {str(e)}",
"path": license_path,
"present": True,
"score": 0,
"status": "fail"
}
if args.json:
print(json.dumps(result, indent=2))
else:
print(f"❌ ERROR: Failed to read LICENSE: {e}")
return 1
# Detect license
detected_license, confidence, is_complete = detect_license(content)
# Read expected license from plugin.json if not provided
if not args.expected:
args.expected = read_plugin_manifest(args.path)
# Check consistency
matches_manifest = True
match_type = None
if args.expected:
matches_manifest, match_type = licenses_match(detected_license or "", args.expected)
# Determine if OSI approved
is_osi_approved = False
if detected_license and detected_license in LICENSE_PATTERNS:
is_osi_approved = LICENSE_PATTERNS[detected_license]["osi_approved"]
# Build issues list
issues = []
score = 100
if not detected_license:
issues.append("Unable to identify license type")
score -= 50
elif not is_complete:
issues.append("LICENSE contains only license name, not full text")
score -= 20 if args.strict else 10
if not is_osi_approved and detected_license:
issues.append("License is not OSI-approved")
score -= 30
if args.expected and not matches_manifest:
issues.append(f"LICENSE ({detected_license or 'unknown'}) does not match plugin.json ({args.expected})")
score -= 20
score = max(0, score)
# Determine status
if score >= 80:
status = "pass"
elif score >= 60:
status = "warning"
else:
status = "fail"
# Build result
result = {
"present": True,
"path": license_path,
"detected_license": detected_license,
"confidence": confidence,
"is_complete": is_complete,
"is_osi_approved": is_osi_approved,
"manifest_license": args.expected,
"matches_manifest": matches_manifest,
"match_type": match_type,
"score": score,
"status": status,
"issues": issues
}
# Output
if args.json:
print(json.dumps(result, indent=2))
else:
# Human-readable output
print(f"\nLICENSE Validation Results")
print("=" * 50)
print(f"File: {license_path}")
print(f"Detected: {detected_license or 'Unknown'} (confidence: {confidence}%)")
print(f"Score: {score}/100")
print(f"\nOSI Approved: {'✓ Yes' if is_osi_approved else '✗ No'}")
print(f"Complete Text: {'✓ Yes' if is_complete else '⚠ No (name only)'}")
if args.expected:
print(f"\nConsistency Check:")
print(f" plugin.json: {args.expected}")
print(f" LICENSE file: {detected_license or 'Unknown'}")
print(f" Match: {'✓ Yes' if matches_manifest else '✗ No'}")
if issues:
print(f"\nIssues Found: {len(issues)}")
for issue in issues:
print(f"{issue}")
print(f"\nOverall: {'✓ PASS' if status == 'pass' else '⚠ WARNING' if status == 'warning' else '✗ FAIL'}")
print()
return 0 if status != "fail" else 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,311 @@
#!/usr/bin/env python3
# ============================================================================
# README Checker
# ============================================================================
# Purpose: Validate README.md completeness and quality
# Version: 1.0.0
# Usage: ./readme-checker.py <readme-path> [options]
# Returns: 0=success, 1=error, JSON output to stdout
# ============================================================================
import sys
import os
import re
import json
import argparse
from pathlib import Path
from typing import Dict, List, Tuple
# Required sections (case-insensitive patterns)
REQUIRED_SECTIONS = {
"overview": r"(?i)^#{1,3}\s*(overview|description|about)",
"installation": r"(?i)^#{1,3}\s*installation",
"usage": r"(?i)^#{1,3}\s*usage",
"examples": r"(?i)^#{1,3}\s*(examples?|demonstrations?)",
"license": r"(?i)^#{1,3}\s*licen[cs]e"
}
# Optional but recommended sections
RECOMMENDED_SECTIONS = {
"configuration": r"(?i)^#{1,3}\s*(configuration|setup|config)",
"troubleshooting": r"(?i)^#{1,3}\s*(troubleshooting|faq|common.?issues)",
"contributing": r"(?i)^#{1,3}\s*contribut",
"changelog": r"(?i)^#{1,3}\s*(changelog|version.?history|releases)"
}
def find_readme(path: str) -> str:
"""Find README file in path."""
path_obj = Path(path)
# Check if path is directly to README
if path_obj.is_file() and path_obj.name.lower().startswith('readme'):
return str(path_obj)
# Search for README in directory
if path_obj.is_dir():
for filename in ['README.md', 'readme.md', 'README.txt', 'README']:
readme_path = path_obj / filename
if readme_path.exists():
return str(readme_path)
return None
def analyze_sections(content: str) -> Tuple[List[str], List[str]]:
"""Analyze README sections."""
lines = content.split('\n')
found_sections = []
missing_sections = []
# Check required sections
for section_name, pattern in REQUIRED_SECTIONS.items():
found = False
for line in lines:
if re.match(pattern, line.strip()):
found = True
found_sections.append(section_name)
break
if not found:
missing_sections.append(section_name)
return found_sections, missing_sections
def count_examples(content: str) -> int:
"""Count code examples in README."""
# Count code blocks (```...```)
code_blocks = re.findall(r'```[\s\S]*?```', content)
return len(code_blocks)
def check_quality_issues(content: str) -> List[str]:
"""Check for quality issues."""
issues = []
# Check for excessive placeholder text
placeholder_patterns = [
r'TODO',
r'FIXME',
r'XXX',
r'placeholder',
r'your-.*-here',
r'<your-'
]
for pattern in placeholder_patterns:
matches = re.findall(pattern, content, re.IGNORECASE)
if len(matches) > 5: # More than 5 is excessive
issues.append(f"Excessive placeholder patterns: {len(matches)} instances of '{pattern}'")
# Check for very short sections
lines = content.split('\n')
current_section = None
section_lengths = {}
for line in lines:
if re.match(r'^#{1,3}\s+', line):
current_section = line.strip()
section_lengths[current_section] = 0
elif current_section and line.strip():
section_lengths[current_section] += len(line)
for section, length in section_lengths.items():
if length < 100 and any(keyword in section.lower() for keyword in ['installation', 'usage', 'example']):
issues.append(f"Section '{section}' is very short ({length} chars), consider expanding")
return issues
def calculate_score(found_sections: List[str], missing_sections: List[str],
length: int, example_count: int, quality_issues: List[str]) -> int:
"""Calculate README quality score (0-100)."""
score = 100
# Deduct for missing required sections (15 points each)
score -= len(missing_sections) * 15
# Deduct if too short
if length < 200:
score -= 30 # Critical
elif length < 500:
score -= 10 # Warning
# Deduct if no examples
if example_count == 0:
score -= 15
elif example_count < 2:
score -= 5
# Deduct for quality issues (5 points each, max 20)
score -= min(len(quality_issues) * 5, 20)
return max(0, score)
def generate_recommendations(found_sections: List[str], missing_sections: List[str],
length: int, example_count: int, quality_issues: List[str]) -> List[Dict]:
"""Generate actionable recommendations."""
recommendations = []
# Missing sections
for section in missing_sections:
impact = 15
recommendations.append({
"priority": "critical" if section in ["overview", "installation", "usage"] else "important",
"action": f"Add {section.title()} section",
"impact": impact,
"effort": "medium" if section == "examples" else "low",
"description": f"Include a comprehensive {section} section with clear explanations"
})
# Length issues
if length < 500:
gap = 500 - length
recommendations.append({
"priority": "important" if length >= 200 else "critical",
"action": f"Expand README by {gap} characters",
"impact": 10 if length >= 200 else 30,
"effort": "medium",
"description": "Add more detail to existing sections or include additional sections"
})
# Example issues
if example_count < 3:
needed = 3 - example_count
recommendations.append({
"priority": "important",
"action": f"Add {needed} more code example{'s' if needed > 1 else ''}",
"impact": 15 if example_count == 0 else 5,
"effort": "medium",
"description": "Include concrete, copy-pasteable usage examples"
})
# Quality issues
for issue in quality_issues:
recommendations.append({
"priority": "recommended",
"action": "Address quality issue",
"impact": 5,
"effort": "low",
"description": issue
})
return sorted(recommendations, key=lambda x: (
{"critical": 0, "important": 1, "recommended": 2}[x["priority"]],
-x["impact"]
))
def main():
parser = argparse.ArgumentParser(description='Validate README.md quality')
parser.add_argument('path', help='Path to README.md or directory containing it')
parser.add_argument('--sections', help='Comma-separated required sections', default=None)
parser.add_argument('--min-length', type=int, default=500, help='Minimum character count')
parser.add_argument('--strict', action='store_true', help='Enable strict validation')
parser.add_argument('--json', action='store_true', help='Output JSON format')
args = parser.parse_args()
# Find README file
readme_path = find_readme(args.path)
if not readme_path:
result = {
"error": "README.md not found",
"path": args.path,
"present": False,
"score": 0,
"issues": ["README.md file not found in specified path"]
}
print(json.dumps(result, indent=2))
return 1
# Read README content
try:
with open(readme_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
result = {
"error": f"Failed to read README: {str(e)}",
"path": readme_path,
"present": True,
"score": 0
}
print(json.dumps(result, indent=2))
return 1
# Analyze README
length = len(content)
found_sections, missing_sections = analyze_sections(content)
example_count = count_examples(content)
quality_issues = check_quality_issues(content)
# Calculate score
score = calculate_score(found_sections, missing_sections, length, example_count, quality_issues)
# Generate recommendations
recommendations = generate_recommendations(found_sections, missing_sections, length, example_count, quality_issues)
# Build result
result = {
"present": True,
"path": readme_path,
"length": length,
"min_length": args.min_length,
"meets_min_length": length >= args.min_length,
"sections": {
"found": found_sections,
"missing": missing_sections,
"required_count": len(REQUIRED_SECTIONS),
"found_count": len(found_sections)
},
"examples": {
"count": example_count,
"sufficient": example_count >= 2
},
"quality_issues": quality_issues,
"score": score,
"rating": (
"excellent" if score >= 90 else
"good" if score >= 75 else
"fair" if score >= 60 else
"needs_improvement" if score >= 40 else
"poor"
),
"recommendations": recommendations[:10], # Top 10
"status": "pass" if score >= 60 and not missing_sections else "warning" if score >= 40 else "fail"
}
# Output
if args.json:
print(json.dumps(result, indent=2))
else:
# Human-readable output
print(f"\nREADME Validation Results")
print("=" * 50)
print(f"File: {readme_path}")
print(f"Length: {length} characters (min: {args.min_length})")
print(f"Score: {score}/100 ({result['rating'].title()})")
print(f"\nSections Found: {len(found_sections)}/{len(REQUIRED_SECTIONS)}")
for section in found_sections:
print(f"{section.title()}")
if missing_sections:
print(f"\nMissing Sections: {len(missing_sections)}")
for section in missing_sections:
print(f"{section.title()}")
print(f"\nCode Examples: {example_count}")
if quality_issues:
print(f"\nQuality Issues: {len(quality_issues)}")
for issue in quality_issues[:5]: # Top 5
print(f"{issue}")
if recommendations:
print(f"\nTop Recommendations:")
for i, rec in enumerate(recommendations[:5], 1):
print(f" {i}. [{rec['priority'].upper()}] {rec['action']} (+{rec['impact']} pts)")
print()
return 0 if score >= 60 else 1
if __name__ == "__main__":
sys.exit(main())