Initial commit
This commit is contained in:
344
commands/documentation-validation/.scripts/license-detector.py
Executable file
344
commands/documentation-validation/.scripts/license-detector.py
Executable file
@@ -0,0 +1,344 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# ============================================================================
|
||||
# License Detector
|
||||
# ============================================================================
|
||||
# Purpose: Detect and validate LICENSE file content
|
||||
# Version: 1.0.0
|
||||
# Usage: ./license-detector.py <path> [--expected LICENSE] [--json]
|
||||
# Returns: 0=success, 1=error, JSON output to stdout
|
||||
# ============================================================================
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional, Tuple
|
||||
|
||||
# OSI-approved license patterns
|
||||
LICENSE_PATTERNS = {
|
||||
"MIT": {
|
||||
"pattern": r"Permission is hereby granted, free of charge",
|
||||
"confidence": 95,
|
||||
"osi_approved": True,
|
||||
"full_name": "MIT License"
|
||||
},
|
||||
"Apache-2.0": {
|
||||
"pattern": r"Licensed under the Apache License, Version 2\.0",
|
||||
"confidence": 95,
|
||||
"osi_approved": True,
|
||||
"full_name": "Apache License 2.0"
|
||||
},
|
||||
"GPL-3.0": {
|
||||
"pattern": r"GNU GENERAL PUBLIC LICENSE.*Version 3",
|
||||
"confidence": 95,
|
||||
"osi_approved": True,
|
||||
"full_name": "GNU General Public License v3.0"
|
||||
},
|
||||
"GPL-2.0": {
|
||||
"pattern": r"GNU GENERAL PUBLIC LICENSE.*Version 2",
|
||||
"confidence": 95,
|
||||
"osi_approved": True,
|
||||
"full_name": "GNU General Public License v2.0"
|
||||
},
|
||||
"BSD-3-Clause": {
|
||||
"pattern": r"Redistribution and use in source and binary forms.*3\.",
|
||||
"confidence": 85,
|
||||
"osi_approved": True,
|
||||
"full_name": "BSD 3-Clause License"
|
||||
},
|
||||
"BSD-2-Clause": {
|
||||
"pattern": r"Redistribution and use in source and binary forms",
|
||||
"confidence": 80,
|
||||
"osi_approved": True,
|
||||
"full_name": "BSD 2-Clause License"
|
||||
},
|
||||
"ISC": {
|
||||
"pattern": r"Permission to use, copy, modify, and/or distribute",
|
||||
"confidence": 90,
|
||||
"osi_approved": True,
|
||||
"full_name": "ISC License"
|
||||
},
|
||||
"MPL-2.0": {
|
||||
"pattern": r"Mozilla Public License Version 2\.0",
|
||||
"confidence": 95,
|
||||
"osi_approved": True,
|
||||
"full_name": "Mozilla Public License 2.0"
|
||||
}
|
||||
}
|
||||
|
||||
# License name variations/aliases
|
||||
LICENSE_ALIASES = {
|
||||
"MIT License": "MIT",
|
||||
"MIT license": "MIT",
|
||||
"Apache License 2.0": "Apache-2.0",
|
||||
"Apache 2.0": "Apache-2.0",
|
||||
"Apache-2": "Apache-2.0",
|
||||
"GNU GPL v3": "GPL-3.0",
|
||||
"GPLv3": "GPL-3.0",
|
||||
"GNU GPL v2": "GPL-2.0",
|
||||
"GPLv2": "GPL-2.0",
|
||||
"BSD 3-Clause": "BSD-3-Clause",
|
||||
"BSD 2-Clause": "BSD-2-Clause",
|
||||
}
|
||||
|
||||
def find_license_file(path: str) -> Optional[str]:
|
||||
"""Find LICENSE file in path."""
|
||||
path_obj = Path(path)
|
||||
|
||||
# Check if path is directly to LICENSE
|
||||
if path_obj.is_file() and 'license' in path_obj.name.lower():
|
||||
return str(path_obj)
|
||||
|
||||
# Search for LICENSE in directory
|
||||
if path_obj.is_dir():
|
||||
for filename in ['LICENSE', 'LICENSE.txt', 'LICENSE.md', 'COPYING', 'COPYING.txt', 'LICENCE']:
|
||||
license_path = path_obj / filename
|
||||
if license_path.exists():
|
||||
return str(license_path)
|
||||
|
||||
return None
|
||||
|
||||
def read_plugin_manifest(path: str) -> Optional[str]:
|
||||
"""Read license from plugin.json."""
|
||||
path_obj = Path(path)
|
||||
|
||||
if path_obj.is_file():
|
||||
path_obj = path_obj.parent
|
||||
|
||||
manifest_path = path_obj / '.claude-plugin' / 'plugin.json'
|
||||
|
||||
if not manifest_path.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(manifest_path, 'r', encoding='utf-8') as f:
|
||||
manifest = json.load(f)
|
||||
return manifest.get('license')
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def detect_license(content: str) -> Tuple[Optional[str], int, bool]:
|
||||
"""
|
||||
Detect license type from content.
|
||||
Returns: (license_type, confidence, is_complete)
|
||||
"""
|
||||
content_normalized = ' '.join(content.split()) # Normalize whitespace
|
||||
|
||||
best_match = None
|
||||
best_confidence = 0
|
||||
|
||||
# Check for license text patterns
|
||||
for license_id, license_info in LICENSE_PATTERNS.items():
|
||||
pattern = license_info["pattern"]
|
||||
if re.search(pattern, content, re.IGNORECASE | re.DOTALL):
|
||||
confidence = license_info["confidence"]
|
||||
if confidence > best_confidence:
|
||||
best_match = license_id
|
||||
best_confidence = confidence
|
||||
|
||||
# Check if it's just a name without full text
|
||||
is_complete = True
|
||||
if best_match and len(content.strip()) < 200: # Very short content
|
||||
is_complete = False
|
||||
|
||||
# If no pattern match, check for just license names
|
||||
if not best_match:
|
||||
for alias, license_id in LICENSE_ALIASES.items():
|
||||
if re.search(r'\b' + re.escape(alias) + r'\b', content, re.IGNORECASE):
|
||||
best_match = license_id
|
||||
best_confidence = 50 # Lower confidence for name-only
|
||||
is_complete = False
|
||||
break
|
||||
|
||||
return best_match, best_confidence, is_complete
|
||||
|
||||
def normalize_license_name(license_name: str) -> str:
|
||||
"""Normalize license name for comparison."""
|
||||
if not license_name:
|
||||
return ""
|
||||
|
||||
# Check if it's already a standard ID
|
||||
if license_name in LICENSE_PATTERNS:
|
||||
return license_name
|
||||
|
||||
# Check aliases
|
||||
if license_name in LICENSE_ALIASES:
|
||||
return LICENSE_ALIASES[license_name]
|
||||
|
||||
# Normalize common variations
|
||||
normalized = license_name.strip()
|
||||
normalized = re.sub(r'\s+', ' ', normalized)
|
||||
|
||||
# Try fuzzy matching
|
||||
for alias, license_id in LICENSE_ALIASES.items():
|
||||
if normalized.lower() == alias.lower():
|
||||
return license_id
|
||||
|
||||
return license_name
|
||||
|
||||
def licenses_match(detected: str, expected: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
Check if detected license matches expected.
|
||||
Returns: (matches, match_type)
|
||||
"""
|
||||
detected_norm = normalize_license_name(detected)
|
||||
expected_norm = normalize_license_name(expected)
|
||||
|
||||
if detected_norm == expected_norm:
|
||||
return True, "exact"
|
||||
|
||||
# Check if they're aliases of the same license
|
||||
if detected_norm in LICENSE_PATTERNS and expected_norm in LICENSE_PATTERNS:
|
||||
if LICENSE_PATTERNS[detected_norm]["full_name"] == LICENSE_PATTERNS[expected_norm]["full_name"]:
|
||||
return True, "alias"
|
||||
|
||||
# Fuzzy match
|
||||
if detected_norm.lower().replace('-', '').replace(' ', '') == expected_norm.lower().replace('-', '').replace(' ', ''):
|
||||
return True, "fuzzy"
|
||||
|
||||
return False, "mismatch"
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Detect and validate LICENSE file')
|
||||
parser.add_argument('path', help='Path to LICENSE file or directory containing it')
|
||||
parser.add_argument('--expected', help='Expected license type (from plugin.json)', default=None)
|
||||
parser.add_argument('--strict', action='store_true', help='Strict validation (requires full text)')
|
||||
parser.add_argument('--json', action='store_true', help='Output JSON format')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Find LICENSE file
|
||||
license_path = find_license_file(args.path)
|
||||
|
||||
if not license_path:
|
||||
result = {
|
||||
"error": "LICENSE file not found",
|
||||
"path": args.path,
|
||||
"present": False,
|
||||
"score": 0,
|
||||
"status": "fail",
|
||||
"issues": ["LICENSE file not found in specified path"]
|
||||
}
|
||||
if args.json:
|
||||
print(json.dumps(result, indent=2))
|
||||
else:
|
||||
print("❌ CRITICAL: LICENSE file not found")
|
||||
print(f"Path: {args.path}")
|
||||
print("LICENSE file is required for plugin submission.")
|
||||
return 1
|
||||
|
||||
# Read LICENSE content
|
||||
try:
|
||||
with open(license_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
result = {
|
||||
"error": f"Failed to read LICENSE: {str(e)}",
|
||||
"path": license_path,
|
||||
"present": True,
|
||||
"score": 0,
|
||||
"status": "fail"
|
||||
}
|
||||
if args.json:
|
||||
print(json.dumps(result, indent=2))
|
||||
else:
|
||||
print(f"❌ ERROR: Failed to read LICENSE: {e}")
|
||||
return 1
|
||||
|
||||
# Detect license
|
||||
detected_license, confidence, is_complete = detect_license(content)
|
||||
|
||||
# Read expected license from plugin.json if not provided
|
||||
if not args.expected:
|
||||
args.expected = read_plugin_manifest(args.path)
|
||||
|
||||
# Check consistency
|
||||
matches_manifest = True
|
||||
match_type = None
|
||||
if args.expected:
|
||||
matches_manifest, match_type = licenses_match(detected_license or "", args.expected)
|
||||
|
||||
# Determine if OSI approved
|
||||
is_osi_approved = False
|
||||
if detected_license and detected_license in LICENSE_PATTERNS:
|
||||
is_osi_approved = LICENSE_PATTERNS[detected_license]["osi_approved"]
|
||||
|
||||
# Build issues list
|
||||
issues = []
|
||||
score = 100
|
||||
|
||||
if not detected_license:
|
||||
issues.append("Unable to identify license type")
|
||||
score -= 50
|
||||
elif not is_complete:
|
||||
issues.append("LICENSE contains only license name, not full text")
|
||||
score -= 20 if args.strict else 10
|
||||
|
||||
if not is_osi_approved and detected_license:
|
||||
issues.append("License is not OSI-approved")
|
||||
score -= 30
|
||||
|
||||
if args.expected and not matches_manifest:
|
||||
issues.append(f"LICENSE ({detected_license or 'unknown'}) does not match plugin.json ({args.expected})")
|
||||
score -= 20
|
||||
|
||||
score = max(0, score)
|
||||
|
||||
# Determine status
|
||||
if score >= 80:
|
||||
status = "pass"
|
||||
elif score >= 60:
|
||||
status = "warning"
|
||||
else:
|
||||
status = "fail"
|
||||
|
||||
# Build result
|
||||
result = {
|
||||
"present": True,
|
||||
"path": license_path,
|
||||
"detected_license": detected_license,
|
||||
"confidence": confidence,
|
||||
"is_complete": is_complete,
|
||||
"is_osi_approved": is_osi_approved,
|
||||
"manifest_license": args.expected,
|
||||
"matches_manifest": matches_manifest,
|
||||
"match_type": match_type,
|
||||
"score": score,
|
||||
"status": status,
|
||||
"issues": issues
|
||||
}
|
||||
|
||||
# Output
|
||||
if args.json:
|
||||
print(json.dumps(result, indent=2))
|
||||
else:
|
||||
# Human-readable output
|
||||
print(f"\nLICENSE Validation Results")
|
||||
print("=" * 50)
|
||||
print(f"File: {license_path}")
|
||||
print(f"Detected: {detected_license or 'Unknown'} (confidence: {confidence}%)")
|
||||
print(f"Score: {score}/100")
|
||||
print(f"\nOSI Approved: {'✓ Yes' if is_osi_approved else '✗ No'}")
|
||||
print(f"Complete Text: {'✓ Yes' if is_complete else '⚠ No (name only)'}")
|
||||
|
||||
if args.expected:
|
||||
print(f"\nConsistency Check:")
|
||||
print(f" plugin.json: {args.expected}")
|
||||
print(f" LICENSE file: {detected_license or 'Unknown'}")
|
||||
print(f" Match: {'✓ Yes' if matches_manifest else '✗ No'}")
|
||||
|
||||
if issues:
|
||||
print(f"\nIssues Found: {len(issues)}")
|
||||
for issue in issues:
|
||||
print(f" • {issue}")
|
||||
|
||||
print(f"\nOverall: {'✓ PASS' if status == 'pass' else '⚠ WARNING' if status == 'warning' else '✗ FAIL'}")
|
||||
print()
|
||||
|
||||
return 0 if status != "fail" else 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user