Files
gh-dhofheinz-open-plugins-p…/commands/documentation-validation/.scripts/license-detector.py
2025-11-29 18:20:28 +08:00

345 lines
11 KiB
Python
Executable File

#!/usr/bin/env python3
# ============================================================================
# License Detector
# ============================================================================
# Purpose: Detect and validate LICENSE file content
# Version: 1.0.0
# Usage: ./license-detector.py <path> [--expected LICENSE] [--json]
# Returns: 0=success, 1=error, JSON output to stdout
# ============================================================================
import sys
import os
import re
import json
import argparse
from pathlib import Path
from typing import Dict, Optional, Tuple
# OSI-approved license patterns
LICENSE_PATTERNS = {
"MIT": {
"pattern": r"Permission is hereby granted, free of charge",
"confidence": 95,
"osi_approved": True,
"full_name": "MIT License"
},
"Apache-2.0": {
"pattern": r"Licensed under the Apache License, Version 2\.0",
"confidence": 95,
"osi_approved": True,
"full_name": "Apache License 2.0"
},
"GPL-3.0": {
"pattern": r"GNU GENERAL PUBLIC LICENSE.*Version 3",
"confidence": 95,
"osi_approved": True,
"full_name": "GNU General Public License v3.0"
},
"GPL-2.0": {
"pattern": r"GNU GENERAL PUBLIC LICENSE.*Version 2",
"confidence": 95,
"osi_approved": True,
"full_name": "GNU General Public License v2.0"
},
"BSD-3-Clause": {
"pattern": r"Redistribution and use in source and binary forms.*3\.",
"confidence": 85,
"osi_approved": True,
"full_name": "BSD 3-Clause License"
},
"BSD-2-Clause": {
"pattern": r"Redistribution and use in source and binary forms",
"confidence": 80,
"osi_approved": True,
"full_name": "BSD 2-Clause License"
},
"ISC": {
"pattern": r"Permission to use, copy, modify, and/or distribute",
"confidence": 90,
"osi_approved": True,
"full_name": "ISC License"
},
"MPL-2.0": {
"pattern": r"Mozilla Public License Version 2\.0",
"confidence": 95,
"osi_approved": True,
"full_name": "Mozilla Public License 2.0"
}
}
# License name variations/aliases
LICENSE_ALIASES = {
"MIT License": "MIT",
"MIT license": "MIT",
"Apache License 2.0": "Apache-2.0",
"Apache 2.0": "Apache-2.0",
"Apache-2": "Apache-2.0",
"GNU GPL v3": "GPL-3.0",
"GPLv3": "GPL-3.0",
"GNU GPL v2": "GPL-2.0",
"GPLv2": "GPL-2.0",
"BSD 3-Clause": "BSD-3-Clause",
"BSD 2-Clause": "BSD-2-Clause",
}
def find_license_file(path: str) -> Optional[str]:
"""Find LICENSE file in path."""
path_obj = Path(path)
# Check if path is directly to LICENSE
if path_obj.is_file() and 'license' in path_obj.name.lower():
return str(path_obj)
# Search for LICENSE in directory
if path_obj.is_dir():
for filename in ['LICENSE', 'LICENSE.txt', 'LICENSE.md', 'COPYING', 'COPYING.txt', 'LICENCE']:
license_path = path_obj / filename
if license_path.exists():
return str(license_path)
return None
def read_plugin_manifest(path: str) -> Optional[str]:
"""Read license from plugin.json."""
path_obj = Path(path)
if path_obj.is_file():
path_obj = path_obj.parent
manifest_path = path_obj / '.claude-plugin' / 'plugin.json'
if not manifest_path.exists():
return None
try:
with open(manifest_path, 'r', encoding='utf-8') as f:
manifest = json.load(f)
return manifest.get('license')
except Exception:
return None
def detect_license(content: str) -> Tuple[Optional[str], int, bool]:
"""
Detect license type from content.
Returns: (license_type, confidence, is_complete)
"""
content_normalized = ' '.join(content.split()) # Normalize whitespace
best_match = None
best_confidence = 0
# Check for license text patterns
for license_id, license_info in LICENSE_PATTERNS.items():
pattern = license_info["pattern"]
if re.search(pattern, content, re.IGNORECASE | re.DOTALL):
confidence = license_info["confidence"]
if confidence > best_confidence:
best_match = license_id
best_confidence = confidence
# Check if it's just a name without full text
is_complete = True
if best_match and len(content.strip()) < 200: # Very short content
is_complete = False
# If no pattern match, check for just license names
if not best_match:
for alias, license_id in LICENSE_ALIASES.items():
if re.search(r'\b' + re.escape(alias) + r'\b', content, re.IGNORECASE):
best_match = license_id
best_confidence = 50 # Lower confidence for name-only
is_complete = False
break
return best_match, best_confidence, is_complete
def normalize_license_name(license_name: str) -> str:
"""Normalize license name for comparison."""
if not license_name:
return ""
# Check if it's already a standard ID
if license_name in LICENSE_PATTERNS:
return license_name
# Check aliases
if license_name in LICENSE_ALIASES:
return LICENSE_ALIASES[license_name]
# Normalize common variations
normalized = license_name.strip()
normalized = re.sub(r'\s+', ' ', normalized)
# Try fuzzy matching
for alias, license_id in LICENSE_ALIASES.items():
if normalized.lower() == alias.lower():
return license_id
return license_name
def licenses_match(detected: str, expected: str) -> Tuple[bool, str]:
"""
Check if detected license matches expected.
Returns: (matches, match_type)
"""
detected_norm = normalize_license_name(detected)
expected_norm = normalize_license_name(expected)
if detected_norm == expected_norm:
return True, "exact"
# Check if they're aliases of the same license
if detected_norm in LICENSE_PATTERNS and expected_norm in LICENSE_PATTERNS:
if LICENSE_PATTERNS[detected_norm]["full_name"] == LICENSE_PATTERNS[expected_norm]["full_name"]:
return True, "alias"
# Fuzzy match
if detected_norm.lower().replace('-', '').replace(' ', '') == expected_norm.lower().replace('-', '').replace(' ', ''):
return True, "fuzzy"
return False, "mismatch"
def main():
parser = argparse.ArgumentParser(description='Detect and validate LICENSE file')
parser.add_argument('path', help='Path to LICENSE file or directory containing it')
parser.add_argument('--expected', help='Expected license type (from plugin.json)', default=None)
parser.add_argument('--strict', action='store_true', help='Strict validation (requires full text)')
parser.add_argument('--json', action='store_true', help='Output JSON format')
args = parser.parse_args()
# Find LICENSE file
license_path = find_license_file(args.path)
if not license_path:
result = {
"error": "LICENSE file not found",
"path": args.path,
"present": False,
"score": 0,
"status": "fail",
"issues": ["LICENSE file not found in specified path"]
}
if args.json:
print(json.dumps(result, indent=2))
else:
print("❌ CRITICAL: LICENSE file not found")
print(f"Path: {args.path}")
print("LICENSE file is required for plugin submission.")
return 1
# Read LICENSE content
try:
with open(license_path, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
result = {
"error": f"Failed to read LICENSE: {str(e)}",
"path": license_path,
"present": True,
"score": 0,
"status": "fail"
}
if args.json:
print(json.dumps(result, indent=2))
else:
print(f"❌ ERROR: Failed to read LICENSE: {e}")
return 1
# Detect license
detected_license, confidence, is_complete = detect_license(content)
# Read expected license from plugin.json if not provided
if not args.expected:
args.expected = read_plugin_manifest(args.path)
# Check consistency
matches_manifest = True
match_type = None
if args.expected:
matches_manifest, match_type = licenses_match(detected_license or "", args.expected)
# Determine if OSI approved
is_osi_approved = False
if detected_license and detected_license in LICENSE_PATTERNS:
is_osi_approved = LICENSE_PATTERNS[detected_license]["osi_approved"]
# Build issues list
issues = []
score = 100
if not detected_license:
issues.append("Unable to identify license type")
score -= 50
elif not is_complete:
issues.append("LICENSE contains only license name, not full text")
score -= 20 if args.strict else 10
if not is_osi_approved and detected_license:
issues.append("License is not OSI-approved")
score -= 30
if args.expected and not matches_manifest:
issues.append(f"LICENSE ({detected_license or 'unknown'}) does not match plugin.json ({args.expected})")
score -= 20
score = max(0, score)
# Determine status
if score >= 80:
status = "pass"
elif score >= 60:
status = "warning"
else:
status = "fail"
# Build result
result = {
"present": True,
"path": license_path,
"detected_license": detected_license,
"confidence": confidence,
"is_complete": is_complete,
"is_osi_approved": is_osi_approved,
"manifest_license": args.expected,
"matches_manifest": matches_manifest,
"match_type": match_type,
"score": score,
"status": status,
"issues": issues
}
# Output
if args.json:
print(json.dumps(result, indent=2))
else:
# Human-readable output
print(f"\nLICENSE Validation Results")
print("=" * 50)
print(f"File: {license_path}")
print(f"Detected: {detected_license or 'Unknown'} (confidence: {confidence}%)")
print(f"Score: {score}/100")
print(f"\nOSI Approved: {'✓ Yes' if is_osi_approved else '✗ No'}")
print(f"Complete Text: {'✓ Yes' if is_complete else '⚠ No (name only)'}")
if args.expected:
print(f"\nConsistency Check:")
print(f" plugin.json: {args.expected}")
print(f" LICENSE file: {detected_license or 'Unknown'}")
print(f" Match: {'✓ Yes' if matches_manifest else '✗ No'}")
if issues:
print(f"\nIssues Found: {len(issues)}")
for issue in issues:
print(f"{issue}")
print(f"\nOverall: {'✓ PASS' if status == 'pass' else '⚠ WARNING' if status == 'warning' else '✗ FAIL'}")
print()
return 0 if status != "fail" else 1
if __name__ == "__main__":
sys.exit(main())