393 lines
12 KiB
Python
Executable File
393 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
"""
|
|
============================================================================
|
|
Keyword Quality Analyzer
|
|
============================================================================
|
|
Purpose: Analyze keyword quality, count, and relevance for OpenPlugins
|
|
Version: 1.0.0
|
|
Usage: ./keyword-analyzer.py <keywords> [--min N] [--max N]
|
|
Returns: 0=valid, 1=count violation, 2=quality issues, 3=missing params
|
|
============================================================================
|
|
"""
|
|
|
|
import sys
|
|
import re
|
|
from typing import List, Tuple, Dict
|
|
|
|
# Default constraints
|
|
DEFAULT_MIN_KEYWORDS = 3
|
|
DEFAULT_MAX_KEYWORDS = 7
|
|
|
|
# Generic terms to avoid
|
|
GENERIC_BLOCKLIST = [
|
|
'plugin', 'tool', 'utility', 'helper', 'app',
|
|
'code', 'software', 'program', 'system',
|
|
'awesome', 'best', 'perfect', 'great', 'super',
|
|
'amazing', 'cool', 'nice', 'good', 'excellent'
|
|
]
|
|
|
|
# OpenPlugins categories (should not be duplicated as keywords)
|
|
CATEGORIES = [
|
|
'development', 'testing', 'deployment', 'documentation',
|
|
'security', 'database', 'monitoring', 'productivity',
|
|
'quality', 'collaboration'
|
|
]
|
|
|
|
# Common keyword types for balance checking
|
|
FUNCTIONALITY_KEYWORDS = [
|
|
'testing', 'deployment', 'formatting', 'linting', 'migration',
|
|
'generation', 'automation', 'analysis', 'monitoring', 'scanning',
|
|
'refactoring', 'debugging', 'profiling', 'optimization'
|
|
]
|
|
|
|
TECHNOLOGY_KEYWORDS = [
|
|
'python', 'javascript', 'typescript', 'docker', 'kubernetes',
|
|
'react', 'vue', 'angular', 'node', 'bash', 'terraform',
|
|
'postgresql', 'mysql', 'redis', 'aws', 'azure', 'gcp'
|
|
]
|
|
|
|
|
|
def usage():
|
|
"""Print usage information"""
|
|
print("""Usage: keyword-analyzer.py <keywords> [--min N] [--max N]
|
|
|
|
Analyze keyword quality and relevance for OpenPlugins standards.
|
|
|
|
Arguments:
|
|
keywords Comma-separated list of keywords (required)
|
|
--min N Minimum keyword count (default: 3)
|
|
--max N Maximum keyword count (default: 7)
|
|
|
|
Requirements:
|
|
- Count: 3-7 keywords (optimal: 5-6)
|
|
- No generic terms (plugin, tool, awesome)
|
|
- No marketing fluff (best, perfect, amazing)
|
|
- Mix of functionality and technology
|
|
- No redundant variations
|
|
|
|
Good examples:
|
|
"testing,pytest,automation,tdd,python"
|
|
"deployment,kubernetes,ci-cd,docker"
|
|
"linting,javascript,code-quality"
|
|
|
|
Bad examples:
|
|
"plugin,tool,awesome" (generic)
|
|
"test,testing,tests" (redundant)
|
|
"development" (only one, too generic)
|
|
|
|
Exit codes:
|
|
0 - Valid keyword set
|
|
1 - Count violation (too few or too many)
|
|
2 - Quality issues (generic terms, duplicates)
|
|
3 - Missing required parameters
|
|
""")
|
|
sys.exit(3)
|
|
|
|
|
|
def parse_keywords(keyword_string: str) -> List[str]:
|
|
"""Parse and normalize keyword string"""
|
|
if not keyword_string:
|
|
return []
|
|
|
|
# Split by comma, strip whitespace, lowercase
|
|
keywords = [k.strip().lower() for k in keyword_string.split(',')]
|
|
|
|
# Remove empty strings
|
|
keywords = [k for k in keywords if k]
|
|
|
|
# Remove duplicates while preserving order
|
|
seen = set()
|
|
unique_keywords = []
|
|
for k in keywords:
|
|
if k not in seen:
|
|
seen.add(k)
|
|
unique_keywords.append(k)
|
|
|
|
return unique_keywords
|
|
|
|
|
|
def check_generic_terms(keywords: List[str]) -> Tuple[List[str], List[str]]:
|
|
"""
|
|
Check for generic and marketing terms
|
|
|
|
Returns:
|
|
(generic_terms, marketing_terms)
|
|
"""
|
|
generic_terms = []
|
|
marketing_terms = []
|
|
|
|
for keyword in keywords:
|
|
if keyword in GENERIC_BLOCKLIST:
|
|
if keyword in ['awesome', 'best', 'perfect', 'great', 'super', 'amazing', 'cool', 'nice', 'good', 'excellent']:
|
|
marketing_terms.append(keyword)
|
|
else:
|
|
generic_terms.append(keyword)
|
|
|
|
return generic_terms, marketing_terms
|
|
|
|
|
|
def check_redundant_variations(keywords: List[str]) -> List[Tuple[str, str]]:
|
|
"""
|
|
Find redundant keyword variations
|
|
|
|
Returns:
|
|
List of (keyword1, keyword2) pairs that are redundant
|
|
"""
|
|
redundant = []
|
|
|
|
for i, kw1 in enumerate(keywords):
|
|
for kw2 in keywords[i+1:]:
|
|
# Check if one is a substring of the other
|
|
if kw1 in kw2 or kw2 in kw1:
|
|
redundant.append((kw1, kw2))
|
|
# Check for plural variations
|
|
elif kw1.rstrip('s') == kw2 or kw2.rstrip('s') == kw1:
|
|
redundant.append((kw1, kw2))
|
|
|
|
return redundant
|
|
|
|
|
|
def check_category_duplication(keywords: List[str]) -> List[str]:
|
|
"""Check if any keywords exactly match category names"""
|
|
duplicates = []
|
|
for keyword in keywords:
|
|
if keyword in CATEGORIES:
|
|
duplicates.append(keyword)
|
|
return duplicates
|
|
|
|
|
|
def analyze_balance(keywords: List[str]) -> Dict[str, int]:
|
|
"""
|
|
Analyze keyword balance across types
|
|
|
|
Returns:
|
|
Dict with counts for each type
|
|
"""
|
|
balance = {
|
|
'functionality': 0,
|
|
'technology': 0,
|
|
'other': 0
|
|
}
|
|
|
|
for keyword in keywords:
|
|
if keyword in FUNCTIONALITY_KEYWORDS:
|
|
balance['functionality'] += 1
|
|
elif keyword in TECHNOLOGY_KEYWORDS:
|
|
balance['technology'] += 1
|
|
else:
|
|
balance['other'] += 1
|
|
|
|
return balance
|
|
|
|
|
|
def calculate_quality_score(
|
|
keywords: List[str],
|
|
generic_terms: List[str],
|
|
marketing_terms: List[str],
|
|
redundant: List[Tuple[str, str]],
|
|
category_dups: List[str],
|
|
min_count: int,
|
|
max_count: int
|
|
) -> Tuple[int, List[str]]:
|
|
"""
|
|
Calculate quality score and list issues
|
|
|
|
Returns:
|
|
(score out of 10, list of issues)
|
|
"""
|
|
score = 10
|
|
issues = []
|
|
|
|
# Count violations
|
|
count = len(keywords)
|
|
if count < min_count:
|
|
score -= 5
|
|
issues.append(f"Too few keywords ({count} < {min_count} minimum)")
|
|
elif count > max_count:
|
|
score -= 3
|
|
issues.append(f"Too many keywords ({count} > {max_count} maximum)")
|
|
|
|
# Generic terms
|
|
if generic_terms:
|
|
score -= len(generic_terms) * 2
|
|
issues.append(f"Generic terms detected: {', '.join(generic_terms)}")
|
|
|
|
# Marketing terms
|
|
if marketing_terms:
|
|
score -= len(marketing_terms) * 2
|
|
issues.append(f"Marketing terms detected: {', '.join(marketing_terms)}")
|
|
|
|
# Redundant variations
|
|
if redundant:
|
|
score -= len(redundant) * 2
|
|
redundant_str = ', '.join([f"{a}/{b}" for a, b in redundant])
|
|
issues.append(f"Redundant variations: {redundant_str}")
|
|
|
|
# Category duplication
|
|
if category_dups:
|
|
score -= len(category_dups) * 1
|
|
issues.append(f"Category name duplication: {', '.join(category_dups)}")
|
|
|
|
# Single-character keywords
|
|
single_char = [k for k in keywords if len(k) == 1]
|
|
if single_char:
|
|
score -= len(single_char) * 2
|
|
issues.append(f"Single-character keywords: {', '.join(single_char)}")
|
|
|
|
# Balance check
|
|
balance = analyze_balance(keywords)
|
|
if balance['functionality'] == 0 and balance['technology'] == 0:
|
|
score -= 2
|
|
issues.append("No functional or technical keywords")
|
|
|
|
return max(0, score), issues
|
|
|
|
|
|
def suggest_improvements(
|
|
keywords: List[str],
|
|
generic_terms: List[str],
|
|
marketing_terms: List[str],
|
|
redundant: List[Tuple[str, str]],
|
|
min_count: int,
|
|
max_count: int
|
|
) -> List[str]:
|
|
"""Generate improvement suggestions"""
|
|
suggestions = []
|
|
|
|
# Remove generic/marketing terms
|
|
if generic_terms or marketing_terms:
|
|
suggestions.append("Remove generic/marketing terms")
|
|
suggestions.append(" Replace with specific functionality (e.g., testing, deployment, formatting)")
|
|
|
|
# Consolidate redundant variations
|
|
if redundant:
|
|
suggestions.append("Consolidate redundant variations")
|
|
for kw1, kw2 in redundant:
|
|
suggestions.append(f" Keep one of: {kw1}, {kw2}")
|
|
|
|
# Add more keywords if too few
|
|
count = len(keywords)
|
|
if count < min_count:
|
|
needed = min_count - count
|
|
suggestions.append(f"Add {needed} more relevant keyword(s)")
|
|
suggestions.append(" Consider: specific technologies, use-cases, or functionalities")
|
|
|
|
# Remove keywords if too many
|
|
elif count > max_count:
|
|
excess = count - max_count
|
|
suggestions.append(f"Remove {excess} least relevant keyword(s)")
|
|
|
|
# Balance suggestions
|
|
balance = analyze_balance(keywords)
|
|
if balance['functionality'] == 0:
|
|
suggestions.append("Add functionality keywords (e.g., testing, automation, deployment)")
|
|
if balance['technology'] == 0:
|
|
suggestions.append("Add technology keywords (e.g., python, docker, kubernetes)")
|
|
|
|
return suggestions
|
|
|
|
|
|
def main():
|
|
"""Main entry point"""
|
|
if len(sys.argv) < 2 or sys.argv[1] in ['-h', '--help']:
|
|
usage()
|
|
|
|
keyword_string = sys.argv[1]
|
|
|
|
# Parse optional arguments
|
|
min_count = DEFAULT_MIN_KEYWORDS
|
|
max_count = DEFAULT_MAX_KEYWORDS
|
|
|
|
for i, arg in enumerate(sys.argv[2:], start=2):
|
|
if arg == '--min' and i + 1 < len(sys.argv):
|
|
min_count = int(sys.argv[i + 1])
|
|
elif arg == '--max' and i + 1 < len(sys.argv):
|
|
max_count = int(sys.argv[i + 1])
|
|
|
|
# Parse keywords
|
|
keywords = parse_keywords(keyword_string)
|
|
|
|
if not keywords:
|
|
print("ERROR: Keywords cannot be empty\n")
|
|
print("Provide 3-7 relevant keywords describing your plugin.\n")
|
|
print("Examples:")
|
|
print(' "testing,pytest,automation"')
|
|
print(' "deployment,kubernetes,ci-cd"')
|
|
sys.exit(3)
|
|
|
|
# Analyze keywords
|
|
count = len(keywords)
|
|
generic_terms, marketing_terms = check_generic_terms(keywords)
|
|
redundant = check_redundant_variations(keywords)
|
|
category_dups = check_category_duplication(keywords)
|
|
balance = analyze_balance(keywords)
|
|
|
|
# Calculate quality score
|
|
score, issues = calculate_quality_score(
|
|
keywords, generic_terms, marketing_terms,
|
|
redundant, category_dups, min_count, max_count
|
|
)
|
|
|
|
# Determine status
|
|
if score >= 9 and min_count <= count <= max_count:
|
|
status = "✅ PASS"
|
|
exit_code = 0
|
|
elif count < min_count or count > max_count:
|
|
status = "❌ FAIL"
|
|
exit_code = 1
|
|
elif score < 7:
|
|
status = "❌ FAIL"
|
|
exit_code = 2
|
|
else:
|
|
status = "⚠️ WARNING"
|
|
exit_code = 0
|
|
|
|
# Print results
|
|
print(f"{status}: Keyword validation\n")
|
|
print(f"Keywords: {', '.join(keywords)}")
|
|
print(f"Count: {count} (valid range: {min_count}-{max_count})")
|
|
print(f"Quality Score: {score}/10\n")
|
|
|
|
if issues:
|
|
print("Issues Found:")
|
|
for issue in issues:
|
|
print(f" - {issue}")
|
|
print()
|
|
|
|
# Balance breakdown
|
|
print("Breakdown:")
|
|
print(f" - Functionality: {balance['functionality']} keywords")
|
|
print(f" - Technology: {balance['technology']} keywords")
|
|
print(f" - Other: {balance['other']} keywords")
|
|
print()
|
|
|
|
# Score impact
|
|
if score >= 9:
|
|
print("Quality Score Impact: +10 points (excellent)\n")
|
|
if exit_code == 0:
|
|
print("Excellent keyword selection for discoverability!")
|
|
elif score >= 7:
|
|
print("Quality Score Impact: +7 points (good)\n")
|
|
print("Good keywords, but could be improved.")
|
|
else:
|
|
print("Quality Score Impact: 0 points (fix to gain +10)\n")
|
|
print("Keywords need significant improvement.")
|
|
|
|
# Suggestions
|
|
if issues:
|
|
suggestions = suggest_improvements(
|
|
keywords, generic_terms, marketing_terms,
|
|
redundant, min_count, max_count
|
|
)
|
|
if suggestions:
|
|
print("\nSuggestions:")
|
|
for suggestion in suggestions:
|
|
print(f" {suggestion}")
|
|
|
|
sys.exit(exit_code)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|