Initial commit
This commit is contained in:
392
commands/best-practices/.scripts/keyword-analyzer.py
Executable file
392
commands/best-practices/.scripts/keyword-analyzer.py
Executable file
@@ -0,0 +1,392 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
============================================================================
|
||||
Keyword Quality Analyzer
|
||||
============================================================================
|
||||
Purpose: Analyze keyword quality, count, and relevance for OpenPlugins
|
||||
Version: 1.0.0
|
||||
Usage: ./keyword-analyzer.py <keywords> [--min N] [--max N]
|
||||
Returns: 0=valid, 1=count violation, 2=quality issues, 3=missing params
|
||||
============================================================================
|
||||
"""
|
||||
|
||||
import sys
|
||||
import re
|
||||
from typing import List, Tuple, Dict
|
||||
|
||||
# Default constraints
|
||||
DEFAULT_MIN_KEYWORDS = 3
|
||||
DEFAULT_MAX_KEYWORDS = 7
|
||||
|
||||
# Generic terms to avoid
|
||||
GENERIC_BLOCKLIST = [
|
||||
'plugin', 'tool', 'utility', 'helper', 'app',
|
||||
'code', 'software', 'program', 'system',
|
||||
'awesome', 'best', 'perfect', 'great', 'super',
|
||||
'amazing', 'cool', 'nice', 'good', 'excellent'
|
||||
]
|
||||
|
||||
# OpenPlugins categories (should not be duplicated as keywords)
|
||||
CATEGORIES = [
|
||||
'development', 'testing', 'deployment', 'documentation',
|
||||
'security', 'database', 'monitoring', 'productivity',
|
||||
'quality', 'collaboration'
|
||||
]
|
||||
|
||||
# Common keyword types for balance checking
|
||||
FUNCTIONALITY_KEYWORDS = [
|
||||
'testing', 'deployment', 'formatting', 'linting', 'migration',
|
||||
'generation', 'automation', 'analysis', 'monitoring', 'scanning',
|
||||
'refactoring', 'debugging', 'profiling', 'optimization'
|
||||
]
|
||||
|
||||
TECHNOLOGY_KEYWORDS = [
|
||||
'python', 'javascript', 'typescript', 'docker', 'kubernetes',
|
||||
'react', 'vue', 'angular', 'node', 'bash', 'terraform',
|
||||
'postgresql', 'mysql', 'redis', 'aws', 'azure', 'gcp'
|
||||
]
|
||||
|
||||
|
||||
def usage():
|
||||
"""Print usage information"""
|
||||
print("""Usage: keyword-analyzer.py <keywords> [--min N] [--max N]
|
||||
|
||||
Analyze keyword quality and relevance for OpenPlugins standards.
|
||||
|
||||
Arguments:
|
||||
keywords Comma-separated list of keywords (required)
|
||||
--min N Minimum keyword count (default: 3)
|
||||
--max N Maximum keyword count (default: 7)
|
||||
|
||||
Requirements:
|
||||
- Count: 3-7 keywords (optimal: 5-6)
|
||||
- No generic terms (plugin, tool, awesome)
|
||||
- No marketing fluff (best, perfect, amazing)
|
||||
- Mix of functionality and technology
|
||||
- No redundant variations
|
||||
|
||||
Good examples:
|
||||
"testing,pytest,automation,tdd,python"
|
||||
"deployment,kubernetes,ci-cd,docker"
|
||||
"linting,javascript,code-quality"
|
||||
|
||||
Bad examples:
|
||||
"plugin,tool,awesome" (generic)
|
||||
"test,testing,tests" (redundant)
|
||||
"development" (only one, too generic)
|
||||
|
||||
Exit codes:
|
||||
0 - Valid keyword set
|
||||
1 - Count violation (too few or too many)
|
||||
2 - Quality issues (generic terms, duplicates)
|
||||
3 - Missing required parameters
|
||||
""")
|
||||
sys.exit(3)
|
||||
|
||||
|
||||
def parse_keywords(keyword_string: str) -> List[str]:
|
||||
"""Parse and normalize keyword string"""
|
||||
if not keyword_string:
|
||||
return []
|
||||
|
||||
# Split by comma, strip whitespace, lowercase
|
||||
keywords = [k.strip().lower() for k in keyword_string.split(',')]
|
||||
|
||||
# Remove empty strings
|
||||
keywords = [k for k in keywords if k]
|
||||
|
||||
# Remove duplicates while preserving order
|
||||
seen = set()
|
||||
unique_keywords = []
|
||||
for k in keywords:
|
||||
if k not in seen:
|
||||
seen.add(k)
|
||||
unique_keywords.append(k)
|
||||
|
||||
return unique_keywords
|
||||
|
||||
|
||||
def check_generic_terms(keywords: List[str]) -> Tuple[List[str], List[str]]:
|
||||
"""
|
||||
Check for generic and marketing terms
|
||||
|
||||
Returns:
|
||||
(generic_terms, marketing_terms)
|
||||
"""
|
||||
generic_terms = []
|
||||
marketing_terms = []
|
||||
|
||||
for keyword in keywords:
|
||||
if keyword in GENERIC_BLOCKLIST:
|
||||
if keyword in ['awesome', 'best', 'perfect', 'great', 'super', 'amazing', 'cool', 'nice', 'good', 'excellent']:
|
||||
marketing_terms.append(keyword)
|
||||
else:
|
||||
generic_terms.append(keyword)
|
||||
|
||||
return generic_terms, marketing_terms
|
||||
|
||||
|
||||
def check_redundant_variations(keywords: List[str]) -> List[Tuple[str, str]]:
|
||||
"""
|
||||
Find redundant keyword variations
|
||||
|
||||
Returns:
|
||||
List of (keyword1, keyword2) pairs that are redundant
|
||||
"""
|
||||
redundant = []
|
||||
|
||||
for i, kw1 in enumerate(keywords):
|
||||
for kw2 in keywords[i+1:]:
|
||||
# Check if one is a substring of the other
|
||||
if kw1 in kw2 or kw2 in kw1:
|
||||
redundant.append((kw1, kw2))
|
||||
# Check for plural variations
|
||||
elif kw1.rstrip('s') == kw2 or kw2.rstrip('s') == kw1:
|
||||
redundant.append((kw1, kw2))
|
||||
|
||||
return redundant
|
||||
|
||||
|
||||
def check_category_duplication(keywords: List[str]) -> List[str]:
|
||||
"""Check if any keywords exactly match category names"""
|
||||
duplicates = []
|
||||
for keyword in keywords:
|
||||
if keyword in CATEGORIES:
|
||||
duplicates.append(keyword)
|
||||
return duplicates
|
||||
|
||||
|
||||
def analyze_balance(keywords: List[str]) -> Dict[str, int]:
|
||||
"""
|
||||
Analyze keyword balance across types
|
||||
|
||||
Returns:
|
||||
Dict with counts for each type
|
||||
"""
|
||||
balance = {
|
||||
'functionality': 0,
|
||||
'technology': 0,
|
||||
'other': 0
|
||||
}
|
||||
|
||||
for keyword in keywords:
|
||||
if keyword in FUNCTIONALITY_KEYWORDS:
|
||||
balance['functionality'] += 1
|
||||
elif keyword in TECHNOLOGY_KEYWORDS:
|
||||
balance['technology'] += 1
|
||||
else:
|
||||
balance['other'] += 1
|
||||
|
||||
return balance
|
||||
|
||||
|
||||
def calculate_quality_score(
|
||||
keywords: List[str],
|
||||
generic_terms: List[str],
|
||||
marketing_terms: List[str],
|
||||
redundant: List[Tuple[str, str]],
|
||||
category_dups: List[str],
|
||||
min_count: int,
|
||||
max_count: int
|
||||
) -> Tuple[int, List[str]]:
|
||||
"""
|
||||
Calculate quality score and list issues
|
||||
|
||||
Returns:
|
||||
(score out of 10, list of issues)
|
||||
"""
|
||||
score = 10
|
||||
issues = []
|
||||
|
||||
# Count violations
|
||||
count = len(keywords)
|
||||
if count < min_count:
|
||||
score -= 5
|
||||
issues.append(f"Too few keywords ({count} < {min_count} minimum)")
|
||||
elif count > max_count:
|
||||
score -= 3
|
||||
issues.append(f"Too many keywords ({count} > {max_count} maximum)")
|
||||
|
||||
# Generic terms
|
||||
if generic_terms:
|
||||
score -= len(generic_terms) * 2
|
||||
issues.append(f"Generic terms detected: {', '.join(generic_terms)}")
|
||||
|
||||
# Marketing terms
|
||||
if marketing_terms:
|
||||
score -= len(marketing_terms) * 2
|
||||
issues.append(f"Marketing terms detected: {', '.join(marketing_terms)}")
|
||||
|
||||
# Redundant variations
|
||||
if redundant:
|
||||
score -= len(redundant) * 2
|
||||
redundant_str = ', '.join([f"{a}/{b}" for a, b in redundant])
|
||||
issues.append(f"Redundant variations: {redundant_str}")
|
||||
|
||||
# Category duplication
|
||||
if category_dups:
|
||||
score -= len(category_dups) * 1
|
||||
issues.append(f"Category name duplication: {', '.join(category_dups)}")
|
||||
|
||||
# Single-character keywords
|
||||
single_char = [k for k in keywords if len(k) == 1]
|
||||
if single_char:
|
||||
score -= len(single_char) * 2
|
||||
issues.append(f"Single-character keywords: {', '.join(single_char)}")
|
||||
|
||||
# Balance check
|
||||
balance = analyze_balance(keywords)
|
||||
if balance['functionality'] == 0 and balance['technology'] == 0:
|
||||
score -= 2
|
||||
issues.append("No functional or technical keywords")
|
||||
|
||||
return max(0, score), issues
|
||||
|
||||
|
||||
def suggest_improvements(
|
||||
keywords: List[str],
|
||||
generic_terms: List[str],
|
||||
marketing_terms: List[str],
|
||||
redundant: List[Tuple[str, str]],
|
||||
min_count: int,
|
||||
max_count: int
|
||||
) -> List[str]:
|
||||
"""Generate improvement suggestions"""
|
||||
suggestions = []
|
||||
|
||||
# Remove generic/marketing terms
|
||||
if generic_terms or marketing_terms:
|
||||
suggestions.append("Remove generic/marketing terms")
|
||||
suggestions.append(" Replace with specific functionality (e.g., testing, deployment, formatting)")
|
||||
|
||||
# Consolidate redundant variations
|
||||
if redundant:
|
||||
suggestions.append("Consolidate redundant variations")
|
||||
for kw1, kw2 in redundant:
|
||||
suggestions.append(f" Keep one of: {kw1}, {kw2}")
|
||||
|
||||
# Add more keywords if too few
|
||||
count = len(keywords)
|
||||
if count < min_count:
|
||||
needed = min_count - count
|
||||
suggestions.append(f"Add {needed} more relevant keyword(s)")
|
||||
suggestions.append(" Consider: specific technologies, use-cases, or functionalities")
|
||||
|
||||
# Remove keywords if too many
|
||||
elif count > max_count:
|
||||
excess = count - max_count
|
||||
suggestions.append(f"Remove {excess} least relevant keyword(s)")
|
||||
|
||||
# Balance suggestions
|
||||
balance = analyze_balance(keywords)
|
||||
if balance['functionality'] == 0:
|
||||
suggestions.append("Add functionality keywords (e.g., testing, automation, deployment)")
|
||||
if balance['technology'] == 0:
|
||||
suggestions.append("Add technology keywords (e.g., python, docker, kubernetes)")
|
||||
|
||||
return suggestions
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
if len(sys.argv) < 2 or sys.argv[1] in ['-h', '--help']:
|
||||
usage()
|
||||
|
||||
keyword_string = sys.argv[1]
|
||||
|
||||
# Parse optional arguments
|
||||
min_count = DEFAULT_MIN_KEYWORDS
|
||||
max_count = DEFAULT_MAX_KEYWORDS
|
||||
|
||||
for i, arg in enumerate(sys.argv[2:], start=2):
|
||||
if arg == '--min' and i + 1 < len(sys.argv):
|
||||
min_count = int(sys.argv[i + 1])
|
||||
elif arg == '--max' and i + 1 < len(sys.argv):
|
||||
max_count = int(sys.argv[i + 1])
|
||||
|
||||
# Parse keywords
|
||||
keywords = parse_keywords(keyword_string)
|
||||
|
||||
if not keywords:
|
||||
print("ERROR: Keywords cannot be empty\n")
|
||||
print("Provide 3-7 relevant keywords describing your plugin.\n")
|
||||
print("Examples:")
|
||||
print(' "testing,pytest,automation"')
|
||||
print(' "deployment,kubernetes,ci-cd"')
|
||||
sys.exit(3)
|
||||
|
||||
# Analyze keywords
|
||||
count = len(keywords)
|
||||
generic_terms, marketing_terms = check_generic_terms(keywords)
|
||||
redundant = check_redundant_variations(keywords)
|
||||
category_dups = check_category_duplication(keywords)
|
||||
balance = analyze_balance(keywords)
|
||||
|
||||
# Calculate quality score
|
||||
score, issues = calculate_quality_score(
|
||||
keywords, generic_terms, marketing_terms,
|
||||
redundant, category_dups, min_count, max_count
|
||||
)
|
||||
|
||||
# Determine status
|
||||
if score >= 9 and min_count <= count <= max_count:
|
||||
status = "✅ PASS"
|
||||
exit_code = 0
|
||||
elif count < min_count or count > max_count:
|
||||
status = "❌ FAIL"
|
||||
exit_code = 1
|
||||
elif score < 7:
|
||||
status = "❌ FAIL"
|
||||
exit_code = 2
|
||||
else:
|
||||
status = "⚠️ WARNING"
|
||||
exit_code = 0
|
||||
|
||||
# Print results
|
||||
print(f"{status}: Keyword validation\n")
|
||||
print(f"Keywords: {', '.join(keywords)}")
|
||||
print(f"Count: {count} (valid range: {min_count}-{max_count})")
|
||||
print(f"Quality Score: {score}/10\n")
|
||||
|
||||
if issues:
|
||||
print("Issues Found:")
|
||||
for issue in issues:
|
||||
print(f" - {issue}")
|
||||
print()
|
||||
|
||||
# Balance breakdown
|
||||
print("Breakdown:")
|
||||
print(f" - Functionality: {balance['functionality']} keywords")
|
||||
print(f" - Technology: {balance['technology']} keywords")
|
||||
print(f" - Other: {balance['other']} keywords")
|
||||
print()
|
||||
|
||||
# Score impact
|
||||
if score >= 9:
|
||||
print("Quality Score Impact: +10 points (excellent)\n")
|
||||
if exit_code == 0:
|
||||
print("Excellent keyword selection for discoverability!")
|
||||
elif score >= 7:
|
||||
print("Quality Score Impact: +7 points (good)\n")
|
||||
print("Good keywords, but could be improved.")
|
||||
else:
|
||||
print("Quality Score Impact: 0 points (fix to gain +10)\n")
|
||||
print("Keywords need significant improvement.")
|
||||
|
||||
# Suggestions
|
||||
if issues:
|
||||
suggestions = suggest_improvements(
|
||||
keywords, generic_terms, marketing_terms,
|
||||
redundant, min_count, max_count
|
||||
)
|
||||
if suggestions:
|
||||
print("\nSuggestions:")
|
||||
for suggestion in suggestions:
|
||||
print(f" {suggestion}")
|
||||
|
||||
sys.exit(exit_code)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user