#!/usr/bin/env python3 """ ============================================================================ Keyword Quality Analyzer ============================================================================ Purpose: Analyze keyword quality, count, and relevance for OpenPlugins Version: 1.0.0 Usage: ./keyword-analyzer.py [--min N] [--max N] Returns: 0=valid, 1=count violation, 2=quality issues, 3=missing params ============================================================================ """ import sys import re from typing import List, Tuple, Dict # Default constraints DEFAULT_MIN_KEYWORDS = 3 DEFAULT_MAX_KEYWORDS = 7 # Generic terms to avoid GENERIC_BLOCKLIST = [ 'plugin', 'tool', 'utility', 'helper', 'app', 'code', 'software', 'program', 'system', 'awesome', 'best', 'perfect', 'great', 'super', 'amazing', 'cool', 'nice', 'good', 'excellent' ] # OpenPlugins categories (should not be duplicated as keywords) CATEGORIES = [ 'development', 'testing', 'deployment', 'documentation', 'security', 'database', 'monitoring', 'productivity', 'quality', 'collaboration' ] # Common keyword types for balance checking FUNCTIONALITY_KEYWORDS = [ 'testing', 'deployment', 'formatting', 'linting', 'migration', 'generation', 'automation', 'analysis', 'monitoring', 'scanning', 'refactoring', 'debugging', 'profiling', 'optimization' ] TECHNOLOGY_KEYWORDS = [ 'python', 'javascript', 'typescript', 'docker', 'kubernetes', 'react', 'vue', 'angular', 'node', 'bash', 'terraform', 'postgresql', 'mysql', 'redis', 'aws', 'azure', 'gcp' ] def usage(): """Print usage information""" print("""Usage: keyword-analyzer.py [--min N] [--max N] Analyze keyword quality and relevance for OpenPlugins standards. Arguments: keywords Comma-separated list of keywords (required) --min N Minimum keyword count (default: 3) --max N Maximum keyword count (default: 7) Requirements: - Count: 3-7 keywords (optimal: 5-6) - No generic terms (plugin, tool, awesome) - No marketing fluff (best, perfect, amazing) - Mix of functionality and technology - No redundant variations Good examples: "testing,pytest,automation,tdd,python" "deployment,kubernetes,ci-cd,docker" "linting,javascript,code-quality" Bad examples: "plugin,tool,awesome" (generic) "test,testing,tests" (redundant) "development" (only one, too generic) Exit codes: 0 - Valid keyword set 1 - Count violation (too few or too many) 2 - Quality issues (generic terms, duplicates) 3 - Missing required parameters """) sys.exit(3) def parse_keywords(keyword_string: str) -> List[str]: """Parse and normalize keyword string""" if not keyword_string: return [] # Split by comma, strip whitespace, lowercase keywords = [k.strip().lower() for k in keyword_string.split(',')] # Remove empty strings keywords = [k for k in keywords if k] # Remove duplicates while preserving order seen = set() unique_keywords = [] for k in keywords: if k not in seen: seen.add(k) unique_keywords.append(k) return unique_keywords def check_generic_terms(keywords: List[str]) -> Tuple[List[str], List[str]]: """ Check for generic and marketing terms Returns: (generic_terms, marketing_terms) """ generic_terms = [] marketing_terms = [] for keyword in keywords: if keyword in GENERIC_BLOCKLIST: if keyword in ['awesome', 'best', 'perfect', 'great', 'super', 'amazing', 'cool', 'nice', 'good', 'excellent']: marketing_terms.append(keyword) else: generic_terms.append(keyword) return generic_terms, marketing_terms def check_redundant_variations(keywords: List[str]) -> List[Tuple[str, str]]: """ Find redundant keyword variations Returns: List of (keyword1, keyword2) pairs that are redundant """ redundant = [] for i, kw1 in enumerate(keywords): for kw2 in keywords[i+1:]: # Check if one is a substring of the other if kw1 in kw2 or kw2 in kw1: redundant.append((kw1, kw2)) # Check for plural variations elif kw1.rstrip('s') == kw2 or kw2.rstrip('s') == kw1: redundant.append((kw1, kw2)) return redundant def check_category_duplication(keywords: List[str]) -> List[str]: """Check if any keywords exactly match category names""" duplicates = [] for keyword in keywords: if keyword in CATEGORIES: duplicates.append(keyword) return duplicates def analyze_balance(keywords: List[str]) -> Dict[str, int]: """ Analyze keyword balance across types Returns: Dict with counts for each type """ balance = { 'functionality': 0, 'technology': 0, 'other': 0 } for keyword in keywords: if keyword in FUNCTIONALITY_KEYWORDS: balance['functionality'] += 1 elif keyword in TECHNOLOGY_KEYWORDS: balance['technology'] += 1 else: balance['other'] += 1 return balance def calculate_quality_score( keywords: List[str], generic_terms: List[str], marketing_terms: List[str], redundant: List[Tuple[str, str]], category_dups: List[str], min_count: int, max_count: int ) -> Tuple[int, List[str]]: """ Calculate quality score and list issues Returns: (score out of 10, list of issues) """ score = 10 issues = [] # Count violations count = len(keywords) if count < min_count: score -= 5 issues.append(f"Too few keywords ({count} < {min_count} minimum)") elif count > max_count: score -= 3 issues.append(f"Too many keywords ({count} > {max_count} maximum)") # Generic terms if generic_terms: score -= len(generic_terms) * 2 issues.append(f"Generic terms detected: {', '.join(generic_terms)}") # Marketing terms if marketing_terms: score -= len(marketing_terms) * 2 issues.append(f"Marketing terms detected: {', '.join(marketing_terms)}") # Redundant variations if redundant: score -= len(redundant) * 2 redundant_str = ', '.join([f"{a}/{b}" for a, b in redundant]) issues.append(f"Redundant variations: {redundant_str}") # Category duplication if category_dups: score -= len(category_dups) * 1 issues.append(f"Category name duplication: {', '.join(category_dups)}") # Single-character keywords single_char = [k for k in keywords if len(k) == 1] if single_char: score -= len(single_char) * 2 issues.append(f"Single-character keywords: {', '.join(single_char)}") # Balance check balance = analyze_balance(keywords) if balance['functionality'] == 0 and balance['technology'] == 0: score -= 2 issues.append("No functional or technical keywords") return max(0, score), issues def suggest_improvements( keywords: List[str], generic_terms: List[str], marketing_terms: List[str], redundant: List[Tuple[str, str]], min_count: int, max_count: int ) -> List[str]: """Generate improvement suggestions""" suggestions = [] # Remove generic/marketing terms if generic_terms or marketing_terms: suggestions.append("Remove generic/marketing terms") suggestions.append(" Replace with specific functionality (e.g., testing, deployment, formatting)") # Consolidate redundant variations if redundant: suggestions.append("Consolidate redundant variations") for kw1, kw2 in redundant: suggestions.append(f" Keep one of: {kw1}, {kw2}") # Add more keywords if too few count = len(keywords) if count < min_count: needed = min_count - count suggestions.append(f"Add {needed} more relevant keyword(s)") suggestions.append(" Consider: specific technologies, use-cases, or functionalities") # Remove keywords if too many elif count > max_count: excess = count - max_count suggestions.append(f"Remove {excess} least relevant keyword(s)") # Balance suggestions balance = analyze_balance(keywords) if balance['functionality'] == 0: suggestions.append("Add functionality keywords (e.g., testing, automation, deployment)") if balance['technology'] == 0: suggestions.append("Add technology keywords (e.g., python, docker, kubernetes)") return suggestions def main(): """Main entry point""" if len(sys.argv) < 2 or sys.argv[1] in ['-h', '--help']: usage() keyword_string = sys.argv[1] # Parse optional arguments min_count = DEFAULT_MIN_KEYWORDS max_count = DEFAULT_MAX_KEYWORDS for i, arg in enumerate(sys.argv[2:], start=2): if arg == '--min' and i + 1 < len(sys.argv): min_count = int(sys.argv[i + 1]) elif arg == '--max' and i + 1 < len(sys.argv): max_count = int(sys.argv[i + 1]) # Parse keywords keywords = parse_keywords(keyword_string) if not keywords: print("ERROR: Keywords cannot be empty\n") print("Provide 3-7 relevant keywords describing your plugin.\n") print("Examples:") print(' "testing,pytest,automation"') print(' "deployment,kubernetes,ci-cd"') sys.exit(3) # Analyze keywords count = len(keywords) generic_terms, marketing_terms = check_generic_terms(keywords) redundant = check_redundant_variations(keywords) category_dups = check_category_duplication(keywords) balance = analyze_balance(keywords) # Calculate quality score score, issues = calculate_quality_score( keywords, generic_terms, marketing_terms, redundant, category_dups, min_count, max_count ) # Determine status if score >= 9 and min_count <= count <= max_count: status = "✅ PASS" exit_code = 0 elif count < min_count or count > max_count: status = "❌ FAIL" exit_code = 1 elif score < 7: status = "❌ FAIL" exit_code = 2 else: status = "⚠️ WARNING" exit_code = 0 # Print results print(f"{status}: Keyword validation\n") print(f"Keywords: {', '.join(keywords)}") print(f"Count: {count} (valid range: {min_count}-{max_count})") print(f"Quality Score: {score}/10\n") if issues: print("Issues Found:") for issue in issues: print(f" - {issue}") print() # Balance breakdown print("Breakdown:") print(f" - Functionality: {balance['functionality']} keywords") print(f" - Technology: {balance['technology']} keywords") print(f" - Other: {balance['other']} keywords") print() # Score impact if score >= 9: print("Quality Score Impact: +10 points (excellent)\n") if exit_code == 0: print("Excellent keyword selection for discoverability!") elif score >= 7: print("Quality Score Impact: +7 points (good)\n") print("Good keywords, but could be improved.") else: print("Quality Score Impact: 0 points (fix to gain +10)\n") print("Keywords need significant improvement.") # Suggestions if issues: suggestions = suggest_improvements( keywords, generic_terms, marketing_terms, redundant, min_count, max_count ) if suggestions: print("\nSuggestions:") for suggestion in suggestions: print(f" {suggestion}") sys.exit(exit_code) if __name__ == '__main__': main()