Initial commit

2025-11-29 18:20:28 +08:00
commit b727790a9e
65 changed files with 16412 additions and 0 deletions
--- a/commands/best-practices/.scripts/keyword-analyzer.py
+++ b/commands/best-practices/.scripts/keyword-analyzer.py
@@ -0,0 +1,392 @@
+#!/usr/bin/env python3
+
+"""
+============================================================================
+Keyword Quality Analyzer
+============================================================================
+Purpose: Analyze keyword quality, count, and relevance for OpenPlugins
+Version: 1.0.0
+Usage: ./keyword-analyzer.py <keywords> [--min N] [--max N]
+Returns: 0=valid, 1=count violation, 2=quality issues, 3=missing params
+============================================================================
+"""
+
+import sys
+import re
+from typing import List, Tuple, Dict
+
+# Default constraints
+DEFAULT_MIN_KEYWORDS = 3
+DEFAULT_MAX_KEYWORDS = 7
+
+# Generic terms to avoid
+GENERIC_BLOCKLIST = [
+    'plugin', 'tool', 'utility', 'helper', 'app',
+    'code', 'software', 'program', 'system',
+    'awesome', 'best', 'perfect', 'great', 'super',
+    'amazing', 'cool', 'nice', 'good', 'excellent'
+]
+
+# OpenPlugins categories (should not be duplicated as keywords)
+CATEGORIES = [
+    'development', 'testing', 'deployment', 'documentation',
+    'security', 'database', 'monitoring', 'productivity',
+    'quality', 'collaboration'
+]
+
+# Common keyword types for balance checking
+FUNCTIONALITY_KEYWORDS = [
+    'testing', 'deployment', 'formatting', 'linting', 'migration',
+    'generation', 'automation', 'analysis', 'monitoring', 'scanning',
+    'refactoring', 'debugging', 'profiling', 'optimization'
+]
+
+TECHNOLOGY_KEYWORDS = [
+    'python', 'javascript', 'typescript', 'docker', 'kubernetes',
+    'react', 'vue', 'angular', 'node', 'bash', 'terraform',
+    'postgresql', 'mysql', 'redis', 'aws', 'azure', 'gcp'
+]
+
+
+def usage():
+    """Print usage information"""
+    print("""Usage: keyword-analyzer.py <keywords> [--min N] [--max N]
+
+Analyze keyword quality and relevance for OpenPlugins standards.
+
+Arguments:
+  keywords    Comma-separated list of keywords (required)
+  --min N     Minimum keyword count (default: 3)
+  --max N     Maximum keyword count (default: 7)
+
+Requirements:
+  - Count: 3-7 keywords (optimal: 5-6)
+  - No generic terms (plugin, tool, awesome)
+  - No marketing fluff (best, perfect, amazing)
+  - Mix of functionality and technology
+  - No redundant variations
+
+Good examples:
+  "testing,pytest,automation,tdd,python"
+  "deployment,kubernetes,ci-cd,docker"
+  "linting,javascript,code-quality"
+
+Bad examples:
+  "plugin,tool,awesome" (generic)
+  "test,testing,tests" (redundant)
+  "development" (only one, too generic)
+
+Exit codes:
+  0 - Valid keyword set
+  1 - Count violation (too few or too many)
+  2 - Quality issues (generic terms, duplicates)
+  3 - Missing required parameters
+""")
+    sys.exit(3)
+
+
+def parse_keywords(keyword_string: str) -> List[str]:
+    """Parse and normalize keyword string"""
+    if not keyword_string:
+        return []
+
+    # Split by comma, strip whitespace, lowercase
+    keywords = [k.strip().lower() for k in keyword_string.split(',')]
+
+    # Remove empty strings
+    keywords = [k for k in keywords if k]
+
+    # Remove duplicates while preserving order
+    seen = set()
+    unique_keywords = []
+    for k in keywords:
+        if k not in seen:
+            seen.add(k)
+            unique_keywords.append(k)
+
+    return unique_keywords
+
+
+def check_generic_terms(keywords: List[str]) -> Tuple[List[str], List[str]]:
+    """
+    Check for generic and marketing terms
+
+    Returns:
+        (generic_terms, marketing_terms)
+    """
+    generic_terms = []
+    marketing_terms = []
+
+    for keyword in keywords:
+        if keyword in GENERIC_BLOCKLIST:
+            if keyword in ['awesome', 'best', 'perfect', 'great', 'super', 'amazing', 'cool', 'nice', 'good', 'excellent']:
+                marketing_terms.append(keyword)
+            else:
+                generic_terms.append(keyword)
+
+    return generic_terms, marketing_terms
+
+
+def check_redundant_variations(keywords: List[str]) -> List[Tuple[str, str]]:
+    """
+    Find redundant keyword variations
+
+    Returns:
+        List of (keyword1, keyword2) pairs that are redundant
+    """
+    redundant = []
+
+    for i, kw1 in enumerate(keywords):
+        for kw2 in keywords[i+1:]:
+            # Check if one is a substring of the other
+            if kw1 in kw2 or kw2 in kw1:
+                redundant.append((kw1, kw2))
+            # Check for plural variations
+            elif kw1.rstrip('s') == kw2 or kw2.rstrip('s') == kw1:
+                redundant.append((kw1, kw2))
+
+    return redundant
+
+
+def check_category_duplication(keywords: List[str]) -> List[str]:
+    """Check if any keywords exactly match category names"""
+    duplicates = []
+    for keyword in keywords:
+        if keyword in CATEGORIES:
+            duplicates.append(keyword)
+    return duplicates
+
+
+def analyze_balance(keywords: List[str]) -> Dict[str, int]:
+    """
+    Analyze keyword balance across types
+
+    Returns:
+        Dict with counts for each type
+    """
+    balance = {
+        'functionality': 0,
+        'technology': 0,
+        'other': 0
+    }
+
+    for keyword in keywords:
+        if keyword in FUNCTIONALITY_KEYWORDS:
+            balance['functionality'] += 1
+        elif keyword in TECHNOLOGY_KEYWORDS:
+            balance['technology'] += 1
+        else:
+            balance['other'] += 1
+
+    return balance
+
+
+def calculate_quality_score(
+    keywords: List[str],
+    generic_terms: List[str],
+    marketing_terms: List[str],
+    redundant: List[Tuple[str, str]],
+    category_dups: List[str],
+    min_count: int,
+    max_count: int
+) -> Tuple[int, List[str]]:
+    """
+    Calculate quality score and list issues
+
+    Returns:
+        (score out of 10, list of issues)
+    """
+    score = 10
+    issues = []
+
+    # Count violations
+    count = len(keywords)
+    if count < min_count:
+        score -= 5
+        issues.append(f"Too few keywords ({count} < {min_count} minimum)")
+    elif count > max_count:
+        score -= 3
+        issues.append(f"Too many keywords ({count} > {max_count} maximum)")
+
+    # Generic terms
+    if generic_terms:
+        score -= len(generic_terms) * 2
+        issues.append(f"Generic terms detected: {', '.join(generic_terms)}")
+
+    # Marketing terms
+    if marketing_terms:
+        score -= len(marketing_terms) * 2
+        issues.append(f"Marketing terms detected: {', '.join(marketing_terms)}")
+
+    # Redundant variations
+    if redundant:
+        score -= len(redundant) * 2
+        redundant_str = ', '.join([f"{a}/{b}" for a, b in redundant])
+        issues.append(f"Redundant variations: {redundant_str}")
+
+    # Category duplication
+    if category_dups:
+        score -= len(category_dups) * 1
+        issues.append(f"Category name duplication: {', '.join(category_dups)}")
+
+    # Single-character keywords
+    single_char = [k for k in keywords if len(k) == 1]
+    if single_char:
+        score -= len(single_char) * 2
+        issues.append(f"Single-character keywords: {', '.join(single_char)}")
+
+    # Balance check
+    balance = analyze_balance(keywords)
+    if balance['functionality'] == 0 and balance['technology'] == 0:
+        score -= 2
+        issues.append("No functional or technical keywords")
+
+    return max(0, score), issues
+
+
+def suggest_improvements(
+    keywords: List[str],
+    generic_terms: List[str],
+    marketing_terms: List[str],
+    redundant: List[Tuple[str, str]],
+    min_count: int,
+    max_count: int
+) -> List[str]:
+    """Generate improvement suggestions"""
+    suggestions = []
+
+    # Remove generic/marketing terms
+    if generic_terms or marketing_terms:
+        suggestions.append("Remove generic/marketing terms")
+        suggestions.append("  Replace with specific functionality (e.g., testing, deployment, formatting)")
+
+    # Consolidate redundant variations
+    if redundant:
+        suggestions.append("Consolidate redundant variations")
+        for kw1, kw2 in redundant:
+            suggestions.append(f"  Keep one of: {kw1}, {kw2}")
+
+    # Add more keywords if too few
+    count = len(keywords)
+    if count < min_count:
+        needed = min_count - count
+        suggestions.append(f"Add {needed} more relevant keyword(s)")
+        suggestions.append("  Consider: specific technologies, use-cases, or functionalities")
+
+    # Remove keywords if too many
+    elif count > max_count:
+        excess = count - max_count
+        suggestions.append(f"Remove {excess} least relevant keyword(s)")
+
+    # Balance suggestions
+    balance = analyze_balance(keywords)
+    if balance['functionality'] == 0:
+        suggestions.append("Add functionality keywords (e.g., testing, automation, deployment)")
+    if balance['technology'] == 0:
+        suggestions.append("Add technology keywords (e.g., python, docker, kubernetes)")
+
+    return suggestions
+
+
+def main():
+    """Main entry point"""
+    if len(sys.argv) < 2 or sys.argv[1] in ['-h', '--help']:
+        usage()
+
+    keyword_string = sys.argv[1]
+
+    # Parse optional arguments
+    min_count = DEFAULT_MIN_KEYWORDS
+    max_count = DEFAULT_MAX_KEYWORDS
+
+    for i, arg in enumerate(sys.argv[2:], start=2):
+        if arg == '--min' and i + 1 < len(sys.argv):
+            min_count = int(sys.argv[i + 1])
+        elif arg == '--max' and i + 1 < len(sys.argv):
+            max_count = int(sys.argv[i + 1])
+
+    # Parse keywords
+    keywords = parse_keywords(keyword_string)
+
+    if not keywords:
+        print("ERROR: Keywords cannot be empty\n")
+        print("Provide 3-7 relevant keywords describing your plugin.\n")
+        print("Examples:")
+        print('  "testing,pytest,automation"')
+        print('  "deployment,kubernetes,ci-cd"')
+        sys.exit(3)
+
+    # Analyze keywords
+    count = len(keywords)
+    generic_terms, marketing_terms = check_generic_terms(keywords)
+    redundant = check_redundant_variations(keywords)
+    category_dups = check_category_duplication(keywords)
+    balance = analyze_balance(keywords)
+
+    # Calculate quality score
+    score, issues = calculate_quality_score(
+        keywords, generic_terms, marketing_terms,
+        redundant, category_dups, min_count, max_count
+    )
+
+    # Determine status
+    if score >= 9 and min_count <= count <= max_count:
+        status = "✅ PASS"
+        exit_code = 0
+    elif count < min_count or count > max_count:
+        status = "❌ FAIL"
+        exit_code = 1
+    elif score < 7:
+        status = "❌ FAIL"
+        exit_code = 2
+    else:
+        status = "⚠️  WARNING"
+        exit_code = 0
+
+    # Print results
+    print(f"{status}: Keyword validation\n")
+    print(f"Keywords: {', '.join(keywords)}")
+    print(f"Count: {count} (valid range: {min_count}-{max_count})")
+    print(f"Quality Score: {score}/10\n")
+
+    if issues:
+        print("Issues Found:")
+        for issue in issues:
+            print(f"  - {issue}")
+        print()
+
+    # Balance breakdown
+    print("Breakdown:")
+    print(f"  - Functionality: {balance['functionality']} keywords")
+    print(f"  - Technology: {balance['technology']} keywords")
+    print(f"  - Other: {balance['other']} keywords")
+    print()
+
+    # Score impact
+    if score >= 9:
+        print("Quality Score Impact: +10 points (excellent)\n")
+        if exit_code == 0:
+            print("Excellent keyword selection for discoverability!")
+    elif score >= 7:
+        print("Quality Score Impact: +7 points (good)\n")
+        print("Good keywords, but could be improved.")
+    else:
+        print("Quality Score Impact: 0 points (fix to gain +10)\n")
+        print("Keywords need significant improvement.")
+
+    # Suggestions
+    if issues:
+        suggestions = suggest_improvements(
+            keywords, generic_terms, marketing_terms,
+            redundant, min_count, max_count
+        )
+        if suggestions:
+            print("\nSuggestions:")
+            for suggestion in suggestions:
+                print(f"  {suggestion}")
+
+    sys.exit(exit_code)
+
+
+if __name__ == '__main__':
+    main()