#!/usr/bin/env python3
"""
Script to categorize recipes from recipes-all.csv into separate category files.
"""

import csv
import os
from pathlib import Path

# Define categories and their matching patterns (keywords in id, name, or description)
CATEGORIES = {
    'java-basic': {
        'keywords': [
            'ChangeType', 'ChangeMethodName', 'ChangePackage', 'AddAnnotation', 'RemoveAnnotation',
            'ChangeMethodAccessLevel', 'AddImport', 'RemoveImport', 'AddMethodParameter',
            'DeleteMethodArgument', 'ReplaceConstant', 'ReplaceStringLiteral',
            'org.openrewrite.java.ChangeType', 'org.openrewrite.java.ChangeMethod',
            'org.openrewrite.java.ChangePackage', 'org.openrewrite.java.AddComment',
            'org.openrewrite.java.ReplaceAnnotation', 'org.openrewrite.java.ShortenFullyQualifiedTypeReferences'
        ],
        'description': 'Basic Java refactoring operations for types, methods, packages, and annotations'
    },
    'spring-boot': {
        'keywords': [
            'spring.boot', 'SpringBoot', 'spring-boot', 'UpgradeSpringBoot'
        ],
        'description': 'Spring Boot migrations, upgrades, and best practices'
    },
    'security': {
        'keywords': [
            'security', 'vulnerability', 'injection', 'XSS', 'XXE', 'SQL injection',
            'command injection', 'path traversal', 'LDAP injection', 'unencrypted',
            'FindSqlInjection', 'FindXss', 'FindCommandInjection', 'OWASP'
        ],
        'description': 'Security vulnerability detection and fixes'
    },
    'testing': {
        'keywords': [
            'junit', 'JUnit', 'test', 'Test', 'mockito', 'Mockito', 'assertj', 'AssertJ',
            '@Test', 'TestNG'
        ],
        'description': 'Testing framework migrations and best practices'
    },
    'dependencies': {
        'keywords': [
            'maven', 'Maven', 'gradle', 'Gradle', 'dependency', 'Dependency',
            'UpgradeDependency', 'ChangeDependency', 'AddDependency', 'RemoveDependency',
            'pom.xml', 'build.gradle'
        ],
        'description': 'Maven and Gradle dependency management'
    },
    'logging': {
        'keywords': [
            'log4j', 'Log4j', 'logback', 'Logback', 'slf4j', 'SLF4J', 'logging',
            'Logging', 'logger', 'Logger'
        ],
        'description': 'Logging framework configuration and migrations'
    },
    'file-operations': {
        'keywords': [
            'CreateFile', 'DeleteFile', 'MoveFile', 'RenameFile', 'FindAndReplace',
            'ChangeText', 'AppendToTextFile', 'CreateTextFile', 'org.openrewrite.text',
            'org.openrewrite.DeleteSourceFiles', 'org.openrewrite.MoveFile'
        ],
        'description': 'File and text manipulation operations'
    },
    'framework-migrations': {
        'keywords': [
            'MigrateTo', 'UpgradeTo', 'Kafka', 'Hibernate', 'Elasticsearch', 'Quarkus',
            'Jakarta', 'javax', 'migration'
        ],
        'description': 'Major framework and library migrations'
    },
    'xml-yaml-json': {
        'keywords': [
            'xml', 'XML', 'yaml', 'YAML', 'json', 'JSON', 'properties', 'Properties',
            'ChangePropertyValue', 'MergeYaml', 'CreateXmlFile'
        ],
        'description': 'Configuration file operations for XML, YAML, JSON, and properties files'
    },
    'static-analysis': {
        'keywords': [
            'Find', 'Search', 'Analysis', 'complexity', 'unused', 'unreachable',
            'dead code', 'null pointer', 'FindNullPointer', 'FindUnused'
        ],
        'description': 'Code analysis and search recipes for finding patterns and issues'
    }
}

def matches_category(recipe_id, recipe_name, recipe_desc, keywords):
    """Check if a recipe matches any of the keywords for a category."""
    text_to_search = f"{recipe_id} {recipe_name} {recipe_desc}".lower()
    return any(keyword.lower() in text_to_search for keyword in keywords)

def main():
    script_dir = Path(__file__).parent
    references_dir = script_dir.parent / 'references'
    input_file = references_dir / 'recipes-all.csv'

    # Dictionary to store recipes for each category
    categorized_recipes = {cat: [] for cat in CATEGORIES.keys()}

    print(f"Reading recipes from {input_file}...")

    # Read and categorize recipes
    with open(input_file, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            recipe_id = row['id']
            recipe_name = row['name']
            recipe_desc = row['description']

            # Skip C#, DevCenter, COBOL, and AWS SDK migration recipes globally
            if 'csharp' in recipe_id.lower() or '.csharp.' in recipe_id.lower():
                continue
            if 'io.moderne.devcenter' in recipe_id.lower():
                continue
            if 'cobol' in recipe_id.lower():
                continue
            if 'software.amazon.awssdk' in recipe_id.lower():
                continue

            # Check each category
            for category, config in CATEGORIES.items():
                if matches_category(recipe_id, recipe_name, recipe_desc, config['keywords']):
                    categorized_recipes[category].append({
                        'id': recipe_id,
                        'name': recipe_name,
                        'description': recipe_desc
                    })

    # Write categorized CSV files
    for category, recipes in categorized_recipes.items():
        if recipes:
            output_file = references_dir / f'recipes-{category}.csv'
            print(f"Writing {len(recipes)} recipes to {output_file.name}...")

            with open(output_file, 'w', encoding='utf-8', newline='') as f:
                writer = csv.writer(f)
                writer.writerow(['Fully Qualified Recipe Name', 'Recipe Name', 'Description'])

                for recipe in recipes:
                    writer.writerow([recipe['id'], recipe['name'], recipe['description']])

    # Print summary
    print("\n=== Summary ===")
    for category, recipes in sorted(categorized_recipes.items(), key=lambda x: len(x[1]), reverse=True):
        print(f"{category:25} {len(recipes):4} recipes - {CATEGORIES[category]['description']}")

if __name__ == '__main__':
    main()