#!/usr/bin/env python3 """ Script to create curated "common" versions of large recipe categories. Selects the most practical and commonly used recipes (30-75 each). """ import csv from pathlib import Path def read_recipes(file_path): """Read recipes from a CSV file.""" recipes = [] with open(file_path, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: recipes.append(row) return recipes def write_recipes(file_path, recipes): """Write recipes to a CSV file.""" with open(file_path, 'w', encoding='utf-8', newline='') as f: writer = csv.writer(f) writer.writerow(['Fully Qualified Recipe Name', 'Recipe Name', 'Description']) for recipe in recipes: writer.writerow([ recipe['Fully Qualified Recipe Name'], recipe['Recipe Name'], recipe['Description'] ]) def curate_testing(recipes): """Select most useful testing recipes - prioritize broad recipes over specific variations.""" curated = [] seen = set() for recipe in recipes: recipe_id = recipe['Fully Qualified Recipe Name'] recipe_name = recipe['Recipe Name'] # Skip overly specific sub-recipes (inner classes with $) if '$' in recipe_id: continue # Exclude overly specific assertion patterns for individual numeric types if any(pattern in recipe_name for pattern in [ 'Byte Assert', 'Integer Assert', 'Long Assert', 'Float Assert', 'Double Assert', 'Short Assert', 'BigInteger Assert', 'BigDecimal Assert' ]): continue # Prioritize broad testing migrations and frameworks is_useful = ( # JUnit migrations 'junit' in recipe_id.lower() and any(keyword in recipe_name.lower() for keyword in [ 'junit 5', 'junit5', 'migration', 'upgrade', 'best practices' ]) or # Mockito 'mockito' in recipe_id.lower() or # AssertJ (but only high-level) ('assertj' in recipe_id.lower() and 'adopt' in recipe_name.lower() and not any(num in recipe_name for num in ['Byte', 'Integer', 'Long', 'Float', 'Double'])) or # TestNG 'testng' in recipe_id.lower() or # General testing best practices any(keyword in recipe_name.lower() for keyword in [ 'test', 'assert', 'mock', 'verify', 'cleanup' ]) and 'org.openrewrite.java.testing' in recipe_id ) if is_useful and recipe_name not in seen: curated.append(recipe) seen.add(recipe_name) if len(curated) >= 60: break return curated[:60] def curate_framework_migrations(recipes): """Select most useful framework migration recipes - prioritize framework diversity over multiple versions.""" from collections import defaultdict # Group recipes by framework framework_recipes = defaultdict(list) for recipe in recipes: recipe_id = recipe['Fully Qualified Recipe Name'] recipe_name = recipe['Recipe Name'] # Identify the framework framework = None if 'kafka' in recipe_id.lower() or 'kafka' in recipe_name.lower(): framework = 'kafka' elif 'hibernate' in recipe_id.lower() or 'hibernate' in recipe_name.lower(): framework = 'hibernate' elif 'quarkus' in recipe_id.lower() or 'quarkus' in recipe_name.lower(): framework = 'quarkus' elif 'elasticsearch' in recipe_id.lower() or 'elasticsearch' in recipe_name.lower(): framework = 'elasticsearch' elif 'spring.boot' in recipe_id.lower() or ('spring boot' in recipe_name.lower() and 'upgrade' in recipe_name.lower()): framework = 'spring-boot' elif 'spring.security' in recipe_id.lower() or 'spring security' in recipe_name.lower(): framework = 'spring-security' elif 'spring.cloud' in recipe_id.lower() or 'spring cloud' in recipe_name.lower(): framework = 'spring-cloud' elif 'spring.data' in recipe_id.lower() or 'spring data' in recipe_name.lower(): framework = 'spring-data' elif 'jakarta' in recipe_id.lower() or 'javax' in recipe_id.lower(): framework = 'jakarta-ee' elif 'micronaut' in recipe_id.lower() or 'micronaut' in recipe_name.lower(): framework = 'micronaut' elif 'camel' in recipe_id.lower(): framework = 'camel' elif 'vertx' in recipe_id.lower() or 'vert.x' in recipe_name.lower(): framework = 'vertx' elif 'dropwizard' in recipe_id.lower(): framework = 'dropwizard' elif 'guava' in recipe_id.lower(): framework = 'guava' elif 'junit' in recipe_id.lower() or 'junit' in recipe_name.lower(): framework = 'junit' # Only include migration/upgrade recipes if framework and ('MigrateTo' in recipe_id or 'Upgrade' in recipe_id or 'Migrate' in recipe_name or 'Upgrade' in recipe_name): framework_recipes[framework].append(recipe) # Select top 1-2 versions per framework, prioritizing latest versions curated = [] seen_names = set() # Sort frameworks by recipe count to ensure we get diverse coverage for framework in sorted(framework_recipes.keys()): framework_list = framework_recipes[framework] # For each framework, prefer recipes with higher version numbers (assumed to be more recent) # and avoid duplicate recipe names added_for_framework = 0 max_per_framework = 2 # Keep at most 2 versions per framework for recipe in framework_list: recipe_name = recipe['Recipe Name'] if recipe_name not in seen_names and added_for_framework < max_per_framework: curated.append(recipe) seen_names.add(recipe_name) added_for_framework += 1 return curated[:50] def curate_dependencies(recipes): """Select most useful dependency management recipes - prefer Maven+Gradle over build-tool-specific.""" curated = [] seen = set() # First pass: Prioritize org.openrewrite.java.dependencies (works for both Maven and Gradle) for recipe in recipes: recipe_id = recipe['Fully Qualified Recipe Name'] recipe_name = recipe['Recipe Name'] # Exclude C# and DevCenter recipes if 'csharp' in recipe_id.lower() or 'devcenter' in recipe_id.lower(): continue # Prioritize org.openrewrite.java.dependencies (works for both Maven and Gradle) if 'org.openrewrite.java.dependencies' in recipe_id and recipe_name not in seen: curated.append(recipe) seen.add(recipe_name) # Second pass: Add common Maven operations for recipe in recipes: recipe_id = recipe['Fully Qualified Recipe Name'] recipe_name = recipe['Recipe Name'] if 'csharp' in recipe_id.lower() or 'devcenter' in recipe_id.lower(): continue # Include Maven recipes for common operations if 'org.openrewrite.maven' in recipe_id and recipe_name not in seen: is_common_maven = any(keyword in recipe_name for keyword in [ 'Change dependency', 'Add dependency', 'Remove dependency', 'Upgrade dependency', 'parent', 'BOM', 'managed', 'plugin' ]) if is_common_maven: curated.append(recipe) seen.add(recipe_name) if len(curated) >= 35: break # Third pass: Add common Gradle operations (but fewer since Maven is covered) for recipe in recipes: recipe_id = recipe['Fully Qualified Recipe Name'] recipe_name = recipe['Recipe Name'] if 'csharp' in recipe_id.lower() or 'devcenter' in recipe_id.lower(): continue # Include Gradle recipes for operations not covered by Maven if 'org.openrewrite.gradle' in recipe_id and recipe_name not in seen: is_essential_gradle = any(keyword in recipe_name for keyword in [ 'Change dependency', 'Add dependency', 'Remove dependency', 'Upgrade version', 'wrapper', 'plugin' ]) if is_essential_gradle: curated.append(recipe) seen.add(recipe_name) if len(curated) >= 50: break return curated[:50] def curate_xml_yaml_json(recipes): """Select most useful configuration file recipes.""" curated = [] seen = set() for recipe in recipes: recipe_id = recipe['Fully Qualified Recipe Name'] recipe_name = recipe['Recipe Name'] # Include recipes from core packages for YAML, XML, JSON, Properties, TOML is_config_operation = ( 'org.openrewrite.yaml' in recipe_id or 'org.openrewrite.xml' in recipe_id or 'org.openrewrite.json' in recipe_id or 'org.openrewrite.properties' in recipe_id or 'org.openrewrite.toml' in recipe_id ) # Also include common operations by name has_config_keywords = any(keyword in recipe_name for keyword in [ 'YAML', 'XML', 'JSON', 'Properties', 'TOML', 'property', 'tag', 'attribute', 'value', 'file' ]) if (is_config_operation or has_config_keywords) and recipe_name not in seen: curated.append(recipe) seen.add(recipe_name) if len(curated) >= 50: break return curated[:50] def curate_static_analysis(recipes): """Select most useful static analysis recipes.""" curated = [] seen = set() for recipe in recipes: recipe_id = recipe['Fully Qualified Recipe Name'] recipe_name = recipe['Recipe Name'] # Include Find* and Search* recipes from core packages is_search_recipe = ( recipe_name.startswith('Find') or recipe_name.startswith('Search') or 'org.openrewrite.search' in recipe_id or 'org.openrewrite.analysis' in recipe_id ) # Prioritize security-related searches is_security = any(keyword in recipe_name.lower() for keyword in [ 'security', 'vulnerability', 'injection', 'xss', 'xxe' ]) if (is_search_recipe or is_security) and recipe_name not in seen: curated.append(recipe) seen.add(recipe_name) if len(curated) >= 50: break return curated[:50] def curate_spring_boot(recipes): """Select most useful Spring Boot recipes.""" curated = [] seen = set() for recipe in recipes: recipe_id = recipe['Fully Qualified Recipe Name'] recipe_name = recipe['Recipe Name'] # Prioritize major upgrade recipes and common operations is_major_upgrade = any(version in recipe_name for version in [ '3.5', '3.4', '3.3', '3.2', '3.1', '3.0', '2.7' ]) is_common_operation = any(keyword in recipe_name for keyword in [ 'Upgrade', 'Migrate', 'Best Practices', 'Properties', 'Actuator', 'Configuration', 'Deprecat' ]) # Avoid overly specific internal changes is_specific = any(keyword in recipe_name.lower() for keyword in [ 'replace', 'remove', 'use', 'comment', 'add @valid' ]) if (is_major_upgrade or is_common_operation) and not is_specific and recipe_name not in seen: curated.append(recipe) seen.add(recipe_name) if len(curated) >= 60: break return curated[:60] def curate_security(recipes): """Select most useful security recipes.""" curated = [] seen = set() for recipe in recipes: recipe_id = recipe['Fully Qualified Recipe Name'] recipe_name = recipe['Recipe Name'] # Prioritize high-impact security issues is_high_impact = any(keyword in recipe_name.lower() for keyword in [ 'sql injection', 'xss', 'xxe', 'command injection', 'path traversal', 'ldap injection', 'vulnerability', 'unencrypted', 'insecure', 'csrf', 'authentication' ]) # Include general security finding recipes is_finding_recipe = recipe_name.startswith('Find') and any(keyword in recipe_name.lower() for keyword in [ 'security', 'vulnerability', 'injection', 'exposure' ]) # Include OWASP related is_owasp = 'owasp' in recipe_id.lower() or 'owasp' in recipe_name.lower() if (is_high_impact or is_finding_recipe or is_owasp) and recipe_name not in seen: curated.append(recipe) seen.add(recipe_name) if len(curated) >= 60: break return curated[:60] def curate_logging(recipes): """Select most useful logging recipes.""" curated = [] seen = set() for recipe in recipes: recipe_id = recipe['Fully Qualified Recipe Name'] recipe_name = recipe['Recipe Name'] # Prioritize common logging operations is_common_logging = any(keyword in recipe_name for keyword in [ 'Parameterize', 'SLF4J', 'Log4j', 'Logback', 'System.out', 'System.err', 'printStackTrace', 'Containerize', 'Migration', 'logger', 'Logger' ]) # Exclude overly specific internal operations is_specific = 'internal' in recipe_name.lower() if is_common_logging and not is_specific and recipe_name not in seen: curated.append(recipe) seen.add(recipe_name) if len(curated) >= 50: break return curated[:50] def main(): script_dir = Path(__file__).parent references_dir = script_dir.parent / 'references' # Categories to curate (those with >150 recipes or too large to be useful) categories = { 'testing': curate_testing, 'framework-migrations': curate_framework_migrations, 'dependencies': curate_dependencies, 'xml-yaml-json': curate_xml_yaml_json, 'static-analysis': curate_static_analysis, 'spring-boot': curate_spring_boot, 'security': curate_security, 'logging': curate_logging, } print("Creating curated 'common' versions of large categories...\n") for category, curate_func in categories.items(): input_file = references_dir / f'recipes-{category}.csv' output_file = references_dir / f'recipes-{category}-common.csv' if not input_file.exists(): print(f"Warning: {input_file.name} not found, skipping...") continue # Read all recipes all_recipes = read_recipes(input_file) # Curate selection curated_recipes = curate_func(all_recipes) # Write curated file write_recipes(output_file, curated_recipes) print(f"{category:25} {len(curated_recipes):3} curated from {len(all_recipes):4} total -> {output_file.name}") print("\nCurated files created successfully!") if __name__ == '__main__': main()