414 lines
15 KiB
Python
414 lines
15 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Script to create curated "common" versions of large recipe categories.
|
|
Selects the most practical and commonly used recipes (30-75 each).
|
|
"""
|
|
|
|
import csv
|
|
from pathlib import Path
|
|
|
|
def read_recipes(file_path):
|
|
"""Read recipes from a CSV file."""
|
|
recipes = []
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
reader = csv.DictReader(f)
|
|
for row in reader:
|
|
recipes.append(row)
|
|
return recipes
|
|
|
|
def write_recipes(file_path, recipes):
|
|
"""Write recipes to a CSV file."""
|
|
with open(file_path, 'w', encoding='utf-8', newline='') as f:
|
|
writer = csv.writer(f)
|
|
writer.writerow(['Fully Qualified Recipe Name', 'Recipe Name', 'Description'])
|
|
for recipe in recipes:
|
|
writer.writerow([
|
|
recipe['Fully Qualified Recipe Name'],
|
|
recipe['Recipe Name'],
|
|
recipe['Description']
|
|
])
|
|
|
|
def curate_testing(recipes):
|
|
"""Select most useful testing recipes - prioritize broad recipes over specific variations."""
|
|
curated = []
|
|
seen = set()
|
|
|
|
for recipe in recipes:
|
|
recipe_id = recipe['Fully Qualified Recipe Name']
|
|
recipe_name = recipe['Recipe Name']
|
|
|
|
# Skip overly specific sub-recipes (inner classes with $)
|
|
if '$' in recipe_id:
|
|
continue
|
|
|
|
# Exclude overly specific assertion patterns for individual numeric types
|
|
if any(pattern in recipe_name for pattern in [
|
|
'Byte Assert', 'Integer Assert', 'Long Assert', 'Float Assert',
|
|
'Double Assert', 'Short Assert', 'BigInteger Assert', 'BigDecimal Assert'
|
|
]):
|
|
continue
|
|
|
|
# Prioritize broad testing migrations and frameworks
|
|
is_useful = (
|
|
# JUnit migrations
|
|
'junit' in recipe_id.lower() and any(keyword in recipe_name.lower() for keyword in [
|
|
'junit 5', 'junit5', 'migration', 'upgrade', 'best practices'
|
|
]) or
|
|
# Mockito
|
|
'mockito' in recipe_id.lower() or
|
|
# AssertJ (but only high-level)
|
|
('assertj' in recipe_id.lower() and 'adopt' in recipe_name.lower() and
|
|
not any(num in recipe_name for num in ['Byte', 'Integer', 'Long', 'Float', 'Double'])) or
|
|
# TestNG
|
|
'testng' in recipe_id.lower() or
|
|
# General testing best practices
|
|
any(keyword in recipe_name.lower() for keyword in [
|
|
'test', 'assert', 'mock', 'verify', 'cleanup'
|
|
]) and 'org.openrewrite.java.testing' in recipe_id
|
|
)
|
|
|
|
if is_useful and recipe_name not in seen:
|
|
curated.append(recipe)
|
|
seen.add(recipe_name)
|
|
|
|
if len(curated) >= 60:
|
|
break
|
|
|
|
return curated[:60]
|
|
|
|
def curate_framework_migrations(recipes):
|
|
"""Select most useful framework migration recipes - prioritize framework diversity over multiple versions."""
|
|
from collections import defaultdict
|
|
|
|
# Group recipes by framework
|
|
framework_recipes = defaultdict(list)
|
|
|
|
for recipe in recipes:
|
|
recipe_id = recipe['Fully Qualified Recipe Name']
|
|
recipe_name = recipe['Recipe Name']
|
|
|
|
# Identify the framework
|
|
framework = None
|
|
if 'kafka' in recipe_id.lower() or 'kafka' in recipe_name.lower():
|
|
framework = 'kafka'
|
|
elif 'hibernate' in recipe_id.lower() or 'hibernate' in recipe_name.lower():
|
|
framework = 'hibernate'
|
|
elif 'quarkus' in recipe_id.lower() or 'quarkus' in recipe_name.lower():
|
|
framework = 'quarkus'
|
|
elif 'elasticsearch' in recipe_id.lower() or 'elasticsearch' in recipe_name.lower():
|
|
framework = 'elasticsearch'
|
|
elif 'spring.boot' in recipe_id.lower() or ('spring boot' in recipe_name.lower() and 'upgrade' in recipe_name.lower()):
|
|
framework = 'spring-boot'
|
|
elif 'spring.security' in recipe_id.lower() or 'spring security' in recipe_name.lower():
|
|
framework = 'spring-security'
|
|
elif 'spring.cloud' in recipe_id.lower() or 'spring cloud' in recipe_name.lower():
|
|
framework = 'spring-cloud'
|
|
elif 'spring.data' in recipe_id.lower() or 'spring data' in recipe_name.lower():
|
|
framework = 'spring-data'
|
|
elif 'jakarta' in recipe_id.lower() or 'javax' in recipe_id.lower():
|
|
framework = 'jakarta-ee'
|
|
elif 'micronaut' in recipe_id.lower() or 'micronaut' in recipe_name.lower():
|
|
framework = 'micronaut'
|
|
elif 'camel' in recipe_id.lower():
|
|
framework = 'camel'
|
|
elif 'vertx' in recipe_id.lower() or 'vert.x' in recipe_name.lower():
|
|
framework = 'vertx'
|
|
elif 'dropwizard' in recipe_id.lower():
|
|
framework = 'dropwizard'
|
|
elif 'guava' in recipe_id.lower():
|
|
framework = 'guava'
|
|
elif 'junit' in recipe_id.lower() or 'junit' in recipe_name.lower():
|
|
framework = 'junit'
|
|
|
|
# Only include migration/upgrade recipes
|
|
if framework and ('MigrateTo' in recipe_id or 'Upgrade' in recipe_id or
|
|
'Migrate' in recipe_name or 'Upgrade' in recipe_name):
|
|
framework_recipes[framework].append(recipe)
|
|
|
|
# Select top 1-2 versions per framework, prioritizing latest versions
|
|
curated = []
|
|
seen_names = set()
|
|
|
|
# Sort frameworks by recipe count to ensure we get diverse coverage
|
|
for framework in sorted(framework_recipes.keys()):
|
|
framework_list = framework_recipes[framework]
|
|
|
|
# For each framework, prefer recipes with higher version numbers (assumed to be more recent)
|
|
# and avoid duplicate recipe names
|
|
added_for_framework = 0
|
|
max_per_framework = 2 # Keep at most 2 versions per framework
|
|
|
|
for recipe in framework_list:
|
|
recipe_name = recipe['Recipe Name']
|
|
if recipe_name not in seen_names and added_for_framework < max_per_framework:
|
|
curated.append(recipe)
|
|
seen_names.add(recipe_name)
|
|
added_for_framework += 1
|
|
|
|
return curated[:50]
|
|
|
|
def curate_dependencies(recipes):
|
|
"""Select most useful dependency management recipes - prefer Maven+Gradle over build-tool-specific."""
|
|
curated = []
|
|
seen = set()
|
|
|
|
# First pass: Prioritize org.openrewrite.java.dependencies (works for both Maven and Gradle)
|
|
for recipe in recipes:
|
|
recipe_id = recipe['Fully Qualified Recipe Name']
|
|
recipe_name = recipe['Recipe Name']
|
|
|
|
# Exclude C# and DevCenter recipes
|
|
if 'csharp' in recipe_id.lower() or 'devcenter' in recipe_id.lower():
|
|
continue
|
|
|
|
# Prioritize org.openrewrite.java.dependencies (works for both Maven and Gradle)
|
|
if 'org.openrewrite.java.dependencies' in recipe_id and recipe_name not in seen:
|
|
curated.append(recipe)
|
|
seen.add(recipe_name)
|
|
|
|
# Second pass: Add common Maven operations
|
|
for recipe in recipes:
|
|
recipe_id = recipe['Fully Qualified Recipe Name']
|
|
recipe_name = recipe['Recipe Name']
|
|
|
|
if 'csharp' in recipe_id.lower() or 'devcenter' in recipe_id.lower():
|
|
continue
|
|
|
|
# Include Maven recipes for common operations
|
|
if 'org.openrewrite.maven' in recipe_id and recipe_name not in seen:
|
|
is_common_maven = any(keyword in recipe_name for keyword in [
|
|
'Change dependency', 'Add dependency', 'Remove dependency',
|
|
'Upgrade dependency', 'parent', 'BOM', 'managed', 'plugin'
|
|
])
|
|
|
|
if is_common_maven:
|
|
curated.append(recipe)
|
|
seen.add(recipe_name)
|
|
|
|
if len(curated) >= 35:
|
|
break
|
|
|
|
# Third pass: Add common Gradle operations (but fewer since Maven is covered)
|
|
for recipe in recipes:
|
|
recipe_id = recipe['Fully Qualified Recipe Name']
|
|
recipe_name = recipe['Recipe Name']
|
|
|
|
if 'csharp' in recipe_id.lower() or 'devcenter' in recipe_id.lower():
|
|
continue
|
|
|
|
# Include Gradle recipes for operations not covered by Maven
|
|
if 'org.openrewrite.gradle' in recipe_id and recipe_name not in seen:
|
|
is_essential_gradle = any(keyword in recipe_name for keyword in [
|
|
'Change dependency', 'Add dependency', 'Remove dependency',
|
|
'Upgrade version', 'wrapper', 'plugin'
|
|
])
|
|
|
|
if is_essential_gradle:
|
|
curated.append(recipe)
|
|
seen.add(recipe_name)
|
|
|
|
if len(curated) >= 50:
|
|
break
|
|
|
|
return curated[:50]
|
|
|
|
def curate_xml_yaml_json(recipes):
|
|
"""Select most useful configuration file recipes."""
|
|
curated = []
|
|
seen = set()
|
|
|
|
for recipe in recipes:
|
|
recipe_id = recipe['Fully Qualified Recipe Name']
|
|
recipe_name = recipe['Recipe Name']
|
|
|
|
# Include recipes from core packages for YAML, XML, JSON, Properties, TOML
|
|
is_config_operation = (
|
|
'org.openrewrite.yaml' in recipe_id or
|
|
'org.openrewrite.xml' in recipe_id or
|
|
'org.openrewrite.json' in recipe_id or
|
|
'org.openrewrite.properties' in recipe_id or
|
|
'org.openrewrite.toml' in recipe_id
|
|
)
|
|
|
|
# Also include common operations by name
|
|
has_config_keywords = any(keyword in recipe_name for keyword in [
|
|
'YAML', 'XML', 'JSON', 'Properties', 'TOML', 'property', 'tag',
|
|
'attribute', 'value', 'file'
|
|
])
|
|
|
|
if (is_config_operation or has_config_keywords) and recipe_name not in seen:
|
|
curated.append(recipe)
|
|
seen.add(recipe_name)
|
|
|
|
if len(curated) >= 50:
|
|
break
|
|
|
|
return curated[:50]
|
|
|
|
def curate_static_analysis(recipes):
|
|
"""Select most useful static analysis recipes."""
|
|
curated = []
|
|
seen = set()
|
|
|
|
for recipe in recipes:
|
|
recipe_id = recipe['Fully Qualified Recipe Name']
|
|
recipe_name = recipe['Recipe Name']
|
|
|
|
# Include Find* and Search* recipes from core packages
|
|
is_search_recipe = (
|
|
recipe_name.startswith('Find') or
|
|
recipe_name.startswith('Search') or
|
|
'org.openrewrite.search' in recipe_id or
|
|
'org.openrewrite.analysis' in recipe_id
|
|
)
|
|
|
|
# Prioritize security-related searches
|
|
is_security = any(keyword in recipe_name.lower() for keyword in [
|
|
'security', 'vulnerability', 'injection', 'xss', 'xxe'
|
|
])
|
|
|
|
if (is_search_recipe or is_security) and recipe_name not in seen:
|
|
curated.append(recipe)
|
|
seen.add(recipe_name)
|
|
|
|
if len(curated) >= 50:
|
|
break
|
|
|
|
return curated[:50]
|
|
|
|
def curate_spring_boot(recipes):
|
|
"""Select most useful Spring Boot recipes."""
|
|
curated = []
|
|
seen = set()
|
|
|
|
for recipe in recipes:
|
|
recipe_id = recipe['Fully Qualified Recipe Name']
|
|
recipe_name = recipe['Recipe Name']
|
|
|
|
# Prioritize major upgrade recipes and common operations
|
|
is_major_upgrade = any(version in recipe_name for version in [
|
|
'3.5', '3.4', '3.3', '3.2', '3.1', '3.0', '2.7'
|
|
])
|
|
|
|
is_common_operation = any(keyword in recipe_name for keyword in [
|
|
'Upgrade', 'Migrate', 'Best Practices', 'Properties',
|
|
'Actuator', 'Configuration', 'Deprecat'
|
|
])
|
|
|
|
# Avoid overly specific internal changes
|
|
is_specific = any(keyword in recipe_name.lower() for keyword in [
|
|
'replace', 'remove', 'use', 'comment', 'add @valid'
|
|
])
|
|
|
|
if (is_major_upgrade or is_common_operation) and not is_specific and recipe_name not in seen:
|
|
curated.append(recipe)
|
|
seen.add(recipe_name)
|
|
|
|
if len(curated) >= 60:
|
|
break
|
|
|
|
return curated[:60]
|
|
|
|
def curate_security(recipes):
|
|
"""Select most useful security recipes."""
|
|
curated = []
|
|
seen = set()
|
|
|
|
for recipe in recipes:
|
|
recipe_id = recipe['Fully Qualified Recipe Name']
|
|
recipe_name = recipe['Recipe Name']
|
|
|
|
# Prioritize high-impact security issues
|
|
is_high_impact = any(keyword in recipe_name.lower() for keyword in [
|
|
'sql injection', 'xss', 'xxe', 'command injection',
|
|
'path traversal', 'ldap injection', 'vulnerability',
|
|
'unencrypted', 'insecure', 'csrf', 'authentication'
|
|
])
|
|
|
|
# Include general security finding recipes
|
|
is_finding_recipe = recipe_name.startswith('Find') and any(keyword in recipe_name.lower() for keyword in [
|
|
'security', 'vulnerability', 'injection', 'exposure'
|
|
])
|
|
|
|
# Include OWASP related
|
|
is_owasp = 'owasp' in recipe_id.lower() or 'owasp' in recipe_name.lower()
|
|
|
|
if (is_high_impact or is_finding_recipe or is_owasp) and recipe_name not in seen:
|
|
curated.append(recipe)
|
|
seen.add(recipe_name)
|
|
|
|
if len(curated) >= 60:
|
|
break
|
|
|
|
return curated[:60]
|
|
|
|
def curate_logging(recipes):
|
|
"""Select most useful logging recipes."""
|
|
curated = []
|
|
seen = set()
|
|
|
|
for recipe in recipes:
|
|
recipe_id = recipe['Fully Qualified Recipe Name']
|
|
recipe_name = recipe['Recipe Name']
|
|
|
|
# Prioritize common logging operations
|
|
is_common_logging = any(keyword in recipe_name for keyword in [
|
|
'Parameterize', 'SLF4J', 'Log4j', 'Logback', 'System.out',
|
|
'System.err', 'printStackTrace', 'Containerize', 'Migration',
|
|
'logger', 'Logger'
|
|
])
|
|
|
|
# Exclude overly specific internal operations
|
|
is_specific = 'internal' in recipe_name.lower()
|
|
|
|
if is_common_logging and not is_specific and recipe_name not in seen:
|
|
curated.append(recipe)
|
|
seen.add(recipe_name)
|
|
|
|
if len(curated) >= 50:
|
|
break
|
|
|
|
return curated[:50]
|
|
|
|
def main():
|
|
script_dir = Path(__file__).parent
|
|
references_dir = script_dir.parent / 'references'
|
|
|
|
# Categories to curate (those with >150 recipes or too large to be useful)
|
|
categories = {
|
|
'testing': curate_testing,
|
|
'framework-migrations': curate_framework_migrations,
|
|
'dependencies': curate_dependencies,
|
|
'xml-yaml-json': curate_xml_yaml_json,
|
|
'static-analysis': curate_static_analysis,
|
|
'spring-boot': curate_spring_boot,
|
|
'security': curate_security,
|
|
'logging': curate_logging,
|
|
}
|
|
|
|
print("Creating curated 'common' versions of large categories...\n")
|
|
|
|
for category, curate_func in categories.items():
|
|
input_file = references_dir / f'recipes-{category}.csv'
|
|
output_file = references_dir / f'recipes-{category}-common.csv'
|
|
|
|
if not input_file.exists():
|
|
print(f"Warning: {input_file.name} not found, skipping...")
|
|
continue
|
|
|
|
# Read all recipes
|
|
all_recipes = read_recipes(input_file)
|
|
|
|
# Curate selection
|
|
curated_recipes = curate_func(all_recipes)
|
|
|
|
# Write curated file
|
|
write_recipes(output_file, curated_recipes)
|
|
|
|
print(f"{category:25} {len(curated_recipes):3} curated from {len(all_recipes):4} total -> {output_file.name}")
|
|
|
|
print("\nCurated files created successfully!")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|