Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:45:33 +08:00
commit 480d09eec9
27 changed files with 8336 additions and 0 deletions

View File

@@ -0,0 +1,413 @@
#!/usr/bin/env python3
"""
Script to create curated "common" versions of large recipe categories.
Selects the most practical and commonly used recipes (30-75 each).
"""
import csv
from pathlib import Path
def read_recipes(file_path):
"""Read recipes from a CSV file."""
recipes = []
with open(file_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
recipes.append(row)
return recipes
def write_recipes(file_path, recipes):
"""Write recipes to a CSV file."""
with open(file_path, 'w', encoding='utf-8', newline='') as f:
writer = csv.writer(f)
writer.writerow(['Fully Qualified Recipe Name', 'Recipe Name', 'Description'])
for recipe in recipes:
writer.writerow([
recipe['Fully Qualified Recipe Name'],
recipe['Recipe Name'],
recipe['Description']
])
def curate_testing(recipes):
"""Select most useful testing recipes - prioritize broad recipes over specific variations."""
curated = []
seen = set()
for recipe in recipes:
recipe_id = recipe['Fully Qualified Recipe Name']
recipe_name = recipe['Recipe Name']
# Skip overly specific sub-recipes (inner classes with $)
if '$' in recipe_id:
continue
# Exclude overly specific assertion patterns for individual numeric types
if any(pattern in recipe_name for pattern in [
'Byte Assert', 'Integer Assert', 'Long Assert', 'Float Assert',
'Double Assert', 'Short Assert', 'BigInteger Assert', 'BigDecimal Assert'
]):
continue
# Prioritize broad testing migrations and frameworks
is_useful = (
# JUnit migrations
'junit' in recipe_id.lower() and any(keyword in recipe_name.lower() for keyword in [
'junit 5', 'junit5', 'migration', 'upgrade', 'best practices'
]) or
# Mockito
'mockito' in recipe_id.lower() or
# AssertJ (but only high-level)
('assertj' in recipe_id.lower() and 'adopt' in recipe_name.lower() and
not any(num in recipe_name for num in ['Byte', 'Integer', 'Long', 'Float', 'Double'])) or
# TestNG
'testng' in recipe_id.lower() or
# General testing best practices
any(keyword in recipe_name.lower() for keyword in [
'test', 'assert', 'mock', 'verify', 'cleanup'
]) and 'org.openrewrite.java.testing' in recipe_id
)
if is_useful and recipe_name not in seen:
curated.append(recipe)
seen.add(recipe_name)
if len(curated) >= 60:
break
return curated[:60]
def curate_framework_migrations(recipes):
"""Select most useful framework migration recipes - prioritize framework diversity over multiple versions."""
from collections import defaultdict
# Group recipes by framework
framework_recipes = defaultdict(list)
for recipe in recipes:
recipe_id = recipe['Fully Qualified Recipe Name']
recipe_name = recipe['Recipe Name']
# Identify the framework
framework = None
if 'kafka' in recipe_id.lower() or 'kafka' in recipe_name.lower():
framework = 'kafka'
elif 'hibernate' in recipe_id.lower() or 'hibernate' in recipe_name.lower():
framework = 'hibernate'
elif 'quarkus' in recipe_id.lower() or 'quarkus' in recipe_name.lower():
framework = 'quarkus'
elif 'elasticsearch' in recipe_id.lower() or 'elasticsearch' in recipe_name.lower():
framework = 'elasticsearch'
elif 'spring.boot' in recipe_id.lower() or ('spring boot' in recipe_name.lower() and 'upgrade' in recipe_name.lower()):
framework = 'spring-boot'
elif 'spring.security' in recipe_id.lower() or 'spring security' in recipe_name.lower():
framework = 'spring-security'
elif 'spring.cloud' in recipe_id.lower() or 'spring cloud' in recipe_name.lower():
framework = 'spring-cloud'
elif 'spring.data' in recipe_id.lower() or 'spring data' in recipe_name.lower():
framework = 'spring-data'
elif 'jakarta' in recipe_id.lower() or 'javax' in recipe_id.lower():
framework = 'jakarta-ee'
elif 'micronaut' in recipe_id.lower() or 'micronaut' in recipe_name.lower():
framework = 'micronaut'
elif 'camel' in recipe_id.lower():
framework = 'camel'
elif 'vertx' in recipe_id.lower() or 'vert.x' in recipe_name.lower():
framework = 'vertx'
elif 'dropwizard' in recipe_id.lower():
framework = 'dropwizard'
elif 'guava' in recipe_id.lower():
framework = 'guava'
elif 'junit' in recipe_id.lower() or 'junit' in recipe_name.lower():
framework = 'junit'
# Only include migration/upgrade recipes
if framework and ('MigrateTo' in recipe_id or 'Upgrade' in recipe_id or
'Migrate' in recipe_name or 'Upgrade' in recipe_name):
framework_recipes[framework].append(recipe)
# Select top 1-2 versions per framework, prioritizing latest versions
curated = []
seen_names = set()
# Sort frameworks by recipe count to ensure we get diverse coverage
for framework in sorted(framework_recipes.keys()):
framework_list = framework_recipes[framework]
# For each framework, prefer recipes with higher version numbers (assumed to be more recent)
# and avoid duplicate recipe names
added_for_framework = 0
max_per_framework = 2 # Keep at most 2 versions per framework
for recipe in framework_list:
recipe_name = recipe['Recipe Name']
if recipe_name not in seen_names and added_for_framework < max_per_framework:
curated.append(recipe)
seen_names.add(recipe_name)
added_for_framework += 1
return curated[:50]
def curate_dependencies(recipes):
"""Select most useful dependency management recipes - prefer Maven+Gradle over build-tool-specific."""
curated = []
seen = set()
# First pass: Prioritize org.openrewrite.java.dependencies (works for both Maven and Gradle)
for recipe in recipes:
recipe_id = recipe['Fully Qualified Recipe Name']
recipe_name = recipe['Recipe Name']
# Exclude C# and DevCenter recipes
if 'csharp' in recipe_id.lower() or 'devcenter' in recipe_id.lower():
continue
# Prioritize org.openrewrite.java.dependencies (works for both Maven and Gradle)
if 'org.openrewrite.java.dependencies' in recipe_id and recipe_name not in seen:
curated.append(recipe)
seen.add(recipe_name)
# Second pass: Add common Maven operations
for recipe in recipes:
recipe_id = recipe['Fully Qualified Recipe Name']
recipe_name = recipe['Recipe Name']
if 'csharp' in recipe_id.lower() or 'devcenter' in recipe_id.lower():
continue
# Include Maven recipes for common operations
if 'org.openrewrite.maven' in recipe_id and recipe_name not in seen:
is_common_maven = any(keyword in recipe_name for keyword in [
'Change dependency', 'Add dependency', 'Remove dependency',
'Upgrade dependency', 'parent', 'BOM', 'managed', 'plugin'
])
if is_common_maven:
curated.append(recipe)
seen.add(recipe_name)
if len(curated) >= 35:
break
# Third pass: Add common Gradle operations (but fewer since Maven is covered)
for recipe in recipes:
recipe_id = recipe['Fully Qualified Recipe Name']
recipe_name = recipe['Recipe Name']
if 'csharp' in recipe_id.lower() or 'devcenter' in recipe_id.lower():
continue
# Include Gradle recipes for operations not covered by Maven
if 'org.openrewrite.gradle' in recipe_id and recipe_name not in seen:
is_essential_gradle = any(keyword in recipe_name for keyword in [
'Change dependency', 'Add dependency', 'Remove dependency',
'Upgrade version', 'wrapper', 'plugin'
])
if is_essential_gradle:
curated.append(recipe)
seen.add(recipe_name)
if len(curated) >= 50:
break
return curated[:50]
def curate_xml_yaml_json(recipes):
"""Select most useful configuration file recipes."""
curated = []
seen = set()
for recipe in recipes:
recipe_id = recipe['Fully Qualified Recipe Name']
recipe_name = recipe['Recipe Name']
# Include recipes from core packages for YAML, XML, JSON, Properties, TOML
is_config_operation = (
'org.openrewrite.yaml' in recipe_id or
'org.openrewrite.xml' in recipe_id or
'org.openrewrite.json' in recipe_id or
'org.openrewrite.properties' in recipe_id or
'org.openrewrite.toml' in recipe_id
)
# Also include common operations by name
has_config_keywords = any(keyword in recipe_name for keyword in [
'YAML', 'XML', 'JSON', 'Properties', 'TOML', 'property', 'tag',
'attribute', 'value', 'file'
])
if (is_config_operation or has_config_keywords) and recipe_name not in seen:
curated.append(recipe)
seen.add(recipe_name)
if len(curated) >= 50:
break
return curated[:50]
def curate_static_analysis(recipes):
"""Select most useful static analysis recipes."""
curated = []
seen = set()
for recipe in recipes:
recipe_id = recipe['Fully Qualified Recipe Name']
recipe_name = recipe['Recipe Name']
# Include Find* and Search* recipes from core packages
is_search_recipe = (
recipe_name.startswith('Find') or
recipe_name.startswith('Search') or
'org.openrewrite.search' in recipe_id or
'org.openrewrite.analysis' in recipe_id
)
# Prioritize security-related searches
is_security = any(keyword in recipe_name.lower() for keyword in [
'security', 'vulnerability', 'injection', 'xss', 'xxe'
])
if (is_search_recipe or is_security) and recipe_name not in seen:
curated.append(recipe)
seen.add(recipe_name)
if len(curated) >= 50:
break
return curated[:50]
def curate_spring_boot(recipes):
"""Select most useful Spring Boot recipes."""
curated = []
seen = set()
for recipe in recipes:
recipe_id = recipe['Fully Qualified Recipe Name']
recipe_name = recipe['Recipe Name']
# Prioritize major upgrade recipes and common operations
is_major_upgrade = any(version in recipe_name for version in [
'3.5', '3.4', '3.3', '3.2', '3.1', '3.0', '2.7'
])
is_common_operation = any(keyword in recipe_name for keyword in [
'Upgrade', 'Migrate', 'Best Practices', 'Properties',
'Actuator', 'Configuration', 'Deprecat'
])
# Avoid overly specific internal changes
is_specific = any(keyword in recipe_name.lower() for keyword in [
'replace', 'remove', 'use', 'comment', 'add @valid'
])
if (is_major_upgrade or is_common_operation) and not is_specific and recipe_name not in seen:
curated.append(recipe)
seen.add(recipe_name)
if len(curated) >= 60:
break
return curated[:60]
def curate_security(recipes):
"""Select most useful security recipes."""
curated = []
seen = set()
for recipe in recipes:
recipe_id = recipe['Fully Qualified Recipe Name']
recipe_name = recipe['Recipe Name']
# Prioritize high-impact security issues
is_high_impact = any(keyword in recipe_name.lower() for keyword in [
'sql injection', 'xss', 'xxe', 'command injection',
'path traversal', 'ldap injection', 'vulnerability',
'unencrypted', 'insecure', 'csrf', 'authentication'
])
# Include general security finding recipes
is_finding_recipe = recipe_name.startswith('Find') and any(keyword in recipe_name.lower() for keyword in [
'security', 'vulnerability', 'injection', 'exposure'
])
# Include OWASP related
is_owasp = 'owasp' in recipe_id.lower() or 'owasp' in recipe_name.lower()
if (is_high_impact or is_finding_recipe or is_owasp) and recipe_name not in seen:
curated.append(recipe)
seen.add(recipe_name)
if len(curated) >= 60:
break
return curated[:60]
def curate_logging(recipes):
"""Select most useful logging recipes."""
curated = []
seen = set()
for recipe in recipes:
recipe_id = recipe['Fully Qualified Recipe Name']
recipe_name = recipe['Recipe Name']
# Prioritize common logging operations
is_common_logging = any(keyword in recipe_name for keyword in [
'Parameterize', 'SLF4J', 'Log4j', 'Logback', 'System.out',
'System.err', 'printStackTrace', 'Containerize', 'Migration',
'logger', 'Logger'
])
# Exclude overly specific internal operations
is_specific = 'internal' in recipe_name.lower()
if is_common_logging and not is_specific and recipe_name not in seen:
curated.append(recipe)
seen.add(recipe_name)
if len(curated) >= 50:
break
return curated[:50]
def main():
script_dir = Path(__file__).parent
references_dir = script_dir.parent / 'references'
# Categories to curate (those with >150 recipes or too large to be useful)
categories = {
'testing': curate_testing,
'framework-migrations': curate_framework_migrations,
'dependencies': curate_dependencies,
'xml-yaml-json': curate_xml_yaml_json,
'static-analysis': curate_static_analysis,
'spring-boot': curate_spring_boot,
'security': curate_security,
'logging': curate_logging,
}
print("Creating curated 'common' versions of large categories...\n")
for category, curate_func in categories.items():
input_file = references_dir / f'recipes-{category}.csv'
output_file = references_dir / f'recipes-{category}-common.csv'
if not input_file.exists():
print(f"Warning: {input_file.name} not found, skipping...")
continue
# Read all recipes
all_recipes = read_recipes(input_file)
# Curate selection
curated_recipes = curate_func(all_recipes)
# Write curated file
write_recipes(output_file, curated_recipes)
print(f"{category:25} {len(curated_recipes):3} curated from {len(all_recipes):4} total -> {output_file.name}")
print("\nCurated files created successfully!")
if __name__ == '__main__':
main()