Files
2025-11-30 08:45:33 +08:00

152 lines
6.1 KiB
Python

#!/usr/bin/env python3
"""
Script to categorize recipes from recipes-all.csv into separate category files.
"""
import csv
import os
from pathlib import Path
# Define categories and their matching patterns (keywords in id, name, or description)
CATEGORIES = {
'java-basic': {
'keywords': [
'ChangeType', 'ChangeMethodName', 'ChangePackage', 'AddAnnotation', 'RemoveAnnotation',
'ChangeMethodAccessLevel', 'AddImport', 'RemoveImport', 'AddMethodParameter',
'DeleteMethodArgument', 'ReplaceConstant', 'ReplaceStringLiteral',
'org.openrewrite.java.ChangeType', 'org.openrewrite.java.ChangeMethod',
'org.openrewrite.java.ChangePackage', 'org.openrewrite.java.AddComment',
'org.openrewrite.java.ReplaceAnnotation', 'org.openrewrite.java.ShortenFullyQualifiedTypeReferences'
],
'description': 'Basic Java refactoring operations for types, methods, packages, and annotations'
},
'spring-boot': {
'keywords': [
'spring.boot', 'SpringBoot', 'spring-boot', 'UpgradeSpringBoot'
],
'description': 'Spring Boot migrations, upgrades, and best practices'
},
'security': {
'keywords': [
'security', 'vulnerability', 'injection', 'XSS', 'XXE', 'SQL injection',
'command injection', 'path traversal', 'LDAP injection', 'unencrypted',
'FindSqlInjection', 'FindXss', 'FindCommandInjection', 'OWASP'
],
'description': 'Security vulnerability detection and fixes'
},
'testing': {
'keywords': [
'junit', 'JUnit', 'test', 'Test', 'mockito', 'Mockito', 'assertj', 'AssertJ',
'@Test', 'TestNG'
],
'description': 'Testing framework migrations and best practices'
},
'dependencies': {
'keywords': [
'maven', 'Maven', 'gradle', 'Gradle', 'dependency', 'Dependency',
'UpgradeDependency', 'ChangeDependency', 'AddDependency', 'RemoveDependency',
'pom.xml', 'build.gradle'
],
'description': 'Maven and Gradle dependency management'
},
'logging': {
'keywords': [
'log4j', 'Log4j', 'logback', 'Logback', 'slf4j', 'SLF4J', 'logging',
'Logging', 'logger', 'Logger'
],
'description': 'Logging framework configuration and migrations'
},
'file-operations': {
'keywords': [
'CreateFile', 'DeleteFile', 'MoveFile', 'RenameFile', 'FindAndReplace',
'ChangeText', 'AppendToTextFile', 'CreateTextFile', 'org.openrewrite.text',
'org.openrewrite.DeleteSourceFiles', 'org.openrewrite.MoveFile'
],
'description': 'File and text manipulation operations'
},
'framework-migrations': {
'keywords': [
'MigrateTo', 'UpgradeTo', 'Kafka', 'Hibernate', 'Elasticsearch', 'Quarkus',
'Jakarta', 'javax', 'migration'
],
'description': 'Major framework and library migrations'
},
'xml-yaml-json': {
'keywords': [
'xml', 'XML', 'yaml', 'YAML', 'json', 'JSON', 'properties', 'Properties',
'ChangePropertyValue', 'MergeYaml', 'CreateXmlFile'
],
'description': 'Configuration file operations for XML, YAML, JSON, and properties files'
},
'static-analysis': {
'keywords': [
'Find', 'Search', 'Analysis', 'complexity', 'unused', 'unreachable',
'dead code', 'null pointer', 'FindNullPointer', 'FindUnused'
],
'description': 'Code analysis and search recipes for finding patterns and issues'
}
}
def matches_category(recipe_id, recipe_name, recipe_desc, keywords):
"""Check if a recipe matches any of the keywords for a category."""
text_to_search = f"{recipe_id} {recipe_name} {recipe_desc}".lower()
return any(keyword.lower() in text_to_search for keyword in keywords)
def main():
script_dir = Path(__file__).parent
references_dir = script_dir.parent / 'references'
input_file = references_dir / 'recipes-all.csv'
# Dictionary to store recipes for each category
categorized_recipes = {cat: [] for cat in CATEGORIES.keys()}
print(f"Reading recipes from {input_file}...")
# Read and categorize recipes
with open(input_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
recipe_id = row['id']
recipe_name = row['name']
recipe_desc = row['description']
# Skip C#, DevCenter, COBOL, and AWS SDK migration recipes globally
if 'csharp' in recipe_id.lower() or '.csharp.' in recipe_id.lower():
continue
if 'io.moderne.devcenter' in recipe_id.lower():
continue
if 'cobol' in recipe_id.lower():
continue
if 'software.amazon.awssdk' in recipe_id.lower():
continue
# Check each category
for category, config in CATEGORIES.items():
if matches_category(recipe_id, recipe_name, recipe_desc, config['keywords']):
categorized_recipes[category].append({
'id': recipe_id,
'name': recipe_name,
'description': recipe_desc
})
# Write categorized CSV files
for category, recipes in categorized_recipes.items():
if recipes:
output_file = references_dir / f'recipes-{category}.csv'
print(f"Writing {len(recipes)} recipes to {output_file.name}...")
with open(output_file, 'w', encoding='utf-8', newline='') as f:
writer = csv.writer(f)
writer.writerow(['Fully Qualified Recipe Name', 'Recipe Name', 'Description'])
for recipe in recipes:
writer.writerow([recipe['id'], recipe['name'], recipe['description']])
# Print summary
print("\n=== Summary ===")
for category, recipes in sorted(categorized_recipes.items(), key=lambda x: len(x[1]), reverse=True):
print(f"{category:25} {len(recipes):4} recipes - {CATEGORIES[category]['description']}")
if __name__ == '__main__':
main()