# Configuration template for the feature engineering toolkit plugin # This file allows you to customize the feature engineering pipeline. # --- Data Input --- data_input: # Path to the input data file (CSV, Parquet, etc.) # Supported formats: csv, parquet data_path: "data/input.csv" # REPLACE_ME: Path to your input data # File type of the input data file_type: "csv" # Separator for CSV files (e.g., comma, tab) csv_separator: "," # Quote character for CSV files csv_quotechar: '"' # Encoding of the input file encoding: "utf-8" # Target variable name target_variable: "target" # REPLACE_ME: Name of your target variable # Index column (optional) index_column: null # Set to column name if you have an index column # --- Feature Creation --- feature_creation: # Enable or disable automated feature creation enabled: true # List of feature creation techniques to apply # Available techniques: polynomial_features, interaction_terms, aggregation_features, datetime_features techniques: - "polynomial_features" - "interaction_terms" - "datetime_features" # Polynomial Features configuration polynomial_features: degree: 2 include_bias: false # Interaction Terms configuration interaction_terms: interaction_degree: 2 # Aggregation Features configuration aggregation_features: # Group by column for aggregation (e.g., user_id, product_id) group_by: "user_id" # REPLACE_ME: Column to group by for aggregation # Aggregation functions to apply (e.g., mean, sum, count) aggregations: - "mean" - "sum" # Datetime Features configuration datetime_features: # List of datetime columns to extract features from datetime_columns: - "date" # REPLACE_ME: Name of your datetime column # List of datetime features to extract (e.g., year, month, day, dayofweek) features_to_extract: - "year" - "month" - "dayofweek" # --- Feature Selection --- feature_selection: # Enable or disable feature selection enabled: true # Selection method # Available methods: variance_threshold, select_k_best, recursive_feature_elimination method: "select_k_best" # Variance Threshold configuration variance_threshold: threshold: 0.0 # Select K Best configuration select_k_best: k: 10 # REPLACE_ME: Number of top features to select score_func: "f_classif" # Or "f_regression" # Recursive Feature Elimination configuration recursive_feature_elimination: n_features_to_select: 10 step: 1 # --- Feature Transformation --- feature_transformation: # Enable or disable feature transformation enabled: true # Transformation method # Available methods: standard_scaler, min_max_scaler, robust_scaler, power_transformer, quantile_transformer method: "standard_scaler" # Standard Scaler configuration (no specific parameters) standard_scaler: {} # Min-Max Scaler configuration min_max_scaler: feature_range: [0, 1] # Robust Scaler configuration robust_scaler: quantile_range: [25.0, 75.0] # Power Transformer configuration power_transformer: method: "yeo-johnson" standardize: true # Quantile Transformer configuration quantile_transformer: n_quantiles: 100 output_distribution: "uniform" # --- Output --- output: # Path to save the processed data output_path: "data/processed.csv" # REPLACE_ME: Path to save the processed data # Save format for the output data output_format: "csv" # Save the feature engineering report save_report: true # Path to save the feature engineering report report_path: "reports/feature_engineering_report.html" # Save the trained model (if applicable, e.g., for feature importance selection) save_model: false # Path to save the trained model model_path: "models/feature_selection_model.pkl" # --- Advanced Options --- advanced: # Number of CPU cores to use for parallel processing n_jobs: -1 # -1 means using all available cores # Random seed for reproducibility random_state: 42 # YOUR_VALUE_HERE: Set a random seed for reproducibility # Verbosity level (0: silent, 1: info, 2: debug) verbosity: 1 # Handle missing values (imputation) handle_missing: true # Imputation strategy (mean, median, most_frequent, constant) imputation_strategy: "mean" # Constant value for imputation (if imputation_strategy is "constant") imputation_constant: 0