Initial commit

2025-11-29 18:50:58 +08:00
commit 3fb2d73fdf
11 changed files with 488 additions and 0 deletions
--- a/skills/automl-pipeline-builder/assets/README.md
+++ b/skills/automl-pipeline-builder/assets/README.md
@@ -0,0 +1,7 @@
+# Assets
+
+Bundled resources for automl-pipeline-builder skill
+
+- [ ] pipeline_template.yaml: YAML template for defining the structure and configuration of the AutoML pipeline.
+- [ ] example_dataset.csv: Sample dataset that can be used as input for the AutoML pipeline.
+- [ ] evaluation_report_template.html: HTML template for generating the model evaluation report.
--- a/skills/automl-pipeline-builder/assets/evaluation_report_template.html
+++ b/skills/automl-pipeline-builder/assets/evaluation_report_template.html
@@ -0,0 +1,203 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>AutoML Model Evaluation Report</title>
+    <style>
+        /* Basic Reset */
+        body, h1, h2, h3, p, table, th, td {
+            margin: 0;
+            padding: 0;
+            border: 0;
+            font-size: 100%;
+            font: inherit;
+            vertical-align: baseline;
+        }
+
+        /* General Styles */
+        body {
+            font-family: sans-serif;
+            line-height: 1.6;
+            background-color: #f4f4f4;
+            color: #333;
+            padding: 20px;
+        }
+
+        .container {
+            max-width: 960px;
+            margin: 0 auto;
+            background-color: #fff;
+            padding: 20px;
+            border-radius: 8px;
+            box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+        }
+
+        h1, h2, h3 {
+            margin-bottom: 15px;
+            color: #0056b3;
+        }
+
+        h1 {
+            font-size: 2.5em;
+        }
+
+        h2 {
+            font-size: 2em;
+        }
+
+        h3 {
+            font-size: 1.5em;
+        }
+
+        p {
+            margin-bottom: 15px;
+        }
+
+        /* Table Styles */
+        table {
+            width: 100%;
+            border-collapse: collapse;
+            margin-bottom: 20px;
+        }
+
+        th, td {
+            padding: 12px 15px;
+            text-align: left;
+            border-bottom: 1px solid #ddd;
+        }
+
+        th {
+            background-color: #f0f0f0;
+            font-weight: bold;
+        }
+
+        /* Responsive Design */
+        @media (max-width: 768px) {
+            .container {
+                padding: 15px;
+            }
+
+            h1 {
+                font-size: 2em;
+            }
+
+            h2 {
+                font-size: 1.6em;
+            }
+
+            h3 {
+                font-size: 1.3em;
+            }
+
+            table {
+                display: block;
+                overflow-x: auto;
+            }
+        }
+
+        /* Specific Styles */
+        .model-summary {
+            margin-bottom: 30px;
+        }
+
+        .evaluation-metrics {
+            margin-bottom: 30px;
+        }
+
+        .visualizations {
+            margin-bottom: 30px;
+        }
+
+        .conclusion {
+            margin-bottom: 20px;
+        }
+
+        .visualization-image {
+            max-width: 100%;
+            height: auto;
+            border: 1px solid #ccc;
+            border-radius: 5px;
+            margin-bottom: 10px;
+        }
+    </style>
+</head>
+<body>
+
+    <div class="container">
+        <!-- Report Header -->
+        <h1>AutoML Model Evaluation Report</h1>
+        <p>Generated on: {{generation_date}}</p>
+
+        <!-- Model Summary -->
+        <section class="model-summary">
+            <h2>Model Summary</h2>
+            <p><strong>Model Name:</strong> {{model_name}}</p>
+            <p><strong>Algorithm:</strong> {{algorithm}}</p>
+            <p><strong>Dataset:</strong> {{dataset_name}}</p>
+            <p><strong>Features Used:</strong> {{features_used}}</p>
+        </section>
+
+        <!-- Evaluation Metrics -->
+        <section class="evaluation-metrics">
+            <h2>Evaluation Metrics</h2>
+            <table>
+                <thead>
+                    <tr>
+                        <th>Metric</th>
+                        <th>Value</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    <tr>
+                        <td>Accuracy</td>
+                        <td>{{accuracy}}</td>
+                    </tr>
+                    <tr>
+                        <td>Precision</td>
+                        <td>{{precision}}</td>
+                    </tr>
+                    <tr>
+                        <td>Recall</td>
+                        <td>{{recall}}</td>
+                    </tr>
+                    <tr>
+                        <td>F1-Score</td>
+                        <td>{{f1_score}}</td>
+                    </tr>
+                    <tr>
+                        <td>AUC-ROC</td>
+                        <td>{{auc_roc}}</td>
+                    </tr>
+                </tbody>
+            </table>
+        </section>
+
+        <!-- Visualizations -->
+        <section class="visualizations">
+            <h2>Visualizations</h2>
+            <h3>Confusion Matrix</h3>
+            <img src="{{confusion_matrix_image}}" alt="Confusion Matrix" class="visualization-image">
+
+            <h3>ROC Curve</h3>
+            <img src="{{roc_curve_image}}" alt="ROC Curve" class="visualization-image">
+
+            <h3>Feature Importance</h3>
+            <img src="{{feature_importance_image}}" alt="Feature Importance" class="visualization-image">
+        </section>
+
+        <!-- Conclusion -->
+        <section class="conclusion">
+            <h2>Conclusion</h2>
+            <p>{{conclusion_text}}</p>
+        </section>
+
+        <!-- Additional Notes -->
+        <section class="notes">
+            <h3>Additional Notes</h3>
+            <p>{{additional_notes}}</p>
+        </section>
+    </div>
+
+</body>
+</html>
--- a/skills/automl-pipeline-builder/assets/example_dataset.csv
+++ b/skills/automl-pipeline-builder/assets/example_dataset.csv
@@ -0,0 +1,36 @@
+# Sample dataset for AutoML pipeline builder plugin
+# This dataset is a simplified example and may not be suitable for all AutoML tasks.
+# Replace this with your actual dataset for optimal results.
+#
+# Columns:
+#   feature1: Numerical feature (e.g., age, income)
+#   feature2: Categorical feature (e.g., city, product type) - encoded as strings
+#   target: Target variable (e.g., churn, conversion) - binary (0 or 1)
+
+feature1,feature2,target
+25,New York,0
+30,Los Angeles,1
+40,Chicago,0
+22,Houston,0
+35,Phoenix,1
+48,Philadelphia,1
+28,San Antonio,0
+32,San Diego,1
+45,Dallas,0
+27,San Jose,0
+31,Austin,1
+38,Jacksonville,0
+24,Fort Worth,0
+41,Columbus,1
+29,Charlotte,0
+33,San Francisco,1
+46,Indianapolis,1
+23,Seattle,0
+36,Denver,1
+49,Washington,1
+# Add more data rows here.  Aim for a larger dataset (hundreds or thousands of rows) for better AutoML performance.
+# Example:
+# 52,Miami,0
+# 39,Boston,1
+# Consider adding missing values (e.g., empty strings) to test the pipeline's handling of missing data.
+# For categorical features with many unique values, consider using techniques like one-hot encoding or target encoding.
--- a/skills/automl-pipeline-builder/assets/pipeline_template.yaml
+++ b/skills/automl-pipeline-builder/assets/pipeline_template.yaml
@@ -0,0 +1,69 @@
+# pipeline_template.yaml
+
+# --- General Pipeline Configuration ---
+pipeline_name: "AutoML Pipeline - REPLACE_ME" # Name of the pipeline (e.g., Customer Churn Prediction)
+description: "Automated Machine Learning pipeline for REPLACE_ME." # Short description of the pipeline's purpose
+version: "1.0.0" # Pipeline version
+
+# --- Data Source Configuration ---
+data_source:
+  type: "csv" # Type of data source (e.g., csv, database, api)
+  location: "data/YOUR_DATASET.csv" # Path to the data file or connection string
+  target_column: "target" # Name of the target variable column
+  index_column: null # Name of the index column (optional)
+  delimiter: "," # Delimiter for CSV files (e.g., ",", ";", "\t")
+  quotechar: '"' # Quote character for CSV files
+  encoding: "utf-8" # Encoding of the data file
+
+# --- Feature Engineering Configuration ---
+feature_engineering:
+  enabled: true # Enable or disable feature engineering
+  numeric_imputation: "mean" # Strategy for handling missing numerical values (e.g., mean, median, most_frequent, constant)
+  categorical_encoding: "onehot" # Method for encoding categorical features (e.g., onehot, ordinal, target)
+  feature_scaling: "standard" # Scaling method for numeric features (e.g., standard, minmax, robust)
+  feature_selection:
+    enabled: false # Enable or disable feature selection
+    method: "variance_threshold" # Feature selection method (e.g., variance_threshold, selectkbest)
+    threshold: 0.01 # Threshold for feature selection (depends on the method)
+
+# --- Model Training Configuration ---
+model_training:
+  algorithm: "xgboost" # Machine learning algorithm to use (e.g., xgboost, lightgbm, randomforest, logisticregression)
+  hyperparameter_tuning:
+    enabled: true # Enable or disable hyperparameter tuning
+    method: "random_search" # Hyperparameter tuning method (e.g., random_search, grid_search, bayesian_optimization)
+    n_trials: 50 # Number of trials for hyperparameter tuning
+    scoring_metric: "roc_auc" # Metric to optimize for (e.g., roc_auc, accuracy, f1, precision, recall)
+    hyperparameter_space: # Define hyperparameter ranges for each algorithm
+      xgboost: # Example for XGBoost
+        n_estimators: [100, 200, 300]
+        learning_rate: [0.01, 0.1, 0.2]
+        max_depth: [3, 5, 7]
+      # Add hyperparameter spaces for other algorithms as needed
+
+# --- Model Evaluation Configuration ---
+model_evaluation:
+  split_ratio: 0.2 # Ratio for splitting data into training and validation sets
+  scoring_metrics: ["roc_auc", "accuracy", "f1", "precision", "recall"] # List of metrics to evaluate the model
+  cross_validation:
+    enabled: true # Enable or disable cross-validation
+    n_folds: 5 # Number of folds for cross-validation
+
+# --- Model Deployment Configuration ---
+model_deployment:
+  enabled: false # Enable or disable model deployment
+  environment: "staging" # Target deployment environment (e.g., staging, production)
+  model_registry: "local" # Location to store the trained model (e.g., local, s3, gcp)
+  model_path: "models/YOUR_MODEL.pkl" # Path to save the trained model
+  api_endpoint: "YOUR_API_ENDPOINT" # API endpoint for model deployment (if applicable)
+
+# --- Logging Configuration ---
+logging:
+  level: "INFO" # Logging level (e.g., DEBUG, INFO, WARNING, ERROR)
+  format: "%(asctime)s - %(levelname)s - %(message)s" # Logging format
+  file_path: "logs/pipeline.log" # Path to the log file
+
+# --- Error Handling Configuration ---
+error_handling:
+  on_failure: "email_notification" # Action to take on pipeline failure (e.g., email_notification, retry, stop)
+  email_recipients: ["YOUR_EMAIL@example.com"] # List of email addresses to notify on failure