Initial commit
This commit is contained in:
7
skills/automl-pipeline-builder/assets/README.md
Normal file
7
skills/automl-pipeline-builder/assets/README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Assets
|
||||
|
||||
Bundled resources for automl-pipeline-builder skill
|
||||
|
||||
- [ ] pipeline_template.yaml: YAML template for defining the structure and configuration of the AutoML pipeline.
|
||||
- [ ] example_dataset.csv: Sample dataset that can be used as input for the AutoML pipeline.
|
||||
- [ ] evaluation_report_template.html: HTML template for generating the model evaluation report.
|
||||
@@ -0,0 +1,203 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>AutoML Model Evaluation Report</title>
|
||||
<style>
|
||||
/* Basic Reset */
|
||||
body, h1, h2, h3, p, table, th, td {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
border: 0;
|
||||
font-size: 100%;
|
||||
font: inherit;
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
/* General Styles */
|
||||
body {
|
||||
font-family: sans-serif;
|
||||
line-height: 1.6;
|
||||
background-color: #f4f4f4;
|
||||
color: #333;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 960px;
|
||||
margin: 0 auto;
|
||||
background-color: #fff;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
h1, h2, h3 {
|
||||
margin-bottom: 15px;
|
||||
color: #0056b3;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 2.5em;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 2em;
|
||||
}
|
||||
|
||||
h3 {
|
||||
font-size: 1.5em;
|
||||
}
|
||||
|
||||
p {
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
/* Table Styles */
|
||||
table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
th, td {
|
||||
padding: 12px 15px;
|
||||
text-align: left;
|
||||
border-bottom: 1px solid #ddd;
|
||||
}
|
||||
|
||||
th {
|
||||
background-color: #f0f0f0;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
/* Responsive Design */
|
||||
@media (max-width: 768px) {
|
||||
.container {
|
||||
padding: 15px;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 2em;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 1.6em;
|
||||
}
|
||||
|
||||
h3 {
|
||||
font-size: 1.3em;
|
||||
}
|
||||
|
||||
table {
|
||||
display: block;
|
||||
overflow-x: auto;
|
||||
}
|
||||
}
|
||||
|
||||
/* Specific Styles */
|
||||
.model-summary {
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
|
||||
.evaluation-metrics {
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
|
||||
.visualizations {
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
|
||||
.conclusion {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.visualization-image {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 5px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="container">
|
||||
<!-- Report Header -->
|
||||
<h1>AutoML Model Evaluation Report</h1>
|
||||
<p>Generated on: {{generation_date}}</p>
|
||||
|
||||
<!-- Model Summary -->
|
||||
<section class="model-summary">
|
||||
<h2>Model Summary</h2>
|
||||
<p><strong>Model Name:</strong> {{model_name}}</p>
|
||||
<p><strong>Algorithm:</strong> {{algorithm}}</p>
|
||||
<p><strong>Dataset:</strong> {{dataset_name}}</p>
|
||||
<p><strong>Features Used:</strong> {{features_used}}</p>
|
||||
</section>
|
||||
|
||||
<!-- Evaluation Metrics -->
|
||||
<section class="evaluation-metrics">
|
||||
<h2>Evaluation Metrics</h2>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Metric</th>
|
||||
<th>Value</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>Accuracy</td>
|
||||
<td>{{accuracy}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Precision</td>
|
||||
<td>{{precision}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Recall</td>
|
||||
<td>{{recall}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>F1-Score</td>
|
||||
<td>{{f1_score}}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>AUC-ROC</td>
|
||||
<td>{{auc_roc}}</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</section>
|
||||
|
||||
<!-- Visualizations -->
|
||||
<section class="visualizations">
|
||||
<h2>Visualizations</h2>
|
||||
<h3>Confusion Matrix</h3>
|
||||
<img src="{{confusion_matrix_image}}" alt="Confusion Matrix" class="visualization-image">
|
||||
|
||||
<h3>ROC Curve</h3>
|
||||
<img src="{{roc_curve_image}}" alt="ROC Curve" class="visualization-image">
|
||||
|
||||
<h3>Feature Importance</h3>
|
||||
<img src="{{feature_importance_image}}" alt="Feature Importance" class="visualization-image">
|
||||
</section>
|
||||
|
||||
<!-- Conclusion -->
|
||||
<section class="conclusion">
|
||||
<h2>Conclusion</h2>
|
||||
<p>{{conclusion_text}}</p>
|
||||
</section>
|
||||
|
||||
<!-- Additional Notes -->
|
||||
<section class="notes">
|
||||
<h3>Additional Notes</h3>
|
||||
<p>{{additional_notes}}</p>
|
||||
</section>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
36
skills/automl-pipeline-builder/assets/example_dataset.csv
Normal file
36
skills/automl-pipeline-builder/assets/example_dataset.csv
Normal file
@@ -0,0 +1,36 @@
|
||||
# Sample dataset for AutoML pipeline builder plugin
|
||||
# This dataset is a simplified example and may not be suitable for all AutoML tasks.
|
||||
# Replace this with your actual dataset for optimal results.
|
||||
#
|
||||
# Columns:
|
||||
# feature1: Numerical feature (e.g., age, income)
|
||||
# feature2: Categorical feature (e.g., city, product type) - encoded as strings
|
||||
# target: Target variable (e.g., churn, conversion) - binary (0 or 1)
|
||||
|
||||
feature1,feature2,target
|
||||
25,New York,0
|
||||
30,Los Angeles,1
|
||||
40,Chicago,0
|
||||
22,Houston,0
|
||||
35,Phoenix,1
|
||||
48,Philadelphia,1
|
||||
28,San Antonio,0
|
||||
32,San Diego,1
|
||||
45,Dallas,0
|
||||
27,San Jose,0
|
||||
31,Austin,1
|
||||
38,Jacksonville,0
|
||||
24,Fort Worth,0
|
||||
41,Columbus,1
|
||||
29,Charlotte,0
|
||||
33,San Francisco,1
|
||||
46,Indianapolis,1
|
||||
23,Seattle,0
|
||||
36,Denver,1
|
||||
49,Washington,1
|
||||
# Add more data rows here. Aim for a larger dataset (hundreds or thousands of rows) for better AutoML performance.
|
||||
# Example:
|
||||
# 52,Miami,0
|
||||
# 39,Boston,1
|
||||
# Consider adding missing values (e.g., empty strings) to test the pipeline's handling of missing data.
|
||||
# For categorical features with many unique values, consider using techniques like one-hot encoding or target encoding.
|
||||
|
69
skills/automl-pipeline-builder/assets/pipeline_template.yaml
Normal file
69
skills/automl-pipeline-builder/assets/pipeline_template.yaml
Normal file
@@ -0,0 +1,69 @@
|
||||
# pipeline_template.yaml
|
||||
|
||||
# --- General Pipeline Configuration ---
|
||||
pipeline_name: "AutoML Pipeline - REPLACE_ME" # Name of the pipeline (e.g., Customer Churn Prediction)
|
||||
description: "Automated Machine Learning pipeline for REPLACE_ME." # Short description of the pipeline's purpose
|
||||
version: "1.0.0" # Pipeline version
|
||||
|
||||
# --- Data Source Configuration ---
|
||||
data_source:
|
||||
type: "csv" # Type of data source (e.g., csv, database, api)
|
||||
location: "data/YOUR_DATASET.csv" # Path to the data file or connection string
|
||||
target_column: "target" # Name of the target variable column
|
||||
index_column: null # Name of the index column (optional)
|
||||
delimiter: "," # Delimiter for CSV files (e.g., ",", ";", "\t")
|
||||
quotechar: '"' # Quote character for CSV files
|
||||
encoding: "utf-8" # Encoding of the data file
|
||||
|
||||
# --- Feature Engineering Configuration ---
|
||||
feature_engineering:
|
||||
enabled: true # Enable or disable feature engineering
|
||||
numeric_imputation: "mean" # Strategy for handling missing numerical values (e.g., mean, median, most_frequent, constant)
|
||||
categorical_encoding: "onehot" # Method for encoding categorical features (e.g., onehot, ordinal, target)
|
||||
feature_scaling: "standard" # Scaling method for numeric features (e.g., standard, minmax, robust)
|
||||
feature_selection:
|
||||
enabled: false # Enable or disable feature selection
|
||||
method: "variance_threshold" # Feature selection method (e.g., variance_threshold, selectkbest)
|
||||
threshold: 0.01 # Threshold for feature selection (depends on the method)
|
||||
|
||||
# --- Model Training Configuration ---
|
||||
model_training:
|
||||
algorithm: "xgboost" # Machine learning algorithm to use (e.g., xgboost, lightgbm, randomforest, logisticregression)
|
||||
hyperparameter_tuning:
|
||||
enabled: true # Enable or disable hyperparameter tuning
|
||||
method: "random_search" # Hyperparameter tuning method (e.g., random_search, grid_search, bayesian_optimization)
|
||||
n_trials: 50 # Number of trials for hyperparameter tuning
|
||||
scoring_metric: "roc_auc" # Metric to optimize for (e.g., roc_auc, accuracy, f1, precision, recall)
|
||||
hyperparameter_space: # Define hyperparameter ranges for each algorithm
|
||||
xgboost: # Example for XGBoost
|
||||
n_estimators: [100, 200, 300]
|
||||
learning_rate: [0.01, 0.1, 0.2]
|
||||
max_depth: [3, 5, 7]
|
||||
# Add hyperparameter spaces for other algorithms as needed
|
||||
|
||||
# --- Model Evaluation Configuration ---
|
||||
model_evaluation:
|
||||
split_ratio: 0.2 # Ratio for splitting data into training and validation sets
|
||||
scoring_metrics: ["roc_auc", "accuracy", "f1", "precision", "recall"] # List of metrics to evaluate the model
|
||||
cross_validation:
|
||||
enabled: true # Enable or disable cross-validation
|
||||
n_folds: 5 # Number of folds for cross-validation
|
||||
|
||||
# --- Model Deployment Configuration ---
|
||||
model_deployment:
|
||||
enabled: false # Enable or disable model deployment
|
||||
environment: "staging" # Target deployment environment (e.g., staging, production)
|
||||
model_registry: "local" # Location to store the trained model (e.g., local, s3, gcp)
|
||||
model_path: "models/YOUR_MODEL.pkl" # Path to save the trained model
|
||||
api_endpoint: "YOUR_API_ENDPOINT" # API endpoint for model deployment (if applicable)
|
||||
|
||||
# --- Logging Configuration ---
|
||||
logging:
|
||||
level: "INFO" # Logging level (e.g., DEBUG, INFO, WARNING, ERROR)
|
||||
format: "%(asctime)s - %(levelname)s - %(message)s" # Logging format
|
||||
file_path: "logs/pipeline.log" # Path to the log file
|
||||
|
||||
# --- Error Handling Configuration ---
|
||||
error_handling:
|
||||
on_failure: "email_notification" # Action to take on pipeline failure (e.g., email_notification, retry, stop)
|
||||
email_recipients: ["YOUR_EMAIL@example.com"] # List of email addresses to notify on failure
|
||||
Reference in New Issue
Block a user