Initial commit
This commit is contained in:
6
skills/regression-analysis-tool/assets/README.md
Normal file
6
skills/regression-analysis-tool/assets/README.md
Normal file
@@ -0,0 +1,6 @@
|
||||
# Assets
|
||||
|
||||
Bundled resources for regression-analysis-tool skill
|
||||
|
||||
- [ ] example_dataset.csv: Example CSV dataset for demonstrating regression analysis.
|
||||
- [ ] model_visualization_template.py: Python script template for visualizing the regression model and its performance.
|
||||
21
skills/regression-analysis-tool/assets/example_dataset.csv
Normal file
21
skills/regression-analysis-tool/assets/example_dataset.csv
Normal file
@@ -0,0 +1,21 @@
|
||||
# Example dataset for regression analysis with the regression-analysis-tool plugin.
|
||||
# This dataset contains independent variables (X1, X2, X3) and a dependent variable (Y).
|
||||
# Use this dataset as a starting point for running the /run-regression command.
|
||||
# You can replace this data with your own dataset. Ensure your data is clean and properly formatted.
|
||||
# Columns should be comma-separated. The first row should be the header row.
|
||||
# If you need to include text values, enclose them in double quotes.
|
||||
# This example contains 10 rows, but your dataset can contain more.
|
||||
|
||||
X1,X2,X3,Y
|
||||
10,20,30,100
|
||||
15,25,35,120
|
||||
20,30,40,140
|
||||
25,35,45,160
|
||||
30,40,50,180
|
||||
35,45,55,200
|
||||
40,50,60,220
|
||||
45,55,65,240
|
||||
50,60,70,260
|
||||
55,65,75,280
|
||||
#Add your own data below.
|
||||
#<X1_VALUE>,<X2_VALUE>,<X3_VALUE>,<Y_VALUE>
|
||||
|
@@ -0,0 +1,161 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
This script provides a template for visualizing regression model results.
|
||||
|
||||
It includes functionalities for plotting predicted vs. actual values,
|
||||
residual plots, and other visualizations to assess model performance.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
from sklearn.metrics import mean_squared_error, r2_score
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def plot_predicted_vs_actual(
|
||||
y_true: np.ndarray, y_pred: np.ndarray, title: str = "Predicted vs. Actual"
|
||||
) -> None:
|
||||
"""
|
||||
Plots predicted values against actual values.
|
||||
|
||||
Args:
|
||||
y_true (np.ndarray): Array of actual values.
|
||||
y_pred (np.ndarray): Array of predicted values.
|
||||
title (str, optional): Title of the plot. Defaults to "Predicted vs. Actual".
|
||||
"""
|
||||
try:
|
||||
plt.figure(figsize=(8, 6))
|
||||
plt.scatter(y_true, y_pred, alpha=0.5)
|
||||
plt.xlabel("Actual Values")
|
||||
plt.ylabel("Predicted Values")
|
||||
plt.title(title)
|
||||
plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], "k--", lw=2)
|
||||
plt.show()
|
||||
except Exception as e:
|
||||
print(f"Error plotting predicted vs. actual: {e}")
|
||||
|
||||
|
||||
def plot_residuals(
|
||||
y_true: np.ndarray, y_pred: np.ndarray, title: str = "Residual Plot"
|
||||
) -> None:
|
||||
"""
|
||||
Plots the residuals (errors) of the regression model.
|
||||
|
||||
Args:
|
||||
y_true (np.ndarray): Array of actual values.
|
||||
y_pred (np.ndarray): Array of predicted values.
|
||||
title (str, optional): Title of the plot. Defaults to "Residual Plot".
|
||||
"""
|
||||
try:
|
||||
residuals = y_true - y_pred
|
||||
plt.figure(figsize=(8, 6))
|
||||
plt.scatter(y_pred, residuals, alpha=0.5)
|
||||
plt.xlabel("Predicted Values")
|
||||
plt.ylabel("Residuals")
|
||||
plt.title(title)
|
||||
plt.axhline(y=0, color="k", linestyle="--") # Add a horizontal line at y=0
|
||||
plt.show()
|
||||
except Exception as e:
|
||||
print(f"Error plotting residuals: {e}")
|
||||
|
||||
|
||||
def visualize_regression_results(
|
||||
y_true: np.ndarray, y_pred: np.ndarray, model_name: str = "Regression Model"
|
||||
) -> None:
|
||||
"""
|
||||
Visualizes the regression results, including predicted vs. actual and residual plots.
|
||||
|
||||
Args:
|
||||
y_true (np.ndarray): Array of actual values.
|
||||
y_pred (np.ndarray): Array of predicted values.
|
||||
model_name (str, optional): Name of the regression model. Defaults to "Regression Model".
|
||||
"""
|
||||
try:
|
||||
plot_predicted_vs_actual(
|
||||
y_true, y_pred, title=f"{model_name}: Predicted vs. Actual"
|
||||
)
|
||||
plot_residuals(y_true, y_pred, title=f"{model_name}: Residual Plot")
|
||||
|
||||
# Calculate and print metrics
|
||||
mse = mean_squared_error(y_true, y_pred)
|
||||
r2 = r2_score(y_true, y_pred)
|
||||
print(f"{model_name} - Mean Squared Error: {mse:.4f}")
|
||||
print(f"{model_name} - R-squared: {r2:.4f}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error visualizing regression results: {e}")
|
||||
|
||||
|
||||
def main(
|
||||
actual_values_file: str, predicted_values_file: str, model_name: str = "Regression Model"
|
||||
) -> None:
|
||||
"""
|
||||
Main function to load data and visualize regression results.
|
||||
|
||||
Args:
|
||||
actual_values_file (str): Path to the CSV file containing actual values.
|
||||
predicted_values_file (str): Path to the CSV file containing predicted values.
|
||||
model_name (str, optional): Name of the regression model. Defaults to "Regression Model".
|
||||
"""
|
||||
try:
|
||||
# Load data from CSV files
|
||||
actual_df = pd.read_csv(actual_values_file)
|
||||
predicted_df = pd.read_csv(predicted_values_file)
|
||||
|
||||
# Assuming the CSVs have a single column with the values
|
||||
y_true = actual_df.iloc[:, 0].values
|
||||
y_pred = predicted_df.iloc[:, 0].values
|
||||
|
||||
visualize_regression_results(y_true, y_pred, model_name=model_name)
|
||||
|
||||
except FileNotFoundError:
|
||||
print("Error: One or both of the input files were not found.")
|
||||
except pd.errors.EmptyDataError:
|
||||
print("Error: One or both of the input files are empty.")
|
||||
except KeyError:
|
||||
print("Error: The specified column does not exist in the input file.")
|
||||
except Exception as e:
|
||||
print(f"An unexpected error occurred: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Visualize regression model results from CSV files."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--actual",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to the CSV file containing actual values.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--predicted",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Path to the CSV file containing predicted values.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model_name",
|
||||
type=str,
|
||||
default="Regression Model",
|
||||
help="Name of the regression model (optional).",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create dummy data files for example usage
|
||||
example_actual_data = pd.DataFrame({"actual": np.random.rand(100)})
|
||||
example_predicted_data = pd.DataFrame({"predicted": example_actual_data["actual"] + np.random.normal(0, 0.1, 100)})
|
||||
example_actual_data.to_csv("actual_values.csv", index=False)
|
||||
example_predicted_data.to_csv("predicted_values.csv", index=False)
|
||||
|
||||
# Example usage with command-line arguments
|
||||
main(args.actual, args.predicted, args.model_name)
|
||||
# Clean up dummy data files
|
||||
import os
|
||||
os.remove("actual_values.csv")
|
||||
os.remove("predicted_values.csv")
|
||||
Reference in New Issue
Block a user