#!/usr/bin/env python3 """ This script provides a template for visualizing regression model results. It includes functionalities for plotting predicted vs. actual values, residual plots, and other visualizations to assess model performance. """ import argparse import matplotlib.pyplot as plt import numpy as np import pandas as pd import seaborn as sns from sklearn.metrics import mean_squared_error, r2_score from typing import Optional def plot_predicted_vs_actual( y_true: np.ndarray, y_pred: np.ndarray, title: str = "Predicted vs. Actual" ) -> None: """ Plots predicted values against actual values. Args: y_true (np.ndarray): Array of actual values. y_pred (np.ndarray): Array of predicted values. title (str, optional): Title of the plot. Defaults to "Predicted vs. Actual". """ try: plt.figure(figsize=(8, 6)) plt.scatter(y_true, y_pred, alpha=0.5) plt.xlabel("Actual Values") plt.ylabel("Predicted Values") plt.title(title) plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], "k--", lw=2) plt.show() except Exception as e: print(f"Error plotting predicted vs. actual: {e}") def plot_residuals( y_true: np.ndarray, y_pred: np.ndarray, title: str = "Residual Plot" ) -> None: """ Plots the residuals (errors) of the regression model. Args: y_true (np.ndarray): Array of actual values. y_pred (np.ndarray): Array of predicted values. title (str, optional): Title of the plot. Defaults to "Residual Plot". """ try: residuals = y_true - y_pred plt.figure(figsize=(8, 6)) plt.scatter(y_pred, residuals, alpha=0.5) plt.xlabel("Predicted Values") plt.ylabel("Residuals") plt.title(title) plt.axhline(y=0, color="k", linestyle="--") # Add a horizontal line at y=0 plt.show() except Exception as e: print(f"Error plotting residuals: {e}") def visualize_regression_results( y_true: np.ndarray, y_pred: np.ndarray, model_name: str = "Regression Model" ) -> None: """ Visualizes the regression results, including predicted vs. actual and residual plots. Args: y_true (np.ndarray): Array of actual values. y_pred (np.ndarray): Array of predicted values. model_name (str, optional): Name of the regression model. Defaults to "Regression Model". """ try: plot_predicted_vs_actual( y_true, y_pred, title=f"{model_name}: Predicted vs. Actual" ) plot_residuals(y_true, y_pred, title=f"{model_name}: Residual Plot") # Calculate and print metrics mse = mean_squared_error(y_true, y_pred) r2 = r2_score(y_true, y_pred) print(f"{model_name} - Mean Squared Error: {mse:.4f}") print(f"{model_name} - R-squared: {r2:.4f}") except Exception as e: print(f"Error visualizing regression results: {e}") def main( actual_values_file: str, predicted_values_file: str, model_name: str = "Regression Model" ) -> None: """ Main function to load data and visualize regression results. Args: actual_values_file (str): Path to the CSV file containing actual values. predicted_values_file (str): Path to the CSV file containing predicted values. model_name (str, optional): Name of the regression model. Defaults to "Regression Model". """ try: # Load data from CSV files actual_df = pd.read_csv(actual_values_file) predicted_df = pd.read_csv(predicted_values_file) # Assuming the CSVs have a single column with the values y_true = actual_df.iloc[:, 0].values y_pred = predicted_df.iloc[:, 0].values visualize_regression_results(y_true, y_pred, model_name=model_name) except FileNotFoundError: print("Error: One or both of the input files were not found.") except pd.errors.EmptyDataError: print("Error: One or both of the input files are empty.") except KeyError: print("Error: The specified column does not exist in the input file.") except Exception as e: print(f"An unexpected error occurred: {e}") if __name__ == "__main__": parser = argparse.ArgumentParser( description="Visualize regression model results from CSV files." ) parser.add_argument( "--actual", type=str, required=True, help="Path to the CSV file containing actual values.", ) parser.add_argument( "--predicted", type=str, required=True, help="Path to the CSV file containing predicted values.", ) parser.add_argument( "--model_name", type=str, default="Regression Model", help="Name of the regression model (optional).", ) args = parser.parse_args() # Create dummy data files for example usage example_actual_data = pd.DataFrame({"actual": np.random.rand(100)}) example_predicted_data = pd.DataFrame({"predicted": example_actual_data["actual"] + np.random.normal(0, 0.1, 100)}) example_actual_data.to_csv("actual_values.csv", index=False) example_predicted_data.to_csv("predicted_values.csv", index=False) # Example usage with command-line arguments main(args.actual, args.predicted, args.model_name) # Clean up dummy data files import os os.remove("actual_values.csv") os.remove("predicted_values.csv")