Initial commit

2025-11-29 18:51:59 +08:00
commit 7dbe52e8bb
11 changed files with 577 additions and 0 deletions
--- a/skills/time-series-forecaster/assets/README.md
+++ b/skills/time-series-forecaster/assets/README.md
@@ -0,0 +1,7 @@
+# Assets
+
+Bundled resources for time-series-forecaster skill
+
+- [ ] example_data.csv: Example time series data for testing and demonstration purposes.
+- [ ] visualization_template.py: Python script to generate visualizations of the time series data and forecasts.
+- [ ] configuration_template.json: Template for configuring the forecasting model parameters.
--- a/skills/time-series-forecaster/assets/configuration_template.json
+++ b/skills/time-series-forecaster/assets/configuration_template.json
@@ -0,0 +1,114 @@
+{
+  "_comment": "Configuration template for the time-series-forecaster plugin.",
+  "model_name": "Prophet",
+  "_comment": "Name of the forecasting model to use. Options: Prophet, ARIMA, ExponentialSmoothing. Default: Prophet",
+  "model_parameters": {
+    "_comment": "Parameters specific to the chosen model.",
+    "Prophet": {
+      "_comment": "Parameters for the Prophet model (https://facebook.github.io/prophet/docs/parameters.html). Leave empty to use defaults.",
+      "growth": "linear",
+      "_comment": "Type of growth: 'linear' or 'logistic'.",
+      "changepoints": null,
+      "_comment": "List of dates at which to include potential changepoints. If None, potential changepoints are automatically placed.",
+      "n_changepoints": 25,
+      "_comment": "Number of potential changepoints to place automatically.",
+      "changepoint_range": 0.8,
+      "_comment": "Proportion of history in which trend changepoints will be estimated.",
+      "yearly_seasonality": "auto",
+      "_comment": "Fit yearly seasonality. Can be 'auto', True, False, or a number of Fourier terms to generate.",
+      "weekly_seasonality": "auto",
+      "_comment": "Fit weekly seasonality. Can be 'auto', True, False, or a number of Fourier terms to generate.",
+      "daily_seasonality": "auto",
+      "_comment": "Fit daily seasonality. Can be 'auto', True, False, or a number of Fourier terms to generate.",
+      "holidays": null,
+      "_comment": "Pandas DataFrame of holidays with columns 'ds' (date) and 'holiday' (holiday name).",
+      "seasonality_mode": "additive",
+      "_comment": "Type of seasonality: 'additive' or 'multiplicative'.",
+      "seasonality_prior_scale": 10.0,
+      "_comment": "Parameter modulating the strength of the seasonality model.",
+      "holidays_prior_scale": 10.0,
+      "_comment": "Parameter modulating the strength of the holiday model.",
+      "changepoint_prior_scale": 0.05,
+      "_comment": "Parameter modulating the flexibility of the automatic changepoint selection.",
+      "mcmc_samples": 0,
+      "_comment": "Number of MCMC samples to draw. If 0, will do point estimates.",
+      "interval_width": 0.8,
+      "_comment": "Width of the uncertainty intervals provided for the forecast.",
+      "uncertainty_samples": 1000,
+      "_comment": "Number of simulated draws used to estimate uncertainty intervals.",
+      "stan_backend": null
+      "_comment": "The Stan backend to use. Valid options are: null (default), 'CMDSTANPB', and 'PYSTAN'.",
+    },
+    "ARIMA": {
+      "_comment": "Parameters for the ARIMA model (https://www.statsmodels.org/stable/generated/statsmodels.tsa.arima.model.ARIMA.html).",
+      "p": 5,
+      "_comment": "The number of lag observations included in the model, also called the order of the AR part.",
+      "d": 1,
+      "_comment": "The number of times that the raw observations are differenced, also called the degree of differencing.",
+      "q": 0,
+      "_comment": "The size of the moving average window, also called the order of the MA part.",
+      "seasonal_order": [
+        0,
+        0,
+        0,
+        0
+      ],
+      "_comment": "The (P,D,Q,s) order of the seasonal component of the model for seasonality of period s (often 4 or 12). Can be set to None if there is no seasonal component.",
+      "trend": null,
+      "_comment": "Parameter controlling the deterministic trend. Can be a string ('n', 'c', 't', 'ct') or a list of regressor names. 'n' - no trend, 'c' - constant only, 't' - time trend only, 'ct' - constant and time trend."
+    },
+    "ExponentialSmoothing": {
+      "_comment": "Parameters for the Exponential Smoothing model (https://www.statsmodels.org/stable/generated/statsmodels.tsa.holtwinters.ExponentialSmoothing.html).",
+      "seasonal_periods": 12,
+      "_comment": "The number of periods in a complete seasonal cycle.",
+      "trend": "add",
+      "_comment": "Type of trend component. Options: 'add', 'mul', None.",
+      "seasonal": "add",
+      "_comment": "Type of seasonal component. Options: 'add', 'mul', None.",
+      "damped_trend": false,
+      "_comment": "Should the trend component be damped?",
+      "initialization_method": "estimated",
+      "_comment": "Method for initializing the recursions. Options: 'estimated', 'heuristic', 'known'.",
+      "use_boxcox": false,
+      "_comment": "Should the Box-Cox transform be applied to the data?"
+    }
+  },
+  "data_frequency": "D",
+  "_comment": "Frequency of the time series data. Options: 'D' (daily), 'W' (weekly), 'M' (monthly), 'Q' (quarterly), 'Y' (yearly), etc.",
+  "forecast_horizon": 30,
+  "_comment": "Number of periods to forecast into the future.",
+  "confidence_interval": 0.95,
+  "_comment": "Confidence level for the forecast intervals.",
+  "error_metric": "rmse",
+  "_comment": "Metric to use for evaluating model performance. Options: 'rmse', 'mae', 'mse'.",
+  "cross_validation": {
+    "enabled": true,
+    "_comment": "Enable or disable cross-validation.",
+    "initial": "365 days",
+    "_comment": "Amount of time to train the model initially. Example: '365 days', '6 months'.",
+    "period": "90 days",
+    "_comment": "Spacing between cutoff dates. Example: '90 days', '3 months'.",
+    "horizon": "30 days",
+    "_comment": "Size of the forecast horizon. Example: '30 days', '1 month'."
+  },
+  "feature_engineering": {
+    "add_lags": false,
+    "_comment": "Add lagged values of the time series as features.",
+    "num_lags": 7,
+    "_comment": "Number of lags to add if add_lags is true.",
+    "add_seasonal_dummies": false,
+    "_comment": "Add seasonal dummy variables.",
+    "add_time_trend": false
+    "_comment": "Add a time trend feature."
+  },
+  "anomaly_detection": {
+    "enabled": false,
+    "_comment": "Enable or disable anomaly detection.",
+    "threshold": 0.95
+    "_comment": "Threshold for anomaly detection."
+  },
+  "output_format": "json",
+  "_comment": "Format of the forecast output. Options: 'json', 'csv'.",
+  "dataset_name": "example_sales_data",
+  "_comment": "Name of the dataset for identification purposes."
+}
--- a/skills/time-series-forecaster/assets/example_data.csv
+++ b/skills/time-series-forecaster/assets/example_data.csv
@@ -0,0 +1,43 @@
+# example_data.csv
+# This file provides example time series data for the time-series-forecaster plugin.
+# It includes a date/time column and a value column.
+# Use this data to test the /forecast-ts command.
+#
+# To use this data with the plugin, save it to your workspace and then run:
+# /forecast-ts --data example_data.csv --target value --periods <number_of_periods>
+#
+# Replace <number_of_periods> with the number of time periods you want to forecast.
+#
+
+timestamp,value
+2023-01-01,10
+2023-01-02,12
+2023-01-03,15
+2023-01-04,13
+2023-01-05,16
+2023-01-06,18
+2023-01-07,20
+2023-01-08,22
+2023-01-09,25
+2023-01-10,23
+2023-01-11,26
+2023-01-12,28
+2023-01-13,30
+2023-01-14,32
+2023-01-15,35
+2023-01-16,33
+2023-01-17,36
+2023-01-18,38
+2023-01-19,40
+2023-01-20,42
+2023-01-21,45
+2023-01-22,43
+2023-01-23,46
+2023-01-24,48
+2023-01-25,50
+2023-01-26,52
+2023-01-27,55
+2023-01-28,53
+2023-01-29,56
+2023-01-30,58
+2023-01-31,60
--- a/skills/time-series-forecaster/assets/visualization_template.py
+++ b/skills/time-series-forecaster/assets/visualization_template.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python3
+
+"""
+visualization_template.py
+
+This module provides functions for visualizing time series data and forecasts.
+It includes functionalities for plotting historical data, forecasts,
+confidence intervals, and residuals.
+
+Example:
+    To use this module, import it and call the desired visualization functions.
+    For example:
+
+    >>> import visualization_template as vt
+    >>> import pandas as pd
+    >>> import matplotlib.pyplot as plt
+    >>> # Assume 'data' is a pandas DataFrame with a 'time' column and a 'value' column
+    >>> # Assume 'forecast' is a pandas DataFrame with a 'time' column and a 'forecast' column
+    >>> vt.plot_time_series(data, forecast, title="Time Series Forecast")
+    >>> plt.show()
+"""
+
+import pandas as pd
+import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
+import numpy as np
+from typing import Optional, Tuple, List
+
+
+def plot_time_series(
+    data: pd.DataFrame,
+    forecast: Optional[pd.DataFrame] = None,
+    confidence_interval: Optional[Tuple[float, float]] = None,
+    title: str = "Time Series Data and Forecast",
+    xlabel: str = "Time",
+    ylabel: str = "Value",
+    figsize: Tuple[int, int] = (12, 6),
+    data_color: str = 'blue',
+    forecast_color: str = 'red',
+    confidence_color: str = 'lightgray'
+) -> None:
+    """
+    Plots the time series data and, optionally, a forecast with confidence intervals.
+
+    Args:
+        data (pd.DataFrame): DataFrame containing the time series data.
+                              Must have a column named 'time' and a column with the time series values.
+        forecast (pd.DataFrame, optional): DataFrame containing the forecast. Defaults to None.
+                                          Must have a column named 'time' and a column with the forecast values (e.g., 'forecast').
+        confidence_interval (Tuple[float, float], optional): Tuple containing the lower and upper bounds of the confidence interval. Defaults to None.
+                                                            If provided, `forecast` DataFrame must have columns for lower and upper bounds (e.g., 'lower', 'upper').
+        title (str, optional): Title of the plot. Defaults to "Time Series Data and Forecast".
+        xlabel (str, optional): Label for the x-axis. Defaults to "Time".
+        ylabel (str, optional): Label for the y-axis. Defaults to "Value".
+        figsize (Tuple[int, int], optional): Figure size (width, height). Defaults to (12, 6).
+        data_color (str, optional): Color of the historical data line. Defaults to 'blue'.
+        forecast_color (str, optional): Color of the forecast line. Defaults to 'red'.
+        confidence_color (str, optional): Color of the confidence interval shading. Defaults to 'lightgray'.
+
+    Raises:
+        ValueError: If `data` does not have a 'time' column.
+        ValueError: If `data` does not have a column with the time series values.
+        ValueError: If `forecast` is provided but does not have a 'time' column.
+        ValueError: If `forecast` is provided but does not have a column with the forecast values.
+        ValueError: If `confidence_interval` is provided but `forecast` does not have 'lower' and 'upper' columns.
+    """
+
+    if 'time' not in data.columns:
+        raise ValueError("Data DataFrame must have a 'time' column.")
+
+    value_column = next((col for col in data.columns if col != 'time'), None)
+    if value_column is None:
+        raise ValueError("Data DataFrame must have a column with the time series values.")
+
+    plt.figure(figsize=figsize)
+    plt.plot(data['time'], data[value_column], label="Historical Data", color=data_color)
+
+    if forecast is not None:
+        if 'time' not in forecast.columns:
+            raise ValueError("Forecast DataFrame must have a 'time' column.")
+        forecast_column = next((col for col in forecast.columns if col != 'time'), None)
+        if forecast_column is None:
+            raise ValueError("Forecast DataFrame must have a column with the forecast values.")
+
+        plt.plot(forecast['time'], forecast[forecast_column], label="Forecast", color=forecast_color)
+
+        if confidence_interval is not None:
+            if 'lower' not in forecast.columns or 'upper' not in forecast.columns:
+                raise ValueError("Forecast DataFrame must have 'lower' and 'upper' columns for confidence intervals.")
+            plt.fill_between(forecast['time'], forecast['lower'], forecast['upper'], color=confidence_color, alpha=0.5, label="Confidence Interval")
+
+    plt.xlabel(xlabel)
+    plt.ylabel(ylabel)
+    plt.title(title)
+    plt.legend()
+    plt.grid(True)
+    plt.xticks(rotation=45)
+    plt.tight_layout()
+
+
+def plot_residuals(
+    residuals: pd.Series,
+    title: str = "Residual Plot",
+    xlabel: str = "Time",
+    ylabel: str = "Residuals",
+    figsize: Tuple[int, int] = (12, 6)
+) -> None:
+    """
+    Plots the residuals of a time series forecast.
+
+    Args:
+        residuals (pd.Series): Pandas Series containing the residuals.
+        title (str, optional): Title of the plot. Defaults to "Residual Plot".
+        xlabel (str, optional): Label for the x-axis. Defaults to "Time".
+        ylabel (str, optional): Label for the y-axis. Defaults to "Residuals".
+        figsize (Tuple[int, int], optional): Figure size (width, height). Defaults to (12, 6).
+
+    Raises:
+        TypeError: If `residuals` is not a pandas Series.
+    """
+
+    if not isinstance(residuals, pd.Series):
+        raise TypeError("Residuals must be a pandas Series.")
+
+    plt.figure(figsize=figsize)
+    plt.plot(residuals.index, residuals.values, marker='o', linestyle='-', label="Residuals")
+    plt.axhline(y=0, color='r', linestyle='--', label="Zero Line")  # Add a horizontal line at y=0
+    plt.xlabel(xlabel)
+    plt.ylabel(ylabel)
+    plt.title(title)
+    plt.legend()
+    plt.grid(True)
+    plt.xticks(rotation=45)
+    plt.tight_layout()
+
+
+def plot_acf(
+    data: pd.Series,
+    lags: int = 40,
+    title: str = "Autocorrelation Function (ACF)",
+    figsize: Tuple[int, int] = (12, 6)
+) -> None:
+    """
+    Plots the Autocorrelation Function (ACF) of a time series.
+
+    Args:
+        data (pd.Series): Pandas Series containing the time series data.
+        lags (int, optional): Number of lags to plot. Defaults to 40.
+        title (str, optional): Title of the plot. Defaults to "Autocorrelation Function (ACF)".
+        figsize (Tuple[int, int], optional): Figure size (width, height). Defaults to (12, 6).
+
+    Raises:
+        TypeError: If `data` is not a pandas Series.
+        ValueError: If `lags` is not a positive integer.
+    """
+    from statsmodels.graphics.tsaplots import plot_acf as sm_plot_acf
+
+    if not isinstance(data, pd.Series):
+        raise TypeError("Data must be a pandas Series.")
+    if not isinstance(lags, int) or lags <= 0:
+        raise ValueError("Lags must be a positive integer.")
+
+    fig, ax = plt.subplots(figsize=figsize)
+    sm_plot_acf(data, lags=lags, ax=ax, title=title)
+    plt.tight_layout()
+
+
+def plot_pacf(
+    data: pd.Series,
+    lags: int = 40,
+    title: str = "Partial Autocorrelation Function (PACF)",
+    figsize: Tuple[int, int] = (12, 6)
+) -> None:
+    """
+    Plots the Partial Autocorrelation Function (PACF) of a time series.
+
+    Args:
+        data (pd.Series): Pandas Series containing the time series data.
+        lags (int, optional): Number of lags to plot. Defaults to 40.
+        title (str, optional): Title of the plot. Defaults to "Partial Autocorrelation Function (PACF)".
+        figsize (Tuple[int, int], optional): Figure size (width, height). Defaults to (12, 6).
+
+    Raises:
+        TypeError: If `data` is not a pandas Series.
+        ValueError: If `lags` is not a positive integer.
+    """
+    from statsmodels.graphics.tsaplots import plot_pacf as sm_plot_pacf
+
+    if not isinstance(data, pd.Series):
+        raise TypeError("Data must be a pandas Series.")
+    if not isinstance(lags, int) or lags <= 0:
+        raise ValueError("Lags must be a positive integer.")
+
+    fig, ax = plt.subplots(figsize=figsize)
+    sm_plot_pacf(data, lags=lags, ax=ax, title=title)
+    plt.tight_layout()
+
+
+if __name__ == '__main__':
+    # Example Usage
+    try:
+        # Create sample data
+        dates = pd.date_range(start='2023-01-01', periods=100, freq='D')
+        values = np.random.randn(100).cumsum()
+        data = pd.DataFrame({'time': dates, 'value': values})
+
+        # Create sample forecast
+        forecast_dates = pd.date_range(start='2023-04-11', periods=30, freq='D')
+        forecast_values = np.random.randn(30).cumsum() + values[-1]
+        lower_bound = forecast_values - np.abs(np.random.randn(30).cumsum())
+        upper_bound = forecast_values + np.abs(np.random.randn(30).cumsum())
+        forecast = pd.DataFrame({'time': forecast_dates, 'forecast': forecast_values, 'lower': lower_bound, 'upper': upper_bound})
+
+        # Plot time series and forecast
+        plot_time_series(data, forecast, confidence_interval=(0.05, 0.95), title="Example Time Series Forecast")
+        plt.show()
+
+        # Create sample residuals
+        residuals = pd.Series(np.random.randn(100), index=dates)
+
+        # Plot residuals
+        plot_residuals(residuals, title="Example Residual Plot")
+        plt.show()
+
+        # Plot ACF
+        plot_acf(data['value'], lags=20, title="Example ACF Plot")
+        plt.show()
+
+        # Plot PACF
+        plot_pacf(data['value'], lags=20, title="Example PACF Plot")
+        plt.show()
+
+    except ValueError as e:
+        print(f"ValueError: {e}")
+    except TypeError as e:
+        print(f"TypeError: {e}")
+    except Exception as e:
+        print(f"An unexpected error occurred: {e}")