113 lines
3.3 KiB
Python
113 lines
3.3 KiB
Python
#!/usr/bin/env -S uv --quiet run --active --script
|
|
# /// script
|
|
# requires-python = ">=3.11"
|
|
# dependencies = [
|
|
# "pandas>=2.0.0",
|
|
# "matplotlib>=3.7.0",
|
|
# "rich>=13.0.0",
|
|
# ]
|
|
# [tool.uv]
|
|
# exclude-newer = "2025-10-24T00:00:00Z" # Time-based reproducibility
|
|
# ///
|
|
|
|
"""Data Analysis Script Example
|
|
|
|
This script demonstrates PEP 723 inline script metadata with uv.
|
|
It analyzes CSV data and creates visualizations.
|
|
|
|
Usage:
|
|
# Make executable and run
|
|
chmod +x data_analysis.py
|
|
./data_analysis.py data.csv
|
|
|
|
# Or run directly with uv
|
|
uv run data_analysis.py data.csv
|
|
|
|
# With additional dependencies
|
|
uv run --with seaborn data_analysis.py data.csv
|
|
"""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import matplotlib.pyplot as plt
|
|
import pandas as pd
|
|
from rich.console import Console
|
|
from rich.table import Table
|
|
|
|
console = Console()
|
|
|
|
|
|
def analyze_data(csv_path: str) -> None:
|
|
"""Analyze CSV data and display summary statistics."""
|
|
# Load data
|
|
df = pd.read_csv(csv_path)
|
|
|
|
console.print(f"[bold green]Analyzing {csv_path}[/bold green]")
|
|
console.print(f"Shape: {df.shape[0]} rows x {df.shape[1]} columns\n")
|
|
|
|
# Display summary statistics in rich table
|
|
table = Table(title="Summary Statistics")
|
|
table.add_column("Column", style="cyan")
|
|
table.add_column("Mean", style="magenta")
|
|
table.add_column("Std", style="green")
|
|
table.add_column("Min", style="yellow")
|
|
table.add_column("Max", style="red")
|
|
|
|
for col in df.select_dtypes(include=["number"]).columns:
|
|
table.add_row(
|
|
col, f"{df[col].mean():.2f}", f"{df[col].std():.2f}", f"{df[col].min():.2f}", f"{df[col].max():.2f}"
|
|
)
|
|
|
|
console.print(table)
|
|
|
|
# Create visualization
|
|
_fig, axes = plt.subplots(1, 2, figsize=(12, 5))
|
|
|
|
# Plot 1: Distribution of first numeric column
|
|
numeric_cols = df.select_dtypes(include=["number"]).columns
|
|
if len(numeric_cols) > 0:
|
|
df[numeric_cols[0]].hist(ax=axes[0], bins=30)
|
|
axes[0].set_title(f"Distribution of {numeric_cols[0]}")
|
|
axes[0].set_xlabel(numeric_cols[0])
|
|
axes[0].set_ylabel("Frequency")
|
|
|
|
# Plot 2: Correlation heatmap
|
|
if len(numeric_cols) > 1:
|
|
corr = df.loc[:, numeric_cols].corr()
|
|
im = axes[1].imshow(corr, cmap="coolwarm", aspect="auto")
|
|
axes[1].set_xticks(range(len(numeric_cols)))
|
|
axes[1].set_yticks(range(len(numeric_cols)))
|
|
axes[1].set_xticklabels(numeric_cols, rotation=45)
|
|
axes[1].set_yticklabels(numeric_cols)
|
|
axes[1].set_title("Correlation Matrix")
|
|
plt.colorbar(im, ax=axes[1])
|
|
|
|
plt.tight_layout()
|
|
output_path = Path(csv_path).stem + "_analysis.png"
|
|
plt.savefig(output_path, dpi=150)
|
|
console.print(f"\n[bold green]✓[/bold green] Saved visualization to {output_path}")
|
|
|
|
|
|
def main() -> None:
|
|
"""Main entry point."""
|
|
if len(sys.argv) < 2:
|
|
console.print("[bold red]Error:[/bold red] Please provide CSV file path")
|
|
console.print("\nUsage: uv run data_analysis.py <csv_file>")
|
|
sys.exit(1)
|
|
|
|
csv_path = sys.argv[1]
|
|
if not Path(csv_path).exists():
|
|
console.print(f"[bold red]Error:[/bold red] File '{csv_path}' not found")
|
|
sys.exit(1)
|
|
|
|
try:
|
|
analyze_data(csv_path)
|
|
except Exception as e:
|
|
console.print(f"[bold red]Error:[/bold red] {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|