Initial commit

2025-11-29 18:51:02 +08:00
commit b092d0393c
11 changed files with 371 additions and 0 deletions
--- a/skills/clustering-algorithm-runner/assets/clustering_visualization.ipynb
+++ b/skills/clustering-algorithm-runner/assets/clustering_visualization.ipynb
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+"""
+Clustering Visualization Notebook
+
+This notebook demonstrates how to visualize clustering results using matplotlib and seaborn.
+
+It assumes that you have already run a clustering algorithm and have cluster assignments for your data.
+
+Instructions:
+1.  Replace the placeholder data loading and clustering results with your actual data.
+2.  Adjust the visualization parameters (e.g., colors, markers) to suit your data and preferences.
+3.  Experiment with different visualization techniques to gain insights into your clustering results.
+"""
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+import pandas as pd
+import numpy as np  # Import numpy
+
+# --- Placeholder: Load your data here ---
+# Replace this with your actual data loading code
+# For example:
+# data = pd.read_csv("your_data.csv")
+# features = data[["feature1", "feature2"]] # Select the features used for clustering
+
+# Generate some sample data if no data is loaded
+np.random.seed(42)  # for reproducibility
+num_samples = 100
+features = pd.DataFrame({
+    'feature1': np.random.rand(num_samples),
+    'feature2': np.random.rand(num_samples)
+})
+
+# --- Placeholder: Load your clustering results here ---
+# Replace this with your actual cluster assignments
+# For example:
+# cluster_labels = model.labels_  # Assuming you used scikit-learn
+# Or:
+# cluster_labels = your_clustering_function(features)
+
+# Generate some sample cluster labels if no cluster labels are loaded
+num_clusters = 3
+cluster_labels = np.random.randint(0, num_clusters, num_samples)
+
+
+# --- Create a DataFrame for visualization ---
+df = features.copy()
+df['cluster'] = cluster_labels
+
+# --- Visualization using matplotlib ---
+plt.figure(figsize=(8, 6))
+plt.title("Clustering Visualization (Matplotlib)")
+
+# Define colors for each cluster
+colors = ['red', 'green', 'blue', 'purple', 'orange'] # Add more colors if needed
+
+for cluster_id in df['cluster'].unique():
+    cluster_data = df[df['cluster'] == cluster_id]
+    plt.scatter(cluster_data['feature1'], cluster_data['feature2'],
+                color=colors[cluster_id % len(colors)],  # Cycle through colors
+                label=f'Cluster {cluster_id}')
+
+plt.xlabel("Feature 1")
+plt.ylabel("Feature 2")
+plt.legend()
+plt.show()
+
+
+# --- Visualization using seaborn ---
+plt.figure(figsize=(8, 6))
+plt.title("Clustering Visualization (Seaborn)")
+sns.scatterplot(x='feature1', y='feature2', hue='cluster', data=df, palette='viridis') # or other palettes
+plt.show()
+
+# --- Additional Visualizations ---
+# You can add more visualizations here, such as:
+# - Pair plots
+# - Box plots
+# - Histograms
+
+# Example: Pair plot
+# sns.pairplot(df, hue='cluster')
+# plt.show()
+
+# --- Summary and Interpretation ---
+# Add your interpretation of the clustering results here.
+# For example:
+# "The clustering algorithm has successfully separated the data into distinct groups based on feature1 and feature2."
+# "Cluster 0 represents the group with low values for both feature1 and feature2."
+# "Cluster 1 represents the group with high values for feature1 and low values for feature2."
+# "Cluster 2 represents the group with high values for both feature1 and feature2."
+
+# --- Next Steps ---
+# Consider the following next steps:
+# - Evaluate the clustering performance using metrics like silhouette score or Davies-Bouldin index.
+# - Tune the clustering algorithm parameters to improve the results.
+# - Investigate the characteristics of each cluster to gain insights into the data.
+# - Use the clustering results for downstream tasks, such as customer segmentation or anomaly detection.