Initial commit
This commit is contained in:
822
skills/seaborn/references/examples.md
Normal file
822
skills/seaborn/references/examples.md
Normal file
@@ -0,0 +1,822 @@
|
||||
# Seaborn Common Use Cases and Examples
|
||||
|
||||
This document provides practical examples for common data visualization scenarios using seaborn.
|
||||
|
||||
## Exploratory Data Analysis
|
||||
|
||||
### Quick Dataset Overview
|
||||
|
||||
```python
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
|
||||
# Load data
|
||||
df = pd.read_csv('data.csv')
|
||||
|
||||
# Pairwise relationships for all numeric variables
|
||||
sns.pairplot(df, hue='target_variable', corner=True, diag_kind='kde')
|
||||
plt.suptitle('Dataset Overview', y=1.01)
|
||||
plt.savefig('overview.png', dpi=300, bbox_inches='tight')
|
||||
```
|
||||
|
||||
### Distribution Exploration
|
||||
|
||||
```python
|
||||
# Multiple distributions across categories
|
||||
g = sns.displot(
|
||||
data=df,
|
||||
x='measurement',
|
||||
hue='condition',
|
||||
col='timepoint',
|
||||
kind='kde',
|
||||
fill=True,
|
||||
height=3,
|
||||
aspect=1.5,
|
||||
col_wrap=3,
|
||||
common_norm=False
|
||||
)
|
||||
g.set_axis_labels('Measurement Value', 'Density')
|
||||
g.set_titles('{col_name}')
|
||||
```
|
||||
|
||||
### Correlation Analysis
|
||||
|
||||
```python
|
||||
# Compute correlation matrix
|
||||
corr = df.select_dtypes(include='number').corr()
|
||||
|
||||
# Create mask for upper triangle
|
||||
mask = np.triu(np.ones_like(corr, dtype=bool))
|
||||
|
||||
# Plot heatmap
|
||||
fig, ax = plt.subplots(figsize=(10, 8))
|
||||
sns.heatmap(
|
||||
corr,
|
||||
mask=mask,
|
||||
annot=True,
|
||||
fmt='.2f',
|
||||
cmap='coolwarm',
|
||||
center=0,
|
||||
square=True,
|
||||
linewidths=1,
|
||||
cbar_kws={'shrink': 0.8}
|
||||
)
|
||||
plt.title('Correlation Matrix')
|
||||
plt.tight_layout()
|
||||
```
|
||||
|
||||
## Scientific Publications
|
||||
|
||||
### Multi-Panel Figure with Different Plot Types
|
||||
|
||||
```python
|
||||
# Set publication style
|
||||
sns.set_theme(style='ticks', context='paper', font_scale=1.1)
|
||||
sns.set_palette('colorblind')
|
||||
|
||||
# Create figure with custom layout
|
||||
fig = plt.figure(figsize=(12, 8))
|
||||
gs = fig.add_gridspec(2, 3, hspace=0.3, wspace=0.3)
|
||||
|
||||
# Panel A: Time series
|
||||
ax1 = fig.add_subplot(gs[0, :2])
|
||||
sns.lineplot(
|
||||
data=timeseries_df,
|
||||
x='time',
|
||||
y='expression',
|
||||
hue='gene',
|
||||
style='treatment',
|
||||
markers=True,
|
||||
dashes=False,
|
||||
ax=ax1
|
||||
)
|
||||
ax1.set_title('A. Gene Expression Over Time', loc='left', fontweight='bold')
|
||||
ax1.set_xlabel('Time (hours)')
|
||||
ax1.set_ylabel('Expression Level (AU)')
|
||||
|
||||
# Panel B: Distribution comparison
|
||||
ax2 = fig.add_subplot(gs[0, 2])
|
||||
sns.violinplot(
|
||||
data=expression_df,
|
||||
x='treatment',
|
||||
y='expression',
|
||||
inner='box',
|
||||
ax=ax2
|
||||
)
|
||||
ax2.set_title('B. Expression Distribution', loc='left', fontweight='bold')
|
||||
ax2.set_xlabel('Treatment')
|
||||
ax2.set_ylabel('')
|
||||
|
||||
# Panel C: Correlation
|
||||
ax3 = fig.add_subplot(gs[1, 0])
|
||||
sns.scatterplot(
|
||||
data=correlation_df,
|
||||
x='gene1',
|
||||
y='gene2',
|
||||
hue='cell_type',
|
||||
alpha=0.6,
|
||||
ax=ax3
|
||||
)
|
||||
sns.regplot(
|
||||
data=correlation_df,
|
||||
x='gene1',
|
||||
y='gene2',
|
||||
scatter=False,
|
||||
color='black',
|
||||
ax=ax3
|
||||
)
|
||||
ax3.set_title('C. Gene Correlation', loc='left', fontweight='bold')
|
||||
ax3.set_xlabel('Gene 1 Expression')
|
||||
ax3.set_ylabel('Gene 2 Expression')
|
||||
|
||||
# Panel D: Heatmap
|
||||
ax4 = fig.add_subplot(gs[1, 1:])
|
||||
sns.heatmap(
|
||||
sample_matrix,
|
||||
cmap='RdBu_r',
|
||||
center=0,
|
||||
annot=True,
|
||||
fmt='.1f',
|
||||
cbar_kws={'label': 'Log2 Fold Change'},
|
||||
ax=ax4
|
||||
)
|
||||
ax4.set_title('D. Treatment Effects', loc='left', fontweight='bold')
|
||||
ax4.set_xlabel('Sample')
|
||||
ax4.set_ylabel('Gene')
|
||||
|
||||
# Clean up
|
||||
sns.despine()
|
||||
plt.savefig('figure.pdf', dpi=300, bbox_inches='tight')
|
||||
plt.savefig('figure.png', dpi=300, bbox_inches='tight')
|
||||
```
|
||||
|
||||
### Box Plot with Significance Annotations
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
from scipy import stats
|
||||
|
||||
# Create plot
|
||||
fig, ax = plt.subplots(figsize=(8, 6))
|
||||
sns.boxplot(
|
||||
data=df,
|
||||
x='treatment',
|
||||
y='response',
|
||||
order=['Control', 'Low', 'Medium', 'High'],
|
||||
palette='Set2',
|
||||
ax=ax
|
||||
)
|
||||
|
||||
# Add individual points
|
||||
sns.stripplot(
|
||||
data=df,
|
||||
x='treatment',
|
||||
y='response',
|
||||
order=['Control', 'Low', 'Medium', 'High'],
|
||||
color='black',
|
||||
alpha=0.3,
|
||||
size=3,
|
||||
ax=ax
|
||||
)
|
||||
|
||||
# Add significance bars
|
||||
def add_significance_bar(ax, x1, x2, y, h, text):
|
||||
ax.plot([x1, x1, x2, x2], [y, y+h, y+h, y], 'k-', lw=1.5)
|
||||
ax.text((x1+x2)/2, y+h, text, ha='center', va='bottom')
|
||||
|
||||
y_max = df['response'].max()
|
||||
add_significance_bar(ax, 0, 3, y_max + 1, 0.5, '***')
|
||||
add_significance_bar(ax, 0, 1, y_max + 3, 0.5, 'ns')
|
||||
|
||||
ax.set_ylabel('Response (μM)')
|
||||
ax.set_xlabel('Treatment Condition')
|
||||
ax.set_title('Treatment Response Analysis')
|
||||
sns.despine()
|
||||
```
|
||||
|
||||
## Time Series Analysis
|
||||
|
||||
### Multiple Time Series with Confidence Bands
|
||||
|
||||
```python
|
||||
# Plot with automatic aggregation
|
||||
fig, ax = plt.subplots(figsize=(10, 6))
|
||||
sns.lineplot(
|
||||
data=timeseries_df,
|
||||
x='timestamp',
|
||||
y='value',
|
||||
hue='sensor',
|
||||
style='location',
|
||||
markers=True,
|
||||
dashes=False,
|
||||
errorbar=('ci', 95),
|
||||
ax=ax
|
||||
)
|
||||
|
||||
# Customize
|
||||
ax.set_xlabel('Date')
|
||||
ax.set_ylabel('Measurement (units)')
|
||||
ax.set_title('Sensor Measurements Over Time')
|
||||
ax.legend(title='Sensor & Location', bbox_to_anchor=(1.05, 1), loc='upper left')
|
||||
|
||||
# Format x-axis for dates
|
||||
import matplotlib.dates as mdates
|
||||
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
|
||||
ax.xaxis.set_major_locator(mdates.DayLocator(interval=7))
|
||||
plt.xticks(rotation=45, ha='right')
|
||||
|
||||
plt.tight_layout()
|
||||
```
|
||||
|
||||
### Faceted Time Series
|
||||
|
||||
```python
|
||||
# Create faceted time series
|
||||
g = sns.relplot(
|
||||
data=long_timeseries,
|
||||
x='date',
|
||||
y='measurement',
|
||||
hue='device',
|
||||
col='location',
|
||||
row='metric',
|
||||
kind='line',
|
||||
height=3,
|
||||
aspect=2,
|
||||
errorbar='sd',
|
||||
facet_kws={'sharex': True, 'sharey': False}
|
||||
)
|
||||
|
||||
# Customize facet titles
|
||||
g.set_titles('{row_name} - {col_name}')
|
||||
g.set_axis_labels('Date', 'Value')
|
||||
|
||||
# Rotate x-axis labels
|
||||
for ax in g.axes.flat:
|
||||
ax.tick_params(axis='x', rotation=45)
|
||||
|
||||
g.tight_layout()
|
||||
```
|
||||
|
||||
## Categorical Comparisons
|
||||
|
||||
### Nested Categorical Variables
|
||||
|
||||
```python
|
||||
# Create figure
|
||||
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
|
||||
|
||||
# Left panel: Grouped bar plot
|
||||
sns.barplot(
|
||||
data=df,
|
||||
x='category',
|
||||
y='value',
|
||||
hue='subcategory',
|
||||
errorbar=('ci', 95),
|
||||
capsize=0.1,
|
||||
ax=axes[0]
|
||||
)
|
||||
axes[0].set_title('Mean Values with 95% CI')
|
||||
axes[0].set_ylabel('Value (units)')
|
||||
axes[0].legend(title='Subcategory')
|
||||
|
||||
# Right panel: Strip + violin plot
|
||||
sns.violinplot(
|
||||
data=df,
|
||||
x='category',
|
||||
y='value',
|
||||
hue='subcategory',
|
||||
inner=None,
|
||||
alpha=0.3,
|
||||
ax=axes[1]
|
||||
)
|
||||
sns.stripplot(
|
||||
data=df,
|
||||
x='category',
|
||||
y='value',
|
||||
hue='subcategory',
|
||||
dodge=True,
|
||||
size=3,
|
||||
alpha=0.6,
|
||||
ax=axes[1]
|
||||
)
|
||||
axes[1].set_title('Distribution of Individual Values')
|
||||
axes[1].set_ylabel('')
|
||||
axes[1].get_legend().remove()
|
||||
|
||||
plt.tight_layout()
|
||||
```
|
||||
|
||||
### Point Plot for Trends
|
||||
|
||||
```python
|
||||
# Show how values change across categories
|
||||
sns.pointplot(
|
||||
data=df,
|
||||
x='timepoint',
|
||||
y='score',
|
||||
hue='treatment',
|
||||
markers=['o', 's', '^'],
|
||||
linestyles=['-', '--', '-.'],
|
||||
dodge=0.3,
|
||||
capsize=0.1,
|
||||
errorbar=('ci', 95)
|
||||
)
|
||||
|
||||
plt.xlabel('Timepoint')
|
||||
plt.ylabel('Performance Score')
|
||||
plt.title('Treatment Effects Over Time')
|
||||
plt.legend(title='Treatment', bbox_to_anchor=(1.05, 1), loc='upper left')
|
||||
sns.despine()
|
||||
plt.tight_layout()
|
||||
```
|
||||
|
||||
## Regression and Relationships
|
||||
|
||||
### Linear Regression with Facets
|
||||
|
||||
```python
|
||||
# Fit separate regressions for each category
|
||||
g = sns.lmplot(
|
||||
data=df,
|
||||
x='predictor',
|
||||
y='response',
|
||||
hue='treatment',
|
||||
col='cell_line',
|
||||
height=4,
|
||||
aspect=1.2,
|
||||
scatter_kws={'alpha': 0.5, 's': 50},
|
||||
ci=95,
|
||||
palette='Set2'
|
||||
)
|
||||
|
||||
g.set_axis_labels('Predictor Variable', 'Response Variable')
|
||||
g.set_titles('{col_name}')
|
||||
g.tight_layout()
|
||||
```
|
||||
|
||||
### Polynomial Regression
|
||||
|
||||
```python
|
||||
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
|
||||
|
||||
for idx, order in enumerate([1, 2, 3]):
|
||||
sns.regplot(
|
||||
data=df,
|
||||
x='x',
|
||||
y='y',
|
||||
order=order,
|
||||
scatter_kws={'alpha': 0.5},
|
||||
line_kws={'color': 'red'},
|
||||
ci=95,
|
||||
ax=axes[idx]
|
||||
)
|
||||
axes[idx].set_title(f'Order {order} Polynomial Fit')
|
||||
axes[idx].set_xlabel('X Variable')
|
||||
axes[idx].set_ylabel('Y Variable')
|
||||
|
||||
plt.tight_layout()
|
||||
```
|
||||
|
||||
### Residual Analysis
|
||||
|
||||
```python
|
||||
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
|
||||
|
||||
# Main regression
|
||||
sns.regplot(data=df, x='x', y='y', ax=axes[0, 0])
|
||||
axes[0, 0].set_title('Regression Fit')
|
||||
|
||||
# Residuals vs fitted
|
||||
sns.residplot(data=df, x='x', y='y', lowess=True,
|
||||
scatter_kws={'alpha': 0.5},
|
||||
line_kws={'color': 'red', 'lw': 2},
|
||||
ax=axes[0, 1])
|
||||
axes[0, 1].set_title('Residuals vs Fitted')
|
||||
axes[0, 1].axhline(0, ls='--', color='gray')
|
||||
|
||||
# Q-Q plot (using scipy)
|
||||
from scipy import stats as sp_stats
|
||||
residuals = df['y'] - np.poly1d(np.polyfit(df['x'], df['y'], 1))(df['x'])
|
||||
sp_stats.probplot(residuals, dist="norm", plot=axes[1, 0])
|
||||
axes[1, 0].set_title('Q-Q Plot')
|
||||
|
||||
# Histogram of residuals
|
||||
sns.histplot(residuals, kde=True, ax=axes[1, 1])
|
||||
axes[1, 1].set_title('Residual Distribution')
|
||||
axes[1, 1].set_xlabel('Residuals')
|
||||
|
||||
plt.tight_layout()
|
||||
```
|
||||
|
||||
## Bivariate and Joint Distributions
|
||||
|
||||
### Joint Plot with Multiple Representations
|
||||
|
||||
```python
|
||||
# Scatter with marginals
|
||||
g = sns.jointplot(
|
||||
data=df,
|
||||
x='var1',
|
||||
y='var2',
|
||||
hue='category',
|
||||
kind='scatter',
|
||||
height=8,
|
||||
ratio=4,
|
||||
space=0.1,
|
||||
joint_kws={'alpha': 0.5, 's': 50},
|
||||
marginal_kws={'kde': True, 'bins': 30}
|
||||
)
|
||||
|
||||
# Add reference lines
|
||||
g.ax_joint.axline((0, 0), slope=1, color='r', ls='--', alpha=0.5, label='y=x')
|
||||
g.ax_joint.legend()
|
||||
|
||||
g.set_axis_labels('Variable 1', 'Variable 2', fontsize=12)
|
||||
```
|
||||
|
||||
### KDE Contour Plot
|
||||
|
||||
```python
|
||||
fig, ax = plt.subplots(figsize=(8, 8))
|
||||
|
||||
# Bivariate KDE with filled contours
|
||||
sns.kdeplot(
|
||||
data=df,
|
||||
x='x',
|
||||
y='y',
|
||||
fill=True,
|
||||
levels=10,
|
||||
cmap='viridis',
|
||||
thresh=0.05,
|
||||
ax=ax
|
||||
)
|
||||
|
||||
# Overlay scatter
|
||||
sns.scatterplot(
|
||||
data=df,
|
||||
x='x',
|
||||
y='y',
|
||||
color='white',
|
||||
edgecolor='black',
|
||||
s=50,
|
||||
alpha=0.6,
|
||||
ax=ax
|
||||
)
|
||||
|
||||
ax.set_xlabel('X Variable')
|
||||
ax.set_ylabel('Y Variable')
|
||||
ax.set_title('Bivariate Distribution')
|
||||
```
|
||||
|
||||
### Hexbin with Marginals
|
||||
|
||||
```python
|
||||
# For large datasets
|
||||
g = sns.jointplot(
|
||||
data=large_df,
|
||||
x='x',
|
||||
y='y',
|
||||
kind='hex',
|
||||
height=8,
|
||||
ratio=5,
|
||||
space=0.1,
|
||||
joint_kws={'gridsize': 30, 'cmap': 'viridis'},
|
||||
marginal_kws={'bins': 50, 'color': 'skyblue'}
|
||||
)
|
||||
|
||||
g.set_axis_labels('X Variable', 'Y Variable')
|
||||
```
|
||||
|
||||
## Matrix and Heatmap Visualizations
|
||||
|
||||
### Hierarchical Clustering Heatmap
|
||||
|
||||
```python
|
||||
# Prepare data (samples x features)
|
||||
data_matrix = df.set_index('sample_id')[feature_columns]
|
||||
|
||||
# Create color annotations
|
||||
row_colors = df.set_index('sample_id')['condition'].map({
|
||||
'control': '#1f77b4',
|
||||
'treatment': '#ff7f0e'
|
||||
})
|
||||
|
||||
col_colors = pd.Series(['#2ca02c' if 'gene' in col else '#d62728'
|
||||
for col in data_matrix.columns])
|
||||
|
||||
# Plot
|
||||
g = sns.clustermap(
|
||||
data_matrix,
|
||||
method='ward',
|
||||
metric='euclidean',
|
||||
z_score=0, # Normalize rows
|
||||
cmap='RdBu_r',
|
||||
center=0,
|
||||
row_colors=row_colors,
|
||||
col_colors=col_colors,
|
||||
figsize=(12, 10),
|
||||
dendrogram_ratio=(0.1, 0.1),
|
||||
cbar_pos=(0.02, 0.8, 0.03, 0.15),
|
||||
linewidths=0.5
|
||||
)
|
||||
|
||||
g.ax_heatmap.set_xlabel('Features')
|
||||
g.ax_heatmap.set_ylabel('Samples')
|
||||
plt.savefig('clustermap.png', dpi=300, bbox_inches='tight')
|
||||
```
|
||||
|
||||
### Annotated Heatmap with Custom Colorbar
|
||||
|
||||
```python
|
||||
# Pivot data for heatmap
|
||||
pivot_data = df.pivot(index='row_var', columns='col_var', values='value')
|
||||
|
||||
# Create heatmap
|
||||
fig, ax = plt.subplots(figsize=(10, 8))
|
||||
sns.heatmap(
|
||||
pivot_data,
|
||||
annot=True,
|
||||
fmt='.1f',
|
||||
cmap='RdYlGn',
|
||||
center=pivot_data.mean().mean(),
|
||||
vmin=pivot_data.min().min(),
|
||||
vmax=pivot_data.max().max(),
|
||||
linewidths=0.5,
|
||||
linecolor='gray',
|
||||
cbar_kws={
|
||||
'label': 'Value (units)',
|
||||
'orientation': 'vertical',
|
||||
'shrink': 0.8,
|
||||
'aspect': 20
|
||||
},
|
||||
ax=ax
|
||||
)
|
||||
|
||||
ax.set_title('Variable Relationships', fontsize=14, pad=20)
|
||||
ax.set_xlabel('Column Variable', fontsize=12)
|
||||
ax.set_ylabel('Row Variable', fontsize=12)
|
||||
|
||||
plt.xticks(rotation=45, ha='right')
|
||||
plt.yticks(rotation=0)
|
||||
plt.tight_layout()
|
||||
```
|
||||
|
||||
## Statistical Comparisons
|
||||
|
||||
### Before/After Comparison
|
||||
|
||||
```python
|
||||
# Reshape data for paired comparison
|
||||
df_paired = df.melt(
|
||||
id_vars='subject',
|
||||
value_vars=['before', 'after'],
|
||||
var_name='timepoint',
|
||||
value_name='measurement'
|
||||
)
|
||||
|
||||
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
|
||||
|
||||
# Left: Individual trajectories
|
||||
for subject in df_paired['subject'].unique():
|
||||
subject_data = df_paired[df_paired['subject'] == subject]
|
||||
axes[0].plot(subject_data['timepoint'], subject_data['measurement'],
|
||||
'o-', alpha=0.3, color='gray')
|
||||
|
||||
sns.pointplot(
|
||||
data=df_paired,
|
||||
x='timepoint',
|
||||
y='measurement',
|
||||
color='red',
|
||||
markers='D',
|
||||
scale=1.5,
|
||||
errorbar=('ci', 95),
|
||||
capsize=0.2,
|
||||
ax=axes[0]
|
||||
)
|
||||
axes[0].set_title('Individual Changes')
|
||||
axes[0].set_ylabel('Measurement')
|
||||
|
||||
# Right: Distribution comparison
|
||||
sns.violinplot(
|
||||
data=df_paired,
|
||||
x='timepoint',
|
||||
y='measurement',
|
||||
inner='box',
|
||||
ax=axes[1]
|
||||
)
|
||||
sns.swarmplot(
|
||||
data=df_paired,
|
||||
x='timepoint',
|
||||
y='measurement',
|
||||
color='black',
|
||||
alpha=0.5,
|
||||
size=3,
|
||||
ax=axes[1]
|
||||
)
|
||||
axes[1].set_title('Distribution Comparison')
|
||||
axes[1].set_ylabel('')
|
||||
|
||||
plt.tight_layout()
|
||||
```
|
||||
|
||||
### Dose-Response Curve
|
||||
|
||||
```python
|
||||
# Create dose-response plot
|
||||
fig, ax = plt.subplots(figsize=(8, 6))
|
||||
|
||||
# Plot individual points
|
||||
sns.stripplot(
|
||||
data=dose_df,
|
||||
x='dose',
|
||||
y='response',
|
||||
order=sorted(dose_df['dose'].unique()),
|
||||
color='gray',
|
||||
alpha=0.3,
|
||||
jitter=0.2,
|
||||
ax=ax
|
||||
)
|
||||
|
||||
# Overlay mean with CI
|
||||
sns.pointplot(
|
||||
data=dose_df,
|
||||
x='dose',
|
||||
y='response',
|
||||
order=sorted(dose_df['dose'].unique()),
|
||||
color='blue',
|
||||
markers='o',
|
||||
scale=1.2,
|
||||
errorbar=('ci', 95),
|
||||
capsize=0.1,
|
||||
ax=ax
|
||||
)
|
||||
|
||||
# Fit sigmoid curve
|
||||
from scipy.optimize import curve_fit
|
||||
|
||||
def sigmoid(x, bottom, top, ec50, hill):
|
||||
return bottom + (top - bottom) / (1 + (ec50 / x) ** hill)
|
||||
|
||||
doses_numeric = dose_df['dose'].astype(float)
|
||||
params, _ = curve_fit(sigmoid, doses_numeric, dose_df['response'])
|
||||
|
||||
x_smooth = np.logspace(np.log10(doses_numeric.min()),
|
||||
np.log10(doses_numeric.max()), 100)
|
||||
y_smooth = sigmoid(x_smooth, *params)
|
||||
|
||||
ax.plot(range(len(sorted(dose_df['dose'].unique()))),
|
||||
sigmoid(sorted(doses_numeric.unique()), *params),
|
||||
'r-', linewidth=2, label='Sigmoid Fit')
|
||||
|
||||
ax.set_xlabel('Dose')
|
||||
ax.set_ylabel('Response')
|
||||
ax.set_title('Dose-Response Analysis')
|
||||
ax.legend()
|
||||
sns.despine()
|
||||
```
|
||||
|
||||
## Custom Styling
|
||||
|
||||
### Custom Color Palette from Hex Codes
|
||||
|
||||
```python
|
||||
# Define custom palette
|
||||
custom_palette = ['#E64B35', '#4DBBD5', '#00A087', '#3C5488', '#F39B7F']
|
||||
sns.set_palette(custom_palette)
|
||||
|
||||
# Or use for specific plot
|
||||
sns.scatterplot(
|
||||
data=df,
|
||||
x='x',
|
||||
y='y',
|
||||
hue='category',
|
||||
palette=custom_palette
|
||||
)
|
||||
```
|
||||
|
||||
### Publication-Ready Theme
|
||||
|
||||
```python
|
||||
# Set comprehensive theme
|
||||
sns.set_theme(
|
||||
context='paper',
|
||||
style='ticks',
|
||||
palette='colorblind',
|
||||
font='Arial',
|
||||
font_scale=1.1,
|
||||
rc={
|
||||
'figure.dpi': 300,
|
||||
'savefig.dpi': 300,
|
||||
'savefig.format': 'pdf',
|
||||
'axes.linewidth': 1.0,
|
||||
'axes.labelweight': 'bold',
|
||||
'xtick.major.width': 1.0,
|
||||
'ytick.major.width': 1.0,
|
||||
'xtick.direction': 'out',
|
||||
'ytick.direction': 'out',
|
||||
'legend.frameon': False,
|
||||
'pdf.fonttype': 42, # True Type fonts for PDFs
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
### Diverging Colormap Centered on Zero
|
||||
|
||||
```python
|
||||
# For data with meaningful zero point (e.g., log fold change)
|
||||
from matplotlib.colors import TwoSlopeNorm
|
||||
|
||||
# Find data range
|
||||
vmin, vmax = df['value'].min(), df['value'].max()
|
||||
vcenter = 0
|
||||
|
||||
# Create norm
|
||||
norm = TwoSlopeNorm(vmin=vmin, vcenter=vcenter, vmax=vmax)
|
||||
|
||||
# Plot
|
||||
sns.heatmap(
|
||||
pivot_data,
|
||||
cmap='RdBu_r',
|
||||
norm=norm,
|
||||
center=0,
|
||||
annot=True,
|
||||
fmt='.2f'
|
||||
)
|
||||
```
|
||||
|
||||
## Large Datasets
|
||||
|
||||
### Downsampling Strategy
|
||||
|
||||
```python
|
||||
# For very large datasets, sample intelligently
|
||||
def smart_sample(df, target_size=10000, category_col=None):
|
||||
if len(df) <= target_size:
|
||||
return df
|
||||
|
||||
if category_col:
|
||||
# Stratified sampling
|
||||
return df.groupby(category_col, group_keys=False).apply(
|
||||
lambda x: x.sample(min(len(x), target_size // df[category_col].nunique()))
|
||||
)
|
||||
else:
|
||||
# Simple random sampling
|
||||
return df.sample(target_size)
|
||||
|
||||
# Use sampled data for visualization
|
||||
df_sampled = smart_sample(large_df, target_size=5000, category_col='category')
|
||||
|
||||
sns.scatterplot(data=df_sampled, x='x', y='y', hue='category', alpha=0.5)
|
||||
```
|
||||
|
||||
### Hexbin for Dense Scatter Plots
|
||||
|
||||
```python
|
||||
# For millions of points
|
||||
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
|
||||
|
||||
# Regular scatter (slow)
|
||||
axes[0].scatter(df['x'], df['y'], alpha=0.1, s=1)
|
||||
axes[0].set_title('Scatter (all points)')
|
||||
|
||||
# Hexbin (fast)
|
||||
hb = axes[1].hexbin(df['x'], df['y'], gridsize=50, cmap='viridis', mincnt=1)
|
||||
axes[1].set_title('Hexbin Aggregation')
|
||||
plt.colorbar(hb, ax=axes[1], label='Count')
|
||||
|
||||
plt.tight_layout()
|
||||
```
|
||||
|
||||
## Interactive Elements for Notebooks
|
||||
|
||||
### Adjustable Parameters
|
||||
|
||||
```python
|
||||
from ipywidgets import interact, FloatSlider
|
||||
|
||||
@interact(bandwidth=FloatSlider(min=0.1, max=3.0, step=0.1, value=1.0))
|
||||
def plot_kde(bandwidth):
|
||||
plt.figure(figsize=(10, 6))
|
||||
sns.kdeplot(data=df, x='value', hue='category',
|
||||
bw_adjust=bandwidth, fill=True)
|
||||
plt.title(f'KDE with bandwidth adjustment = {bandwidth}')
|
||||
plt.show()
|
||||
```
|
||||
|
||||
### Dynamic Filtering
|
||||
|
||||
```python
|
||||
from ipywidgets import interact, SelectMultiple
|
||||
|
||||
categories = df['category'].unique().tolist()
|
||||
|
||||
@interact(selected=SelectMultiple(options=categories, value=[categories[0]]))
|
||||
def filtered_plot(selected):
|
||||
filtered_df = df[df['category'].isin(selected)]
|
||||
|
||||
fig, ax = plt.subplots(figsize=(10, 6))
|
||||
sns.violinplot(data=filtered_df, x='category', y='value', ax=ax)
|
||||
ax.set_title(f'Showing {len(selected)} categories')
|
||||
plt.show()
|
||||
```
|
||||
770
skills/seaborn/references/function_reference.md
Normal file
770
skills/seaborn/references/function_reference.md
Normal file
@@ -0,0 +1,770 @@
|
||||
# Seaborn Function Reference
|
||||
|
||||
This document provides a comprehensive reference for all major seaborn functions, organized by category.
|
||||
|
||||
## Relational Plots
|
||||
|
||||
### scatterplot()
|
||||
|
||||
**Purpose:** Create a scatter plot with points representing individual observations.
|
||||
|
||||
**Key Parameters:**
|
||||
- `data` - DataFrame, array, or dict of arrays
|
||||
- `x, y` - Variables for x and y axes
|
||||
- `hue` - Grouping variable for color encoding
|
||||
- `size` - Grouping variable for size encoding
|
||||
- `style` - Grouping variable for marker style
|
||||
- `palette` - Color palette name or list
|
||||
- `hue_order` - Order for categorical hue levels
|
||||
- `hue_norm` - Normalization for numeric hue (tuple or Normalize object)
|
||||
- `sizes` - Size range for size encoding (tuple or dict)
|
||||
- `size_order` - Order for categorical size levels
|
||||
- `size_norm` - Normalization for numeric size
|
||||
- `markers` - Marker style(s) (string, list, or dict)
|
||||
- `style_order` - Order for categorical style levels
|
||||
- `legend` - How to draw legend: "auto", "brief", "full", or False
|
||||
- `ax` - Matplotlib axes to plot on
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.scatterplot(data=df, x='height', y='weight',
|
||||
hue='gender', size='age', style='smoker',
|
||||
palette='Set2', sizes=(20, 200))
|
||||
```
|
||||
|
||||
### lineplot()
|
||||
|
||||
**Purpose:** Draw a line plot with automatic aggregation and confidence intervals for repeated measures.
|
||||
|
||||
**Key Parameters:**
|
||||
- `data` - DataFrame, array, or dict of arrays
|
||||
- `x, y` - Variables for x and y axes
|
||||
- `hue` - Grouping variable for color encoding
|
||||
- `size` - Grouping variable for line width
|
||||
- `style` - Grouping variable for line style (dashes)
|
||||
- `units` - Grouping variable for sampling units (no aggregation within units)
|
||||
- `estimator` - Function for aggregating across observations (default: mean)
|
||||
- `errorbar` - Method for error bars: "sd", "se", "pi", ("ci", level), ("pi", level), or None
|
||||
- `n_boot` - Number of bootstrap iterations for CI computation
|
||||
- `seed` - Random seed for reproducible bootstrapping
|
||||
- `sort` - Sort data before plotting
|
||||
- `err_style` - "band" or "bars" for error representation
|
||||
- `err_kws` - Additional parameters for error representation
|
||||
- `markers` - Marker style(s) for emphasizing data points
|
||||
- `dashes` - Dash style(s) for lines
|
||||
- `legend` - How to draw legend
|
||||
- `ax` - Matplotlib axes to plot on
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.lineplot(data=timeseries, x='time', y='signal',
|
||||
hue='condition', style='subject',
|
||||
errorbar=('ci', 95), markers=True)
|
||||
```
|
||||
|
||||
### relplot()
|
||||
|
||||
**Purpose:** Figure-level interface for drawing relational plots (scatter or line) onto a FacetGrid.
|
||||
|
||||
**Key Parameters:**
|
||||
All parameters from `scatterplot()` and `lineplot()`, plus:
|
||||
- `kind` - "scatter" or "line"
|
||||
- `col` - Categorical variable for column facets
|
||||
- `row` - Categorical variable for row facets
|
||||
- `col_wrap` - Wrap columns after this many columns
|
||||
- `col_order` - Order for column facet levels
|
||||
- `row_order` - Order for row facet levels
|
||||
- `height` - Height of each facet in inches
|
||||
- `aspect` - Aspect ratio (width = height * aspect)
|
||||
- `facet_kws` - Additional parameters for FacetGrid
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.relplot(data=df, x='time', y='measurement',
|
||||
hue='treatment', style='batch',
|
||||
col='cell_line', row='timepoint',
|
||||
kind='line', height=3, aspect=1.5)
|
||||
```
|
||||
|
||||
## Distribution Plots
|
||||
|
||||
### histplot()
|
||||
|
||||
**Purpose:** Plot univariate or bivariate histograms with flexible binning.
|
||||
|
||||
**Key Parameters:**
|
||||
- `data` - DataFrame, array, or dict
|
||||
- `x, y` - Variables (y optional for bivariate)
|
||||
- `hue` - Grouping variable
|
||||
- `weights` - Variable for weighting observations
|
||||
- `stat` - Aggregate statistic: "count", "frequency", "probability", "percent", "density"
|
||||
- `bins` - Number of bins, bin edges, or method ("auto", "fd", "doane", "scott", "stone", "rice", "sturges", "sqrt")
|
||||
- `binwidth` - Width of bins (overrides bins)
|
||||
- `binrange` - Range for binning (tuple)
|
||||
- `discrete` - Treat x as discrete (centers bars on values)
|
||||
- `cumulative` - Compute cumulative distribution
|
||||
- `common_bins` - Use same bins for all hue levels
|
||||
- `common_norm` - Normalize across hue levels
|
||||
- `multiple` - How to handle hue: "layer", "dodge", "stack", "fill"
|
||||
- `element` - Visual element: "bars", "step", "poly"
|
||||
- `fill` - Fill bars/elements
|
||||
- `shrink` - Scale bar width (for multiple="dodge")
|
||||
- `kde` - Overlay KDE estimate
|
||||
- `kde_kws` - Parameters for KDE
|
||||
- `line_kws` - Parameters for step/poly elements
|
||||
- `thresh` - Minimum count threshold for bins
|
||||
- `pthresh` - Minimum probability threshold
|
||||
- `pmax` - Maximum probability for color scaling
|
||||
- `log_scale` - Log scale for axis (bool or base)
|
||||
- `legend` - Whether to show legend
|
||||
- `ax` - Matplotlib axes
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.histplot(data=df, x='measurement', hue='condition',
|
||||
stat='density', bins=30, kde=True,
|
||||
multiple='layer', alpha=0.5)
|
||||
```
|
||||
|
||||
### kdeplot()
|
||||
|
||||
**Purpose:** Plot univariate or bivariate kernel density estimates.
|
||||
|
||||
**Key Parameters:**
|
||||
- `data` - DataFrame, array, or dict
|
||||
- `x, y` - Variables (y optional for bivariate)
|
||||
- `hue` - Grouping variable
|
||||
- `weights` - Variable for weighting observations
|
||||
- `palette` - Color palette
|
||||
- `hue_order` - Order for hue levels
|
||||
- `hue_norm` - Normalization for numeric hue
|
||||
- `multiple` - How to handle hue: "layer", "stack", "fill"
|
||||
- `common_norm` - Normalize across hue levels
|
||||
- `common_grid` - Use same grid for all hue levels
|
||||
- `cumulative` - Compute cumulative distribution
|
||||
- `bw_method` - Method for bandwidth: "scott", "silverman", or scalar
|
||||
- `bw_adjust` - Bandwidth multiplier (higher = smoother)
|
||||
- `log_scale` - Log scale for axis
|
||||
- `levels` - Number or values for contour levels (bivariate)
|
||||
- `thresh` - Minimum density threshold for contours
|
||||
- `gridsize` - Grid resolution
|
||||
- `cut` - Extension beyond data extremes (in bandwidth units)
|
||||
- `clip` - Data range for curve (tuple)
|
||||
- `fill` - Fill area under curve/contours
|
||||
- `legend` - Whether to show legend
|
||||
- `ax` - Matplotlib axes
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
# Univariate
|
||||
sns.kdeplot(data=df, x='measurement', hue='condition',
|
||||
fill=True, common_norm=False, bw_adjust=1.5)
|
||||
|
||||
# Bivariate
|
||||
sns.kdeplot(data=df, x='var1', y='var2',
|
||||
fill=True, levels=10, thresh=0.05)
|
||||
```
|
||||
|
||||
### ecdfplot()
|
||||
|
||||
**Purpose:** Plot empirical cumulative distribution functions.
|
||||
|
||||
**Key Parameters:**
|
||||
- `data` - DataFrame, array, or dict
|
||||
- `x, y` - Variables (specify one)
|
||||
- `hue` - Grouping variable
|
||||
- `weights` - Variable for weighting observations
|
||||
- `stat` - "proportion" or "count"
|
||||
- `complementary` - Plot complementary CDF (1 - ECDF)
|
||||
- `palette` - Color palette
|
||||
- `hue_order` - Order for hue levels
|
||||
- `hue_norm` - Normalization for numeric hue
|
||||
- `log_scale` - Log scale for axis
|
||||
- `legend` - Whether to show legend
|
||||
- `ax` - Matplotlib axes
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.ecdfplot(data=df, x='response_time', hue='treatment',
|
||||
stat='proportion', complementary=False)
|
||||
```
|
||||
|
||||
### rugplot()
|
||||
|
||||
**Purpose:** Plot tick marks showing individual observations along an axis.
|
||||
|
||||
**Key Parameters:**
|
||||
- `data` - DataFrame, array, or dict
|
||||
- `x, y` - Variable (specify one)
|
||||
- `hue` - Grouping variable
|
||||
- `height` - Height of ticks (proportion of axis)
|
||||
- `expand_margins` - Add margin space for rug
|
||||
- `palette` - Color palette
|
||||
- `hue_order` - Order for hue levels
|
||||
- `hue_norm` - Normalization for numeric hue
|
||||
- `legend` - Whether to show legend
|
||||
- `ax` - Matplotlib axes
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.rugplot(data=df, x='value', hue='category', height=0.05)
|
||||
```
|
||||
|
||||
### displot()
|
||||
|
||||
**Purpose:** Figure-level interface for distribution plots onto a FacetGrid.
|
||||
|
||||
**Key Parameters:**
|
||||
All parameters from `histplot()`, `kdeplot()`, and `ecdfplot()`, plus:
|
||||
- `kind` - "hist", "kde", "ecdf"
|
||||
- `rug` - Add rug plot on marginal axes
|
||||
- `rug_kws` - Parameters for rug plot
|
||||
- `col` - Categorical variable for column facets
|
||||
- `row` - Categorical variable for row facets
|
||||
- `col_wrap` - Wrap columns
|
||||
- `col_order` - Order for column facets
|
||||
- `row_order` - Order for row facets
|
||||
- `height` - Height of each facet
|
||||
- `aspect` - Aspect ratio
|
||||
- `facet_kws` - Additional parameters for FacetGrid
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.displot(data=df, x='measurement', hue='treatment',
|
||||
col='timepoint', kind='kde', fill=True,
|
||||
height=3, aspect=1.5, rug=True)
|
||||
```
|
||||
|
||||
### jointplot()
|
||||
|
||||
**Purpose:** Draw a bivariate plot with marginal univariate plots.
|
||||
|
||||
**Key Parameters:**
|
||||
- `data` - DataFrame
|
||||
- `x, y` - Variables for x and y axes
|
||||
- `hue` - Grouping variable
|
||||
- `kind` - "scatter", "kde", "hist", "hex", "reg", "resid"
|
||||
- `height` - Size of the figure (square)
|
||||
- `ratio` - Ratio of joint to marginal axes
|
||||
- `space` - Space between joint and marginal axes
|
||||
- `dropna` - Drop missing values
|
||||
- `xlim, ylim` - Axis limits (tuples)
|
||||
- `marginal_ticks` - Show ticks on marginal axes
|
||||
- `joint_kws` - Parameters for joint plot
|
||||
- `marginal_kws` - Parameters for marginal plots
|
||||
- `hue_order` - Order for hue levels
|
||||
- `palette` - Color palette
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.jointplot(data=df, x='var1', y='var2', hue='group',
|
||||
kind='scatter', height=6, ratio=4,
|
||||
joint_kws={'alpha': 0.5})
|
||||
```
|
||||
|
||||
### pairplot()
|
||||
|
||||
**Purpose:** Plot pairwise relationships in a dataset.
|
||||
|
||||
**Key Parameters:**
|
||||
- `data` - DataFrame
|
||||
- `hue` - Grouping variable for color encoding
|
||||
- `hue_order` - Order for hue levels
|
||||
- `palette` - Color palette
|
||||
- `vars` - Variables to plot (default: all numeric)
|
||||
- `x_vars, y_vars` - Variables for x and y axes (non-square grid)
|
||||
- `kind` - "scatter", "kde", "hist", "reg"
|
||||
- `diag_kind` - "auto", "hist", "kde", None
|
||||
- `markers` - Marker style(s)
|
||||
- `height` - Height of each facet
|
||||
- `aspect` - Aspect ratio
|
||||
- `corner` - Plot only lower triangle
|
||||
- `dropna` - Drop missing values
|
||||
- `plot_kws` - Parameters for non-diagonal plots
|
||||
- `diag_kws` - Parameters for diagonal plots
|
||||
- `grid_kws` - Parameters for PairGrid
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.pairplot(data=df, hue='species', palette='Set2',
|
||||
vars=['sepal_length', 'sepal_width', 'petal_length'],
|
||||
corner=True, height=2.5)
|
||||
```
|
||||
|
||||
## Categorical Plots
|
||||
|
||||
### stripplot()
|
||||
|
||||
**Purpose:** Draw a categorical scatterplot with jittered points.
|
||||
|
||||
**Key Parameters:**
|
||||
- `data` - DataFrame, array, or dict
|
||||
- `x, y` - Variables (one categorical, one continuous)
|
||||
- `hue` - Grouping variable
|
||||
- `order` - Order for categorical levels
|
||||
- `hue_order` - Order for hue levels
|
||||
- `jitter` - Amount of jitter: True, float, or False
|
||||
- `dodge` - Separate hue levels side-by-side
|
||||
- `orient` - "v" or "h" (usually inferred)
|
||||
- `color` - Single color for all elements
|
||||
- `palette` - Color palette
|
||||
- `size` - Marker size
|
||||
- `edgecolor` - Marker edge color
|
||||
- `linewidth` - Marker edge width
|
||||
- `native_scale` - Use numeric scale for categorical axis
|
||||
- `formatter` - Formatter for categorical axis
|
||||
- `legend` - Whether to show legend
|
||||
- `ax` - Matplotlib axes
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.stripplot(data=df, x='day', y='total_bill',
|
||||
hue='sex', dodge=True, jitter=0.2)
|
||||
```
|
||||
|
||||
### swarmplot()
|
||||
|
||||
**Purpose:** Draw a categorical scatterplot with non-overlapping points.
|
||||
|
||||
**Key Parameters:**
|
||||
Same as `stripplot()`, except:
|
||||
- No `jitter` parameter
|
||||
- `size` - Marker size (important for avoiding overlap)
|
||||
- `warn_thresh` - Threshold for warning about too many points (default: 0.05)
|
||||
|
||||
**Note:** Computationally intensive for large datasets. Use stripplot for >1000 points.
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.swarmplot(data=df, x='day', y='total_bill',
|
||||
hue='time', dodge=True, size=5)
|
||||
```
|
||||
|
||||
### boxplot()
|
||||
|
||||
**Purpose:** Draw a box plot showing quartiles and outliers.
|
||||
|
||||
**Key Parameters:**
|
||||
- `data` - DataFrame, array, or dict
|
||||
- `x, y` - Variables (one categorical, one continuous)
|
||||
- `hue` - Grouping variable
|
||||
- `order` - Order for categorical levels
|
||||
- `hue_order` - Order for hue levels
|
||||
- `orient` - "v" or "h"
|
||||
- `color` - Single color for boxes
|
||||
- `palette` - Color palette
|
||||
- `saturation` - Color saturation intensity
|
||||
- `width` - Width of boxes
|
||||
- `dodge` - Separate hue levels side-by-side
|
||||
- `fliersize` - Size of outlier markers
|
||||
- `linewidth` - Box line width
|
||||
- `whis` - IQR multiplier for whiskers (default: 1.5)
|
||||
- `notch` - Draw notched boxes
|
||||
- `showcaps` - Show whisker caps
|
||||
- `showmeans` - Show mean value
|
||||
- `meanprops` - Properties for mean marker
|
||||
- `boxprops` - Properties for boxes
|
||||
- `whiskerprops` - Properties for whiskers
|
||||
- `capprops` - Properties for caps
|
||||
- `flierprops` - Properties for outliers
|
||||
- `medianprops` - Properties for median line
|
||||
- `native_scale` - Use numeric scale
|
||||
- `formatter` - Formatter for categorical axis
|
||||
- `legend` - Whether to show legend
|
||||
- `ax` - Matplotlib axes
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.boxplot(data=df, x='day', y='total_bill',
|
||||
hue='smoker', palette='Set3',
|
||||
showmeans=True, notch=True)
|
||||
```
|
||||
|
||||
### violinplot()
|
||||
|
||||
**Purpose:** Draw a violin plot combining boxplot and KDE.
|
||||
|
||||
**Key Parameters:**
|
||||
Same as `boxplot()`, plus:
|
||||
- `bw_method` - KDE bandwidth method
|
||||
- `bw_adjust` - KDE bandwidth multiplier
|
||||
- `cut` - KDE extension beyond extremes
|
||||
- `density_norm` - "area", "count", "width"
|
||||
- `inner` - "box", "quartile", "point", "stick", None
|
||||
- `split` - Split violins for hue comparison
|
||||
- `scale` - Scaling method: "area", "count", "width"
|
||||
- `scale_hue` - Scale across hue levels
|
||||
- `gridsize` - KDE grid resolution
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.violinplot(data=df, x='day', y='total_bill',
|
||||
hue='sex', split=True, inner='quartile',
|
||||
palette='muted')
|
||||
```
|
||||
|
||||
### boxenplot()
|
||||
|
||||
**Purpose:** Draw enhanced box plot for larger datasets showing more quantiles.
|
||||
|
||||
**Key Parameters:**
|
||||
Same as `boxplot()`, plus:
|
||||
- `k_depth` - "tukey", "proportion", "trustworthy", "full", or int
|
||||
- `outlier_prop` - Proportion of data as outliers
|
||||
- `trust_alpha` - Alpha for trustworthy depth
|
||||
- `showfliers` - Show outlier points
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.boxenplot(data=df, x='day', y='total_bill',
|
||||
hue='time', palette='Set2')
|
||||
```
|
||||
|
||||
### barplot()
|
||||
|
||||
**Purpose:** Draw a bar plot with error bars showing statistical estimates.
|
||||
|
||||
**Key Parameters:**
|
||||
- `data` - DataFrame, array, or dict
|
||||
- `x, y` - Variables (one categorical, one continuous)
|
||||
- `hue` - Grouping variable
|
||||
- `order` - Order for categorical levels
|
||||
- `hue_order` - Order for hue levels
|
||||
- `estimator` - Aggregation function (default: mean)
|
||||
- `errorbar` - Error representation: "sd", "se", "pi", ("ci", level), ("pi", level), or None
|
||||
- `n_boot` - Bootstrap iterations
|
||||
- `seed` - Random seed
|
||||
- `units` - Identifier for sampling units
|
||||
- `weights` - Observation weights
|
||||
- `orient` - "v" or "h"
|
||||
- `color` - Single bar color
|
||||
- `palette` - Color palette
|
||||
- `saturation` - Color saturation
|
||||
- `width` - Bar width
|
||||
- `dodge` - Separate hue levels side-by-side
|
||||
- `errcolor` - Error bar color
|
||||
- `errwidth` - Error bar line width
|
||||
- `capsize` - Error bar cap width
|
||||
- `native_scale` - Use numeric scale
|
||||
- `formatter` - Formatter for categorical axis
|
||||
- `legend` - Whether to show legend
|
||||
- `ax` - Matplotlib axes
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.barplot(data=df, x='day', y='total_bill',
|
||||
hue='sex', estimator='median',
|
||||
errorbar=('ci', 95), capsize=0.1)
|
||||
```
|
||||
|
||||
### countplot()
|
||||
|
||||
**Purpose:** Show counts of observations in each categorical bin.
|
||||
|
||||
**Key Parameters:**
|
||||
Same as `barplot()`, but:
|
||||
- Only specify one of x or y (the categorical variable)
|
||||
- No estimator or errorbar (shows counts)
|
||||
- `stat` - "count" or "percent"
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.countplot(data=df, x='day', hue='time',
|
||||
palette='pastel', dodge=True)
|
||||
```
|
||||
|
||||
### pointplot()
|
||||
|
||||
**Purpose:** Show point estimates and confidence intervals with connecting lines.
|
||||
|
||||
**Key Parameters:**
|
||||
Same as `barplot()`, plus:
|
||||
- `markers` - Marker style(s)
|
||||
- `linestyles` - Line style(s)
|
||||
- `scale` - Scale for markers
|
||||
- `join` - Connect points with lines
|
||||
- `capsize` - Error bar cap width
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.pointplot(data=df, x='time', y='total_bill',
|
||||
hue='sex', markers=['o', 's'],
|
||||
linestyles=['-', '--'], capsize=0.1)
|
||||
```
|
||||
|
||||
### catplot()
|
||||
|
||||
**Purpose:** Figure-level interface for categorical plots onto a FacetGrid.
|
||||
|
||||
**Key Parameters:**
|
||||
All parameters from categorical plots, plus:
|
||||
- `kind` - "strip", "swarm", "box", "violin", "boxen", "bar", "point", "count"
|
||||
- `col` - Categorical variable for column facets
|
||||
- `row` - Categorical variable for row facets
|
||||
- `col_wrap` - Wrap columns
|
||||
- `col_order` - Order for column facets
|
||||
- `row_order` - Order for row facets
|
||||
- `height` - Height of each facet
|
||||
- `aspect` - Aspect ratio
|
||||
- `sharex, sharey` - Share axes across facets
|
||||
- `legend` - Whether to show legend
|
||||
- `legend_out` - Place legend outside figure
|
||||
- `facet_kws` - Additional FacetGrid parameters
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.catplot(data=df, x='day', y='total_bill',
|
||||
hue='smoker', col='time',
|
||||
kind='violin', split=True,
|
||||
height=4, aspect=0.8)
|
||||
```
|
||||
|
||||
## Regression Plots
|
||||
|
||||
### regplot()
|
||||
|
||||
**Purpose:** Plot data and a linear regression fit.
|
||||
|
||||
**Key Parameters:**
|
||||
- `data` - DataFrame
|
||||
- `x, y` - Variables or data vectors
|
||||
- `x_estimator` - Apply estimator to x bins
|
||||
- `x_bins` - Bin x for estimator
|
||||
- `x_ci` - CI for binned estimates
|
||||
- `scatter` - Show scatter points
|
||||
- `fit_reg` - Plot regression line
|
||||
- `ci` - CI for regression estimate (int or None)
|
||||
- `n_boot` - Bootstrap iterations for CI
|
||||
- `units` - Identifier for sampling units
|
||||
- `seed` - Random seed
|
||||
- `order` - Polynomial regression order
|
||||
- `logistic` - Fit logistic regression
|
||||
- `lowess` - Fit lowess smoother
|
||||
- `robust` - Fit robust regression
|
||||
- `logx` - Log-transform x
|
||||
- `x_partial, y_partial` - Partial regression (regress out variables)
|
||||
- `truncate` - Limit regression line to data range
|
||||
- `dropna` - Drop missing values
|
||||
- `x_jitter, y_jitter` - Add jitter to data
|
||||
- `label` - Label for legend
|
||||
- `color` - Color for all elements
|
||||
- `marker` - Marker style
|
||||
- `scatter_kws` - Parameters for scatter
|
||||
- `line_kws` - Parameters for regression line
|
||||
- `ax` - Matplotlib axes
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.regplot(data=df, x='total_bill', y='tip',
|
||||
order=2, robust=True, ci=95,
|
||||
scatter_kws={'alpha': 0.5})
|
||||
```
|
||||
|
||||
### lmplot()
|
||||
|
||||
**Purpose:** Figure-level interface for regression plots onto a FacetGrid.
|
||||
|
||||
**Key Parameters:**
|
||||
All parameters from `regplot()`, plus:
|
||||
- `hue` - Grouping variable
|
||||
- `col` - Column facets
|
||||
- `row` - Row facets
|
||||
- `palette` - Color palette
|
||||
- `col_wrap` - Wrap columns
|
||||
- `height` - Facet height
|
||||
- `aspect` - Aspect ratio
|
||||
- `markers` - Marker style(s)
|
||||
- `sharex, sharey` - Share axes
|
||||
- `hue_order` - Order for hue levels
|
||||
- `col_order` - Order for column facets
|
||||
- `row_order` - Order for row facets
|
||||
- `legend` - Whether to show legend
|
||||
- `legend_out` - Place legend outside
|
||||
- `facet_kws` - FacetGrid parameters
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.lmplot(data=df, x='total_bill', y='tip',
|
||||
hue='smoker', col='time', row='sex',
|
||||
height=3, aspect=1.2, ci=None)
|
||||
```
|
||||
|
||||
### residplot()
|
||||
|
||||
**Purpose:** Plot residuals of a regression.
|
||||
|
||||
**Key Parameters:**
|
||||
Same as `regplot()`, but:
|
||||
- Always plots residuals (y - predicted) vs x
|
||||
- Adds horizontal line at y=0
|
||||
- `lowess` - Fit lowess smoother to residuals
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.residplot(data=df, x='x', y='y', lowess=True,
|
||||
scatter_kws={'alpha': 0.5})
|
||||
```
|
||||
|
||||
## Matrix Plots
|
||||
|
||||
### heatmap()
|
||||
|
||||
**Purpose:** Plot rectangular data as a color-encoded matrix.
|
||||
|
||||
**Key Parameters:**
|
||||
- `data` - 2D array-like data
|
||||
- `vmin, vmax` - Anchor values for colormap
|
||||
- `cmap` - Colormap name or object
|
||||
- `center` - Value at colormap center
|
||||
- `robust` - Use robust quantiles for colormap range
|
||||
- `annot` - Annotate cells: True, False, or array
|
||||
- `fmt` - Format string for annotations (e.g., ".2f")
|
||||
- `annot_kws` - Parameters for annotations
|
||||
- `linewidths` - Width of cell borders
|
||||
- `linecolor` - Color of cell borders
|
||||
- `cbar` - Draw colorbar
|
||||
- `cbar_kws` - Colorbar parameters
|
||||
- `cbar_ax` - Axes for colorbar
|
||||
- `square` - Force square cells
|
||||
- `xticklabels, yticklabels` - Tick labels (True, False, int, or list)
|
||||
- `mask` - Boolean array to mask cells
|
||||
- `ax` - Matplotlib axes
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
# Correlation matrix
|
||||
corr = df.corr()
|
||||
mask = np.triu(np.ones_like(corr, dtype=bool))
|
||||
sns.heatmap(corr, mask=mask, annot=True, fmt='.2f',
|
||||
cmap='coolwarm', center=0, square=True,
|
||||
linewidths=1, cbar_kws={'shrink': 0.8})
|
||||
```
|
||||
|
||||
### clustermap()
|
||||
|
||||
**Purpose:** Plot a hierarchically-clustered heatmap.
|
||||
|
||||
**Key Parameters:**
|
||||
All parameters from `heatmap()`, plus:
|
||||
- `pivot_kws` - Parameters for pivoting (if needed)
|
||||
- `method` - Linkage method: "single", "complete", "average", "weighted", "centroid", "median", "ward"
|
||||
- `metric` - Distance metric for clustering
|
||||
- `standard_scale` - Standardize data: 0 (rows), 1 (columns), or None
|
||||
- `z_score` - Z-score normalize data: 0 (rows), 1 (columns), or None
|
||||
- `row_cluster, col_cluster` - Cluster rows/columns
|
||||
- `row_linkage, col_linkage` - Precomputed linkage matrices
|
||||
- `row_colors, col_colors` - Additional color annotations
|
||||
- `dendrogram_ratio` - Ratio of dendrogram to heatmap
|
||||
- `colors_ratio` - Ratio of color annotations to heatmap
|
||||
- `cbar_pos` - Colorbar position (tuple: x, y, width, height)
|
||||
- `tree_kws` - Parameters for dendrogram
|
||||
- `figsize` - Figure size
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
sns.clustermap(data, method='average', metric='euclidean',
|
||||
z_score=0, cmap='viridis',
|
||||
row_colors=row_colors, col_colors=col_colors,
|
||||
figsize=(12, 12), dendrogram_ratio=0.1)
|
||||
```
|
||||
|
||||
## Multi-Plot Grids
|
||||
|
||||
### FacetGrid
|
||||
|
||||
**Purpose:** Multi-plot grid for plotting conditional relationships.
|
||||
|
||||
**Initialization:**
|
||||
```python
|
||||
g = sns.FacetGrid(data, row=None, col=None, hue=None,
|
||||
col_wrap=None, sharex=True, sharey=True,
|
||||
height=3, aspect=1, palette=None,
|
||||
row_order=None, col_order=None, hue_order=None,
|
||||
hue_kws=None, dropna=False, legend_out=True,
|
||||
despine=True, margin_titles=False,
|
||||
xlim=None, ylim=None, subplot_kws=None,
|
||||
gridspec_kws=None)
|
||||
```
|
||||
|
||||
**Methods:**
|
||||
- `map(func, *args, **kwargs)` - Apply function to each facet
|
||||
- `map_dataframe(func, *args, **kwargs)` - Apply function with full DataFrame
|
||||
- `set_axis_labels(x_var, y_var)` - Set axis labels
|
||||
- `set_titles(template, **kwargs)` - Set subplot titles
|
||||
- `set(kwargs)` - Set attributes on all axes
|
||||
- `add_legend(legend_data, title, label_order, **kwargs)` - Add legend
|
||||
- `savefig(*args, **kwargs)` - Save figure
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
g = sns.FacetGrid(df, col='time', row='sex', hue='smoker',
|
||||
height=3, aspect=1.5, margin_titles=True)
|
||||
g.map(sns.scatterplot, 'total_bill', 'tip', alpha=0.7)
|
||||
g.add_legend()
|
||||
g.set_axis_labels('Total Bill ($)', 'Tip ($)')
|
||||
g.set_titles('{col_name} | {row_name}')
|
||||
```
|
||||
|
||||
### PairGrid
|
||||
|
||||
**Purpose:** Grid for plotting pairwise relationships in a dataset.
|
||||
|
||||
**Initialization:**
|
||||
```python
|
||||
g = sns.PairGrid(data, hue=None, vars=None,
|
||||
x_vars=None, y_vars=None,
|
||||
hue_order=None, palette=None,
|
||||
hue_kws=None, corner=False,
|
||||
diag_sharey=True, height=2.5,
|
||||
aspect=1, layout_pad=0.5,
|
||||
despine=True, dropna=False)
|
||||
```
|
||||
|
||||
**Methods:**
|
||||
- `map(func, **kwargs)` - Apply function to all subplots
|
||||
- `map_diag(func, **kwargs)` - Apply to diagonal
|
||||
- `map_offdiag(func, **kwargs)` - Apply to off-diagonal
|
||||
- `map_upper(func, **kwargs)` - Apply to upper triangle
|
||||
- `map_lower(func, **kwargs)` - Apply to lower triangle
|
||||
- `add_legend(legend_data, **kwargs)` - Add legend
|
||||
- `savefig(*args, **kwargs)` - Save figure
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
g = sns.PairGrid(df, hue='species', vars=['a', 'b', 'c', 'd'],
|
||||
corner=True, height=2.5)
|
||||
g.map_upper(sns.scatterplot, alpha=0.5)
|
||||
g.map_lower(sns.kdeplot)
|
||||
g.map_diag(sns.histplot, kde=True)
|
||||
g.add_legend()
|
||||
```
|
||||
|
||||
### JointGrid
|
||||
|
||||
**Purpose:** Grid for bivariate plot with marginal univariate plots.
|
||||
|
||||
**Initialization:**
|
||||
```python
|
||||
g = sns.JointGrid(data=None, x=None, y=None, hue=None,
|
||||
height=6, ratio=5, space=0.2,
|
||||
dropna=False, xlim=None, ylim=None,
|
||||
marginal_ticks=False, hue_order=None,
|
||||
palette=None)
|
||||
```
|
||||
|
||||
**Methods:**
|
||||
- `plot(joint_func, marginal_func, **kwargs)` - Plot both joint and marginals
|
||||
- `plot_joint(func, **kwargs)` - Plot joint distribution
|
||||
- `plot_marginals(func, **kwargs)` - Plot marginal distributions
|
||||
- `refline(x, y, **kwargs)` - Add reference line
|
||||
- `set_axis_labels(xlabel, ylabel, **kwargs)` - Set axis labels
|
||||
- `savefig(*args, **kwargs)` - Save figure
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
g = sns.JointGrid(data=df, x='x', y='y', hue='group',
|
||||
height=6, ratio=5, space=0.2)
|
||||
g.plot_joint(sns.scatterplot, alpha=0.5)
|
||||
g.plot_marginals(sns.histplot, kde=True)
|
||||
g.set_axis_labels('Variable X', 'Variable Y')
|
||||
```
|
||||
964
skills/seaborn/references/objects_interface.md
Normal file
964
skills/seaborn/references/objects_interface.md
Normal file
@@ -0,0 +1,964 @@
|
||||
# Seaborn Objects Interface
|
||||
|
||||
The `seaborn.objects` interface provides a modern, declarative API for building visualizations through composition. This guide covers the complete objects interface introduced in seaborn 0.12+.
|
||||
|
||||
## Core Concept
|
||||
|
||||
The objects interface separates **what you want to show** (data and mappings) from **how to show it** (marks, stats, and moves). Build plots by:
|
||||
|
||||
1. Creating a `Plot` object with data and aesthetic mappings
|
||||
2. Adding layers with `.add()` combining marks and statistical transformations
|
||||
3. Customizing with `.scale()`, `.label()`, `.limit()`, `.theme()`, etc.
|
||||
4. Rendering with `.show()` or `.save()`
|
||||
|
||||
## Basic Usage
|
||||
|
||||
```python
|
||||
from seaborn import objects as so
|
||||
import pandas as pd
|
||||
|
||||
# Create plot with data and mappings
|
||||
p = so.Plot(data=df, x='x_var', y='y_var')
|
||||
|
||||
# Add mark (visual representation)
|
||||
p = p.add(so.Dot())
|
||||
|
||||
# Display (automatic in Jupyter)
|
||||
p.show()
|
||||
```
|
||||
|
||||
## Plot Class
|
||||
|
||||
The `Plot` class is the foundation of the objects interface.
|
||||
|
||||
### Initialization
|
||||
|
||||
```python
|
||||
so.Plot(data=None, x=None, y=None, color=None, alpha=None,
|
||||
fill=None, fillalpha=None, fillcolor=None, marker=None,
|
||||
pointsize=None, stroke=None, text=None, **variables)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `data` - DataFrame or dict of data vectors
|
||||
- `x, y` - Variables for position
|
||||
- `color` - Variable for color encoding
|
||||
- `alpha` - Variable for transparency
|
||||
- `marker` - Variable for marker shape
|
||||
- `pointsize` - Variable for point size
|
||||
- `stroke` - Variable for line width
|
||||
- `text` - Variable for text labels
|
||||
- `**variables` - Additional mappings using property names
|
||||
|
||||
**Examples:**
|
||||
```python
|
||||
# Basic mapping
|
||||
so.Plot(df, x='total_bill', y='tip')
|
||||
|
||||
# Multiple mappings
|
||||
so.Plot(df, x='total_bill', y='tip', color='day', pointsize='size')
|
||||
|
||||
# All variables in Plot
|
||||
p = so.Plot(df, x='x', y='y', color='cat')
|
||||
p.add(so.Dot()) # Uses all mappings
|
||||
|
||||
# Some variables in add()
|
||||
p = so.Plot(df, x='x', y='y')
|
||||
p.add(so.Dot(), color='cat') # Only this layer uses color
|
||||
```
|
||||
|
||||
### Methods
|
||||
|
||||
#### add()
|
||||
|
||||
Add a layer to the plot with mark and optional stat/move.
|
||||
|
||||
```python
|
||||
Plot.add(mark, *transforms, orient=None, legend=True, data=None,
|
||||
**variables)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `mark` - Mark object defining visual representation
|
||||
- `*transforms` - Stat and/or Move objects for data transformation
|
||||
- `orient` - "x", "y", or "v"/"h" for orientation
|
||||
- `legend` - Include in legend (True/False)
|
||||
- `data` - Override data for this layer
|
||||
- `**variables` - Override or add variable mappings
|
||||
|
||||
**Examples:**
|
||||
```python
|
||||
# Simple mark
|
||||
p.add(so.Dot())
|
||||
|
||||
# Mark with stat
|
||||
p.add(so.Line(), so.PolyFit(order=2))
|
||||
|
||||
# Mark with multiple transforms
|
||||
p.add(so.Bar(), so.Agg(), so.Dodge())
|
||||
|
||||
# Layer-specific mappings
|
||||
p.add(so.Dot(), color='category')
|
||||
p.add(so.Line(), so.Agg(), color='category')
|
||||
|
||||
# Layer-specific data
|
||||
p.add(so.Dot())
|
||||
p.add(so.Line(), data=summary_df)
|
||||
```
|
||||
|
||||
#### facet()
|
||||
|
||||
Create subplots from categorical variables.
|
||||
|
||||
```python
|
||||
Plot.facet(col=None, row=None, order=None, wrap=None)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `col` - Variable for column facets
|
||||
- `row` - Variable for row facets
|
||||
- `order` - Dict with facet orders (keys: variable names)
|
||||
- `wrap` - Wrap columns after this many
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
p.facet(col='time', row='sex')
|
||||
p.facet(col='category', wrap=3)
|
||||
p.facet(col='day', order={'day': ['Thur', 'Fri', 'Sat', 'Sun']})
|
||||
```
|
||||
|
||||
#### pair()
|
||||
|
||||
Create pairwise subplots for multiple variables.
|
||||
|
||||
```python
|
||||
Plot.pair(x=None, y=None, wrap=None, cross=True)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `x` - Variables for x-axis pairings
|
||||
- `y` - Variables for y-axis pairings (if None, uses x)
|
||||
- `wrap` - Wrap after this many columns
|
||||
- `cross` - Include all x/y combinations (vs. only diagonal)
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
# Pairs of all variables
|
||||
p = so.Plot(df).pair(x=['a', 'b', 'c'])
|
||||
p.add(so.Dot())
|
||||
|
||||
# Rectangular grid
|
||||
p = so.Plot(df).pair(x=['a', 'b'], y=['c', 'd'])
|
||||
p.add(so.Dot(), alpha=0.5)
|
||||
```
|
||||
|
||||
#### scale()
|
||||
|
||||
Customize how data maps to visual properties.
|
||||
|
||||
```python
|
||||
Plot.scale(**scales)
|
||||
```
|
||||
|
||||
**Parameters:** Keyword arguments with property names and Scale objects
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
p.scale(
|
||||
x=so.Continuous().tick(every=5),
|
||||
y=so.Continuous().label(like='{x:.1f}'),
|
||||
color=so.Nominal(['#1f77b4', '#ff7f0e', '#2ca02c']),
|
||||
pointsize=(5, 10) # Shorthand for range
|
||||
)
|
||||
```
|
||||
|
||||
#### limit()
|
||||
|
||||
Set axis limits.
|
||||
|
||||
```python
|
||||
Plot.limit(x=None, y=None)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `x` - Tuple of (min, max) for x-axis
|
||||
- `y` - Tuple of (min, max) for y-axis
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
p.limit(x=(0, 100), y=(0, 50))
|
||||
```
|
||||
|
||||
#### label()
|
||||
|
||||
Set axis labels and titles.
|
||||
|
||||
```python
|
||||
Plot.label(x=None, y=None, color=None, title=None, **labels)
|
||||
```
|
||||
|
||||
**Parameters:** Keyword arguments with property names and label strings
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
p.label(
|
||||
x='Total Bill ($)',
|
||||
y='Tip Amount ($)',
|
||||
color='Day of Week',
|
||||
title='Restaurant Tips Analysis'
|
||||
)
|
||||
```
|
||||
|
||||
#### theme()
|
||||
|
||||
Apply matplotlib style settings.
|
||||
|
||||
```python
|
||||
Plot.theme(config, **kwargs)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `config` - Dict of rcParams or seaborn theme dict
|
||||
- `**kwargs` - Individual rcParams
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
# Seaborn theme
|
||||
p.theme({**sns.axes_style('whitegrid'), **sns.plotting_context('talk')})
|
||||
|
||||
# Custom rcParams
|
||||
p.theme({'axes.facecolor': 'white', 'axes.grid': True})
|
||||
|
||||
# Individual parameters
|
||||
p.theme(axes_facecolor='white', font_scale=1.2)
|
||||
```
|
||||
|
||||
#### layout()
|
||||
|
||||
Configure subplot layout.
|
||||
|
||||
```python
|
||||
Plot.layout(size=None, extent=None, engine=None)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `size` - (width, height) in inches
|
||||
- `extent` - (left, bottom, right, top) for subplots
|
||||
- `engine` - "tight", "constrained", or None
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
p.layout(size=(10, 6), engine='constrained')
|
||||
```
|
||||
|
||||
#### share()
|
||||
|
||||
Control axis sharing across facets.
|
||||
|
||||
```python
|
||||
Plot.share(x=None, y=None)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `x` - Share x-axis: True, False, or "col"/"row"
|
||||
- `y` - Share y-axis: True, False, or "col"/"row"
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
p.share(x=True, y=False) # Share x across all, independent y
|
||||
p.share(x='col') # Share x within columns only
|
||||
```
|
||||
|
||||
#### on()
|
||||
|
||||
Plot on existing matplotlib figure or axes.
|
||||
|
||||
```python
|
||||
Plot.on(target)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `target` - matplotlib Figure or Axes object
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
fig, axes = plt.subplots(2, 2, figsize=(10, 10))
|
||||
so.Plot(df, x='x', y='y').add(so.Dot()).on(axes[0, 0])
|
||||
so.Plot(df, x='x', y='z').add(so.Line()).on(axes[0, 1])
|
||||
```
|
||||
|
||||
#### show()
|
||||
|
||||
Render and display the plot.
|
||||
|
||||
```python
|
||||
Plot.show(**kwargs)
|
||||
```
|
||||
|
||||
**Parameters:** Passed to `matplotlib.pyplot.show()`
|
||||
|
||||
#### save()
|
||||
|
||||
Save the plot to file.
|
||||
|
||||
```python
|
||||
Plot.save(filename, **kwargs)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `filename` - Output filename
|
||||
- `**kwargs` - Passed to `matplotlib.figure.Figure.savefig()`
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
p.save('plot.png', dpi=300, bbox_inches='tight')
|
||||
p.save('plot.pdf')
|
||||
```
|
||||
|
||||
## Mark Objects
|
||||
|
||||
Marks define how data is visually represented.
|
||||
|
||||
### Dot
|
||||
|
||||
Points/markers for individual observations.
|
||||
|
||||
```python
|
||||
so.Dot(artist_kws=None, **kwargs)
|
||||
```
|
||||
|
||||
**Properties:**
|
||||
- `color` - Fill color
|
||||
- `alpha` - Transparency
|
||||
- `fillcolor` - Alternate color property
|
||||
- `fillalpha` - Alternate alpha property
|
||||
- `edgecolor` - Edge color
|
||||
- `edgealpha` - Edge transparency
|
||||
- `edgewidth` - Edge line width
|
||||
- `marker` - Marker style
|
||||
- `pointsize` - Marker size
|
||||
- `stroke` - Edge width
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='x', y='y').add(so.Dot(color='blue', pointsize=10))
|
||||
so.Plot(df, x='x', y='y', color='cat').add(so.Dot(alpha=0.5))
|
||||
```
|
||||
|
||||
### Line
|
||||
|
||||
Lines connecting observations.
|
||||
|
||||
```python
|
||||
so.Line(artist_kws=None, **kwargs)
|
||||
```
|
||||
|
||||
**Properties:**
|
||||
- `color` - Line color
|
||||
- `alpha` - Transparency
|
||||
- `linewidth` - Line width
|
||||
- `linestyle` - Line style ("-", "--", "-.", ":")
|
||||
- `marker` - Marker at data points
|
||||
- `pointsize` - Marker size
|
||||
- `edgecolor` - Marker edge color
|
||||
- `edgewidth` - Marker edge width
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='x', y='y').add(so.Line())
|
||||
so.Plot(df, x='x', y='y', color='cat').add(so.Line(linewidth=2))
|
||||
```
|
||||
|
||||
### Path
|
||||
|
||||
Like Line but connects points in data order (not sorted by x).
|
||||
|
||||
```python
|
||||
so.Path(artist_kws=None, **kwargs)
|
||||
```
|
||||
|
||||
Properties same as `Line`.
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
# For trajectories, loops, etc.
|
||||
so.Plot(trajectory_df, x='x', y='y').add(so.Path())
|
||||
```
|
||||
|
||||
### Bar
|
||||
|
||||
Rectangular bars.
|
||||
|
||||
```python
|
||||
so.Bar(artist_kws=None, **kwargs)
|
||||
```
|
||||
|
||||
**Properties:**
|
||||
- `color` - Fill color
|
||||
- `alpha` - Transparency
|
||||
- `edgecolor` - Edge color
|
||||
- `edgealpha` - Edge transparency
|
||||
- `edgewidth` - Edge line width
|
||||
- `width` - Bar width (data units)
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='category', y='value').add(so.Bar())
|
||||
so.Plot(df, x='x', y='y').add(so.Bar(color='#1f77b4', width=0.5))
|
||||
```
|
||||
|
||||
### Bars
|
||||
|
||||
Multiple bars (for aggregated data with error bars).
|
||||
|
||||
```python
|
||||
so.Bars(artist_kws=None, **kwargs)
|
||||
```
|
||||
|
||||
Properties same as `Bar`. Used with `Agg()` or `Est()` stats.
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='category', y='value').add(so.Bars(), so.Agg())
|
||||
```
|
||||
|
||||
### Area
|
||||
|
||||
Filled area between line and baseline.
|
||||
|
||||
```python
|
||||
so.Area(artist_kws=None, **kwargs)
|
||||
```
|
||||
|
||||
**Properties:**
|
||||
- `color` - Fill color
|
||||
- `alpha` - Transparency
|
||||
- `edgecolor` - Edge color
|
||||
- `edgealpha` - Edge transparency
|
||||
- `edgewidth` - Edge line width
|
||||
- `baseline` - Baseline value (default: 0)
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='x', y='y').add(so.Area(alpha=0.3))
|
||||
so.Plot(df, x='x', y='y', color='cat').add(so.Area())
|
||||
```
|
||||
|
||||
### Band
|
||||
|
||||
Filled band between two lines (for ranges/intervals).
|
||||
|
||||
```python
|
||||
so.Band(artist_kws=None, **kwargs)
|
||||
```
|
||||
|
||||
Properties same as `Area`. Requires `ymin` and `ymax` mappings or used with `Est()` stat.
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='x', ymin='lower', ymax='upper').add(so.Band())
|
||||
so.Plot(df, x='x', y='y').add(so.Band(), so.Est())
|
||||
```
|
||||
|
||||
### Range
|
||||
|
||||
Line with markers at endpoints (for ranges).
|
||||
|
||||
```python
|
||||
so.Range(artist_kws=None, **kwargs)
|
||||
```
|
||||
|
||||
**Properties:**
|
||||
- `color` - Line and marker color
|
||||
- `alpha` - Transparency
|
||||
- `linewidth` - Line width
|
||||
- `marker` - Marker style at endpoints
|
||||
- `pointsize` - Marker size
|
||||
- `edgewidth` - Marker edge width
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='x', y='y').add(so.Range(), so.Est())
|
||||
```
|
||||
|
||||
### Dash
|
||||
|
||||
Short horizontal/vertical lines (for distribution marks).
|
||||
|
||||
```python
|
||||
so.Dash(artist_kws=None, **kwargs)
|
||||
```
|
||||
|
||||
**Properties:**
|
||||
- `color` - Line color
|
||||
- `alpha` - Transparency
|
||||
- `linewidth` - Line width
|
||||
- `width` - Dash length (data units)
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='category', y='value').add(so.Dash())
|
||||
```
|
||||
|
||||
### Text
|
||||
|
||||
Text labels at data points.
|
||||
|
||||
```python
|
||||
so.Text(artist_kws=None, **kwargs)
|
||||
```
|
||||
|
||||
**Properties:**
|
||||
- `color` - Text color
|
||||
- `alpha` - Transparency
|
||||
- `fontsize` - Font size
|
||||
- `halign` - Horizontal alignment: "left", "center", "right"
|
||||
- `valign` - Vertical alignment: "bottom", "center", "top"
|
||||
- `offset` - (x, y) offset from point
|
||||
|
||||
Requires `text` mapping.
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='x', y='y', text='label').add(so.Text())
|
||||
so.Plot(df, x='x', y='y', text='value').add(so.Text(fontsize=10, offset=(0, 5)))
|
||||
```
|
||||
|
||||
## Stat Objects
|
||||
|
||||
Stats transform data before rendering. Compose with marks in `.add()`.
|
||||
|
||||
### Agg
|
||||
|
||||
Aggregate observations by group.
|
||||
|
||||
```python
|
||||
so.Agg(func='mean')
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `func` - Aggregation function: "mean", "median", "sum", "min", "max", "count", or callable
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='category', y='value').add(so.Bar(), so.Agg('mean'))
|
||||
so.Plot(df, x='x', y='y', color='group').add(so.Line(), so.Agg('median'))
|
||||
```
|
||||
|
||||
### Est
|
||||
|
||||
Estimate central tendency with error intervals.
|
||||
|
||||
```python
|
||||
so.Est(func='mean', errorbar=('ci', 95), n_boot=1000, seed=None)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `func` - Estimator: "mean", "median", "sum", or callable
|
||||
- `errorbar` - Error representation:
|
||||
- `("ci", level)` - Confidence interval via bootstrap
|
||||
- `("pi", level)` - Percentile interval
|
||||
- `("se", scale)` - Standard error scaled by factor
|
||||
- `"sd"` - Standard deviation
|
||||
- `n_boot` - Bootstrap iterations
|
||||
- `seed` - Random seed
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='category', y='value').add(so.Bar(), so.Est())
|
||||
so.Plot(df, x='x', y='y').add(so.Line(), so.Est(errorbar='sd'))
|
||||
so.Plot(df, x='x', y='y').add(so.Line(), so.Est(errorbar=('ci', 95)))
|
||||
so.Plot(df, x='x', y='y').add(so.Band(), so.Est())
|
||||
```
|
||||
|
||||
### Hist
|
||||
|
||||
Bin observations and count/aggregate.
|
||||
|
||||
```python
|
||||
so.Hist(stat='count', bins='auto', binwidth=None, binrange=None,
|
||||
common_norm=True, common_bins=True, cumulative=False)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `stat` - "count", "density", "probability", "percent", "frequency"
|
||||
- `bins` - Number of bins, bin method, or edges
|
||||
- `binwidth` - Width of bins
|
||||
- `binrange` - (min, max) range for binning
|
||||
- `common_norm` - Normalize across groups together
|
||||
- `common_bins` - Use same bins for all groups
|
||||
- `cumulative` - Cumulative histogram
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='value').add(so.Bars(), so.Hist())
|
||||
so.Plot(df, x='value').add(so.Bars(), so.Hist(bins=20, stat='density'))
|
||||
so.Plot(df, x='value', color='group').add(so.Area(), so.Hist(cumulative=True))
|
||||
```
|
||||
|
||||
### KDE
|
||||
|
||||
Kernel density estimate.
|
||||
|
||||
```python
|
||||
so.KDE(bw_method='scott', bw_adjust=1, gridsize=200,
|
||||
cut=3, cumulative=False)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `bw_method` - Bandwidth method: "scott", "silverman", or scalar
|
||||
- `bw_adjust` - Bandwidth multiplier
|
||||
- `gridsize` - Resolution of density curve
|
||||
- `cut` - Extension beyond data range (in bandwidth units)
|
||||
- `cumulative` - Cumulative density
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='value').add(so.Line(), so.KDE())
|
||||
so.Plot(df, x='value', color='group').add(so.Area(alpha=0.5), so.KDE())
|
||||
so.Plot(df, x='x', y='y').add(so.Line(), so.KDE(bw_adjust=0.5))
|
||||
```
|
||||
|
||||
### Count
|
||||
|
||||
Count observations per group.
|
||||
|
||||
```python
|
||||
so.Count()
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='category').add(so.Bar(), so.Count())
|
||||
```
|
||||
|
||||
### PolyFit
|
||||
|
||||
Polynomial regression fit.
|
||||
|
||||
```python
|
||||
so.PolyFit(order=1)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `order` - Polynomial order (1 = linear, 2 = quadratic, etc.)
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='x', y='y').add(so.Dot())
|
||||
so.Plot(df, x='x', y='y').add(so.Line(), so.PolyFit(order=2))
|
||||
```
|
||||
|
||||
### Perc
|
||||
|
||||
Compute percentiles.
|
||||
|
||||
```python
|
||||
so.Perc(k=5, method='linear')
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `k` - Number of percentile intervals
|
||||
- `method` - Interpolation method
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='x', y='y').add(so.Band(), so.Perc())
|
||||
```
|
||||
|
||||
## Move Objects
|
||||
|
||||
Moves adjust positions to resolve overlaps or create specific layouts.
|
||||
|
||||
### Dodge
|
||||
|
||||
Shift positions side-by-side.
|
||||
|
||||
```python
|
||||
so.Dodge(empty='keep', gap=0)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `empty` - How to handle empty groups: "keep", "drop", "fill"
|
||||
- `gap` - Gap between dodged elements (proportion)
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='category', y='value', color='group').add(so.Bar(), so.Dodge())
|
||||
so.Plot(df, x='cat', y='val', color='hue').add(so.Dot(), so.Dodge(gap=0.1))
|
||||
```
|
||||
|
||||
### Stack
|
||||
|
||||
Stack marks vertically.
|
||||
|
||||
```python
|
||||
so.Stack()
|
||||
```
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='x', y='y', color='category').add(so.Bar(), so.Stack())
|
||||
so.Plot(df, x='x', y='y', color='group').add(so.Area(), so.Stack())
|
||||
```
|
||||
|
||||
### Jitter
|
||||
|
||||
Add random noise to positions.
|
||||
|
||||
```python
|
||||
so.Jitter(width=None, height=None, seed=None)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `width` - Jitter in x direction (data units or proportion)
|
||||
- `height` - Jitter in y direction
|
||||
- `seed` - Random seed
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='category', y='value').add(so.Dot(), so.Jitter())
|
||||
so.Plot(df, x='cat', y='val').add(so.Dot(), so.Jitter(width=0.2))
|
||||
```
|
||||
|
||||
### Shift
|
||||
|
||||
Shift positions by constant amount.
|
||||
|
||||
```python
|
||||
so.Shift(x=0, y=0)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `x` - Shift in x direction (data units)
|
||||
- `y` - Shift in y direction
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='x', y='y').add(so.Dot(), so.Shift(x=1))
|
||||
```
|
||||
|
||||
### Norm
|
||||
|
||||
Normalize values.
|
||||
|
||||
```python
|
||||
so.Norm(func='max', where=None, by=None, percent=False)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `func` - Normalization: "max", "sum", "area", or callable
|
||||
- `where` - Apply to which axis: "x", "y", or None
|
||||
- `by` - Grouping variables for separate normalization
|
||||
- `percent` - Show as percentage
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
so.Plot(df, x='x', y='y', color='group').add(so.Area(), so.Norm())
|
||||
```
|
||||
|
||||
## Scale Objects
|
||||
|
||||
Scales control how data values map to visual properties.
|
||||
|
||||
### Continuous
|
||||
|
||||
For numeric data.
|
||||
|
||||
```python
|
||||
so.Continuous(values=None, norm=None, trans=None)
|
||||
```
|
||||
|
||||
**Methods:**
|
||||
- `.tick(at=None, every=None, between=None, minor=None)` - Configure ticks
|
||||
- `.label(like=None, base=None, unit=None)` - Format labels
|
||||
|
||||
**Parameters:**
|
||||
- `values` - Explicit value range (min, max)
|
||||
- `norm` - Normalization function
|
||||
- `trans` - Transformation: "log", "sqrt", "symlog", "logit", "pow10", or callable
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
p.scale(
|
||||
x=so.Continuous().tick(every=10),
|
||||
y=so.Continuous(trans='log').tick(at=[1, 10, 100]),
|
||||
color=so.Continuous(values=(0, 1)),
|
||||
pointsize=(5, 20) # Shorthand for Continuous range
|
||||
)
|
||||
```
|
||||
|
||||
### Nominal
|
||||
|
||||
For categorical data.
|
||||
|
||||
```python
|
||||
so.Nominal(values=None, order=None)
|
||||
```
|
||||
|
||||
**Parameters:**
|
||||
- `values` - Explicit values (e.g., colors, markers)
|
||||
- `order` - Category order
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
p.scale(
|
||||
color=so.Nominal(['#1f77b4', '#ff7f0e', '#2ca02c']),
|
||||
marker=so.Nominal(['o', 's', '^']),
|
||||
x=so.Nominal(order=['Low', 'Medium', 'High'])
|
||||
)
|
||||
```
|
||||
|
||||
### Temporal
|
||||
|
||||
For datetime data.
|
||||
|
||||
```python
|
||||
so.Temporal(values=None, trans=None)
|
||||
```
|
||||
|
||||
**Methods:**
|
||||
- `.tick(every=None, between=None)` - Configure ticks
|
||||
- `.label(concise=False)` - Format labels
|
||||
|
||||
**Example:**
|
||||
```python
|
||||
p.scale(x=so.Temporal().tick(every=('month', 1)).label(concise=True))
|
||||
```
|
||||
|
||||
## Complete Examples
|
||||
|
||||
### Layered Plot with Statistics
|
||||
|
||||
```python
|
||||
(
|
||||
so.Plot(df, x='total_bill', y='tip', color='time')
|
||||
.add(so.Dot(), alpha=0.5)
|
||||
.add(so.Line(), so.PolyFit(order=2))
|
||||
.scale(color=so.Nominal(['#1f77b4', '#ff7f0e']))
|
||||
.label(x='Total Bill ($)', y='Tip ($)', title='Tips Analysis')
|
||||
.theme({**sns.axes_style('whitegrid')})
|
||||
)
|
||||
```
|
||||
|
||||
### Faceted Distribution
|
||||
|
||||
```python
|
||||
(
|
||||
so.Plot(df, x='measurement', color='treatment')
|
||||
.facet(col='timepoint', wrap=3)
|
||||
.add(so.Area(alpha=0.5), so.KDE())
|
||||
.add(so.Dot(), so.Jitter(width=0.1), y=0)
|
||||
.scale(x=so.Continuous().tick(every=5))
|
||||
.label(x='Measurement (units)', title='Treatment Effects Over Time')
|
||||
.share(x=True, y=False)
|
||||
)
|
||||
```
|
||||
|
||||
### Grouped Bar Chart
|
||||
|
||||
```python
|
||||
(
|
||||
so.Plot(df, x='category', y='value', color='group')
|
||||
.add(so.Bar(), so.Agg('mean'), so.Dodge())
|
||||
.add(so.Range(), so.Est(errorbar='se'), so.Dodge())
|
||||
.scale(color=so.Nominal(order=['A', 'B', 'C']))
|
||||
.label(y='Mean Value', title='Comparison by Category and Group')
|
||||
)
|
||||
```
|
||||
|
||||
### Complex Multi-Layer
|
||||
|
||||
```python
|
||||
(
|
||||
so.Plot(df, x='date', y='value')
|
||||
.add(so.Dot(color='gray', pointsize=3), alpha=0.3)
|
||||
.add(so.Line(color='blue', linewidth=2), so.Agg('mean'))
|
||||
.add(so.Band(color='blue', alpha=0.2), so.Est(errorbar=('ci', 95)))
|
||||
.facet(col='sensor', row='location')
|
||||
.scale(
|
||||
x=so.Temporal().label(concise=True),
|
||||
y=so.Continuous().tick(every=10)
|
||||
)
|
||||
.label(
|
||||
x='Date',
|
||||
y='Measurement',
|
||||
title='Sensor Measurements by Location'
|
||||
)
|
||||
.layout(size=(12, 8), engine='constrained')
|
||||
)
|
||||
```
|
||||
|
||||
## Migration from Function Interface
|
||||
|
||||
### Scatter Plot
|
||||
|
||||
**Function interface:**
|
||||
```python
|
||||
sns.scatterplot(data=df, x='x', y='y', hue='category', size='value')
|
||||
```
|
||||
|
||||
**Objects interface:**
|
||||
```python
|
||||
so.Plot(df, x='x', y='y', color='category', pointsize='value').add(so.Dot())
|
||||
```
|
||||
|
||||
### Line Plot with CI
|
||||
|
||||
**Function interface:**
|
||||
```python
|
||||
sns.lineplot(data=df, x='time', y='measurement', hue='group', errorbar='ci')
|
||||
```
|
||||
|
||||
**Objects interface:**
|
||||
```python
|
||||
(
|
||||
so.Plot(df, x='time', y='measurement', color='group')
|
||||
.add(so.Line(), so.Est())
|
||||
)
|
||||
```
|
||||
|
||||
### Histogram
|
||||
|
||||
**Function interface:**
|
||||
```python
|
||||
sns.histplot(data=df, x='value', hue='category', stat='density', kde=True)
|
||||
```
|
||||
|
||||
**Objects interface:**
|
||||
```python
|
||||
(
|
||||
so.Plot(df, x='value', color='category')
|
||||
.add(so.Bars(), so.Hist(stat='density'))
|
||||
.add(so.Line(), so.KDE())
|
||||
)
|
||||
```
|
||||
|
||||
### Bar Plot with Error Bars
|
||||
|
||||
**Function interface:**
|
||||
```python
|
||||
sns.barplot(data=df, x='category', y='value', hue='group', errorbar='ci')
|
||||
```
|
||||
|
||||
**Objects interface:**
|
||||
```python
|
||||
(
|
||||
so.Plot(df, x='category', y='value', color='group')
|
||||
.add(so.Bar(), so.Agg(), so.Dodge())
|
||||
.add(so.Range(), so.Est(), so.Dodge())
|
||||
)
|
||||
```
|
||||
|
||||
## Tips and Best Practices
|
||||
|
||||
1. **Method chaining**: Each method returns a new Plot object, enabling fluent chaining
|
||||
2. **Layer composition**: Combine multiple `.add()` calls to overlay different marks
|
||||
3. **Transform order**: In `.add(mark, stat, move)`, stat applies first, then move
|
||||
4. **Variable priority**: Layer-specific mappings override Plot-level mappings
|
||||
5. **Scale shortcuts**: Use tuples for simple ranges: `color=(min, max)` vs full Scale object
|
||||
6. **Jupyter rendering**: Plots render automatically when returned; use `.show()` otherwise
|
||||
7. **Saving**: Use `.save()` rather than `plt.savefig()` for proper handling
|
||||
8. **Matplotlib access**: Use `.on(ax)` to integrate with matplotlib figures
|
||||
Reference in New Issue
Block a user