14 KiB
14 KiB
name, description, category
| name | description | category |
|---|---|---|
| performance-profiling | Automatically applies when profiling Python performance. Ensures proper use of profiling tools, async profiling, benchmarking, memory analysis, and optimization strategies. | observability |
Performance Profiling Patterns
When profiling Python applications, follow these patterns for effective performance analysis.
Trigger Keywords: profiling, performance, benchmark, optimization, cProfile, line_profiler, memory_profiler, async profiling, bottleneck, CPU profiling
Agent Integration: Used by performance-engineer, backend-architect, optimization-engineer
✅ Correct Pattern: CPU Profiling
import cProfile
import pstats
from pstats import SortKey
from typing import Callable
from functools import wraps
def profile_function(output_file: str = None):
"""
Decorator to profile function execution.
Args:
output_file: Optional file to save profile data
"""
def decorator(func: Callable) -> Callable:
@wraps(func)
def wrapper(*args, **kwargs):
profiler = cProfile.Profile()
profiler.enable()
try:
result = func(*args, **kwargs)
return result
finally:
profiler.disable()
# Print stats
stats = pstats.Stats(profiler)
stats.sort_stats(SortKey.CUMULATIVE)
stats.print_stats(20) # Top 20 functions
# Save to file
if output_file:
stats.dump_stats(output_file)
return wrapper
return decorator
# Usage
@profile_function(output_file="profile.stats")
def process_data(data: List[Dict]):
"""Process data with profiling."""
result = []
for item in data:
processed = expensive_operation(item)
result.append(processed)
return result
# Analyze saved profile
def analyze_profile(stats_file: str):
"""Analyze saved profile data."""
stats = pstats.Stats(stats_file)
print("\n=== Top 20 by cumulative time ===")
stats.sort_stats(SortKey.CUMULATIVE)
stats.print_stats(20)
print("\n=== Top 20 by internal time ===")
stats.sort_stats(SortKey.TIME)
stats.print_stats(20)
print("\n=== Top 20 by call count ===")
stats.sort_stats(SortKey.CALLS)
stats.print_stats(20)
Line-by-Line Profiling
from line_profiler import LineProfiler
def profile_lines(func: Callable) -> Callable:
"""
Decorator for line-by-line profiling.
Usage:
@profile_lines
def my_function():
...
my_function()
"""
@wraps(func)
def wrapper(*args, **kwargs):
profiler = LineProfiler()
profiler.add_function(func)
profiler.enable()
try:
result = func(*args, **kwargs)
return result
finally:
profiler.disable()
profiler.print_stats()
return wrapper
# Usage
@profile_lines
def process_items(items: List[str]) -> List[str]:
"""Process items with line profiling."""
result = []
for item in items:
# Each line's execution time is measured
cleaned = item.strip().lower()
if len(cleaned) > 5:
processed = expensive_transform(cleaned)
result.append(processed)
return result
# Alternative: Profile specific functions
def detailed_profile():
"""Profile multiple functions."""
lp = LineProfiler()
# Add functions to profile
lp.add_function(function1)
lp.add_function(function2)
lp.add_function(function3)
# Run with profiling
lp.enable()
main_function()
lp.disable()
lp.print_stats()
Memory Profiling
from memory_profiler import profile as memory_profile
import tracemalloc
@memory_profile
def memory_intensive_function():
"""
Profile memory usage.
Requires: pip install memory-profiler
"""
data = []
for i in range(1000000):
data.append(i * 2)
return data
# Tracemalloc for detailed memory tracking
def track_memory_detailed():
"""Track memory allocations in detail."""
tracemalloc.start()
# Code to profile
data = memory_intensive_function()
# Get current memory usage
current, peak = tracemalloc.get_traced_memory()
print(f"Current memory: {current / 1024 / 1024:.2f} MB")
print(f"Peak memory: {peak / 1024 / 1024:.2f} MB")
# Get top memory allocations
snapshot = tracemalloc.take_snapshot()
top_stats = snapshot.statistics('lineno')
print("\n=== Top 10 memory allocations ===")
for stat in top_stats[:10]:
print(stat)
tracemalloc.stop()
# Context manager for memory tracking
class MemoryTracker:
"""Context manager to track memory usage."""
def __enter__(self):
tracemalloc.start()
self.start_memory = tracemalloc.get_traced_memory()[0]
return self
def __exit__(self, *args):
current, peak = tracemalloc.get_traced_memory()
self.memory_used = current - self.start_memory
self.peak_memory = peak
tracemalloc.stop()
print(f"Memory used: {self.memory_used / 1024 / 1024:.2f} MB")
print(f"Peak memory: {self.peak_memory / 1024 / 1024:.2f} MB")
# Usage
with MemoryTracker() as tracker:
result = memory_intensive_function()
print(f"Total memory: {tracker.memory_used / 1024 / 1024:.2f} MB")
Async Profiling
import asyncio
import time
from typing import Coroutine
class AsyncProfiler:
"""Profile async function execution."""
def __init__(self):
self.timings = {}
async def profile(self, name: str, coro: Coroutine):
"""
Profile async coroutine.
Args:
name: Profile label
coro: Coroutine to profile
Returns:
Coroutine result
"""
start = time.perf_counter()
try:
result = await coro
duration = time.perf_counter() - start
self.timings[name] = {
"duration": duration,
"status": "success"
}
return result
except Exception as e:
duration = time.perf_counter() - start
self.timings[name] = {
"duration": duration,
"status": "error",
"error": str(e)
}
raise
def print_stats(self):
"""Print profiling statistics."""
print("\n=== Async Profile Stats ===")
print(f"{'Function':<30} {'Duration':>10} {'Status':>10}")
print("-" * 52)
for name, stats in sorted(
self.timings.items(),
key=lambda x: x[1]["duration"],
reverse=True
):
duration = stats["duration"]
status = stats["status"]
print(f"{name:<30} {duration:>10.4f}s {status:>10}")
# Usage
async def main():
profiler = AsyncProfiler()
# Profile async operations
users = await profiler.profile(
"fetch_users",
fetch_users_from_db()
)
orders = await profiler.profile(
"fetch_orders",
fetch_orders_from_api()
)
result = await profiler.profile(
"process_data",
process_data(users, orders)
)
profiler.print_stats()
return result
# Decorator version
def profile_async(name: str = None):
"""Decorator to profile async functions."""
def decorator(func):
@wraps(func)
async def wrapper(*args, **kwargs):
func_name = name or func.__name__
start = time.perf_counter()
try:
result = await func(*args, **kwargs)
duration = time.perf_counter() - start
print(f"{func_name}: {duration:.4f}s")
return result
except Exception as e:
duration = time.perf_counter() - start
print(f"{func_name}: {duration:.4f}s (error)")
raise
return wrapper
return decorator
@profile_async("fetch_users")
async def fetch_users():
"""Fetch users with profiling."""
await asyncio.sleep(0.5)
return ["user1", "user2"]
Benchmarking
import timeit
from typing import Callable, List, Dict
from statistics import mean, stdev
class Benchmark:
"""Benchmark function performance."""
def __init__(self, iterations: int = 1000):
self.iterations = iterations
self.results: Dict[str, List[float]] = {}
def run(self, name: str, func: Callable, *args, **kwargs):
"""
Run benchmark for function.
Args:
name: Benchmark name
func: Function to benchmark
*args, **kwargs: Function arguments
"""
times = []
for _ in range(self.iterations):
start = time.perf_counter()
func(*args, **kwargs)
duration = time.perf_counter() - start
times.append(duration)
self.results[name] = times
def compare(self, name1: str, name2: str):
"""Compare two benchmarks."""
times1 = self.results[name1]
times2 = self.results[name2]
mean1 = mean(times1)
mean2 = mean(times2)
improvement = ((mean1 - mean2) / mean1) * 100
print(f"\n=== Comparison: {name1} vs {name2} ===")
print(f"{name1}: {mean1*1000:.4f}ms ± {stdev(times1)*1000:.4f}ms")
print(f"{name2}: {mean2*1000:.4f}ms ± {stdev(times2)*1000:.4f}ms")
print(f"Improvement: {improvement:+.2f}%")
def print_stats(self):
"""Print all benchmark statistics."""
print("\n=== Benchmark Results ===")
print(f"{'Name':<30} {'Mean':>12} {'Std Dev':>12} {'Min':>12} {'Max':>12}")
print("-" * 80)
for name, times in self.results.items():
mean_time = mean(times) * 1000
std_time = stdev(times) * 1000
min_time = min(times) * 1000
max_time = max(times) * 1000
print(
f"{name:<30} "
f"{mean_time:>10.4f}ms "
f"{std_time:>10.4f}ms "
f"{min_time:>10.4f}ms "
f"{max_time:>10.4f}ms"
)
# Usage
bench = Benchmark(iterations=1000)
# Benchmark different implementations
bench.run("list_comprehension", lambda: [i*2 for i in range(1000)])
bench.run("map_function", lambda: list(map(lambda i: i*2, range(1000))))
bench.run("for_loop", lambda: [i*2 for i in range(1000)])
bench.print_stats()
bench.compare("list_comprehension", "map_function")
# Using timeit module
def benchmark_with_timeit():
"""Benchmark with timeit module."""
# Setup code
setup = """
from mymodule import function1, function2
data = list(range(10000))
"""
# Benchmark function1
time1 = timeit.timeit(
"function1(data)",
setup=setup,
number=1000
)
# Benchmark function2
time2 = timeit.timeit(
"function2(data)",
setup=setup,
number=1000
)
print(f"function1: {time1:.4f}s")
print(f"function2: {time2:.4f}s")
print(f"Speedup: {time1/time2:.2f}x")
Performance Testing
import pytest
from typing import Callable
def test_performance(
func: Callable,
max_duration_ms: float,
iterations: int = 100
):
"""
Test function performance.
Args:
func: Function to test
max_duration_ms: Maximum allowed duration in ms
iterations: Number of iterations
"""
durations = []
for _ in range(iterations):
start = time.perf_counter()
func()
duration = (time.perf_counter() - start) * 1000
durations.append(duration)
avg_duration = mean(durations)
assert avg_duration < max_duration_ms, (
f"Performance regression: {avg_duration:.2f}ms > {max_duration_ms}ms"
)
# Pytest performance tests
@pytest.mark.performance
def test_api_response_time():
"""Test API response time is under 100ms."""
def api_call():
response = requests.get("http://localhost:8000/api/users")
return response.json()
test_performance(api_call, max_duration_ms=100.0, iterations=50)
@pytest.mark.performance
def test_database_query_time():
"""Test database query time is under 50ms."""
def db_query():
return db.query(User).filter(User.status == "active").all()
test_performance(db_query, max_duration_ms=50.0, iterations=100)
❌ Anti-Patterns
# ❌ Using print() for timing
start = time.time()
result = expensive_function()
print(f"Took {time.time() - start}s") # Not precise!
# ✅ Better: Use proper profiling
@profile_function()
def expensive_function():
...
# ❌ Single measurement
time = timeit.timeit(func, number=1) # Not reliable!
# ✅ Better: Multiple iterations
times = [timeit.timeit(func, number=1) for _ in range(100)]
avg_time = mean(times)
# ❌ Not profiling before optimizing
# Just guessing where the bottleneck is
# ✅ Better: Profile first, then optimize
@profile_function()
def identify_bottleneck():
...
# ❌ Profiling in production without limits
profiler.enable() # Runs forever!
# ✅ Better: Time-limited profiling
import signal
profiler.enable()
signal.alarm(60) # Stop after 60 seconds
Best Practices Checklist
- ✅ Profile before optimizing (measure, don't guess)
- ✅ Use appropriate profiling tool (cProfile, line_profiler)
- ✅ Profile memory usage for memory-intensive code
- ✅ Use async profiling for async code
- ✅ Benchmark different implementations
- ✅ Run multiple iterations for reliability
- ✅ Track performance over time
- ✅ Set performance budgets in tests
- ✅ Profile in realistic conditions
- ✅ Focus on hot paths first
- ✅ Document performance characteristics
- ✅ Use sampling for production profiling
Auto-Apply
When profiling performance:
- Use cProfile for CPU profiling
- Use line_profiler for line-by-line analysis
- Use memory_profiler for memory tracking
- Profile async code with AsyncProfiler
- Benchmark alternatives with Benchmark class
- Add performance tests with time limits
- Focus on hot paths identified by profiling
Related Skills
monitoring-alerting- For production metricsquery-optimization- For database performanceasync-await-checker- For async patternspytest-patterns- For performance testing