Initial commit

2025-11-29 18:50:24 +08:00
commit f172746dc6
52 changed files with 17406 additions and 0 deletions
--- a/skills/optimizing-performance/languages/GO.md
+++ b/skills/optimizing-performance/languages/GO.md
@@ -0,0 +1,433 @@
+# Go Performance Optimization
+
+**Load this file when:** Optimizing performance in Go projects
+
+## Profiling Tools
+
+### Built-in pprof
+```bash
+# CPU profiling
+go test -cpuprofile=cpu.prof -bench=.
+go tool pprof cpu.prof
+
+# Memory profiling
+go test -memprofile=mem.prof -bench=.
+go tool pprof mem.prof
+
+# Web UI for profiles
+go tool pprof -http=:8080 cpu.prof
+
+# Goroutine profiling
+go tool pprof http://localhost:6060/debug/pprof/goroutine
+
+# Heap profiling
+go tool pprof http://localhost:6060/debug/pprof/heap
+```
+
+### Benchmarking
+```go
+// Basic benchmark
+func BenchmarkFibonacci(b *testing.B) {
+    for i := 0; i < b.N; i++ {
+        fibonacci(20)
+    }
+}
+
+// With sub-benchmarks
+func BenchmarkSizes(b *testing.B) {
+    sizes := []int{10, 100, 1000}
+    for _, size := range sizes {
+        b.Run(fmt.Sprintf("size=%d", size), func(b *testing.B) {
+            for i := 0; i < b.N; i++ {
+                process(size)
+            }
+        })
+    }
+}
+
+// Reset timer for setup
+func BenchmarkWithSetup(b *testing.B) {
+    data := setupExpensiveData()
+    b.ResetTimer()  // Don't count setup time
+
+    for i := 0; i < b.N; i++ {
+        process(data)
+    }
+}
+```
+
+### Runtime Metrics
+```go
+import (
+    "net/http"
+    _ "net/http/pprof"  // Import for side effects
+    "runtime"
+)
+
+func init() {
+    // Enable profiling endpoint
+    go func() {
+        http.ListenAndServe("localhost:6060", nil)
+    }()
+}
+
+// Monitor goroutines
+func printStats() {
+    fmt.Printf("Goroutines: %d\n", runtime.NumGoroutine())
+
+    var m runtime.MemStats
+    runtime.ReadMemStats(&m)
+    fmt.Printf("Alloc: %d MB\n", m.Alloc/1024/1024)
+    fmt.Printf("TotalAlloc: %d MB\n", m.TotalAlloc/1024/1024)
+}
+```
+
+## Memory Management
+
+### Avoiding Allocations
+```go
+// Bad: Allocates on every call
+func process(data []byte) []byte {
+    result := make([]byte, len(data))  // New allocation
+    copy(result, data)
+    return result
+}
+
+// Good: Reuse buffer
+var bufferPool = sync.Pool{
+    New: func() interface{} {
+        return make([]byte, 1024)
+    },
+}
+
+func process(data []byte) {
+    buf := bufferPool.Get().([]byte)
+    defer bufferPool.Put(buf)
+    // Process with buf
+}
+```
+
+### Preallocate Slices
+```go
+// Bad: Multiple allocations as slice grows
+items := []Item{}
+for i := 0; i < 1000; i++ {
+    items = append(items, Item{i})  // Reallocates when cap exceeded
+}
+
+// Good: Single allocation
+items := make([]Item, 0, 1000)
+for i := 0; i < 1000; i++ {
+    items = append(items, Item{i})  // No reallocation
+}
+
+// Or if final size is known
+items := make([]Item, 1000)
+for i := 0; i < 1000; i++ {
+    items[i] = Item{i}
+}
+```
+
+### String vs []byte
+```go
+// Bad: String concatenation allocates
+var result string
+for _, s := range strings {
+    result += s  // New allocation each time
+}
+
+// Good: Use strings.Builder
+var builder strings.Builder
+builder.Grow(estimatedSize)  // Preallocate
+for _, s := range strings {
+    builder.WriteString(s)
+}
+result := builder.String()
+
+// For byte operations, work with []byte
+data := []byte("hello")
+data = append(data, " world"...)  // Efficient
+```
+
+## Goroutine Optimization
+
+### Worker Pool Pattern
+```go
+// Bad: Unlimited goroutines
+for _, task := range tasks {
+    go process(task)  // Could spawn millions!
+}
+
+// Good: Limited worker pool
+func workerPool(tasks <-chan Task, workers int) {
+    var wg sync.WaitGroup
+    for i := 0; i < workers; i++ {
+        wg.Add(1)
+        go func() {
+            defer wg.Done()
+            for task := range tasks {
+                process(task)
+            }
+        }()
+    }
+    wg.Wait()
+}
+
+// Usage
+taskChan := make(chan Task, 100)
+go workerPool(taskChan, 10)  // 10 workers
+```
+
+### Channel Patterns
+```go
+// Buffered channels reduce blocking
+ch := make(chan int, 100)  // Buffer of 100
+
+// Fan-out pattern for parallel work
+func fanOut(in <-chan int, n int) []<-chan int {
+    outs := make([]<-chan int, n)
+    for i := 0; i < n; i++ {
+        out := make(chan int)
+        outs[i] = out
+        go func() {
+            for v := range in {
+                out <- process(v)
+            }
+            close(out)
+        }()
+    }
+    return outs
+}
+
+// Fan-in pattern to merge results
+func fanIn(channels ...<-chan int) <-chan int {
+    out := make(chan int)
+    var wg sync.WaitGroup
+
+    for _, ch := range channels {
+        wg.Add(1)
+        go func(c <-chan int) {
+            defer wg.Done()
+            for v := range c {
+                out <- v
+            }
+        }(ch)
+    }
+
+    go func() {
+        wg.Wait()
+        close(out)
+    }()
+
+    return out
+}
+```
+
+## Data Structure Optimization
+
+### Map Preallocation
+```go
+// Bad: Map grows as needed
+m := make(map[string]int)
+for i := 0; i < 10000; i++ {
+    m[fmt.Sprint(i)] = i  // Reallocates periodically
+}
+
+// Good: Preallocate
+m := make(map[string]int, 10000)
+for i := 0; i < 10000; i++ {
+    m[fmt.Sprint(i)] = i  // No reallocation
+}
+```
+
+### Struct Field Alignment
+```go
+// Bad: Poor alignment (40 bytes due to padding)
+type BadLayout struct {
+    a bool   // 1 byte + 7 padding
+    b int64  // 8 bytes
+    c bool   // 1 byte + 7 padding
+    d int64  // 8 bytes
+    e bool   // 1 byte + 7 padding
+}
+
+// Good: Optimal alignment (24 bytes)
+type GoodLayout struct {
+    b int64  // 8 bytes
+    d int64  // 8 bytes
+    a bool   // 1 byte
+    c bool   // 1 byte
+    e bool   // 1 byte + 5 padding
+}
+```
+
+## I/O Optimization
+
+### Buffered I/O
+```go
+// Bad: Unbuffered reads
+file, _ := os.Open("file.txt")
+scanner := bufio.NewScanner(file)
+
+// Good: Buffered with custom size
+file, _ := os.Open("file.txt")
+reader := bufio.NewReaderSize(file, 64*1024)  // 64KB buffer
+scanner := bufio.NewScanner(reader)
+```
+
+### Connection Pooling
+```go
+// HTTP client with connection pooling
+client := &http.Client{
+    Transport: &http.Transport{
+        MaxIdleConns:        100,
+        MaxIdleConnsPerHost: 10,
+        IdleConnTimeout:     90 * time.Second,
+    },
+    Timeout: 10 * time.Second,
+}
+
+// Database connection pool
+db, _ := sql.Open("postgres", dsn)
+db.SetMaxOpenConns(25)
+db.SetMaxIdleConns(5)
+db.SetConnMaxLifetime(5 * time.Minute)
+```
+
+## Performance Anti-Patterns
+
+### Unnecessary Interface Conversions
+```go
+// Bad: Interface conversion in hot path
+func process(items []interface{}) {
+    for _, item := range items {
+        v := item.(MyType)  // Type assertion overhead
+        use(v)
+    }
+}
+
+// Good: Use concrete types
+func process(items []MyType) {
+    for _, item := range items {
+        use(item)  // Direct access
+    }
+}
+```
+
+### Defer in Loops
+```go
+// Bad: Defers accumulate in loop
+for _, file := range files {
+    f, _ := os.Open(file)
+    defer f.Close()  // All close calls deferred until function returns!
+}
+
+// Good: Close immediately or use function
+for _, file := range files {
+    func() {
+        f, _ := os.Open(file)
+        defer f.Close()  // Deferred to end of this closure
+        process(f)
+    }()
+}
+```
+
+### Lock Contention
+```go
+// Bad: Lock held during expensive operation
+mu.Lock()
+result := expensiveComputation(data)
+cache[key] = result
+mu.Unlock()
+
+// Good: Minimize lock time
+result := expensiveComputation(data)
+mu.Lock()
+cache[key] = result
+mu.Unlock()
+
+// Better: Use sync.Map for concurrent reads
+var cache sync.Map
+cache.Store(key, value)
+val, ok := cache.Load(key)
+```
+
+## Compiler Optimizations
+
+### Escape Analysis
+```go
+// Bad: Escapes to heap
+func makeSlice() *[]int {
+    s := make([]int, 1000)
+    return &s  // Pointer returned, allocates on heap
+}
+
+// Good: Stays on stack
+func makeSlice() []int {
+    s := make([]int, 1000)
+    return s  // Value returned, can stay on stack
+}
+
+// Check with: go build -gcflags='-m'
+```
+
+### Inline Functions
+```go
+// Small functions are inlined automatically
+func add(a, b int) int {
+    return a + b  // Will be inlined
+}
+
+// Prevent inlining if needed: //go:noinline
+```
+
+## Performance Checklist
+
+**Before Optimizing:**
+- [ ] Profile with pprof to identify bottlenecks
+- [ ] Write benchmarks for hot paths
+- [ ] Measure allocations with `-benchmem`
+- [ ] Check for goroutine leaks
+
+**Go-Specific Optimizations:**
+- [ ] Preallocate slices and maps with known capacity
+- [ ] Use `strings.Builder` for string concatenation
+- [ ] Implement worker pools instead of unlimited goroutines
+- [ ] Use buffered channels to reduce blocking
+- [ ] Reuse buffers with `sync.Pool`
+- [ ] Minimize allocations in hot paths
+- [ ] Order struct fields by size (largest first)
+- [ ] Use concrete types instead of interfaces in hot paths
+- [ ] Avoid `defer` in tight loops
+- [ ] Use `sync.Map` for concurrent read-heavy maps
+
+**After Optimizing:**
+- [ ] Re-profile to verify improvements
+- [ ] Compare benchmarks: `benchstat old.txt new.txt`
+- [ ] Check memory allocations decreased
+- [ ] Monitor goroutine count in production
+- [ ] Use `go test -race` to check for race conditions
+
+## Tools and Packages
+
+**Profiling:**
+- `pprof` - Built-in profiler
+- `go-torch` - Flamegraph generation
+- `benchstat` - Compare benchmark results
+- `trace` - Execution tracer
+
+**Optimization:**
+- `sync.Pool` - Object pooling
+- `sync.Map` - Concurrent map
+- `strings.Builder` - Efficient string building
+- `bufio` - Buffered I/O
+
+**Analysis:**
+- `-gcflags='-m'` - Escape analysis
+- `go test -race` - Race detector
+- `go test -benchmem` - Memory allocations
+- `goleak` - Goroutine leak detection
+
+---
+
+*Go-specific performance optimization with goroutines, channels, and profiling*