Files
2025-11-30 08:51:46 +08:00

602 lines
15 KiB
Markdown

---
name: database-migrations
description: Automatically applies when working with database migrations. Ensures proper Alembic patterns, upgrade/downgrade scripts, data migrations, rollback safety, and migration testing.
category: python
---
# Database Migration Patterns
When managing database migrations, follow these patterns for safe, reversible schema changes.
**Trigger Keywords**: migration, alembic, database schema, upgrade, downgrade, migrate, schema change, DDL, database version, revision
**Agent Integration**: Used by `backend-architect`, `database-engineer`, `data-engineer`
## ✅ Correct Pattern: Alembic Setup
```python
# alembic/env.py
from logging.config import fileConfig
from sqlalchemy import engine_from_config, pool
from alembic import context
from app.models import Base # Import your models
from app.config import settings
# Alembic Config object
config = context.config
# Configure logging
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# Set target metadata
target_metadata = Base.metadata
def run_migrations_offline() -> None:
"""
Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine.
"""
url = settings.database_url
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
compare_type=True, # Detect column type changes
compare_server_default=True # Detect default changes
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""
Run migrations in 'online' mode.
Creates an Engine and associates a connection with the context.
"""
configuration = config.get_section(config.config_ini_section)
configuration["sqlalchemy.url"] = settings.database_url
connectable = engine_from_config(
configuration,
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=target_metadata,
compare_type=True,
compare_server_default=True
)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
```
## Create Migration
```python
"""
Create migration with proper naming and structure.
Command:
alembic revision --autogenerate -m "add_user_email_index"
Revision ID: abc123
Revises: xyz456
Create Date: 2025-01-15 10:30:00
"""
from alembic import op
import sqlalchemy as sa
from typing import Sequence, Union
# revision identifiers
revision: str = 'abc123'
down_revision: Union[str, None] = 'xyz456'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""
Apply migration.
Add index on users.email for faster lookups.
"""
op.create_index(
'ix_users_email',
'users',
['email'],
unique=False
)
def downgrade() -> None:
"""
Revert migration.
Remove index on users.email.
"""
op.drop_index('ix_users_email', table_name='users')
```
## Safe Column Additions
```python
"""Add user phone number column"""
from alembic import op
import sqlalchemy as sa
def upgrade() -> None:
"""Add phone column with nullable default."""
# Add column as nullable first
op.add_column(
'users',
sa.Column(
'phone',
sa.String(20),
nullable=True # Start nullable!
)
)
# Optionally set default value for existing rows
op.execute(
"""
UPDATE users
SET phone = ''
WHERE phone IS NULL
"""
)
# Then make NOT NULL if needed (separate migration recommended)
# op.alter_column('users', 'phone', nullable=False)
def downgrade() -> None:
"""Remove phone column."""
op.drop_column('users', 'phone')
```
## Safe Column Modifications
```python
"""Increase email column length"""
from alembic import op
import sqlalchemy as sa
def upgrade() -> None:
"""
Increase email length from 255 to 500.
Safe for PostgreSQL (no table rewrite).
"""
# Check constraints first
with op.batch_alter_table('users') as batch_op:
batch_op.alter_column(
'email',
type_=sa.String(500),
existing_type=sa.String(255),
existing_nullable=False
)
def downgrade() -> None:
"""
Decrease email length back to 255.
WARNING: May fail if existing data exceeds 255 chars.
"""
# Check for data that would be truncated
op.execute(
"""
SELECT COUNT(*)
FROM users
WHERE LENGTH(email) > 255
"""
)
with op.batch_alter_table('users') as batch_op:
batch_op.alter_column(
'email',
type_=sa.String(255),
existing_type=sa.String(500),
existing_nullable=False
)
```
## Data Migration
```python
"""Migrate user status enum values"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy import text
def upgrade() -> None:
"""
Migrate status from 'active'/'inactive' to 'enabled'/'disabled'.
Two-phase approach:
1. Add new column
2. Migrate data
3. Drop old column (in next migration)
"""
# Phase 1: Add new column
op.add_column(
'users',
sa.Column(
'account_status',
sa.Enum('enabled', 'disabled', 'suspended', name='account_status'),
nullable=True
)
)
# Phase 2: Migrate data
connection = op.get_bind()
# Map old values to new values
connection.execute(
text("""
UPDATE users
SET account_status = CASE
WHEN status = 'active' THEN 'enabled'
WHEN status = 'inactive' THEN 'disabled'
ELSE 'disabled'
END
""")
)
# Phase 3: Make NOT NULL (after verifying data)
op.alter_column('users', 'account_status', nullable=False)
# Note: Drop old 'status' column in next migration
# to allow rollback window
def downgrade() -> None:
"""Rollback account_status column."""
op.drop_column('users', 'account_status')
```
## Foreign Key Changes
```python
"""Add foreign key to orders table"""
from alembic import op
import sqlalchemy as sa
def upgrade() -> None:
"""
Add foreign key constraint.
Ensure referential integrity.
"""
# Create index first for performance
op.create_index(
'ix_orders_user_id',
'orders',
['user_id']
)
# Add foreign key constraint
op.create_foreign_key(
'fk_orders_user_id', # Constraint name
'orders', # Source table
'users', # Target table
['user_id'], # Source columns
['id'], # Target columns
ondelete='CASCADE' # Delete orders when user deleted
)
def downgrade() -> None:
"""Remove foreign key constraint."""
op.drop_constraint(
'fk_orders_user_id',
'orders',
type_='foreignkey'
)
op.drop_index('ix_orders_user_id', table_name='orders')
```
## Complex Table Changes
```python
"""Split user table into users and profiles"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy import text
def upgrade() -> None:
"""
Split users table into users (auth) and profiles (data).
Multi-step migration:
1. Create new profiles table
2. Copy data
3. Add foreign key
4. Drop columns from users
"""
# Step 1: Create profiles table
op.create_table(
'profiles',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('user_id', sa.Integer(), nullable=False),
sa.Column('first_name', sa.String(100), nullable=True),
sa.Column('last_name', sa.String(100), nullable=True),
sa.Column('bio', sa.Text(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=False),
sa.PrimaryKeyConstraint('id')
)
# Step 2: Copy data from users to profiles
connection = op.get_bind()
connection.execute(
text("""
INSERT INTO profiles (user_id, first_name, last_name, bio, created_at)
SELECT id, first_name, last_name, bio, created_at
FROM users
""")
)
# Step 3: Add foreign key
op.create_foreign_key(
'fk_profiles_user_id',
'profiles',
'users',
['user_id'],
['id'],
ondelete='CASCADE'
)
# Step 4: Drop old columns (in separate migration recommended)
# op.drop_column('users', 'first_name')
# op.drop_column('users', 'last_name')
# op.drop_column('users', 'bio')
def downgrade() -> None:
"""
Reverse table split.
WARNING: Complex rollback.
"""
# Add columns back to users
op.add_column('users', sa.Column('first_name', sa.String(100)))
op.add_column('users', sa.Column('last_name', sa.String(100)))
op.add_column('users', sa.Column('bio', sa.Text()))
# Copy data back
connection = op.get_bind()
connection.execute(
text("""
UPDATE users
SET first_name = p.first_name,
last_name = p.last_name,
bio = p.bio
FROM profiles p
WHERE users.id = p.user_id
""")
)
# Drop profiles table
op.drop_table('profiles')
```
## Migration Testing
```python
# tests/test_migrations.py
import pytest
from alembic import command
from alembic.config import Config
from sqlalchemy import create_engine, inspect, text
from app.config import settings
@pytest.fixture
def alembic_config():
"""Create Alembic configuration."""
config = Config("alembic.ini")
config.set_main_option("sqlalchemy.url", settings.test_database_url)
return config
@pytest.fixture
def empty_database():
"""Create empty test database."""
engine = create_engine(settings.test_database_url)
# Drop all tables
with engine.begin() as conn:
conn.execute(text("DROP SCHEMA public CASCADE"))
conn.execute(text("CREATE SCHEMA public"))
yield engine
engine.dispose()
def test_migrations_upgrade_downgrade(alembic_config, empty_database):
"""
Test migrations can upgrade and downgrade.
Ensures all migrations are reversible.
"""
# Upgrade to head
command.upgrade(alembic_config, "head")
# Verify tables exist
inspector = inspect(empty_database)
tables = inspector.get_table_names()
assert "users" in tables
assert "alembic_version" in tables
# Downgrade to base
command.downgrade(alembic_config, "base")
# Verify tables removed
inspector = inspect(empty_database)
tables = inspector.get_table_names()
assert "users" not in tables
def test_migration_data_integrity(alembic_config, empty_database):
"""
Test data migration preserves data integrity.
Insert test data, run migration, verify data.
"""
# Upgrade to revision before data migration
command.upgrade(alembic_config, "abc123")
# Insert test data
with empty_database.begin() as conn:
conn.execute(
text("INSERT INTO users (email, status) VALUES (:email, :status)"),
{"email": "test@example.com", "status": "active"}
)
# Run data migration
command.upgrade(alembic_config, "abc124")
# Verify data migrated correctly
with empty_database.begin() as conn:
result = conn.execute(
text("SELECT account_status FROM users WHERE email = :email"),
{"email": "test@example.com"}
)
row = result.fetchone()
assert row[0] == "enabled"
def test_migration_rollback_safety(alembic_config, empty_database):
"""
Test rollback doesn't lose data.
Verify downgrade preserves critical data.
"""
# Upgrade and insert data
command.upgrade(alembic_config, "head")
with empty_database.begin() as conn:
conn.execute(
text("INSERT INTO users (email) VALUES (:email)"),
{"email": "test@example.com"}
)
# Downgrade one revision
command.downgrade(alembic_config, "-1")
# Verify data still exists
with empty_database.begin() as conn:
result = conn.execute(
text("SELECT COUNT(*) FROM users WHERE email = :email"),
{"email": "test@example.com"}
)
count = result.scalar()
assert count == 1
```
## ❌ Anti-Patterns
```python
# ❌ Making column NOT NULL immediately
def upgrade():
op.add_column('users', sa.Column('phone', sa.String(), nullable=False))
# Fails for existing rows!
# ✅ Better: Add as nullable, populate, then make NOT NULL
def upgrade():
op.add_column('users', sa.Column('phone', sa.String(), nullable=True))
op.execute("UPDATE users SET phone = '' WHERE phone IS NULL")
# Make NOT NULL in separate migration
# ❌ No downgrade implementation
def downgrade():
pass # Not reversible!
# ✅ Better: Implement proper downgrade
def downgrade():
op.drop_column('users', 'phone')
# ❌ Data migration in schema migration
def upgrade():
op.add_column('users', sa.Column('full_name', sa.String()))
op.execute("UPDATE users SET full_name = first_name || ' ' || last_name")
op.drop_column('users', 'first_name')
op.drop_column('users', 'last_name')
# Too many changes in one migration!
# ✅ Better: Split into multiple migrations
# Migration 1: Add column
# Migration 2: Migrate data
# Migration 3: Drop old columns
# ❌ No constraint naming
def upgrade():
op.create_foreign_key(None, 'orders', 'users', ['user_id'], ['id'])
# Auto-generated name!
# ✅ Better: Explicit constraint names
def upgrade():
op.create_foreign_key('fk_orders_user_id', 'orders', 'users', ['user_id'], ['id'])
```
## Best Practices Checklist
- ✅ Use descriptive migration names
- ✅ Always implement downgrade()
- ✅ Add columns as nullable first
- ✅ Use batch operations for SQLite
- ✅ Name all constraints explicitly
- ✅ Test migrations up and down
- ✅ Split complex changes into multiple migrations
- ✅ Create indexes before foreign keys
- ✅ Use transactions for data migrations
- ✅ Document breaking changes
- ✅ Test with production-like data volumes
- ✅ Keep migrations idempotent when possible
## Auto-Apply
When creating migrations:
1. Use `alembic revision --autogenerate -m "descriptive_name"`
2. Review generated migration carefully
3. Implement proper downgrade()
4. Add columns as nullable initially
5. Split data migrations from schema migrations
6. Name all constraints explicitly
7. Write tests for complex migrations
8. Document breaking changes in docstring
## Related Skills
- `query-optimization` - For index creation
- `type-safety` - For type hints in migrations
- `pytest-patterns` - For migration testing
- `structured-errors` - For error handling
- `docstring-format` - For migration documentation