Files
gh-rafaelcalleja-claude-mar…/skills/databases/references/postgresql-queries.md
2025-11-30 08:48:52 +08:00

9.9 KiB

PostgreSQL SQL Queries

SQL queries in PostgreSQL: SELECT, JOINs, subqueries, CTEs, window functions, and advanced patterns.

Basic SELECT

Simple Queries

-- Select all columns
SELECT * FROM users;

-- Select specific columns
SELECT id, name, email FROM users;

-- With alias
SELECT name AS full_name, email AS contact_email FROM users;

-- Distinct values
SELECT DISTINCT status FROM orders;

-- Count rows
SELECT COUNT(*) FROM users;
SELECT COUNT(DISTINCT status) FROM orders;

WHERE Clause

-- Equality
SELECT * FROM users WHERE status = 'active';

-- Comparison
SELECT * FROM products WHERE price > 100;
SELECT * FROM orders WHERE total BETWEEN 100 AND 500;

-- Pattern matching
SELECT * FROM users WHERE email LIKE '%@example.com';
SELECT * FROM users WHERE name ILIKE 'john%';  -- case-insensitive

-- IN operator
SELECT * FROM orders WHERE status IN ('pending', 'processing');

-- NULL checks
SELECT * FROM users WHERE deleted_at IS NULL;
SELECT * FROM users WHERE phone_number IS NOT NULL;

-- Logical operators
SELECT * FROM products WHERE price > 100 AND stock > 0;
SELECT * FROM users WHERE status = 'active' OR verified = true;
SELECT * FROM products WHERE NOT (price > 1000);

ORDER BY

-- Ascending (default)
SELECT * FROM users ORDER BY created_at;

-- Descending
SELECT * FROM users ORDER BY created_at DESC;

-- Multiple columns
SELECT * FROM orders ORDER BY status ASC, created_at DESC;

-- NULL handling
SELECT * FROM users ORDER BY last_login NULLS FIRST;
SELECT * FROM users ORDER BY last_login NULLS LAST;

LIMIT and OFFSET

-- Limit results
SELECT * FROM users LIMIT 10;

-- Pagination
SELECT * FROM users ORDER BY id LIMIT 10 OFFSET 20;

-- Alternative: FETCH
SELECT * FROM users OFFSET 20 ROWS FETCH NEXT 10 ROWS ONLY;

JOINs

INNER JOIN

-- Match rows from both tables
SELECT orders.id, orders.total, customers.name
FROM orders
INNER JOIN customers ON orders.customer_id = customers.id;

-- Short syntax
SELECT o.id, o.total, c.name
FROM orders o
JOIN customers c ON o.customer_id = c.id;

-- Multiple joins
SELECT o.id, c.name, p.name AS product
FROM orders o
JOIN customers c ON o.customer_id = c.id
JOIN order_items oi ON oi.order_id = o.id
JOIN products p ON oi.product_id = p.id;

LEFT JOIN (LEFT OUTER JOIN)

-- All rows from left table, matching rows from right
SELECT c.name, o.id AS order_id
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id;

-- Find customers without orders
SELECT c.name
FROM customers c
LEFT JOIN orders o ON c.id = o.customer_id
WHERE o.id IS NULL;

RIGHT JOIN (RIGHT OUTER JOIN)

-- All rows from right table, matching rows from left
SELECT c.name, o.id AS order_id
FROM orders o
RIGHT JOIN customers c ON o.customer_id = c.id;

FULL OUTER JOIN

-- All rows from both tables
SELECT c.name, o.id AS order_id
FROM customers c
FULL OUTER JOIN orders o ON c.id = o.customer_id;

CROSS JOIN

-- Cartesian product (all combinations)
SELECT c.name, p.name
FROM colors c
CROSS JOIN products p;

Self Join

-- Join table to itself
SELECT e1.name AS employee, e2.name AS manager
FROM employees e1
LEFT JOIN employees e2 ON e1.manager_id = e2.id;

Subqueries

Scalar Subquery

-- Return single value
SELECT name, salary,
  (SELECT AVG(salary) FROM employees) AS avg_salary
FROM employees;

IN Subquery

-- Match against set of values
SELECT name FROM customers
WHERE id IN (
  SELECT customer_id FROM orders WHERE total > 1000
);

EXISTS Subquery

-- Check if subquery returns any rows
SELECT name FROM customers c
WHERE EXISTS (
  SELECT 1 FROM orders o WHERE o.customer_id = c.id
);

-- NOT EXISTS
SELECT name FROM customers c
WHERE NOT EXISTS (
  SELECT 1 FROM orders o WHERE o.customer_id = c.id
);

Correlated Subquery

-- Subquery references outer query
SELECT name, salary FROM employees e1
WHERE salary > (
  SELECT AVG(salary) FROM employees e2
  WHERE e2.department_id = e1.department_id
);

Common Table Expressions (CTEs)

Simple CTE

-- Named temporary result set
WITH active_users AS (
  SELECT id, name, email FROM users WHERE status = 'active'
)
SELECT * FROM active_users WHERE created_at > '2024-01-01';

Multiple CTEs

WITH
  active_customers AS (
    SELECT id, name FROM customers WHERE active = true
  ),
  recent_orders AS (
    SELECT customer_id, SUM(total) AS total_spent
    FROM orders
    WHERE order_date > CURRENT_DATE - INTERVAL '30 days'
    GROUP BY customer_id
  )
SELECT c.name, COALESCE(o.total_spent, 0) AS spent
FROM active_customers c
LEFT JOIN recent_orders o ON c.id = o.customer_id;

Recursive CTE

-- Tree traversal, hierarchical data
WITH RECURSIVE category_tree AS (
  -- Base case: root categories
  SELECT id, name, parent_id, 0 AS level
  FROM categories
  WHERE parent_id IS NULL

  UNION ALL

  -- Recursive case: child categories
  SELECT c.id, c.name, c.parent_id, ct.level + 1
  FROM categories c
  JOIN category_tree ct ON c.parent_id = ct.id
)
SELECT * FROM category_tree ORDER BY level, name;

-- Employee hierarchy
WITH RECURSIVE org_chart AS (
  SELECT id, name, manager_id, 1 AS level
  FROM employees
  WHERE manager_id IS NULL

  UNION ALL

  SELECT e.id, e.name, e.manager_id, oc.level + 1
  FROM employees e
  JOIN org_chart oc ON e.manager_id = oc.id
)
SELECT * FROM org_chart;

Aggregate Functions

Basic Aggregates

-- COUNT, SUM, AVG, MIN, MAX
SELECT
  COUNT(*) AS total_orders,
  SUM(total) AS total_revenue,
  AVG(total) AS avg_order_value,
  MIN(total) AS min_order,
  MAX(total) AS max_order
FROM orders;

-- COUNT variations
SELECT COUNT(*) FROM users;              -- All rows
SELECT COUNT(phone_number) FROM users;   -- Non-NULL values
SELECT COUNT(DISTINCT status) FROM orders; -- Unique values

GROUP BY

-- Aggregate by groups
SELECT status, COUNT(*) AS count
FROM orders
GROUP BY status;

-- Multiple grouping columns
SELECT customer_id, status, COUNT(*) AS count
FROM orders
GROUP BY customer_id, status;

-- With aggregate functions
SELECT customer_id,
  COUNT(*) AS order_count,
  SUM(total) AS total_spent,
  AVG(total) AS avg_order
FROM orders
GROUP BY customer_id;

HAVING

-- Filter after aggregation
SELECT customer_id, SUM(total) AS total_spent
FROM orders
GROUP BY customer_id
HAVING SUM(total) > 1000;

-- Multiple conditions
SELECT status, COUNT(*) AS count
FROM orders
GROUP BY status
HAVING COUNT(*) > 10;

Window Functions

ROW_NUMBER

-- Assign unique number to each row
SELECT id, name, salary,
  ROW_NUMBER() OVER (ORDER BY salary DESC) AS rank
FROM employees;

-- Partition by group
SELECT id, department, salary,
  ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS dept_rank
FROM employees;

RANK / DENSE_RANK

-- RANK: gaps in ranking for ties
-- DENSE_RANK: no gaps
SELECT id, name, salary,
  RANK() OVER (ORDER BY salary DESC) AS rank,
  DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank
FROM employees;

LAG / LEAD

-- Access previous/next row
SELECT date, revenue,
  LAG(revenue) OVER (ORDER BY date) AS prev_revenue,
  LEAD(revenue) OVER (ORDER BY date) AS next_revenue,
  revenue - LAG(revenue) OVER (ORDER BY date) AS change
FROM daily_sales;

Running Totals

-- Cumulative sum
SELECT date, amount,
  SUM(amount) OVER (ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total
FROM transactions;

-- Simpler syntax
SELECT date, amount,
  SUM(amount) OVER (ORDER BY date) AS running_total
FROM transactions;

Moving Averages

-- 7-day moving average
SELECT date, value,
  AVG(value) OVER (
    ORDER BY date
    ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
  ) AS moving_avg_7d
FROM metrics;

Advanced Patterns

CASE Expressions

-- Simple CASE
SELECT name,
  CASE status
    WHEN 'active' THEN 'Active User'
    WHEN 'pending' THEN 'Pending Verification'
    ELSE 'Inactive'
  END AS status_label
FROM users;

-- Searched CASE
SELECT name, age,
  CASE
    WHEN age < 18 THEN 'Minor'
    WHEN age BETWEEN 18 AND 65 THEN 'Adult'
    ELSE 'Senior'
  END AS age_group
FROM users;

COALESCE

-- Return first non-NULL value
SELECT name, COALESCE(phone_number, email, 'No contact') AS contact
FROM users;

NULLIF

-- Return NULL if values equal
SELECT name, NULLIF(status, 'deleted') AS active_status
FROM users;

Array Operations

-- Array aggregate
SELECT customer_id, ARRAY_AGG(product_id) AS products
FROM order_items
GROUP BY customer_id;

-- Unnest array
SELECT unnest(ARRAY[1, 2, 3, 4, 5]);

-- Array contains
SELECT * FROM products WHERE tags @> ARRAY['featured'];

JSON Operations

-- Query JSON/JSONB
SELECT data->>'name' AS name FROM documents;
SELECT data->'address'->>'city' AS city FROM documents;

-- Check key exists
SELECT * FROM documents WHERE data ? 'email';

-- JSONB operators
SELECT * FROM documents WHERE data @> '{"status": "active"}';

-- JSON aggregation
SELECT json_agg(name) FROM users;
SELECT json_object_agg(id, name) FROM users;

Set Operations

UNION

-- Combine results (removes duplicates)
SELECT name FROM customers
UNION
SELECT name FROM suppliers;

-- Keep duplicates
SELECT name FROM customers
UNION ALL
SELECT name FROM suppliers;

INTERSECT

-- Common rows
SELECT email FROM users
INTERSECT
SELECT email FROM subscribers;

EXCEPT

-- Rows in first query but not second
SELECT email FROM users
EXCEPT
SELECT email FROM unsubscribed;

Best Practices

  1. Use indexes on WHERE, JOIN, ORDER BY columns
  2. **Avoid SELECT *** - specify needed columns
  3. Use EXISTS instead of IN for large subqueries
  4. Filter early - WHERE before JOIN when possible
  5. Use CTEs for readability over nested subqueries
  6. Parameterize queries - prevent SQL injection
  7. Use window functions instead of self-joins
  8. Test with EXPLAIN - analyze query plans