Files
2025-11-29 18:32:40 +08:00

12 KiB

GA4 BigQuery SQL Query Cookbook

Helper Functions (Use at Start of Queries)

-- String parameter extraction
CREATE TEMP FUNCTION GetParam(params ANY TYPE, key STRING)
RETURNS STRING AS (
  (SELECT value.string_value FROM UNNEST(params) WHERE key = key)
);

-- Integer parameter extraction
CREATE TEMP FUNCTION GetParamInt(params ANY TYPE, key STRING)
RETURNS INT64 AS (
  (SELECT value.int_value FROM UNNEST(params) WHERE key = key)
);

-- Float parameter extraction
CREATE TEMP FUNCTION GetParamFloat(params ANY TYPE, key STRING)
RETURNS FLOAT64 AS (
  (SELECT value.float_value FROM UNNEST(params) WHERE key = key)
);

-- Get any parameter type (returns as string)
CREATE TEMP FUNCTION GetParamAny(params ANY TYPE, key STRING)
RETURNS STRING AS (
  (SELECT COALESCE(
    value.string_value,
    CAST(value.int_value AS STRING),
    CAST(value.float_value AS STRING),
    CAST(value.double_value AS STRING)
  ) FROM UNNEST(params) WHERE key = key)
);

Basic Queries

1. Daily Active Users

SELECT
  event_date,
  COUNT(DISTINCT user_pseudo_id) as active_users
FROM
  `project.dataset.events_*`
WHERE
  _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
GROUP BY
  event_date
ORDER BY
  event_date

2. Top Pages by Views

SELECT
  (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location') as page_location,
  COUNT(*) as page_views,
  COUNT(DISTINCT user_pseudo_id) as unique_users
FROM
  `project.dataset.events_*`
WHERE
  _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
  AND event_name = 'page_view'
GROUP BY
  page_location
ORDER BY
  page_views DESC
LIMIT 20

3. Session Count by Source/Medium

SELECT
  traffic_source.source,
  traffic_source.medium,
  COUNT(DISTINCT CONCAT(user_pseudo_id,
    (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id')
  )) as sessions,
  COUNT(DISTINCT user_pseudo_id) as users
FROM
  `project.dataset.events_*`
WHERE
  _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
GROUP BY
  traffic_source.source,
  traffic_source.medium
ORDER BY
  sessions DESC

E-commerce Queries

4. Revenue by Date

SELECT
  event_date,
  COUNT(DISTINCT ecommerce.transaction_id) as transactions,
  COUNT(DISTINCT user_pseudo_id) as purchasers,
  SUM(ecommerce.purchase_revenue_in_usd) as revenue,
  AVG(ecommerce.purchase_revenue_in_usd) as avg_order_value
FROM
  `project.dataset.events_*`
WHERE
  _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
  AND event_name = 'purchase'
  AND ecommerce.transaction_id IS NOT NULL
GROUP BY
  event_date
ORDER BY
  event_date

5. Top Selling Products

SELECT
  item.item_name,
  item.item_category,
  SUM(item.quantity) as units_sold,
  SUM(item.item_revenue_in_usd) as total_revenue,
  COUNT(DISTINCT ecommerce.transaction_id) as transactions
FROM
  `project.dataset.events_*`,
  UNNEST(items) as item
WHERE
  _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
  AND event_name = 'purchase'
GROUP BY
  item.item_name,
  item.item_category
ORDER BY
  total_revenue DESC
LIMIT 20

6. Conversion Funnel Analysis

WITH funnel AS (
  SELECT
    user_pseudo_id,
    MAX(IF(event_name = 'view_item_list', 1, 0)) as viewed_list,
    MAX(IF(event_name = 'view_item', 1, 0)) as viewed_item,
    MAX(IF(event_name = 'add_to_cart', 1, 0)) as added_cart,
    MAX(IF(event_name = 'begin_checkout', 1, 0)) as began_checkout,
    MAX(IF(event_name = 'purchase', 1, 0)) as purchased
  FROM
    `project.dataset.events_*`
  WHERE
    _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
  GROUP BY
    user_pseudo_id
)
SELECT
  SUM(viewed_list) as step1_viewed_list,
  SUM(viewed_item) as step2_viewed_item,
  SUM(added_cart) as step3_added_cart,
  SUM(began_checkout) as step4_began_checkout,
  SUM(purchased) as step5_purchased,
  -- Conversion rates
  ROUND(SUM(viewed_item) / SUM(viewed_list) * 100, 2) as pct_list_to_item,
  ROUND(SUM(added_cart) / SUM(viewed_item) * 100, 2) as pct_item_to_cart,
  ROUND(SUM(began_checkout) / SUM(added_cart) * 100, 2) as pct_cart_to_checkout,
  ROUND(SUM(purchased) / SUM(began_checkout) * 100, 2) as pct_checkout_to_purchase,
  ROUND(SUM(purchased) / SUM(viewed_list) * 100, 2) as overall_conversion_rate
FROM
  funnel

7. Cart Abandonment Rate

SELECT
  event_date,
  COUNT(DISTINCT IF(event_name = 'add_to_cart', user_pseudo_id, NULL)) as users_added_cart,
  COUNT(DISTINCT IF(event_name = 'purchase', user_pseudo_id, NULL)) as users_purchased,
  ROUND((1 - COUNT(DISTINCT IF(event_name = 'purchase', user_pseudo_id, NULL)) /
    COUNT(DISTINCT IF(event_name = 'add_to_cart', user_pseudo_id, NULL))) * 100, 2) as abandonment_rate_pct
FROM
  `project.dataset.events_*`
WHERE
  _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
  AND event_name IN ('add_to_cart', 'purchase')
GROUP BY
  event_date
ORDER BY
  event_date

User Behavior Queries

8. User Journey (Event Sequence)

SELECT
  user_pseudo_id,
  ARRAY_AGG(
    STRUCT(
      event_timestamp,
      event_name,
      (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'page_location') as page
    )
    ORDER BY event_timestamp
  ) as journey
FROM
  `project.dataset.events_*`
WHERE
  _TABLE_SUFFIX = '20250115'
  AND user_pseudo_id = 'USER_ID_HERE'
GROUP BY
  user_pseudo_id

9. Session Duration Distribution

WITH sessions AS (
  SELECT
    CONCAT(user_pseudo_id, '-',
      (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id')
    ) as session_id,
    MAX(event_timestamp) - MIN(event_timestamp) as session_duration_micros
  FROM
    `project.dataset.events_*`
  WHERE
    _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
  GROUP BY
    session_id
)
SELECT
  CASE
    WHEN session_duration_micros < 10000000 THEN '0-10 sec'
    WHEN session_duration_micros < 30000000 THEN '10-30 sec'
    WHEN session_duration_micros < 60000000 THEN '30-60 sec'
    WHEN session_duration_micros < 180000000 THEN '1-3 min'
    WHEN session_duration_micros < 600000000 THEN '3-10 min'
    ELSE '10+ min'
  END as duration_bucket,
  COUNT(*) as session_count
FROM
  sessions
GROUP BY
  duration_bucket
ORDER BY
  MIN(session_duration_micros)

10. New vs Returning Users

WITH first_visits AS (
  SELECT
    user_pseudo_id,
    MIN(event_timestamp) as first_visit_timestamp
  FROM
    `project.dataset.events_*`
  WHERE
    _TABLE_SUFFIX BETWEEN '20240101' AND '20250131'
  GROUP BY
    user_pseudo_id
)
SELECT
  event_date,
  COUNT(DISTINCT IF(TIMESTAMP_MICROS(event_timestamp) = TIMESTAMP_MICROS(fv.first_visit_timestamp),
    e.user_pseudo_id, NULL)) as new_users,
  COUNT(DISTINCT IF(TIMESTAMP_MICROS(event_timestamp) > TIMESTAMP_MICROS(fv.first_visit_timestamp),
    e.user_pseudo_id, NULL)) as returning_users
FROM
  `project.dataset.events_*` e
LEFT JOIN
  first_visits fv
ON
  e.user_pseudo_id = fv.user_pseudo_id
WHERE
  _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
GROUP BY
  event_date
ORDER BY
  event_date

Attribution Queries

11. First Touch Attribution

WITH first_touch AS (
  SELECT
    user_pseudo_id,
    ARRAY_AGG(
      STRUCT(
        traffic_source.source,
        traffic_source.medium,
        traffic_source.name as campaign
      )
      ORDER BY event_timestamp LIMIT 1
    )[OFFSET(0)] as first_source
  FROM
    `project.dataset.events_*`
  WHERE
    _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
    AND traffic_source.source IS NOT NULL
  GROUP BY
    user_pseudo_id
),
purchases AS (
  SELECT
    user_pseudo_id,
    COUNT(DISTINCT ecommerce.transaction_id) as purchases,
    SUM(ecommerce.purchase_revenue_in_usd) as revenue
  FROM
    `project.dataset.events_*`
  WHERE
    _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
    AND event_name = 'purchase'
  GROUP BY
    user_pseudo_id
)
SELECT
  ft.first_source.source,
  ft.first_source.medium,
  ft.first_source.campaign,
  COUNT(DISTINCT ft.user_pseudo_id) as users,
  SUM(p.purchases) as total_purchases,
  SUM(p.revenue) as total_revenue
FROM
  first_touch ft
LEFT JOIN
  purchases p
ON
  ft.user_pseudo_id = p.user_pseudo_id
GROUP BY
  ft.first_source.source,
  ft.first_source.medium,
  ft.first_source.campaign
ORDER BY
  total_revenue DESC

12. Last Touch Attribution

WITH last_touch AS (
  SELECT
    ecommerce.transaction_id,
    ARRAY_AGG(
      STRUCT(
        traffic_source.source,
        traffic_source.medium
      )
      ORDER BY event_timestamp DESC LIMIT 1
    )[OFFSET(0)] as last_source,
    SUM(ecommerce.purchase_revenue_in_usd) as revenue
  FROM
    `project.dataset.events_*`
  WHERE
    _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
    AND event_name = 'purchase'
  GROUP BY
    ecommerce.transaction_id
)
SELECT
  last_source.source,
  last_source.medium,
  COUNT(DISTINCT transaction_id) as conversions,
  SUM(revenue) as total_revenue
FROM
  last_touch
GROUP BY
  last_source.source,
  last_source.medium
ORDER BY
  total_revenue DESC

Device and Technology

13. Device Category Performance

SELECT
  device.category as device_category,
  COUNT(DISTINCT user_pseudo_id) as users,
  COUNT(DISTINCT CONCAT(user_pseudo_id,
    (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id')
  )) as sessions,
  COUNTIF(event_name = 'purchase') as purchases,
  SUM(IF(event_name = 'purchase', ecommerce.purchase_revenue_in_usd, 0)) as revenue,
  ROUND(COUNTIF(event_name = 'purchase') / COUNT(DISTINCT CONCAT(user_pseudo_id,
    (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id')
  )) * 100, 2) as conversion_rate_pct
FROM
  `project.dataset.events_*`
WHERE
  _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
GROUP BY
  device.category
ORDER BY
  users DESC

14. Browser and OS Analysis

SELECT
  device.browser,
  device.operating_system,
  COUNT(DISTINCT user_pseudo_id) as users,
  AVG((SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'engagement_time_msec')) / 1000 as avg_engagement_sec
FROM
  `project.dataset.events_*`
WHERE
  _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
  AND event_name = 'user_engagement'
GROUP BY
  device.browser,
  device.operating_system
HAVING
  users > 100
ORDER BY
  users DESC

Cohort and Retention

15. Weekly Cohort Retention

WITH cohorts AS (
  SELECT
    user_pseudo_id,
    FORMAT_DATE('%Y-W%V', PARSE_DATE('%Y%m%d', MIN(event_date))) as cohort_week
  FROM
    `project.dataset.events_*`
  WHERE
    _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
  GROUP BY
    user_pseudo_id
),
activity AS (
  SELECT
    user_pseudo_id,
    FORMAT_DATE('%Y-W%V', PARSE_DATE('%Y%m%d', event_date)) as activity_week
  FROM
    `project.dataset.events_*`
  WHERE
    _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
  GROUP BY
    user_pseudo_id,
    activity_week
)
SELECT
  c.cohort_week,
  a.activity_week,
  COUNT(DISTINCT c.user_pseudo_id) as cohort_size,
  COUNT(DISTINCT a.user_pseudo_id) as active_users,
  ROUND(COUNT(DISTINCT a.user_pseudo_id) / COUNT(DISTINCT c.user_pseudo_id) * 100, 2) as retention_pct
FROM
  cohorts c
LEFT JOIN
  activity a
ON
  c.user_pseudo_id = a.user_pseudo_id
GROUP BY
  c.cohort_week,
  a.activity_week
ORDER BY
  c.cohort_week,
  a.activity_week

Custom Dimensions and Parameters

16. Query Custom Event Parameters

SELECT
  event_name,
  (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'custom_parameter') as custom_value,
  COUNT(*) as event_count
FROM
  `project.dataset.events_*`
WHERE
  _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
  AND event_name = 'custom_event'
GROUP BY
  event_name,
  custom_value
ORDER BY
  event_count DESC

17. User Properties Analysis

SELECT
  (SELECT value.string_value FROM UNNEST(user_properties) WHERE key = 'user_tier') as user_tier,
  COUNT(DISTINCT user_pseudo_id) as users,
  SUM(IF(event_name = 'purchase', ecommerce.purchase_revenue_in_usd, 0)) as total_revenue,
  SUM(IF(event_name = 'purchase', ecommerce.purchase_revenue_in_usd, 0)) /
    COUNT(DISTINCT user_pseudo_id) as revenue_per_user
FROM
  `project.dataset.events_*`
WHERE
  _TABLE_SUFFIX BETWEEN '20250101' AND '20250131'
GROUP BY
  user_tier
ORDER BY
  total_revenue DESC

Performance Tips

  1. Always use _TABLE_SUFFIX filtering (not event_date)
  2. Filter on clustered columns (event_name, event_timestamp)
  3. Select only needed columns
  4. Use LIMIT during development
  5. Create helper functions for repeated UNNEST operations
  6. **Avoid SELECT *** unless necessary
  7. Use materialized views for frequently run queries
  8. Monitor query costs in BigQuery console