CTEs & Recursive Queries
Common Table Expressions (CTEs) make complex SQL readable and maintainable. Recursive CTEs handle hierarchical data that would otherwise require application code. These 5 challenges cover patterns asked at senior-level interviews.
Challenge 1: Multi-Step CTE — Customer Lifetime Value
Schema
CREATE TABLE purchases (
purchase_id INT PRIMARY KEY,
customer_id INT,
amount DECIMAL(10,2),
purchase_date DATE
);
INSERT INTO purchases VALUES
(1, 1, 150.00, '2024-01-05'), (2, 2, 89.99, '2024-01-07'),
(3, 1, 220.00, '2024-01-15'), (4, 3, 499.99, '2024-01-18'),
(5, 2, 175.50, '2024-02-01'), (6, 1, 65.00, '2024-02-10'),
(7, 4, 310.00, '2024-02-14'), (8, 3, 125.00, '2024-02-20'),
(9, 2, 340.00, '2024-03-01'), (10, 1, 190.00, '2024-03-05'),
(11, 5, 55.00, '2024-03-10'), (12, 4, 420.00, '2024-03-15');
Solution
WITH customer_totals AS (
-- Step 1: Aggregate per customer
SELECT
customer_id,
COUNT(*) AS num_purchases,
SUM(amount) AS lifetime_value,
MIN(purchase_date) AS first_purchase,
MAX(purchase_date) AS last_purchase
FROM purchases
GROUP BY customer_id
),
company_total AS (
-- Step 2: Get overall total for percentage calculation
SELECT SUM(lifetime_value) AS total_revenue
FROM customer_totals
),
ranked AS (
-- Step 3: Rank customers and calculate percentages
SELECT
ct.customer_id,
ct.num_purchases,
ct.lifetime_value,
ct.first_purchase,
ct.last_purchase,
RANK() OVER (ORDER BY ct.lifetime_value DESC) AS value_rank,
ROUND(100.0 * ct.lifetime_value / co.total_revenue, 1) AS revenue_pct
FROM customer_totals ct
CROSS JOIN company_total co
)
SELECT * FROM ranked ORDER BY value_rank;
-- Result:
-- customer_id | num_purchases | lifetime_value | first_purchase | last_purchase | value_rank | revenue_pct
-- ------------+---------------+----------------+----------------+---------------+------------+------------
-- 4 | 2 | 730.00 | 2024-02-14 | 2024-03-15 | 1 | 27.8
-- 1 | 4 | 625.00 | 2024-01-05 | 2024-03-05 | 2 | 23.8
-- 3 | 2 | 624.99 | 2024-01-18 | 2024-02-20 | 3 | 23.8
-- 2 | 3 | 605.49 | 2024-01-07 | 2024-03-01 | 4 | 23.1
-- 5 | 1 | 55.00 | 2024-03-10 | 2024-03-10 | 5 | 2.1
Key insight: CTEs let you break complex logic into named, readable steps. Each CTE can reference the previous one. This is far cleaner than nested subqueries and easier to debug. In production dbt models, CTEs are the standard pattern for building transformation logic.
Challenge 2: Recursive CTE — Org Chart Hierarchy
manager_id column, build the full org chart showing each employee's level in the hierarchy, their full management chain, and the total number of direct and indirect reports for each manager.Schema
CREATE TABLE org (
emp_id INT PRIMARY KEY,
name VARCHAR(100),
manager_id INT,
title VARCHAR(100)
);
INSERT INTO org VALUES
(1, 'Sarah', NULL, 'CEO'),
(2, 'Mike', 1, 'VP Engineering'),
(3, 'Lisa', 1, 'VP Sales'),
(4, 'Tom', 2, 'Engineering Manager'),
(5, 'Anna', 2, 'Data Science Lead'),
(6, 'Jake', 3, 'Sales Manager'),
(7, 'Dev1', 4, 'Senior Engineer'),
(8, 'Dev2', 4, 'Engineer'),
(9, 'DS1', 5, 'Data Scientist'),
(10, 'Rep1', 6, 'Sales Rep');
Solution
-- Part 1: Build the hierarchy with recursive CTE
WITH RECURSIVE org_tree AS (
-- Anchor: start with the CEO (no manager)
SELECT
emp_id,
name,
title,
manager_id,
0 AS level,
name AS management_chain
FROM org
WHERE manager_id IS NULL
UNION ALL
-- Recursive: join each employee to their manager in the tree
SELECT
o.emp_id,
o.name,
o.title,
o.manager_id,
ot.level + 1,
ot.management_chain || ' > ' || o.name
FROM org o
INNER JOIN org_tree ot ON o.manager_id = ot.emp_id
)
SELECT
REPEAT(' ', level) || name AS indented_name,
title,
level,
management_chain
FROM org_tree
ORDER BY management_chain;
-- Result:
-- indented_name | title | level | management_chain
-- -----------------+--------------------+-------+----------------------------------
-- Sarah | CEO | 0 | Sarah
-- Lisa | VP Sales | 1 | Sarah > Lisa
-- Jake | Sales Manager | 2 | Sarah > Lisa > Jake
-- Rep1 | Sales Rep | 3 | Sarah > Lisa > Jake > Rep1
-- Mike | VP Engineering | 1 | Sarah > Mike
-- Anna | Data Science Lead | 2 | Sarah > Mike > Anna
-- DS1 | Data Scientist | 3 | Sarah > Mike > Anna > DS1
-- Tom | Eng Manager | 2 | Sarah > Mike > Tom
-- Dev1 | Senior Engineer | 3 | Sarah > Mike > Tom > Dev1
-- Dev2 | Engineer | 3 | Sarah > Mike > Tom > Dev2
-- Part 2: Count direct + indirect reports per manager
WITH RECURSIVE subordinates AS (
SELECT emp_id, manager_id, emp_id AS root_manager
FROM org
WHERE manager_id IS NOT NULL
UNION ALL
SELECT s.emp_id, o.manager_id, s.root_manager
FROM subordinates s
INNER JOIN org o ON s.root_manager = o.emp_id
WHERE o.manager_id IS NOT NULL
)
SELECT
o.name,
o.title,
COUNT(DISTINCT s.emp_id) AS total_reports
FROM org o
LEFT JOIN subordinates s ON o.emp_id = s.root_manager
GROUP BY o.emp_id, o.name, o.title
HAVING COUNT(DISTINCT s.emp_id) > 0
ORDER BY total_reports DESC;
WHERE level < 100 in the recursive member. PostgreSQL also supports CYCLE detection in newer versions.Challenge 3: Recursive CTE — Date Spine Generation
Solution
-- Generate a date spine using recursive CTE
WITH RECURSIVE date_spine AS (
SELECT DATE '2024-01-01' AS dt
UNION ALL
SELECT dt + INTERVAL '1 day'
FROM date_spine
WHERE dt < DATE '2024-01-31'
),
daily_revenue AS (
SELECT
purchase_date,
SUM(amount) AS revenue
FROM purchases
WHERE purchase_date BETWEEN '2024-01-01' AND '2024-01-31'
GROUP BY purchase_date
)
SELECT
ds.dt AS date,
COALESCE(dr.revenue, 0) AS revenue,
SUM(COALESCE(dr.revenue, 0)) OVER (ORDER BY ds.dt) AS cumulative_revenue
FROM date_spine ds
LEFT JOIN daily_revenue dr ON ds.dt = dr.purchase_date
ORDER BY ds.dt;
-- Result (showing sample rows):
-- date | revenue | cumulative_revenue
-- -----------+---------+-------------------
-- 2024-01-01 | 0 | 0
-- 2024-01-02 | 0 | 0
-- ...
-- 2024-01-05 | 150.00 | 150.00
-- 2024-01-06 | 0 | 150.00
-- 2024-01-07 | 89.99 | 239.99
-- ...
-- 2024-01-15 | 220.00 | 459.99
-- ...
-- 2024-01-18 | 499.99 | 959.98
-- ...
-- 2024-01-31 | 0 | 959.98
Key insight: Date spines are essential for time-series analysis. Without them, days with no events disappear from your results, creating misleading gaps. In production, most data warehouses have a pre-built dim_dates table, but interviewers often ask you to generate one on the fly using recursive CTEs.
Challenge 4: CTE — Month-over-Month Growth
Solution
WITH monthly_revenue AS (
SELECT
DATE_TRUNC('month', purchase_date) AS month,
SUM(amount) AS revenue,
COUNT(*) AS num_transactions
FROM purchases
GROUP BY DATE_TRUNC('month', purchase_date)
),
with_prev AS (
SELECT
month,
revenue,
num_transactions,
LAG(revenue) OVER (ORDER BY month) AS prev_month_revenue,
LAG(num_transactions) OVER (ORDER BY month) AS prev_month_txns
FROM monthly_revenue
)
SELECT
month,
revenue,
num_transactions,
COALESCE(prev_month_revenue, 0) AS prev_revenue,
revenue - COALESCE(prev_month_revenue, 0) AS abs_change,
CASE
WHEN prev_month_revenue IS NULL THEN NULL
WHEN prev_month_revenue = 0 THEN NULL
ELSE ROUND(100.0 * (revenue - prev_month_revenue) / prev_month_revenue, 1)
END AS pct_change
FROM with_prev
ORDER BY month;
-- Result:
-- month | revenue | num_txns | prev_revenue | abs_change | pct_change
-- -----------+---------+----------+--------------+------------+-----------
-- 2024-01-01 | 959.98 | 4 | 0 | 959.98 | NULL
-- 2024-02-01 | 675.50 | 4 | 959.98 | -284.48 | -29.6
-- 2024-03-01 | 1005.00 | 4 | 675.50 | 329.50 | 48.8
Key insight: Combining CTEs with window functions (LAG) is the standard pattern for period-over-period analysis. The CASE WHEN for percentage change handles two edge cases: no previous month (NULL) and zero previous revenue (division by zero). Interviewers specifically check whether you handle these edge cases.
Challenge 5: Recursive CTE — Bill of Materials Explosion
Schema
CREATE TABLE bom (
component_id INT PRIMARY KEY,
name VARCHAR(100),
parent_id INT,
unit_cost DECIMAL(10,2),
quantity INT
);
INSERT INTO bom VALUES
(1, 'Laptop', NULL, 0, 1),
(2, 'Motherboard', 1, 50.00, 1),
(3, 'CPU', 2, 200.00, 1),
(4, 'RAM Stick', 2, 45.00, 2),
(5, 'Screen', 1, 120.00, 1),
(6, 'LCD Panel', 5, 80.00, 1),
(7, 'Backlight', 5, 15.00, 1),
(8, 'Battery', 1, 65.00, 1),
(9, 'Keyboard', 1, 25.00, 1),
(10, 'Screw Set', 1, 2.00, 20);
Solution
WITH RECURSIVE bom_tree AS (
-- Anchor: top-level product
SELECT
component_id,
name,
parent_id,
unit_cost,
quantity,
unit_cost * quantity AS line_cost,
0 AS depth,
name AS path
FROM bom
WHERE parent_id IS NULL
UNION ALL
-- Recursive: each sub-component
SELECT
b.component_id,
b.name,
b.parent_id,
b.unit_cost,
b.quantity,
b.unit_cost * b.quantity AS line_cost,
bt.depth + 1,
bt.path || ' > ' || b.name
FROM bom b
INNER JOIN bom_tree bt ON b.parent_id = bt.component_id
)
SELECT
REPEAT(' ', depth) || name AS component,
quantity,
unit_cost,
line_cost,
path
FROM bom_tree
ORDER BY path;
-- Result:
-- component | quantity | unit_cost | line_cost | path
-- -----------------+----------+-----------+-----------+-----------------------------
-- Laptop | 1 | 0.00 | 0.00 | Laptop
-- Battery | 1 | 65.00 | 65.00 | Laptop > Battery
-- Keyboard | 1 | 25.00 | 25.00 | Laptop > Keyboard
-- Motherboard | 1 | 50.00 | 50.00 | Laptop > Motherboard
-- CPU | 1 | 200.00 | 200.00 | Laptop > Motherboard > CPU
-- RAM Stick | 2 | 45.00 | 90.00 | Laptop > Motherboard > RAM Stick
-- Screen | 1 | 120.00 | 120.00 | Laptop > Screen
-- Backlight | 1 | 15.00 | 15.00 | Laptop > Screen > Backlight
-- LCD Panel | 1 | 80.00 | 80.00 | Laptop > Screen > LCD Panel
-- Screw Set | 20 | 2.00 | 40.00 | Laptop > Screw Set
-- Total cost:
SELECT SUM(line_cost) AS total_product_cost FROM bom_tree;
-- total_product_cost: 685.00
Key insight: Recursive CTEs follow a pattern: anchor member (base case) UNION ALL recursive member (references the CTE itself). The recursion terminates when the recursive member returns no rows. This BOM pattern is used in manufacturing, network topology, and any domain with hierarchical data.
Lilly Tech Systems