После базовых вопросов про JOIN и GROUP BY следуют задачи на логику. Вот 8 классических «сложных» задач с объяснением решений.
Задача 1: Consecutive Days — непрерывные активные дни
Условие: Найти пользователей с активностью 7+ дней подряд.
WITH daily_activity AS (
SELECT DISTINCT user_id, DATE(created_at) AS activity_date
FROM user_events
),
consecutive AS (
SELECT
user_id,
activity_date,
activity_date - ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY activity_date) * INTERVAL '1 day' AS group_id
FROM daily_activity
)
SELECT user_id, MIN(activity_date) AS streak_start, COUNT(*) AS streak_length
FROM consecutive
GROUP BY user_id, group_id
HAVING COUNT(*) >= 7
ORDER BY streak_length DESC;
Идея: вычесть порядковый номер из даты. У последовательных дат результат одинаковый → одна группа.
Задача 2: Islands and Gaps — «острова» в данных
Условие: Найти периоды непрерывной активности подписки.
WITH subscriptions AS (
SELECT user_id, active_date
FROM subscription_days
ORDER BY user_id, active_date
),
gaps AS (
SELECT
user_id,
active_date,
LAG(active_date) OVER (PARTITION BY user_id ORDER BY active_date) AS prev_date,
CASE
WHEN active_date - LAG(active_date) OVER (PARTITION BY user_id ORDER BY active_date) > 1
OR LAG(active_date) OVER (PARTITION BY user_id ORDER BY active_date) IS NULL
THEN 1 ELSE 0
END AS is_new_island
FROM subscriptions
),
islands AS (
SELECT
user_id,
active_date,
SUM(is_new_island) OVER (PARTITION BY user_id ORDER BY active_date) AS island_id
FROM gaps
)
SELECT user_id, island_id,
MIN(active_date) AS island_start,
MAX(active_date) AS island_end,
COUNT(*) AS duration_days
FROM islands
GROUP BY user_id, island_id
ORDER BY user_id, island_start;
Задача 3: Sessionization — разбивка событий на сессии
Условие: Разбить события пользователя на сессии (пауза > 30 минут = новая сессия).
WITH events_with_gap AS (
SELECT
user_id,
event_time,
LAG(event_time) OVER (PARTITION BY user_id ORDER BY event_time) AS prev_event_time,
CASE
WHEN event_time - LAG(event_time) OVER (PARTITION BY user_id ORDER BY event_time) > INTERVAL '30 minutes'
OR LAG(event_time) OVER (PARTITION BY user_id ORDER BY event_time) IS NULL
THEN 1 ELSE 0
END AS is_new_session
FROM page_views
),
sessions AS (
SELECT
user_id,
event_time,
SUM(is_new_session) OVER (PARTITION BY user_id ORDER BY event_time) AS session_id
FROM events_with_gap
)
SELECT
user_id,
session_id,
MIN(event_time) AS session_start,
MAX(event_time) AS session_end,
COUNT(*) AS events_in_session,
EXTRACT(EPOCH FROM (MAX(event_time) - MIN(event_time)))/60 AS duration_min
FROM sessions
GROUP BY user_id, session_id;
Задача 4: Медиана без PERCENTILE_CONT
Условие: Найти медиану без встроенных функций.
-- Метод: 50-е значение из 100 отсортированных
SELECT AVG(amount) AS median
FROM (
SELECT amount,
ROW_NUMBER() OVER (ORDER BY amount) AS rn,
COUNT(*) OVER () AS total
FROM orders
) t
WHERE rn IN (FLOOR((total + 1) / 2.0), CEIL((total + 1) / 2.0));
-- Работает для чётного и нечётного числа строк
Задача 5: Running N-Day Retention
Условие: Retention-кривая: сколько % пользователей из когорты активны на день N.
WITH cohort AS (
SELECT user_id, MIN(DATE(created_at)) AS cohort_day
FROM users GROUP BY user_id
),
activity AS (
SELECT DISTINCT user_id, DATE(event_time) AS active_day
FROM user_events
),
retention AS (
SELECT
c.cohort_day,
a.active_day - c.cohort_day AS day_number,
COUNT(DISTINCT c.user_id) AS active_users
FROM cohort c
JOIN activity a ON a.user_id = c.user_id
GROUP BY c.cohort_day, day_number
),
cohort_sizes AS (
SELECT cohort_day, COUNT(*) AS size FROM cohort GROUP BY 1
)
SELECT
r.cohort_day,
cs.size AS cohort_size,
r.day_number,
r.active_users,
ROUND(r.active_users::numeric / cs.size * 100, 1) AS retention_pct
FROM retention r
JOIN cohort_sizes cs ON cs.cohort_day = r.cohort_day
WHERE r.day_number BETWEEN 0 AND 30
ORDER BY r.cohort_day, r.day_number;
Задача 6: Найти «дыры» в расписании
Условие: В таблице занятостей (start_time, end_time) найти свободные промежутки.
WITH occupied AS (
SELECT start_time, end_time
FROM bookings
WHERE date = '2026-03-15'
ORDER BY start_time
),
with_next AS (
SELECT
end_time AS gap_start,
LEAD(start_time) OVER (ORDER BY start_time) AS gap_end
FROM occupied
)
SELECT gap_start, gap_end,
gap_end - gap_start AS duration
FROM with_next
WHERE gap_end IS NOT NULL
AND gap_end > gap_start;
Задача 7: Top-N в каждой группе с дополнительным условием
Условие: Найти топ-3 продукта в каждой категории по выручке за последний месяц. Исключить продукты с менее 10 продажами.
WITH monthly_sales AS (
SELECT
p.category,
p.id AS product_id,
p.name,
COUNT(*) AS units_sold,
SUM(oi.amount) AS revenue
FROM order_items oi
JOIN products p ON p.id = oi.product_id
WHERE oi.created_at >= DATE_TRUNC('month', NOW())
GROUP BY p.category, p.id, p.name
HAVING COUNT(*) >= 10 -- минимум 10 продаж
),
ranked AS (
SELECT *,
RANK() OVER (PARTITION BY category ORDER BY revenue DESC) AS rnk
FROM monthly_sales
)
SELECT category, product_id, name, units_sold, revenue, rnk
FROM ranked
WHERE rnk <= 3
ORDER BY category, rnk;
Задача 8: Найти пользователей изменивших поведение
Условие: Найти пользователей, чья активность в последние 7 дней упала более чем в 2 раза по сравнению с предыдущими 7 днями.
WITH current_week AS (
SELECT user_id, COUNT(*) AS events
FROM user_events
WHERE event_date BETWEEN CURRENT_DATE - 7 AND CURRENT_DATE
GROUP BY user_id
),
prev_week AS (
SELECT user_id, COUNT(*) AS events
FROM user_events
WHERE event_date BETWEEN CURRENT_DATE - 14 AND CURRENT_DATE - 8
GROUP BY user_id
)
SELECT
p.user_id,
p.events AS prev_week_events,
COALESCE(c.events, 0) AS curr_week_events,
ROUND(COALESCE(c.events, 0)::numeric / NULLIF(p.events, 0), 2) AS ratio
FROM prev_week p
LEFT JOIN current_week c ON c.user_id = p.user_id
WHERE COALESCE(c.events, 0) < p.events / 2.0 -- упало вдвое
AND p.events >= 5 -- был хоть сколько активен
ORDER BY ratio;
Паттерны для сложных задач
| Задача | Техника |
|---|---|
| Последовательные строки | date - ROW_NUMBER() → равные группы |
| Сессии | LAG + SUM OVER → нарастающий ID сессии |
| «Острова» | SUM(is_new_island) OVER |
| Медиана | ROW_NUMBER + total/2 |
| Топ-N в группе | RANK/ROW_NUMBER OVER (PARTITION BY) |
| Пробелы | LEAD(start) - LAG(end) |
| Изменение поведения | Два CTE + LEFT JOIN + WHERE ratio |
Эти паттерны — строительные блоки для любых сложных аналитических задач.