SQL-запросы для e-commerce аналитика
Схема данных
Типичная схема e-commerce:
users (user_id, name, city, registered_at)products (product_id, name, category, price)orders (order_id, user_id, created_at, amount, status)order_items (order_id, product_id, quantity, price)
1. GMV по дням
SELECT created_at::DATE AS day, SUM(amount) AS gmv
FROM orders
WHERE status = 'paid'
GROUP BY 1 ORDER BY 1;2. AOV по месяцам с MoM
WITH monthly AS (
SELECT DATE_TRUNC('month', created_at)::DATE AS month,
AVG(amount) AS aov
FROM orders WHERE status = 'paid'
GROUP BY 1
)
SELECT month, aov,
LAG(aov) OVER (ORDER BY month) AS prev,
ROUND(100.0 * (aov - LAG(aov) OVER (ORDER BY month)) /
LAG(aov) OVER (ORDER BY month), 2) AS mom_pct
FROM monthly;3. Топ-10 товаров по выручке
SELECT p.product_id, p.name, SUM(oi.price * oi.quantity) AS revenue
FROM products p
JOIN order_items oi USING (product_id)
JOIN orders o USING (order_id)
WHERE o.status = 'paid'
GROUP BY p.product_id, p.name
ORDER BY revenue DESC
LIMIT 10;4. Категории с падением продаж MoM
WITH monthly AS (
SELECT DATE_TRUNC('month', o.created_at)::DATE AS month,
p.category, SUM(oi.price * oi.quantity) AS rev
FROM orders o
JOIN order_items oi USING (order_id)
JOIN products p USING (product_id)
WHERE o.status = 'paid'
GROUP BY 1, 2
)
SELECT month, category, rev,
LAG(rev) OVER (PARTITION BY category ORDER BY month) AS prev_rev,
ROUND(100.0 * (rev - LAG(rev) OVER (PARTITION BY category ORDER BY month)) /
LAG(rev) OVER (PARTITION BY category ORDER BY month), 2) AS change_pct
FROM monthly
WHERE month = DATE_TRUNC('month', CURRENT_DATE)::DATE;5. Клиенты с наибольшей выручкой
SELECT u.user_id, u.name, SUM(o.amount) AS total_rev, COUNT(*) AS orders
FROM users u
JOIN orders o USING (user_id)
WHERE o.status = 'paid'
GROUP BY u.user_id, u.name
ORDER BY total_rev DESC
LIMIT 20;Попробовать силы на подобных вопросах проще всего в тренажёре Карьерник — прямо в Telegram, без регистрации через сайт.
6. Средний размер корзины
SELECT AVG(items_count) AS avg_items_per_order
FROM (
SELECT order_id, COUNT(*) AS items_count
FROM order_items GROUP BY order_id
) t;7. Распределение товаров в корзине (histogram)
SELECT items_count, COUNT(*) AS orders
FROM (
SELECT order_id, COUNT(*) AS items_count
FROM order_items GROUP BY order_id
) t
GROUP BY items_count
ORDER BY items_count;8. Retention D30
WITH first_purchase AS (
SELECT user_id, MIN(created_at)::DATE AS first_date
FROM orders WHERE status = 'paid'
GROUP BY user_id
)
SELECT
COUNT(DISTINCT fp.user_id) FILTER (
WHERE EXISTS (
SELECT 1 FROM orders
WHERE user_id = fp.user_id
AND created_at::DATE = fp.first_date + 30
)
) * 1.0 / COUNT(DISTINCT fp.user_id) AS retention_d30
FROM first_purchase fp;9. RFM-анализ
WITH rfm AS (
SELECT user_id,
CURRENT_DATE - MAX(created_at)::DATE AS recency,
COUNT(*) AS frequency,
SUM(amount) AS monetary
FROM orders WHERE status = 'paid'
GROUP BY user_id
)
SELECT user_id, recency, frequency, monetary,
NTILE(5) OVER (ORDER BY recency) AS r_score,
NTILE(5) OVER (ORDER BY frequency DESC) AS f_score,
NTILE(5) OVER (ORDER BY monetary DESC) AS m_score
FROM rfm;Классификация клиентов по R, F, M. Подробнее про RFM.
10. Товары, часто покупаемые вместе
WITH pairs AS (
SELECT a.product_id AS p1, b.product_id AS p2, COUNT(*) AS freq
FROM order_items a
JOIN order_items b ON a.order_id = b.order_id
AND a.product_id < b.product_id
GROUP BY 1, 2
)
SELECT p1, p2, freq
FROM pairs
ORDER BY freq DESC
LIMIT 10;Market basket analysis — кросс-селл рекомендации.
11. Доля категорий в выручке
SELECT p.category,
SUM(oi.price * oi.quantity) AS rev,
ROUND(100.0 * SUM(oi.price * oi.quantity) /
SUM(SUM(oi.price * oi.quantity)) OVER (), 2) AS pct
FROM products p
JOIN order_items oi USING (product_id)
JOIN orders o USING (order_id)
WHERE o.status = 'paid'
GROUP BY p.category
ORDER BY rev DESC;12. Конверсия «добавление в корзину → покупка»
WITH u AS (
SELECT user_id,
MAX(CASE WHEN event = 'add_to_cart' THEN 1 ELSE 0 END) AS carted,
MAX(CASE WHEN event = 'purchase' THEN 1 ELSE 0 END) AS bought
FROM events
WHERE event_time >= CURRENT_DATE - INTERVAL '7 day'
GROUP BY user_id
)
SELECT
SUM(carted) AS carts,
SUM(bought) AS purchases,
SUM(bought) * 100.0 / NULLIF(SUM(carted), 0) AS conv
FROM u;Пройти 30–50 задач по теме за вечер можно в Telegram-тренажёре. Это то, что отличает «знаю» от «уверенно отвечу на собесе».
13. Время от регистрации до первой покупки
SELECT AVG(first_order - registered) AS avg_days_to_first_order
FROM (
SELECT u.user_id, u.registered_at::DATE AS registered,
MIN(o.created_at)::DATE AS first_order
FROM users u
JOIN orders o USING (user_id)
GROUP BY u.user_id, u.registered_at
) t;14. Retention purchase per month (когортная таблица)
WITH cohort AS (
SELECT user_id, DATE_TRUNC('month', MIN(created_at))::DATE AS cohort_m
FROM orders WHERE status = 'paid' GROUP BY user_id
),
activity AS (
SELECT c.user_id, c.cohort_m,
EXTRACT(MONTH FROM AGE(
DATE_TRUNC('month', o.created_at), c.cohort_m
))::int AS m_num
FROM cohort c
JOIN orders o USING (user_id)
WHERE o.status = 'paid'
)
SELECT cohort_m, m_num, COUNT(DISTINCT user_id) AS active
FROM activity
GROUP BY cohort_m, m_num
ORDER BY cohort_m, m_num;15. Refund rate
SELECT DATE_TRUNC('month', created_at)::DATE AS month,
COUNT(*) FILTER (WHERE status = 'paid') AS paid,
COUNT(*) FILTER (WHERE status = 'refunded') AS refunded,
COUNT(*) FILTER (WHERE status = 'refunded') * 100.0 /
COUNT(*) FILTER (WHERE status IN ('paid', 'refunded')) AS refund_rate
FROM orders
GROUP BY 1 ORDER BY 1;Как использовать
Эти запросы — шаблоны. В продакшне их обычно кладут в Data Mart или dbt-модель для повторного использования.
Читайте также
FAQ
Зачем фильтр status = 'paid'?
Исключает отменённые и несостоявшиеся заказы. Метрики считают только по подтверждённым.
Почему считать по orders, а не order_items?
GMV — по orders (суммарная стоимость). Популярность товаров — по order_items (каждая позиция).
Как оптимизировать такие запросы?
Индексы на user_id, product_id, created_at, status. Partition таблицы по дате для крупных e-commerce.
dbt или VIEW для таких запросов?
Для production — dbt. Для экспериментов — VIEW. Runtime зависит от объёма данных.