Calculate intervals on the database
Currently I collect before calculating the interval. It is probably more efficient to do it on the database.
This SQL statement works:
WITH Lagged AS (
SELECT
t.*,
COALESCE(LAG(created) OVER (ORDER BY created), FIRST_VALUE(created) OVER (ORDER BY created)) as prev_created
FROM
data.chunk t
),
IntervalAndGroup AS (
SELECT
l.*,
CASE
WHEN EXTRACT(EPOCH FROM (created - prev_created)) < 1.1 THEN FALSE
ELSE TRUE
END as is_new_group,
EXTRACT(EPOCH FROM (created - prev_created)) as interval
FROM
Lagged l
)
SELECT
i.*,
SUM(CASE WHEN is_new_group THEN 1 ELSE 0 END) OVER (ORDER BY created ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as session_id
FROM
IntervalAndGroup i;