moments/crates/moments-data/migrations/0005_dedup_gitea_events.sql

-- Collapse duplicate Gitea events introduced by polling both the user
-- activity feed and per-org activity feeds.
--
-- Gitea writes one Action row per interested user-context: a push to
-- helexa/cortex by user grenade produces two rows, one with
-- user_id=grenade and one with user_id=helexa. Everything else (op_type,
-- act_user_id, repo_id, ref_name, comment_id, created) is identical.
-- Our prior id scheme (gitea:{action_row_id}) gave them different ids,
-- so the upsert-on-id dedup never fired and the timeline rendered each
-- push twice.
--
-- This migration re-keys every existing gitea row to the same canonical
-- formula `parse_gitea_event` now emits, deleting duplicates encountered
-- along the way. Idempotent: running it again is a no-op because the
-- canonical id of a canonical id is itself.

-- Snapshot the canonical id for every gitea row.
CREATE TEMP TABLE _gitea_canonical AS
SELECT
    id AS old_id,
    'gitea:'
      || coalesce(payload->>'op_type', '') || ':'
      || coalesce(payload->>'act_user_id', payload->'act_user'->>'id', '0') || ':'
      || coalesce(payload->>'repo_id', payload->'repo'->>'id', '0') || ':'
      || coalesce(payload->>'ref_name', '') || ':'
      || coalesce(payload->>'comment_id', '0') || ':'
      || coalesce(payload->>'created', '')
      AS new_id
FROM events
WHERE source = 'gitea';

-- For each canonical id, keep the row whose current id is lexicographically
-- smallest (stable, arbitrary tie-break) and delete the rest. The "old id
-- already matches the new id" case lands here too — DELETE skips it because
-- rn = 1 for any singleton group.
DELETE FROM events
WHERE id IN (
    SELECT old_id FROM (
        SELECT old_id, new_id,
               row_number() OVER (PARTITION BY new_id ORDER BY old_id) AS rn
        FROM _gitea_canonical
    ) ranked
    WHERE rn > 1
);

-- Rename remaining rows to the canonical id. Postgres defers PK uniqueness
-- to statement end, so swapping ids across rows in one UPDATE is fine
-- provided the final set is unique (dedup above guarantees that).
UPDATE events e
SET id = c.new_id
FROM _gitea_canonical c
WHERE e.id = c.old_id
  AND e.id <> c.new_id;

DROP TABLE _gitea_canonical;