From 04277e74931cbbd5102b96c455063a332d1ac347 Mon Sep 17 00:00:00 2001 From: Dylan Jeffers Date: Tue, 12 May 2026 21:10:39 -0700 Subject: [PATCH] perf(for-you): bound my_artist_affinity and follow_set by recency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After capping my_saved_artists (PR #805), the endpoint still times out on prod for power users. Two remaining unbounded CTEs are doing full scans of per-user history: * my_artist_affinity unions saves+reposts+plays for the user. A heavy listener can have hundreds of thousands of play rows; the inner scan dwarfs the rest of the query. Cap each source to its most recent N: 200 saves, 200 reposts, 500 plays. * follow_set is the followee set for the user; a power user can follow several thousand artists, which then has to join against every recent-track upload in cand_in_network. Cap to 500 most recently followed. Recency is the right axis on all three: older history is a weaker signal of current taste, and the bounds match the magnitude of the hidden costs (plays >> saves ≈ reposts). Existing For You tests have fixtures well under all caps and still pass unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- api/v1_users_feed_for_you.go | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/api/v1_users_feed_for_you.go b/api/v1_users_feed_for_you.go index 1e3140b1..79460080 100644 --- a/api/v1_users_feed_for_you.go +++ b/api/v1_users_feed_for_you.go @@ -100,12 +100,19 @@ func (app *ApiServer) v1UsersFeedForYou(c *fiber.Ctx) error { sql := ` WITH + -- Cap to the 500 most-recently-followed users. A power user with + -- thousands of follows pulls a huge hash table here that then has to + -- join against every recent track upload to find in-network candidates, + -- so the planner can stall. Recent follows are a better signal of + -- current taste anyway. follow_set AS ( SELECT followee_user_id AS user_id FROM follows WHERE follower_user_id = @userId AND is_current = true AND is_delete = false + ORDER BY created_at DESC + LIMIT 500 ), my_saved_tracks AS ( SELECT save_item_id AS track_id @@ -157,20 +164,31 @@ func (app *ApiServer) v1UsersFeedForYou(c *fiber.Ctx) error { ), -- Per-artist engagement strength (saves + reposts + plays of any of -- their tracks by me). Used for the social_boost multiplier. + -- + -- Each sub-select is bounded by recency: a heavy listener can have + -- hundreds of thousands of play rows, and the unbounded union forces + -- a full scan of those rows on every request. Recent engagement is + -- the right signal anyway — old listens say less about current taste. my_artist_affinity AS ( SELECT t.owner_id AS artist_id, LN(1 + COUNT(*)) AS affinity FROM ( - SELECT save_item_id AS track_id FROM saves + (SELECT save_item_id AS track_id FROM saves WHERE user_id = @userId AND save_type = 'track' AND is_current = true AND is_delete = false + ORDER BY created_at DESC + LIMIT 200) UNION ALL - SELECT repost_item_id AS track_id FROM reposts + (SELECT repost_item_id AS track_id FROM reposts WHERE user_id = @userId AND repost_type = 'track' AND is_current = true AND is_delete = false + ORDER BY created_at DESC + LIMIT 200) UNION ALL - SELECT play_item_id AS track_id FROM plays + (SELECT play_item_id AS track_id FROM plays WHERE user_id = @userId + ORDER BY created_at DESC + LIMIT 500) ) eng JOIN tracks t ON t.track_id = eng.track_id GROUP BY t.owner_id