diff --git a/api/v1_events_remix_contests.go b/api/v1_events_remix_contests.go index 3e75696c..859cede3 100644 --- a/api/v1_events_remix_contests.go +++ b/api/v1_events_remix_contests.go @@ -36,14 +36,17 @@ func (app *ApiServer) v1EventsRemixContests(c *fiber.Ctx) error { "u.is_deactivated = false", "u.is_available = true", "(e.entity_type != 'track' OR (t.track_id IS NOT NULL AND t.is_delete = false AND t.is_unlisted = false))", - // Shadow-ban filters — mirror what v1_event_comments.go applies to - // comment authors. Two parallel signals so the filter catches the - // full population: low-quality / impersonator / bot accounts via - // `aggregate_user.score < 0`, and community-flagged users via the - // karma-muted set (sum of muters' follower_count crosses the - // karmaCommentCountThreshold). Hosts in either bucket disappear - // from the discovery list. - "e.user_id NOT IN (SELECT user_id FROM low_abuse_score)", + // Shadow-ban filters — two parallel community signals lifted from + // v1_event_comments / v1_track_comments. A host disappears from the + // discovery list if either: + // 1. They authored a comment that crossed the high-karma-reporter + // threshold (sum of reporters' follower_count exceeds + // karmaCommentCountThreshold) — same threshold that hides the + // comment itself on comment endpoints, just lifted from + // comment_id to author user_id. + // 2. They are in the karma-muted set (sum of muters' + // follower_count crosses karmaCommentCountThreshold). + "e.user_id NOT IN (SELECT user_id FROM karma_reported_authors)", "e.user_id NOT IN (SELECT muted_user_id FROM muted_by_karma)", } @@ -67,8 +70,23 @@ func (app *ApiServer) v1EventsRemixContests(c *fiber.Ctx) error { GROUP BY muted_user_id HAVING SUM(aggregate_user.follower_count) >= @karmaCommentCountThreshold ), - low_abuse_score AS ( - SELECT user_id FROM aggregate_user WHERE score < 0 + -- Comments that crossed the high-karma reporter threshold — identical + -- shape to high_karma_reporters in v1_track_comments / v1_event_comments. + high_karma_reporters AS ( + SELECT comment_reports.comment_id + FROM comment_reports + JOIN aggregate_user ON comment_reports.user_id = aggregate_user.user_id + WHERE comment_reports.is_delete = false + GROUP BY comment_reports.comment_id + HAVING SUM(aggregate_user.follower_count) >= @karmaCommentCountThreshold + ), + -- Authors of any comment in high_karma_reporters. Lifts the per-comment + -- shadow-ban signal up to the user level so the contest list hides + -- hosts whose comments are already being hidden by the same threshold. + karma_reported_authors AS ( + SELECT DISTINCT comments.user_id + FROM comments + JOIN high_karma_reporters ON high_karma_reporters.comment_id = comments.comment_id ) SELECT e.event_id, diff --git a/api/v1_events_remix_contests_test.go b/api/v1_events_remix_contests_test.go index db72adb5..f0d1e448 100644 --- a/api/v1_events_remix_contests_test.go +++ b/api/v1_events_remix_contests_test.go @@ -464,23 +464,29 @@ func TestRemixContestsExcludesUnavailableContent(t *testing.T) { // TestRemixContestsExcludesShadowbannedHosts covers the two parallel // shadow-ban signals applied to the discovery list: -// 1. `aggregate_user.score < 0` (account-quality signal — bots, -// impersonators, fast-challenge runners). +// 1. `karma_reported_authors` — host authored a comment that crossed the +// high-karma-reporter threshold (sum of reporters' follower_count +// >= karmaCommentCountThreshold). Same threshold that hides the +// comment itself on comment endpoints, lifted to author user_id. // 2. The karma-muted set — host has been muted by users whose combined -// follower_count crosses karmaCommentCountThreshold (community-driven -// signal). Same shape used in v1_event_comments for comment authors. +// follower_count crosses karmaCommentCountThreshold. func TestRemixContestsExcludesShadowbannedHosts(t *testing.T) { app := emptyTestApp(t) cleanHostID := 9601 - lowScoreHostID := 9602 + karmaReportedHostID := 9602 karmaMutedHostID := 9603 - highKarmaMuterID := 9604 + highKarmaUserID := 9604 cleanTrackID := 8601 - lowScoreTrackID := 8602 + karmaReportedTrackID := 8602 karmaMutedTrackID := 8603 + // Comment authored by karmaReportedHost on its own track; the high-karma + // user reports it, which should cross the threshold and propagate the + // shadow-ban from comment_id up to the host's user_id. + reportedCommentID := 7701 + start := parseTime(t, "2024-01-02") end := parseTime(t, "2099-01-01") @@ -493,7 +499,7 @@ func TestRemixContestsExcludesShadowbannedHosts(t *testing.T) { }, { "event_id": 802, "event_type": "remix_contest", "entity_type": "track", - "entity_id": lowScoreTrackID, "user_id": lowScoreHostID, + "entity_id": karmaReportedTrackID, "user_id": karmaReportedHostID, "created_at": start, "end_date": end, }, { @@ -504,36 +510,39 @@ func TestRemixContestsExcludesShadowbannedHosts(t *testing.T) { }, "users": []map[string]any{ {"user_id": cleanHostID, "handle": "clean_host"}, - {"user_id": lowScoreHostID, "handle": "low_score_host"}, + {"user_id": karmaReportedHostID, "handle": "karma_reported_host"}, {"user_id": karmaMutedHostID, "handle": "karma_muted_host"}, - {"user_id": highKarmaMuterID, "handle": "high_karma_muter"}, + {"user_id": highKarmaUserID, "handle": "high_karma_user"}, }, "tracks": []map[string]any{ {"track_id": cleanTrackID, "owner_id": cleanHostID, "created_at": start}, - {"track_id": lowScoreTrackID, "owner_id": lowScoreHostID, "created_at": start}, + {"track_id": karmaReportedTrackID, "owner_id": karmaReportedHostID, "created_at": start}, {"track_id": karmaMutedTrackID, "owner_id": karmaMutedHostID, "created_at": start}, }, + "comments": []map[string]any{ + { + "comment_id": reportedCommentID, "user_id": karmaReportedHostID, + "entity_id": karmaReportedTrackID, "entity_type": "Track", "text": "reported comment", + }, + }, + "comment_reports": []map[string]any{ + {"comment_id": reportedCommentID, "user_id": highKarmaUserID}, + }, "muted_users": []map[string]any{ - // High-karma muter mutes the karma-muted host — combined with the + // High-karma user mutes the karma-muted host — combined with the // follower_count bump below, this should cross the threshold. - {"user_id": highKarmaMuterID, "muted_user_id": karmaMutedHostID}, + {"user_id": highKarmaUserID, "muted_user_id": karmaMutedHostID}, }, } database.Seed(app.pool.Replicas[0], fixtures) - // `aggregate_user` rows are created by the users trigger; tweak the two - // fields we care about: score on the low-score host, and the muter's - // follower_count so the karma-muted CTE actually trips. + // `aggregate_user` rows are created by the users trigger; bump the + // high-karma user's follower_count past the threshold so both + // karma_reported_authors (via comment_reports) and muted_by_karma + // (via muted_users) trip on their respective host. _, err := app.pool.Exec(context.Background(), - `UPDATE aggregate_user SET score = $1 WHERE user_id = $2`, - -1, lowScoreHostID, - ) - if err != nil { - t.Fatal(err) - } - _, err = app.pool.Exec(context.Background(), `UPDATE aggregate_user SET follower_count = $1 WHERE user_id = $2`, - karmaCommentCountThreshold+1, highKarmaMuterID, + karmaCommentCountThreshold+1, highKarmaUserID, ) if err != nil { t.Fatal(err) @@ -550,11 +559,11 @@ func TestRemixContestsExcludesShadowbannedHosts(t *testing.T) { }) }) - t.Run("host with score < 0 is excluded", func(t *testing.T) { + t.Run("host with karma-reported comment is excluded", func(t *testing.T) { _, body := testGet(t, app, "/v1/events/remix-contests") eventIds := pluckStrings(body, "data.#.event_id") assert.NotContains(t, eventIds, trashid.MustEncodeHashID(802), - "contest hosted by a user with aggregate_user.score < 0 must not be returned") + "contest hosted by a user who authored a comment in high_karma_reporters must not be returned") }) t.Run("karma-muted host is excluded", func(t *testing.T) {