diff --git a/app/controllers/discourse_dumbcourse/app_controller.rb b/app/controllers/discourse_dumbcourse/app_controller.rb index 42ac059..989e333 100644 --- a/app/controllers/discourse_dumbcourse/app_controller.rb +++ b/app/controllers/discourse_dumbcourse/app_controller.rb @@ -72,10 +72,19 @@ def show # unicode glyph. Auto-syncs with whatever is uploaded; never raises. custom_reaction_emojis = begin - ::Emoji.custom.each_with_object({}) do |e, h| - url = (e.url rescue nil) - h[e.name] = url if url.present? - end + ::Emoji + .custom + .each_with_object({}) do |e, h| + url = + ( + begin + e.url + rescue StandardError + nil + end + ) + h[e.name] = url if url.present? + end rescue StandardError {} end diff --git a/app/controllers/discourse_mod_categories/messages_controller.rb b/app/controllers/discourse_mod_categories/messages_controller.rb index 079bd0c..db408d0 100644 --- a/app/controllers/discourse_mod_categories/messages_controller.rb +++ b/app/controllers/discourse_mod_categories/messages_controller.rb @@ -274,6 +274,13 @@ def update_post_whisper post.reload end + # Toggle the search index in lockstep with the whisper state so a + # search leak doesn't outlive the arm/disarm click. Armed → + # `post_search_data` row is wiped. Disarmed → the post is re-indexed + # so it becomes discoverable now that it's public. Listener lives + # in sub_plugins/mod_categories.rb. + DiscourseEvent.trigger(:mod_whisper_state_changed, post, armed) + render json: serialized_post_whisper_state(post.reload) end diff --git a/db/migrate/20260701000001_purge_whisper_post_search_data.rb b/db/migrate/20260701000001_purge_whisper_post_search_data.rb new file mode 100644 index 0000000..30967c7 --- /dev/null +++ b/db/migrate/20260701000001_purge_whisper_post_search_data.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +# One-time backfill: strip whisper posts out of `post_search_data`. +# +# Every existing whisper — every row in `post_custom_fields` where +# `name = 'mod_whisper_target_user_ids'` — was originally indexed into +# `post_search_data` before the SearchIndexer gate was in place. That +# tsvector is what leaked whisper text to non-audience users via +# full-text search. This migration deletes those rows in one pass so +# the leak stops on deploy, without waiting for each whisper to be +# re-touched. +# +# `delete_all` bypasses ActiveRecord callbacks — `post_search_data` has +# none we care about, so this is safe and fast. Idempotent: re-running +# just deletes zero rows. +class PurgeWhisperPostSearchData < ActiveRecord::Migration[7.0] + def up + execute(<<~SQL) + DELETE FROM post_search_data + WHERE post_id IN ( + SELECT post_id + FROM post_custom_fields + WHERE name = 'mod_whisper_target_user_ids' + ) + SQL + end + + def down + # Irreversible — the tsvector is derived from posts.raw and would + # need to be recomputed by SearchIndexer per row, which is out of + # scope for a migration and would re-introduce the leak. + end +end diff --git a/lib/discourse_mod_categories/search_extension.rb b/lib/discourse_mod_categories/search_extension.rb new file mode 100644 index 0000000..b609e5b --- /dev/null +++ b/lib/discourse_mod_categories/search_extension.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +module DiscourseModCategories + # Prepended onto ::Search — belt-and-suspenders companion to + # `SearchIndexerExtension`. The indexer gate deletes whisper rows from + # `post_search_data`, so tsquery matching cannot surface them in the + # first place. This filter runs on the result set anyway, so any + # whisper post that somehow survives (indexer race, backfill hasn't + # caught up, custom field written after the first index write) is + # still stripped before the search endpoint serializes it. + # + # Order-independence: whether this or SmartSearch's prepend runs first + # in the `super` chain, variant searches instantiate fresh `Search` + # objects that traverse the whole chain. So variants get filtered + # regardless of which entry point runs first. + # + # The filter drops whisper `posts` for non-staff viewers, then removes + # any topic whose only appearance in the result set was via a now- + # dropped whisper. Topics matched by title stay put — they have their + # own presence in `topics_id_set` independent of post matches. + # + # rescue StandardError → return the base result unchanged so a future + # Discourse refactor to `Search::GroupedSearchResults` can't 500 a + # search request. The DB-level gate already prevents leaks; the + # fallback here only degrades post-filter behavior for a partial + # deploy state. + module SearchExtension + def execute(*args, **kwargs) + result = super + begin + filter_whispers!(result) + rescue StandardError => e + # Only the post-filter is rescued — a raise from core `super` above + # propagates untouched (we're not a circuit breaker for core search). + # The DB-level indexer gate already prevents the leak; this fallback + # only degrades the belt-and-suspenders pass. + ::Rails.logger.warn( + "[jtech-tools] Search whisper post-filter fell back: #{e.class}: #{e.message}", + ) + end + result + end + + private + + def filter_whispers!(result) + return unless defined?(::SiteSetting) && ::SiteSetting.mod_whisper_enabled + return unless result.respond_to?(:posts) + posts = result.posts + return unless posts.is_a?(Array) + return if posts.empty? + + viewer = @guardian.respond_to?(:user) ? @guardian.user : nil + return if viewer&.staff? + + post_ids = posts.map(&:id).compact + return if post_ids.empty? + + blocked = + DiscourseModCategories::UserActionWhisperFilter.blocked_whisper_post_ids(post_ids, viewer) + return if blocked.empty? + blocked_set = blocked.to_set + + dropped_topic_ids = posts.select { |p| blocked_set.include?(p.id) }.map(&:topic_id).uniq + posts.reject! { |p| blocked_set.include?(p.id) } + + if result.respond_to?(:topics) && result.topics.is_a?(Array) + remaining_topic_ids = posts.map(&:topic_id).to_set + result.topics.reject! do |t| + dropped_topic_ids.include?(t.id) && !remaining_topic_ids.include?(t.id) + end + end + end + end +end diff --git a/lib/discourse_mod_categories/search_indexer_extension.rb b/lib/discourse_mod_categories/search_indexer_extension.rb new file mode 100644 index 0000000..58df538 --- /dev/null +++ b/lib/discourse_mod_categories/search_indexer_extension.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module DiscourseModCategories + # Prevent whisper posts from ever landing in `post_search_data` (the + # tsvector index Discourse's full-text search queries). If the row does + # not exist, no ts_query can match it — so no non-audience viewer can + # discover whisper text through search, and no `is:unseen` advanced + # filter can shortcut around Guardian by hitting the index directly. + # + # This is the DB-level gate. Query-time filters (WhisperQueryFilter and + # SearchWhisperFilter) are the belt-and-suspenders layer on top. + # + # Behavior: + # * On `SearchIndexer.index(post)` for a whisper post: skip the write + # AND delete any pre-existing row (defense against a race where the + # custom field is set slightly after the first index attempt). + # * On `SearchIndexer.index(post)` for a non-whisper post: pass through + # to super — normal indexing runs. + # * Non-Post inputs (Topic, Category, User…) pass through untouched. + # + # A post's whisper state is read from `post_custom_fields` directly rather + # than `post.custom_fields` — the latter may lag behind the DB in some + # code paths (post reloaded from cache, custom fields not preloaded). + # A single indexed lookup keyed on (post_id, name) is cheap. + # + # rescue StandardError → fall back to `super` so a schema change to + # `post_custom_fields` (or a NULL post argument) can never break search + # indexing for public posts. + module SearchIndexerExtension + def index(obj, force: false) + if whisper_post?(obj) + ::PostSearchData.where(post_id: obj.id).delete_all + return + end + super + rescue StandardError => e + ::Rails.logger.warn( + "[jtech-tools] SearchIndexer whisper gate fell back: #{e.class}: #{e.message}", + ) + super + end + + private + + def whisper_post?(obj) + return false unless defined?(::SiteSetting) && ::SiteSetting.mod_whisper_enabled + return false unless obj.is_a?(::Post) + return false unless obj.id + + ::PostCustomField.where( + post_id: obj.id, + name: DiscourseModCategories::POST_WHISPER_TARGETS_FIELD, + ).exists? + end + end +end diff --git a/spec/requests/whisper_search_leak_spec.rb b/spec/requests/whisper_search_leak_spec.rb new file mode 100644 index 0000000..1509eb6 --- /dev/null +++ b/spec/requests/whisper_search_leak_spec.rb @@ -0,0 +1,154 @@ +# frozen_string_literal: true + +require "rails_helper" + +# Regression coverage for the whisper search leak +# (forums.jtechforums.org/t/whispers-being-indexed-in-search/7800): whisper +# text was reachable through full-text search by non-audience users, because +# Discourse indexes every post into `post_search_data` and runs `ts_query` +# against it BEFORE any per-user visibility check. The `is:unseen` advanced +# filter widened the same hole. +# +# Fix under test (DB-level primary, per the reporter's own suggestion that +# "whispers should not be indexed"): +# 1. SearchIndexerExtension keeps whisper rows OUT of `post_search_data` +# entirely — so no tsquery can match them, for anyone. A whisper is +# simply not full-text-searchable; the audience still reads it in the +# topic stream, they just can't discover it via /search. +# 2. SearchExtension post-filters any whisper row that lingers (index-gate +# race, backfill pending) so a stranger still can't see it, while the +# audience can — the only path where a whisper surfaces in search. +RSpec.describe "Whisper search leak" do + fab!(:admin) + fab!(:author, :user) + fab!(:target, :user) + fab!(:stranger, :user) + fab!(:topic) + fab!(:op) { Fabricate(:post, topic: topic, user: author, raw: "ordinary opening post body") } + fab!(:whisper_post) do + Fabricate(:post, topic: topic, user: author, raw: "the secret whisper word Boooo lives here") + end + + let(:targets_field) { DiscourseModCategories::POST_WHISPER_TARGETS_FIELD } + + before do + SiteSetting.mod_categories_enabled = true + SiteSetting.mod_whisper_enabled = true + SiteSetting.auto_silence_fast_typers_on_first_post = false + + SearchIndexer.enable + SearchIndexer.index(op, force: true) + SearchIndexer.index(topic, force: true) + end + + after { SearchIndexer.disable } + + def mark_whisper!(post = whisper_post) + post.custom_fields[targets_field] = [target.id] + post.save_custom_fields(true) + post.reload + end + + def search_post_ids(term, viewer) + guardian = viewer ? Guardian.new(viewer) : Guardian.new + Search.new(term, guardian: guardian).execute.posts.map(&:id) + end + + describe "SearchIndexer gate" do + it "never writes a post_search_data row for a whisper" do + mark_whisper! + SearchIndexer.index(whisper_post, force: true) + expect(PostSearchData.where(post_id: whisper_post.id)).not_to exist + end + + it "deletes a pre-existing row when a public post becomes a whisper" do + SearchIndexer.index(whisper_post, force: true) + expect(PostSearchData.where(post_id: whisper_post.id)).to exist + + mark_whisper! + SearchIndexer.index(whisper_post, force: true) + expect(PostSearchData.where(post_id: whisper_post.id)).not_to exist + end + + it "still indexes ordinary public posts" do + SearchIndexer.index(whisper_post, force: true) + expect(PostSearchData.where(post_id: whisper_post.id)).to exist + end + end + + describe "search results (whisper deindexed)" do + before do + mark_whisper! + SearchIndexer.index(whisper_post, force: true) # gate deletes the row + end + + it "is unsearchable by a stranger" do + expect(search_post_ids("Boooo", stranger)).not_to include(whisper_post.id) + end + + it "is unsearchable by an anonymous viewer" do + expect(search_post_ids("Boooo", nil)).not_to include(whisper_post.id) + end + + # A whisper is removed from the index outright, so even the audience + # can't full-text-search it. They still read it in the topic stream — + # this only removes the /search discovery path. + it "is unsearchable even by a target, because the row is gone" do + expect(search_post_ids("Boooo", target)).not_to include(whisper_post.id) + end + + it "is unsearchable even by staff" do + expect(search_post_ids("Boooo", admin)).not_to include(whisper_post.id) + end + end + + # Belt-and-suspenders: a REAL post_search_data row lingers because the post + # was indexed while public, then became a whisper without a re-index. The + # query-time filter must still strip it for a stranger while letting the + # audience through. + describe "query-time filter on a lingering row" do + fab!(:lingering) do + Fabricate(:post, topic: topic, user: author, raw: "lingering whisper term Zzxq stays put") + end + + before do + SearchIndexer.index(lingering, force: true) # real tsvector row, still public + mark_whisper!(lingering) # marked whisper via custom field, NOT re-indexed + end + + it "keeps the real row (no re-index was triggered)" do + expect(PostSearchData.where(post_id: lingering.id)).to exist + end + + it "strips the lingering whisper from a stranger" do + expect(search_post_ids("Zzxq", stranger)).not_to include(lingering.id) + end + + it "strips the lingering whisper from an anonymous viewer" do + expect(search_post_ids("Zzxq", nil)).not_to include(lingering.id) + end + + it "still surfaces the lingering whisper to the audience" do + expect(search_post_ids("Zzxq", target)).to include(lingering.id) + end + + it "still surfaces the lingering whisper to staff" do + expect(search_post_ids("Zzxq", admin)).to include(lingering.id) + end + end + + describe "convert to public re-indexes" do + it "makes a disarmed whisper discoverable again" do + mark_whisper! + SearchIndexer.index(whisper_post, force: true) + expect(search_post_ids("Boooo", stranger)).not_to include(whisper_post.id) + + PostCustomField.where(post_id: whisper_post.id, name: targets_field).destroy_all + whisper_post.reload + DiscourseEvent.trigger(:mod_whisper_state_changed, whisper_post, false) + + expect(PostSearchData.where(post_id: whisper_post.id)).to exist + expect(search_post_ids("Boooo", stranger)).to include(whisper_post.id) + end + end +end diff --git a/sub_plugins/mod_categories.rb b/sub_plugins/mod_categories.rb index 46d1706..011163c 100644 --- a/sub_plugins/mod_categories.rb +++ b/sub_plugins/mod_categories.rb @@ -7,6 +7,8 @@ require_relative "../lib/discourse_mod_categories/whisper_query_filter" require_relative "../lib/discourse_mod_categories/staff_notifier" require_relative "../lib/discourse_mod_categories/user_action_whisper_filter" +require_relative "../lib/discourse_mod_categories/search_indexer_extension" +require_relative "../lib/discourse_mod_categories/search_extension" register_asset "stylesheets/topic-footer-message.scss" register_asset "stylesheets/whisper.scss" @@ -1373,6 +1375,57 @@ def stream(opts = nil) end end + # 3. Full-text search leaked whisper text to non-audience users. Discourse's + # `SearchIndexer` writes every post's tsvector into `post_search_data`, and + # `Search#execute` runs `ts_query` against that table BEFORE any per-user + # visibility check. So a whisper's raw contents were discoverable via + # `/search?q=` — including through the `is:unseen` + # advanced filter, which shortcuts around Guardian by hitting the index + # directly. Two-layer fix, DB-level primary: + # + # (a) `SearchIndexer` gate — skip whisper posts during indexing AND delete + # any existing row. The whisper never sits in `post_search_data`, so + # no tsquery can match it and no advanced filter can reach it. + # (b) `Search#execute` post-filter — belt-and-suspenders. If a row + # survives the indexer gate (race, backfill pending, custom field + # written after first index write), the result-set filter still + # drops it before serialization. + # + # The migration `20260701000001_purge_whisper_post_search_data.rb` runs the + # backfill: one SQL DELETE that strips every already-indexed whisper on + # deploy so the leak stops without waiting for each post to be re-touched. + reloadable_patch do + ::SearchIndexer.singleton_class.prepend(::DiscourseModCategories::SearchIndexerExtension) + ::Search.prepend(::DiscourseModCategories::SearchExtension) if defined?(::Search) + end + + # When staff convert a whisper → public via update_post_whisper (disarm), + # the whisper custom field is removed, so the SearchIndexer gate would + # now let the post through — but nothing triggers a re-index by default. + # Force one so the newly-public post becomes discoverable. + # + # The inverse (public → whisper) is handled by the gate itself: the next + # `SearchIndexer.index` call for that post_id will delete its row, and + # we also fire one explicitly here to close the window between the arm + # and the next natural re-index (post edit, cook, etc.). + DiscourseEvent.on(:mod_whisper_state_changed) do |post, armed| + next unless SiteSetting.mod_whisper_enabled + next unless post.is_a?(::Post) + + begin + if armed + ::PostSearchData.where(post_id: post.id).delete_all + else + ::SearchIndexer.index(post, force: true) + end + rescue StandardError => e + ::Rails.logger.warn( + "[jtech-tools] whisper search-index toggle failed for post=#{post.id}: " \ + "#{e.class}: #{e.message}", + ) + end + end + # 4. Second filter dropdown on /u/{username}/notifications. The frontend # initializer (assets/javascripts/discourse/initializers/notifications-type-filter.js) # sends ?type=. Two cases: