Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions app/controllers/discourse_dumbcourse/app_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,19 @@ def show
# unicode glyph. Auto-syncs with whatever is uploaded; never raises.
custom_reaction_emojis =
begin
::Emoji.custom.each_with_object({}) do |e, h|
url = (e.url rescue nil)
h[e.name] = url if url.present?
end
::Emoji
.custom
.each_with_object({}) do |e, h|
url =
(
begin
e.url
rescue StandardError
nil
end
)
h[e.name] = url if url.present?
end
rescue StandardError
{}
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,13 @@ def update_post_whisper
post.reload
end

# Toggle the search index in lockstep with the whisper state so a
# search leak doesn't outlive the arm/disarm click. Armed →
# `post_search_data` row is wiped. Disarmed → the post is re-indexed
# so it becomes discoverable now that it's public. Listener lives
# in sub_plugins/mod_categories.rb.
DiscourseEvent.trigger(:mod_whisper_state_changed, post, armed)

render json: serialized_post_whisper_state(post.reload)
end

Expand Down
33 changes: 33 additions & 0 deletions db/migrate/20260701000001_purge_whisper_post_search_data.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# frozen_string_literal: true

# One-time backfill: strip whisper posts out of `post_search_data`.
#
# Every existing whisper — every row in `post_custom_fields` where
# `name = 'mod_whisper_target_user_ids'` — was originally indexed into
# `post_search_data` before the SearchIndexer gate was in place. That
# tsvector is what leaked whisper text to non-audience users via
# full-text search. This migration deletes those rows in one pass so
# the leak stops on deploy, without waiting for each whisper to be
# re-touched.
#
# `delete_all` bypasses ActiveRecord callbacks — `post_search_data` has
# none we care about, so this is safe and fast. Idempotent: re-running
# just deletes zero rows.
class PurgeWhisperPostSearchData < ActiveRecord::Migration[7.0]
def up
execute(<<~SQL)
DELETE FROM post_search_data
WHERE post_id IN (
SELECT post_id
FROM post_custom_fields
WHERE name = 'mod_whisper_target_user_ids'
)
SQL
end

def down
# Irreversible — the tsvector is derived from posts.raw and would
# need to be recomputed by SearchIndexer per row, which is out of
# scope for a migration and would re-introduce the leak.
end
end
75 changes: 75 additions & 0 deletions lib/discourse_mod_categories/search_extension.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# frozen_string_literal: true

module DiscourseModCategories
# Prepended onto ::Search — belt-and-suspenders companion to
# `SearchIndexerExtension`. The indexer gate deletes whisper rows from
# `post_search_data`, so tsquery matching cannot surface them in the
# first place. This filter runs on the result set anyway, so any
# whisper post that somehow survives (indexer race, backfill hasn't
# caught up, custom field written after the first index write) is
# still stripped before the search endpoint serializes it.
#
# Order-independence: whether this or SmartSearch's prepend runs first
# in the `super` chain, variant searches instantiate fresh `Search`
# objects that traverse the whole chain. So variants get filtered
# regardless of which entry point runs first.
#
# The filter drops whisper `posts` for non-staff viewers, then removes
# any topic whose only appearance in the result set was via a now-
# dropped whisper. Topics matched by title stay put — they have their
# own presence in `topics_id_set` independent of post matches.
#
# rescue StandardError → return the base result unchanged so a future
# Discourse refactor to `Search::GroupedSearchResults` can't 500 a
# search request. The DB-level gate already prevents leaks; the
# fallback here only degrades post-filter behavior for a partial
# deploy state.
module SearchExtension
def execute(*args, **kwargs)
result = super
begin
filter_whispers!(result)
rescue StandardError => e
# Only the post-filter is rescued — a raise from core `super` above
# propagates untouched (we're not a circuit breaker for core search).
# The DB-level indexer gate already prevents the leak; this fallback
# only degrades the belt-and-suspenders pass.
::Rails.logger.warn(
"[jtech-tools] Search whisper post-filter fell back: #{e.class}: #{e.message}",
)
end
result
end

private

def filter_whispers!(result)
return unless defined?(::SiteSetting) && ::SiteSetting.mod_whisper_enabled
return unless result.respond_to?(:posts)
posts = result.posts
return unless posts.is_a?(Array)
return if posts.empty?

viewer = @guardian.respond_to?(:user) ? @guardian.user : nil
return if viewer&.staff?

post_ids = posts.map(&:id).compact
return if post_ids.empty?

blocked =
DiscourseModCategories::UserActionWhisperFilter.blocked_whisper_post_ids(post_ids, viewer)
return if blocked.empty?
blocked_set = blocked.to_set

dropped_topic_ids = posts.select { |p| blocked_set.include?(p.id) }.map(&:topic_id).uniq
posts.reject! { |p| blocked_set.include?(p.id) }

if result.respond_to?(:topics) && result.topics.is_a?(Array)
remaining_topic_ids = posts.map(&:topic_id).to_set
result.topics.reject! do |t|
dropped_topic_ids.include?(t.id) && !remaining_topic_ids.include?(t.id)
end
end
end
end
end
56 changes: 56 additions & 0 deletions lib/discourse_mod_categories/search_indexer_extension.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# frozen_string_literal: true

module DiscourseModCategories
# Prevent whisper posts from ever landing in `post_search_data` (the
# tsvector index Discourse's full-text search queries). If the row does
# not exist, no ts_query can match it — so no non-audience viewer can
# discover whisper text through search, and no `is:unseen` advanced
# filter can shortcut around Guardian by hitting the index directly.
#
# This is the DB-level gate. Query-time filters (WhisperQueryFilter and
# SearchWhisperFilter) are the belt-and-suspenders layer on top.
#
# Behavior:
# * On `SearchIndexer.index(post)` for a whisper post: skip the write
# AND delete any pre-existing row (defense against a race where the
# custom field is set slightly after the first index attempt).
# * On `SearchIndexer.index(post)` for a non-whisper post: pass through
# to super — normal indexing runs.
# * Non-Post inputs (Topic, Category, User…) pass through untouched.
#
# A post's whisper state is read from `post_custom_fields` directly rather
# than `post.custom_fields` — the latter may lag behind the DB in some
# code paths (post reloaded from cache, custom fields not preloaded).
# A single indexed lookup keyed on (post_id, name) is cheap.
#
# rescue StandardError → fall back to `super` so a schema change to
# `post_custom_fields` (or a NULL post argument) can never break search
# indexing for public posts.
module SearchIndexerExtension
def index(obj, force: false)
if whisper_post?(obj)
::PostSearchData.where(post_id: obj.id).delete_all
return
end
super
rescue StandardError => e
::Rails.logger.warn(
"[jtech-tools] SearchIndexer whisper gate fell back: #{e.class}: #{e.message}",
)
super
end

private

def whisper_post?(obj)
return false unless defined?(::SiteSetting) && ::SiteSetting.mod_whisper_enabled
return false unless obj.is_a?(::Post)
return false unless obj.id

::PostCustomField.where(
post_id: obj.id,
name: DiscourseModCategories::POST_WHISPER_TARGETS_FIELD,
).exists?
end
end
end
154 changes: 154 additions & 0 deletions spec/requests/whisper_search_leak_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# frozen_string_literal: true

require "rails_helper"

# Regression coverage for the whisper search leak
# (forums.jtechforums.org/t/whispers-being-indexed-in-search/7800): whisper
# text was reachable through full-text search by non-audience users, because
# Discourse indexes every post into `post_search_data` and runs `ts_query`
# against it BEFORE any per-user visibility check. The `is:unseen` advanced
# filter widened the same hole.
#
# Fix under test (DB-level primary, per the reporter's own suggestion that
# "whispers should not be indexed"):
# 1. SearchIndexerExtension keeps whisper rows OUT of `post_search_data`
# entirely — so no tsquery can match them, for anyone. A whisper is
# simply not full-text-searchable; the audience still reads it in the
# topic stream, they just can't discover it via /search.
# 2. SearchExtension post-filters any whisper row that lingers (index-gate
# race, backfill pending) so a stranger still can't see it, while the
# audience can — the only path where a whisper surfaces in search.
RSpec.describe "Whisper search leak" do
fab!(:admin)
fab!(:author, :user)
fab!(:target, :user)
fab!(:stranger, :user)
fab!(:topic)
fab!(:op) { Fabricate(:post, topic: topic, user: author, raw: "ordinary opening post body") }
fab!(:whisper_post) do
Fabricate(:post, topic: topic, user: author, raw: "the secret whisper word Boooo lives here")
end

let(:targets_field) { DiscourseModCategories::POST_WHISPER_TARGETS_FIELD }

before do
SiteSetting.mod_categories_enabled = true
SiteSetting.mod_whisper_enabled = true
SiteSetting.auto_silence_fast_typers_on_first_post = false

SearchIndexer.enable
SearchIndexer.index(op, force: true)
SearchIndexer.index(topic, force: true)
end

after { SearchIndexer.disable }

def mark_whisper!(post = whisper_post)
post.custom_fields[targets_field] = [target.id]
post.save_custom_fields(true)
post.reload
end

def search_post_ids(term, viewer)
guardian = viewer ? Guardian.new(viewer) : Guardian.new
Search.new(term, guardian: guardian).execute.posts.map(&:id)
end

describe "SearchIndexer gate" do
it "never writes a post_search_data row for a whisper" do
mark_whisper!
SearchIndexer.index(whisper_post, force: true)
expect(PostSearchData.where(post_id: whisper_post.id)).not_to exist
end

it "deletes a pre-existing row when a public post becomes a whisper" do
SearchIndexer.index(whisper_post, force: true)
expect(PostSearchData.where(post_id: whisper_post.id)).to exist

mark_whisper!
SearchIndexer.index(whisper_post, force: true)
expect(PostSearchData.where(post_id: whisper_post.id)).not_to exist
end

it "still indexes ordinary public posts" do
SearchIndexer.index(whisper_post, force: true)
expect(PostSearchData.where(post_id: whisper_post.id)).to exist
end
end

describe "search results (whisper deindexed)" do
before do
mark_whisper!
SearchIndexer.index(whisper_post, force: true) # gate deletes the row
end

it "is unsearchable by a stranger" do
expect(search_post_ids("Boooo", stranger)).not_to include(whisper_post.id)
end

it "is unsearchable by an anonymous viewer" do
expect(search_post_ids("Boooo", nil)).not_to include(whisper_post.id)
end

# A whisper is removed from the index outright, so even the audience
# can't full-text-search it. They still read it in the topic stream —
# this only removes the /search discovery path.
it "is unsearchable even by a target, because the row is gone" do
expect(search_post_ids("Boooo", target)).not_to include(whisper_post.id)
end

it "is unsearchable even by staff" do
expect(search_post_ids("Boooo", admin)).not_to include(whisper_post.id)
end
end

# Belt-and-suspenders: a REAL post_search_data row lingers because the post
# was indexed while public, then became a whisper without a re-index. The
# query-time filter must still strip it for a stranger while letting the
# audience through.
describe "query-time filter on a lingering row" do
fab!(:lingering) do
Fabricate(:post, topic: topic, user: author, raw: "lingering whisper term Zzxq stays put")
end

before do
SearchIndexer.index(lingering, force: true) # real tsvector row, still public
mark_whisper!(lingering) # marked whisper via custom field, NOT re-indexed
end

it "keeps the real row (no re-index was triggered)" do
expect(PostSearchData.where(post_id: lingering.id)).to exist
end

it "strips the lingering whisper from a stranger" do
expect(search_post_ids("Zzxq", stranger)).not_to include(lingering.id)
end

it "strips the lingering whisper from an anonymous viewer" do
expect(search_post_ids("Zzxq", nil)).not_to include(lingering.id)
end

it "still surfaces the lingering whisper to the audience" do
expect(search_post_ids("Zzxq", target)).to include(lingering.id)
end

it "still surfaces the lingering whisper to staff" do
expect(search_post_ids("Zzxq", admin)).to include(lingering.id)
end
end

describe "convert to public re-indexes" do
it "makes a disarmed whisper discoverable again" do
mark_whisper!
SearchIndexer.index(whisper_post, force: true)
expect(search_post_ids("Boooo", stranger)).not_to include(whisper_post.id)

PostCustomField.where(post_id: whisper_post.id, name: targets_field).destroy_all
whisper_post.reload
DiscourseEvent.trigger(:mod_whisper_state_changed, whisper_post, false)

expect(PostSearchData.where(post_id: whisper_post.id)).to exist
expect(search_post_ids("Boooo", stranger)).to include(whisper_post.id)
end
end
end
Loading