From c972fc0076cb9bde458f3eac0fbe3b686ddf2e23 Mon Sep 17 00:00:00 2001
From: Phat Pham <phat@onroads.xyz>
Date: Tue, 23 Jun 2026 18:28:06 +0700
Subject: [PATCH 1/3] feat: add AI-powered "For You" feed personalization

Reranks the feed by free-text reader interests using Cloudflare Workers
AI, with graceful degradation to chronological order on any model or
ranking failure. Settings live in the existing Reader settings dialog
(disabled by default); personalization is opt-in per browser.

The backend ranks once per (interests, source filter, freshness) via the
Cache API and re-projects the cached order onto a freshly-fetched item
page each request, so pagination behaves exactly like /api/items
(true offset/limit, real has_next) without re-invoking the LLM per page.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 core/personalize/rank.ts               | 120 +++++++++++
 core/personalize/test/fakeLlmRanker.ts |  17 ++
 core/personalize/test/rank.spec.ts     | 144 +++++++++++++
 core/ports.ts                          |  14 ++
 core/render.ts                         |  16 ++
 platforms/cloudflare/src/env.d.ts      |   4 +
 platforms/cloudflare/src/index.ts      | 275 +++++++++++++++++++++++--
 platforms/cloudflare/src/llmRanker.ts  |  87 ++++++++
 platforms/cloudflare/wrangler.toml     |   9 +
 web-static/static/app.js               | 166 +++++++++++++--
 web-static/static/style.css            |  36 ++++
 11 files changed, 854 insertions(+), 34 deletions(-)
 create mode 100644 core/personalize/rank.ts
 create mode 100644 core/personalize/test/fakeLlmRanker.ts
 create mode 100644 core/personalize/test/rank.spec.ts
 create mode 100644 platforms/cloudflare/src/llmRanker.ts
diff --git a/core/personalize/rank.ts b/core/personalize/rank.ts
new file mode 100644
index 0000000..750e31f
--- /dev/null
+++ b/core/personalize/rank.ts
@@ -0,0 +1,120 @@
+// Pure helpers for the "For You" LLM ranking feature. No I/O — adapters
+// (e.g. platforms/cloudflare/src/llmRanker.ts) own the actual model call and
+// use these to build the prompt and interpret the response.
+
+import type { FeedItem } from "../domain.ts";
+
+export const MAX_INTERESTS_LENGTH = 300;
+const MAX_ITEM_TEXT_LENGTH = 160;
+
+export function buildRankingPrompt(
+  items: FeedItem[],
+  interests: string,
+): string {
+  const lines = items.map((item, i) => {
+    const title = item.title.trim().slice(0, MAX_ITEM_TEXT_LENGTH);
+    const summary = (item.summary ?? "").trim().slice(0, MAX_ITEM_TEXT_LENGTH);
+    return `${i}: ${title}${summary ? ` — ${summary}` : ""}`;
+  });
+  return [
+    "You are ranking a list of feed items by relevance to a reader's stated interests.",
+    `Reader interests: ${interests.trim().slice(0, MAX_INTERESTS_LENGTH)}`,
+    "Items (index: title — summary):",
+    ...lines,
+    "",
+    "Respond with ONLY a JSON array of the item indices above, ordered from most to least relevant to the interests. Include every index exactly once. No other text, no markdown.",
+  ].join("\n");
+}
+
+/**
+ * Defensively extracts a ranked index list from a raw model response.
+ * Never throws — a malformed or empty response yields []. Indices outside
+ * [0, itemCount) or repeated are dropped (first occurrence wins).
+ */
+export function parseRankedIndices(raw: string, itemCount: number): number[] {
+  const match = /\[[\s\S]*\]/.exec(raw);
+  if (!match) return [];
+
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(match[0]);
+  } catch {
+    return [];
+  }
+  if (!Array.isArray(parsed)) return [];
+
+  const seen = new Set<number>();
+  const out: number[] = [];
+  for (const value of parsed) {
+    const index = typeof value === "number" ? value : Number(value);
+    if (
+      !Number.isInteger(index) ||
+      index < 0 ||
+      index >= itemCount ||
+      seen.has(index)
+    ) {
+      continue;
+    }
+    seen.add(index);
+    out.push(index);
+  }
+  return out;
+}
+
+/**
+ * Places ranked items first (in model order), then appends any remaining
+ * items in their original order — guarantees a full, valid list even when
+ * the ranker returns a partial or empty result.
+ */
+export function mergeRankedOrder(
+  items: FeedItem[],
+  rankedIndices: number[],
+): FeedItem[] {
+  const used = new Set<number>();
+  const out: FeedItem[] = [];
+  for (const index of rankedIndices) {
+    const item = items[index];
+    if (!item || used.has(index)) continue;
+    used.add(index);
+    out.push(item);
+  }
+  for (let i = 0; i < items.length; i++) {
+    if (!used.has(i)) out.push(items[i]!);
+  }
+  return out;
+}
+
+/** Stable identifier for a FeedItem — the domain type has no numeric id,
+ * so (source, externalId) is the natural key for referencing an item
+ * across requests (e.g. in a cached ranking). */
+export function itemKey(item: FeedItem): string {
+  return `${item.source} ${item.externalId}`;
+}
+
+/**
+ * Projects a previously-computed ranked key order (from a cached ranking,
+ * possibly over a different/smaller pool) onto a freshly-fetched item
+ * list: ranked items appear first, in cached order; anything not in
+ * rankedKeys — new items, or items the cache doesn't cover — keeps its
+ * original relative order at the end. Unlike mergeRankedOrder, this
+ * tolerates the two lists having different lengths or contents, since
+ * `items` is re-fetched live while rankedKeys may be stale or partial.
+ */
+export function mergeRankedKeysOrder(
+  items: FeedItem[],
+  rankedKeys: string[],
+): FeedItem[] {
+  const byKey = new Map(items.map((item) => [itemKey(item), item]));
+  const used = new Set<string>();
+  const out: FeedItem[] = [];
+  for (const key of rankedKeys) {
+    const item = byKey.get(key);
+    if (!item || used.has(key)) continue;
+    used.add(key);
+    out.push(item);
+  }
+  for (const item of items) {
+    if (!used.has(itemKey(item))) out.push(item);
+  }
+  return out;
+}
diff --git a/core/personalize/test/fakeLlmRanker.ts b/core/personalize/test/fakeLlmRanker.ts
new file mode 100644
index 0000000..9f2725f
--- /dev/null
+++ b/core/personalize/test/fakeLlmRanker.ts
@@ -0,0 +1,17 @@
+import type { FeedItem } from "../../domain.ts";
+import type { LlmRanker } from "../../ports.ts";
+
+/** In-memory LlmRanker for tests — no network calls. `script` decides the
+ * returned ranking (or throws, to simulate a transport/availability failure). */
+export class FakeLlmRanker implements LlmRanker {
+  constructor(
+    private readonly script: (
+      items: FeedItem[],
+      interests: string,
+    ) => number[],
+  ) {}
+
+  async rank(items: FeedItem[], interests: string): Promise<number[]> {
+    return this.script(items, interests);
+  }
+}
diff --git a/core/personalize/test/rank.spec.ts b/core/personalize/test/rank.spec.ts
new file mode 100644
index 0000000..1829e92
--- /dev/null
+++ b/core/personalize/test/rank.spec.ts
@@ -0,0 +1,144 @@
+import { describe, expect, it } from "vitest";
+import type { FeedItem } from "../../domain.ts";
+import {
+  buildRankingPrompt,
+  itemKey,
+  mergeRankedKeysOrder,
+  mergeRankedOrder,
+  parseRankedIndices,
+} from "../rank.ts";
+import { FakeLlmRanker } from "./fakeLlmRanker.ts";
+
+function item(overrides: Partial<FeedItem>): FeedItem {
+  return {
+    source: "hackernews",
+    externalId: "1",
+    title: "title",
+    url: "https://example.com",
+    sourceRank: 1,
+    metadata: {},
+    ...overrides,
+  };
+}
+
+describe("buildRankingPrompt", () => {
+  it("numbers items by index and includes title, summary, and interests", () => {
+    const items = [
+      item({ title: "Rust async runtime", summary: "a new executor" }),
+      item({ title: "No summary item" }),
+    ];
+    const prompt = buildRankingPrompt(items, "rust, distributed systems");
+    expect(prompt).toContain("0: Rust async runtime — a new executor");
+    expect(prompt).toContain("1: No summary item");
+    expect(prompt).toContain("Reader interests: rust, distributed systems");
+  });
+
+  it("truncates oversized interests text", () => {
+    const prompt = buildRankingPrompt([], "x".repeat(1000));
+    const line = prompt.split("\n").find((l) => l.startsWith("Reader interests:"))!;
+    expect(line.length).toBeLessThan(320);
+  });
+});
+
+describe("parseRankedIndices", () => {
+  it("parses a well-formed JSON array", () => {
+    expect(parseRankedIndices("[2, 0, 1]", 3)).toEqual([2, 0, 1]);
+  });
+
+  it("tolerates surrounding prose/markdown around the array", () => {
+    expect(parseRankedIndices("Sure! ```json\n[1, 0]\n```", 2)).toEqual([
+      1, 0,
+    ]);
+  });
+
+  it("drops out-of-range and duplicate indices", () => {
+    expect(parseRankedIndices("[1, 1, 5, -1, 0]", 2)).toEqual([1, 0]);
+  });
+
+  it("returns [] for garbage output instead of throwing", () => {
+    expect(parseRankedIndices("not even close to json", 3)).toEqual([]);
+    expect(parseRankedIndices("", 3)).toEqual([]);
+  });
+});
+
+describe("mergeRankedOrder", () => {
+  const items = [item({ externalId: "a" }), item({ externalId: "b" }), item({ externalId: "c" })];
+
+  it("places ranked items first in model order, then appends the rest", () => {
+    const merged = mergeRankedOrder(items, [2, 0]);
+    expect(merged.map((i) => i.externalId)).toEqual(["c", "a", "b"]);
+  });
+
+  it("passes through original order when ranking is empty", () => {
+    expect(mergeRankedOrder(items, []).map((i) => i.externalId)).toEqual([
+      "a",
+      "b",
+      "c",
+    ]);
+  });
+
+  it("ignores indices it doesn't recognize without throwing", () => {
+    const merged = mergeRankedOrder(items, [99, 1]);
+    expect(merged.map((i) => i.externalId)).toEqual(["b", "a", "c"]);
+  });
+});
+
+describe("mergeRankedKeysOrder", () => {
+  const items = [
+    item({ source: "hackernews", externalId: "a" }),
+    item({ source: "github", externalId: "b" }),
+    item({ source: "hackernews", externalId: "c" }),
+  ];
+
+  it("places ranked items first by cached key order, then appends the rest", () => {
+    const merged = mergeRankedKeysOrder(items, [
+      itemKey(items[2]!),
+      itemKey(items[0]!),
+    ]);
+    expect(merged.map((i) => i.externalId)).toEqual(["c", "a", "b"]);
+  });
+
+  it("tolerates a cached key for an item that no longer exists", () => {
+    const merged = mergeRankedKeysOrder(items, [
+      "hackernews missing-id",
+      itemKey(items[1]!),
+    ]);
+    expect(merged.map((i) => i.externalId)).toEqual(["b", "a", "c"]);
+  });
+
+  it("tolerates duplicate cached keys", () => {
+    const key = itemKey(items[1]!);
+    const merged = mergeRankedKeysOrder(items, [key, key]);
+    expect(merged.map((i) => i.externalId)).toEqual(["b", "a", "c"]);
+  });
+
+  it("passes through original order when there is no cached ranking", () => {
+    expect(mergeRankedKeysOrder(items, []).map((i) => i.externalId)).toEqual([
+      "a",
+      "b",
+      "c",
+    ]);
+  });
+});
+
+describe("FakeLlmRanker + degrade pattern", () => {
+  const items = [item({ externalId: "a" }), item({ externalId: "b" })];
+
+  it("a successful rank reorders items", async () => {
+    const ranker = new FakeLlmRanker(() => [1, 0]);
+    const ranked = await ranker.rank(items, "anything");
+    expect(mergeRankedOrder(items, ranked).map((i) => i.externalId)).toEqual([
+      "b",
+      "a",
+    ]);
+  });
+
+  it("a thrown error is the caller's signal to degrade to chronological order", async () => {
+    const ranker = new FakeLlmRanker(() => {
+      throw new Error("model unavailable");
+    });
+    await expect(ranker.rank(items, "anything")).rejects.toThrow(
+      "model unavailable",
+    );
+  });
+});
diff --git a/core/ports.ts b/core/ports.ts
index 44bca93..6a5655a 100644
--- a/core/ports.ts
+++ b/core/ports.ts
@@ -29,3 +29,17 @@ export interface FeedRepository {
   ): Promise<FeedItem[]>;
   countTotalItems(): Promise<number>;
 }
+
+/**
+ * Ranks `items` by relevance to a free-text `interests` description.
+ * Returns a best-effort ordering of 0-based indices into `items`, most
+ * relevant first — FeedItem has no stable numeric id, so position in the
+ * input array is the only identifier the ranker needs. The result may be a
+ * subset (the caller appends any indices the ranker omitted, in their
+ * original order) and may be empty if ranking failed entirely; it must
+ * never throw for a malformed model response, only for genuine
+ * transport/availability failures.
+ */
+export interface LlmRanker {
+  rank(items: FeedItem[], interests: string): Promise<number[]>;
+}
diff --git a/core/render.ts b/core/render.ts
index 4e3f5a0..c3a683a 100644
--- a/core/render.ts
+++ b/core/render.ts
@@ -298,6 +298,21 @@ export function renderIndexPage(data: PageData): string {
           </div>
         </section>
         </div>
+        <section class="config-section" aria-labelledby="config-personalization-title">
+          <div class="config-section-header">
+            <h3 id="config-personalization-title" class="config-section-title">AI personalization</h3>
+            <p class="config-section-copy">Rerank your feed by your interests using AI. Stored only in this browser.</p>
+          </div>
+          <div class="config-options">
+            <label class="config-option">
+              <input type="checkbox" data-ai-personalization-toggle />
+              <span class="config-option-body">
+                <span class="config-option-title">Enable personalized ranking</span>
+              </span>
+            </label>
+          </div>
+          <textarea class="interests-input" data-interests-input rows="3" maxlength="300" placeholder="fullstack development, software engineering, computer science, AI, machine learning, LLMs"></textarea>
+        </section>
         <section class="config-section" aria-labelledby="config-sources-title">
           <div class="config-section-header">
             <h3 id="config-sources-title" class="config-section-title">Sources</h3>
@@ -389,6 +404,7 @@ export function renderIndexPage(data: PageData): string {
     </dialog>
 ${errorsBlock}
     <main class="shell page-body">
+      <p class="personalized-indicator is-hidden" data-personalized-indicator>✦ Personalized by AI</p>
       <section class="cards-grid${cardsHiddenClass}" data-card-grid data-current-source="${escapeHtml(data.currentSource)}" data-page-size="${data.pageSize}" data-has-next="${data.hasNext ? "true" : "false"}" aria-busy="false">
         ${data.cards.map(renderCard).join("\n        ")}
       </section>
diff --git a/platforms/cloudflare/src/env.d.ts b/platforms/cloudflare/src/env.d.ts
index 919a4f8..3ba7539 100644
--- a/platforms/cloudflare/src/env.d.ts
+++ b/platforms/cloudflare/src/env.d.ts
@@ -5,8 +5,12 @@ export interface Env {
    * refreshes as fresh invocations (fresh 10ms CPU budget each), instead of
    * a public-internet self-fetch that would need a known hostname. */
   SELF: Fetcher;
+  /** Workers AI binding backing the "For You" LlmRanker — see
+   * platforms/cloudflare/src/llmRanker.ts. No secret needed. */
+  AI: Ai;
   REFRESH_SECRET: string;
   APP_VERSION?: string;
   FEEDREADER_ITEMS_PER_SOURCE?: string;
   FEEDREADER_USER_AGENT?: string;
+  FEEDREADER_PERSONALIZE_POOL_SIZE?: string;
 }
diff --git a/platforms/cloudflare/src/index.ts b/platforms/cloudflare/src/index.ts
index a70de10..6328496 100644
--- a/platforms/cloudflare/src/index.ts
+++ b/platforms/cloudflare/src/index.ts
@@ -3,7 +3,7 @@
 // served by the [assets] binding before any request reaches fetch() below
 // — see wrangler.toml and docs/RUNBOOK.md.
 
-import type { RefreshOutcome } from "../../../core/domain.ts";
+import type { CardView, RefreshOutcome, SyncState } from "../../../core/domain.ts";
 import {
   buildCards,
   buildErrors,
@@ -14,7 +14,15 @@ import {
 } from "../../../core/service.ts";
 import { build, type Source } from "../../../core/sources/index.ts";
 import { renderIndexPage } from "../../../core/render.ts";
+import {
+  MAX_INTERESTS_LENGTH,
+  itemKey,
+  mergeRankedKeysOrder,
+  mergeRankedOrder,
+} from "../../../core/personalize/rank.ts";
+import { paginate } from "../../../core/sources/listInMemory.ts";
 import { D1Repository } from "./repository.ts";
+import { CloudflareLlmRanker } from "./llmRanker.ts";
 import type { Env } from "./env.d.ts";
 
 const PAGE_SIZE = 12;
@@ -25,6 +33,15 @@ const KNOWN_SOURCES = new Set([
   "alphaxiv",
 ]);
 
+// Ranking the LLM sees is capped (cheap prompt, see env var below); pagination
+// is served from a larger in-memory pool. 500 mirrors the "dataset is small"
+// assumption already baked into D1Repository.listFeedItems/countTotalItems.
+const PERSONALIZE_FULL_POOL_LIMIT = 500;
+// Backstop only — the cache key already changes whenever the underlying
+// source data refreshes (see latestSuccessAt below), so this just bounds
+// staleness if a source somehow stops refreshing.
+const PERSONALIZE_CACHE_TTL_SECONDS = 21600;
+
 export default {
   async fetch(request: Request, env: Env): Promise<Response> {
     const url = new URL(request.url);
@@ -34,6 +51,15 @@ export default {
     if (url.pathname === "/") return handleHome(url, env, repo);
     if (url.pathname === "/healthz") return handleHealthz(sources, repo);
     if (url.pathname === "/api/items") return handleItemsApi(url, repo);
+    if (url.pathname === "/api/personalize") {
+      if (request.method !== "POST") {
+        return new Response("method not allowed", {
+          status: 405,
+          headers: { Allow: "POST" },
+        });
+      }
+      return handlePersonalize(request, env, repo);
+    }
     if (url.pathname === "/api/refresh") {
       if (request.method !== "POST") {
         return new Response("method not allowed", {
@@ -138,21 +164,244 @@ async function handleItemsApi(url: URL, repo: D1Repository): Promise<Response> {
     offset,
     limit,
     has_next: hasNext,
-    items: cards.map((card) => ({
-      source: card.source,
-      index: card.index,
-      title: card.title,
-      url: card.url,
-      brief: card.brief ?? null,
-      brief_prefix: card.briefPrefix ?? null,
-      brief_suffix: card.briefSuffix ?? null,
-      brief_date_iso: card.briefDateIso ?? null,
-      brief_date_kind: card.briefDateKind,
-      host: card.host,
-    })),
+    items: cardsToJson(cards),
+  });
+}
+
+function cardsToJson(cards: CardView[]) {
+  return cards.map((card) => ({
+    source: card.source,
+    index: card.index,
+    title: card.title,
+    url: card.url,
+    brief: card.brief ?? null,
+    brief_prefix: card.briefPrefix ?? null,
+    brief_suffix: card.briefSuffix ?? null,
+    brief_date_iso: card.briefDateIso ?? null,
+    brief_date_kind: card.briefDateKind,
+    host: card.host,
+  }));
+}
+
+/**
+ * Client sends only interests (plus the currently-active source filter and
+ * normal limit/offset paging); the candidate pool and final ordering are
+ * entirely server-decided — the client never supplies item ids.
+ *
+ * The LLM is only ever consulted for the *first* page request for a given
+ * (interests, source filter, source-data freshness) combination — the
+ * resulting ranked order is cached via the platform Cache API, keyed so it
+ * naturally invalidates whenever the underlying sources next refresh (see
+ * latestSuccessAt). Every page request — including the first — re-fetches
+ * fresh item rows from D1 and reorders them according to that ranking, so
+ * pagination never re-invokes the model and item content (title/score/etc.)
+ * is never served stale from the cache, only the ordering is.
+ *
+ * On any ranking failure (bad model output, or every model in the fallback
+ * chain throwing) it serves the pool's chronological order and sets
+ * `degraded: true` rather than erroring the request — and deliberately does
+ * not cache that outcome, so the next request tries ranking again instead of
+ * being stuck degraded for the cache's lifetime.
+ */
+async function handlePersonalize(
+  request: Request,
+  env: Env,
+  repo: D1Repository,
+): Promise<Response> {
+  let body: unknown;
+  try {
+    body = await request.json();
+  } catch {
+    return Response.json({ error: "invalid JSON body" }, { status: 400 });
+  }
+
+  const { interests, source, sources, limit, offset } =
+    parsePersonalizeBody(body);
+  if (interests === "") {
+    return Response.json({ error: "interests is required" }, { status: 400 });
+  }
+
+  const fullPool = await repo.listFeedItems(
+    PERSONALIZE_FULL_POOL_LIMIT,
+    0,
+    source,
+    sources,
+    "",
+  );
+  if (fullPool.length === 0) {
+    return Response.json({
+      generated_at: new Date().toISOString(),
+      degraded: false,
+      has_next: false,
+      items: [],
+    });
+  }
+
+  const states = await repo.listSourceStates();
+  const freshness = latestSuccessAt(states, source, sources);
+  const cacheRequest = personalizeCacheRequest(
+    interests,
+    source,
+    sources,
+    freshness,
+  );
+
+  let rankedKeys = await readRankedKeysCache(cacheRequest);
+  let degraded = false;
+  if (rankedKeys === null) {
+    let poolSize = parsePositiveInt(
+      env.FEEDREADER_PERSONALIZE_POOL_SIZE ?? null,
+      60,
+    );
+    if (poolSize > 100) poolSize = 100;
+    const candidates = fullPool.slice(0, poolSize);
+    try {
+      const ranker = new CloudflareLlmRanker(env.AI);
+      const ranked = await ranker.rank(candidates, interests);
+      if (ranked.length > 0) {
+        rankedKeys = mergeRankedOrder(candidates, ranked).map(itemKey);
+        await writeRankedKeysCache(cacheRequest, rankedKeys);
+      } else {
+        rankedKeys = [];
+        degraded = true;
+      }
+    } catch {
+      rankedKeys = [];
+      degraded = true;
+    }
+  }
+
+  const ordered = mergeRankedKeysOrder(fullPool, rankedKeys);
+  const page = paginate(ordered, limit, offset);
+  const hasNext = offset + page.length < ordered.length;
+  const cards = buildCards(page, offset);
+
+  return Response.json({
+    generated_at: new Date().toISOString(),
+    degraded,
+    has_next: hasNext,
+    items: cardsToJson(cards),
   });
 }
 
+function parsePersonalizeBody(body: unknown): {
+  interests: string;
+  source: string;
+  sources: string[];
+  limit: number;
+  offset: number;
+} {
+  const record =
+    typeof body === "object" && body !== null
+      ? (body as Record<string, unknown>)
+      : {};
+  const interests = normalizeInterests(record.interests);
+  const source = normalizeSource(
+    typeof record.source === "string" ? record.source : "",
+  );
+  const querySource = source === "all" ? "" : source;
+  const rawSources = Array.isArray(record.sources)
+    ? record.sources
+        .filter((value): value is string => typeof value === "string")
+        .join(",")
+    : "";
+  const sources = querySource === "" ? normalizeSourceList(rawSources) : [];
+  let limit = parsePositiveInt(bodyNumberAsString(record.limit), PAGE_SIZE);
+  if (limit > 100) limit = 100;
+  const offset = parseNonNegativeInt(bodyNumberAsString(record.offset), 0);
+  return { interests, source: querySource, sources, limit, offset };
+}
+
+function bodyNumberAsString(value: unknown): string | null {
+  if (typeof value === "number" && Number.isFinite(value)) return String(value);
+  if (typeof value === "string") return value;
+  return null;
+}
+
+function normalizeInterests(raw: unknown): string {
+  if (typeof raw !== "string") return "";
+  return raw.trim().slice(0, MAX_INTERESTS_LENGTH);
+}
+
+/** Latest successful refresh among the sources in scope — used so the
+ * personalize ranking cache invalidates itself whenever new items arrive,
+ * without needing an explicit purge. */
+function latestSuccessAt(
+  states: Record<string, SyncState>,
+  source: string,
+  sources: string[],
+): string {
+  const relevant =
+    source !== ""
+      ? [source]
+      : sources.length > 0
+        ? sources
+        : Array.from(KNOWN_SOURCES);
+  let latest = "";
+  for (const key of relevant) {
+    const at = states[key]?.lastSuccessAt ?? "";
+    if (at > latest) latest = at;
+  }
+  return latest;
+}
+
+/** Synthetic GET request used purely as a Cache API key — never fetched.
+ * `.invalid` is reserved by RFC 2606 for exactly this kind of placeholder
+ * use, so it's guaranteed not to collide with a real host. */
+function personalizeCacheRequest(
+  interests: string,
+  source: string,
+  sources: string[],
+  freshness: string,
+): Request {
+  const raw = `rank:v1:${interests} ${source} ${sources.join(",")} ${freshness}`;
+  const url = `https://feedreader-internal.invalid/personalize-rank?k=${encodeURIComponent(raw)}`;
+  return new Request(url, { method: "GET" });
+}
+
+// tsconfig includes the DOM lib (needed for linkedom-based source parsing
+// elsewhere), whose own `CacheStorage` type shadows the Workers one and
+// lacks `.default` — cast through `unknown` once to recover it.
+function defaultPersonalizeCache(): Cache {
+  return (caches as unknown as { default: Cache }).default;
+}
+
+async function readRankedKeysCache(
+  cacheRequest: Request,
+): Promise<string[] | null> {
+  try {
+    const hit = await defaultPersonalizeCache().match(cacheRequest);
+    if (!hit) return null;
+    const data = (await hit.json()) as unknown;
+    return Array.isArray(data)
+      ? data.filter((value): value is string => typeof value === "string")
+      : null;
+  } catch {
+    // Cache API is best-effort (and a documented no-op in local `wrangler
+    // dev`) — any failure here is just a cache miss, never an error.
+    return null;
+  }
+}
+
+async function writeRankedKeysCache(
+  cacheRequest: Request,
+  rankedKeys: string[],
+): Promise<void> {
+  try {
+    await defaultPersonalizeCache().put(
+      cacheRequest,
+      new Response(JSON.stringify(rankedKeys), {
+        headers: {
+          "content-type": "application/json",
+          "cache-control": `max-age=${PERSONALIZE_CACHE_TTL_SECONDS}`,
+        },
+      }),
+    );
+  } catch {
+    // Same as above — a failed write just means the next request re-ranks.
+  }
+}
+
 async function handleRefresh(env: Env, sources: Source[]): Promise<Response> {
   const outcomes = await fanOutRefresh(env, sources);
   const allOk = outcomes.every((outcome) => outcome.ok);
diff --git a/platforms/cloudflare/src/llmRanker.ts b/platforms/cloudflare/src/llmRanker.ts
new file mode 100644
index 0000000..e4ea349
--- /dev/null
+++ b/platforms/cloudflare/src/llmRanker.ts
@@ -0,0 +1,87 @@
+// Workers AI-backed LlmRanker. Tries each model in MODEL_CHAIN in order —
+// falling through on error or unparseable output — before giving up. Workers
+// AI (env.AI) is a first-party binding (no API key/secret to manage) with a
+// 10,000 Neurons/day free allocation, so it's the provider used here instead
+// of a third-party gateway like OpenRouter. Model ids below are confirmed
+// non-deprecated in Cloudflare's Workers AI catalog as of 2026-06-23 — see
+// https://developers.cloudflare.com/workers-ai/models/ before changing them,
+// since Workers AI does retire model ids on a published schedule.
+
+import type { FeedItem } from "../../../core/domain.ts";
+import type { LlmRanker } from "../../../core/ports.ts";
+import { buildRankingPrompt, parseRankedIndices } from "../../../core/personalize/rank.ts";
+
+const MAX_OUTPUT_TOKENS = 512;
+
+type ModelCall = (ai: Ai, prompt: string) => Promise<string>;
+
+function responseText(result: unknown): string {
+  if (typeof result === "string") return result;
+  if (result && typeof result === "object" && "response" in result) {
+    const response = (result as { response?: unknown }).response;
+    if (typeof response === "string") return response;
+  }
+  return "";
+}
+
+// Primary, then three fallbacks of decreasing similarity (different size,
+// then a different model vendor entirely) so a single model family outage
+// doesn't take down personalization.
+const MODEL_CHAIN: ModelCall[] = [
+  (ai, prompt) =>
+    ai
+      .run("@cf/meta/llama-3.1-8b-instruct-fp8", {
+        messages: [{ role: "user", content: prompt }],
+        max_tokens: MAX_OUTPUT_TOKENS,
+      })
+      .then(responseText),
+  (ai, prompt) =>
+    ai
+      .run("@cf/meta/llama-3.3-70b-instruct-fp8-fast", {
+        messages: [{ role: "user", content: prompt }],
+        max_tokens: MAX_OUTPUT_TOKENS,
+      })
+      .then(responseText),
+  (ai, prompt) =>
+    ai
+      .run("@cf/mistral/mistral-7b-instruct-v0.2-lora", {
+        messages: [{ role: "user", content: prompt }],
+        max_tokens: MAX_OUTPUT_TOKENS,
+      })
+      .then(responseText),
+  (ai, prompt) =>
+    ai
+      .run("@cf/meta/llama-3.2-3b-instruct", {
+        messages: [{ role: "user", content: prompt }],
+        max_tokens: MAX_OUTPUT_TOKENS,
+      })
+      .then(responseText),
+];
+
+export class CloudflareLlmRanker implements LlmRanker {
+  constructor(private readonly ai: Ai) {}
+
+  async rank(items: FeedItem[], interests: string): Promise<number[]> {
+    if (items.length === 0 || interests.trim() === "") return [];
+    const prompt = buildRankingPrompt(items, interests);
+
+    let lastError: unknown;
+    for (const call of MODEL_CHAIN) {
+      try {
+        const raw = await call(this.ai, prompt);
+        const ranked = parseRankedIndices(raw, items.length);
+        if (ranked.length > 0) return ranked;
+      } catch (error) {
+        lastError = error;
+      }
+    }
+    // Every model either errored or returned unparseable output. Throw only
+    // if at least one was a genuine error — a clean run of parse failures
+    // alone resolves to [] (caller treats that as "no ranking available",
+    // not a hard failure).
+    if (lastError) {
+      throw lastError instanceof Error ? lastError : new Error(String(lastError));
+    }
+    return [];
+  }
+}
diff --git a/platforms/cloudflare/wrangler.toml b/platforms/cloudflare/wrangler.toml
index a8951c7..a2e071f 100644
--- a/platforms/cloudflare/wrangler.toml
+++ b/platforms/cloudflare/wrangler.toml
@@ -40,10 +40,19 @@ migrations_dir = "migrations"
 binding = "SELF"
 service = "feedreader"
 
+# Workers AI binding for the "For You" personalization ranker (see
+# platforms/cloudflare/src/llmRanker.ts). First-party binding — no secret to
+# provision; 10,000 Neurons/day free allocation.
+[ai]
+binding = "AI"
+
 [vars]
 APP_VERSION = "dev"
 FEEDREADER_ITEMS_PER_SOURCE = "20"
 FEEDREADER_USER_AGENT = "feedreader/0.1"
+# Candidate pool size ranked per /api/personalize call — bounds prompt size
+# and Workers AI cost.
+FEEDREADER_PERSONALIZE_POOL_SIZE = "60"
 
 # REFRESH_SECRET is a secret, not a var — set it with:
 #   wrangler secret put REFRESH_SECRET --config platforms/cloudflare/wrangler.toml
diff --git a/web-static/static/app.js b/web-static/static/app.js
index b7c62c0..c4a81a0 100644
--- a/web-static/static/app.js
+++ b/web-static/static/app.js
@@ -32,6 +32,13 @@
   const themeOptions = Array.from(
     document.querySelectorAll("[data-theme-option]"),
   );
+  const aiPersonalizationToggle = document.querySelector(
+    "[data-ai-personalization-toggle]",
+  );
+  const interestsInput = document.querySelector("[data-interests-input]");
+  const personalizedIndicator = document.querySelector(
+    "[data-personalized-indicator]",
+  );
   const connectionIndicator = document.querySelector(
     "[data-connection-indicator]",
   );
@@ -74,12 +81,19 @@
   const visitedLinksStorageKey = "feedreader.visited";
   const visitedLinksLimit = 500;
   const themeStorageKey = "feedreader.theme";
+  const interestsStorageKey = "feedreader.interests";
+  const aiPersonalizationStorageKey = "feedreader.aiPersonalizationEnabled";
+  const interestsMaxLength = 300;
+  const defaultInterests = "engineering, AI/ML, startups";
   const metaThemeColor = document.querySelector('meta[name="theme-color"]');
 
   let activeFilter = cardsGrid?.dataset.currentSource || "all";
   let selectedSources = loadSelectedSources();
   let uiDensity = loadUIDensity();
   let visitedLinks = loadVisitedLinks();
+  let interests = loadInterests();
+  let aiPersonalizationEnabled = interests !== "" && loadAiPersonalizationEnabled();
+  let personalizedActive = false;
   let activeQuery = (searchInput?.value || "").trim();
   let searchOpen = Boolean(activeQuery);
   let loadedCount = cardsGrid
@@ -295,6 +309,31 @@
     localStorage.setItem(densityConfigStorageKey, uiDensity);
   }
 
+  function loadInterests() {
+    try {
+      const stored = localStorage.getItem(interestsStorageKey);
+      return stored === null ? defaultInterests : stored.trim();
+    } catch {
+      return defaultInterests;
+    }
+  }
+
+  function persistInterests(next) {
+    localStorage.setItem(interestsStorageKey, next);
+  }
+
+  function loadAiPersonalizationEnabled() {
+    try {
+      return localStorage.getItem(aiPersonalizationStorageKey) === "true";
+    } catch {
+      return false;
+    }
+  }
+
+  function persistAiPersonalizationEnabled(next) {
+    localStorage.setItem(aiPersonalizationStorageKey, next ? "true" : "false");
+  }
+
   function syncThemeOptions() {
     themeOptions.forEach((option) => {
       option.checked = option.value === root.dataset.theme;
@@ -336,6 +375,16 @@
     return [...selectedSources];
   }
 
+  function personalizationConfigured() {
+    return aiPersonalizationEnabled && interests !== "";
+  }
+
+  function shouldShowPersonalized() {
+    return (
+      activeFilter === "all" && personalizationConfigured() && !activeQuery
+    );
+  }
+
   function renderFilters() {
     if (!filterNav) return;
     const keys = visibleFilterKeys();
@@ -350,12 +399,26 @@
       .join(""); // Fixed local source definitions only; all interpolated values are escaped above.
   }
 
+  function renderPersonalizedIndicator() {
+    if (!personalizedIndicator) return;
+    personalizedIndicator.classList.toggle("is-hidden", !personalizedActive);
+  }
+
   function syncConfigOptions() {
     configOptions.forEach((option) => {
       option.checked = selectedSources.includes(option.value);
     });
   }
 
+  function syncPersonalizationOptions() {
+    if (aiPersonalizationToggle) {
+      aiPersonalizationToggle.checked = aiPersonalizationEnabled;
+    }
+    if (interestsInput) {
+      interestsInput.value = interests;
+    }
+  }
+
   function syncDensityOptions() {
     densityOptions.forEach((option) => {
       option.checked = option.value === uiDensity;
@@ -585,30 +648,51 @@
     showLoadingToast = true,
   }) => {
     syncConnectivityState();
+    const personalize = shouldShowPersonalized();
     const requestId = ++requestSequence;
     setFeedLoading(true, {
       mode: append ? "append" : "replace",
       message:
-        loadingMessage || (append ? "Loading more items…" : "Loading feed…"),
+        loadingMessage ||
+        (append
+          ? "Loading more items…"
+          : personalize
+            ? "Personalizing feed…"
+            : "Loading feed…"),
       showLoadingToast,
     });
 
-    const url = new URL("/api/items", window.location.origin);
-    url.searchParams.set("limit", String(pageSize));
-    url.searchParams.set("offset", String(offset));
-    if (source && source !== "all") {
-      url.searchParams.set("source", source);
-    } else if (shouldRestrictAllSources()) {
-      url.searchParams.set("sources", selectedSources.join(","));
-    }
-    if (query) {
-      url.searchParams.set("q", query);
-    }
-
     try {
-      const response = await fetch(url.toString(), {
-        headers: { Accept: "application/json" },
-      });
+      let response;
+      if (personalize) {
+        const body = { interests, limit: pageSize, offset };
+        if (shouldRestrictAllSources()) {
+          body.sources = selectedSources;
+        }
+        response = await fetch("/api/personalize", {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            Accept: "application/json",
+          },
+          body: JSON.stringify(body),
+        });
+      } else {
+        const url = new URL("/api/items", window.location.origin);
+        url.searchParams.set("limit", String(pageSize));
+        url.searchParams.set("offset", String(offset));
+        if (source && source !== "all") {
+          url.searchParams.set("source", source);
+        } else if (shouldRestrictAllSources()) {
+          url.searchParams.set("sources", selectedSources.join(","));
+        }
+        if (query) {
+          url.searchParams.set("q", query);
+        }
+        response = await fetch(url.toString(), {
+          headers: { Accept: "application/json" },
+        });
+      }
       if (!response.ok) {
         throw new Error(`fetch failed: ${response.status}`);
       }
@@ -645,6 +729,16 @@
       }
       loadedCount = append ? loadedCount + items.length : items.length;
       applyVisitedLinkState();
+
+      if (personalize) {
+        personalizedActive = !payload.degraded;
+        if (!append && payload.degraded) {
+          showToast("Personalization unavailable, showing latest", "error");
+        }
+      } else {
+        personalizedActive = false;
+      }
+      renderPersonalizedIndicator();
       renderFeedBody();
     } finally {
       if (requestId === requestSequence) {
@@ -788,6 +882,7 @@
     syncConfigOptions();
     syncDensityOptions();
     syncThemeOptions();
+    syncPersonalizationOptions();
     if (typeof configDialog?.showModal === "function" && !configDialog.open) {
       configDialog.showModal();
       syncDialogOpenState(true);
@@ -869,12 +964,25 @@
     syncDialogOpenState(false);
   }
 
-  async function applyDialogSettings(nextSources, nextDensity, nextTheme) {
+  async function applyDialogSettings(
+    nextSources,
+    nextDensity,
+    nextTheme,
+    nextPersonalizationEnabled,
+    nextInterests,
+  ) {
     const normalizedSources = normalizeSelectedSources(nextSources);
     if (normalizedSources.length === 0) {
       showToast("Select at least one source", "error");
       return;
     }
+    const normalizedInterests = (nextInterests || "")
+      .trim()
+      .slice(0, interestsMaxLength);
+    if (nextPersonalizationEnabled && !normalizedInterests) {
+      showToast("Add a few interests to enable For You", "error");
+      return;
+    }
     const normalizedDensity = normalizeUIDensity(nextDensity);
     const normalizedTheme = nextTheme === "light" ? "light" : "dark";
     const sourcesChanged =
@@ -884,6 +992,9 @@
       );
     const densityChanged = normalizedDensity !== uiDensity;
     const themeChanged = normalizedTheme !== root.dataset.theme;
+    const interestsChanged = normalizedInterests !== interests;
+    const personalizationEnabledChanged =
+      nextPersonalizationEnabled !== aiPersonalizationEnabled;
 
     if (themeChanged) {
       applyTheme(normalizedTheme);
@@ -893,7 +1004,17 @@
       applyUIDensity(normalizedDensity);
     }
 
-    if (!sourcesChanged) {
+    if (interestsChanged) {
+      interests = normalizedInterests;
+      persistInterests(interests);
+    }
+
+    if (personalizationEnabledChanged) {
+      aiPersonalizationEnabled = nextPersonalizationEnabled;
+      persistAiPersonalizationEnabled(aiPersonalizationEnabled);
+    }
+
+    if (!sourcesChanged && !interestsChanged && !personalizationEnabledChanged) {
       syncDensityOptions();
       closeConfigDialog();
       return;
@@ -904,7 +1025,7 @@
     syncConfigOptions();
     syncDensityOptions();
     closeConfigDialog();
-    await refetchCurrentView({ loadingMessage: "Loading selected sources…" });
+    await refetchCurrentView({ loadingMessage: "Loading feed…" });
   }
 
   if (filterNav) {
@@ -953,6 +1074,8 @@
           currentSourceSelection(),
           currentDensitySelection(),
           currentThemeSelection(),
+          aiPersonalizationToggle?.checked ?? false,
+          interestsInput?.value ?? "",
         );
       } catch (error) {
         showToast("Failed to apply reader settings", "error");
@@ -1190,9 +1313,10 @@
   applyVisitedLinkState();
   localizeBriefDates();
   const shouldBootstrapRefetch =
-    activeFilter === "all"
+    shouldShowPersonalized() ||
+    (activeFilter === "all"
       ? selectedSources.length !== availableSources.length
-      : !selectedSources.includes(activeFilter);
+      : !selectedSources.includes(activeFilter));
   ensureActiveFilterIsVisible();
   renderFilters();
   renderSearch();
diff --git a/web-static/static/style.css b/web-static/static/style.css
index d244611..53a2e85 100644
--- a/web-static/static/style.css
+++ b/web-static/static/style.css
@@ -272,6 +272,29 @@ button {
   border-color: color-mix(in srgb, var(--accent) 44%, var(--border));
 }
 
+.interests-input {
+  width: 100%;
+  min-height: 4.5rem;
+  border-radius: 0.75rem;
+  border: 1px solid var(--border);
+  background: var(--surface);
+  color: var(--text);
+  box-shadow: var(--shadow);
+  padding: 0.65rem 0.86rem;
+  font-size: 16px;
+  font-family: inherit;
+  resize: vertical;
+}
+
+.interests-input::placeholder {
+  color: var(--muted);
+}
+
+.interests-input:focus {
+  outline: none;
+  border-color: color-mix(in srgb, var(--accent) 44%, var(--border));
+}
+
 .search-icon-close {
   display: none;
 }
@@ -712,6 +735,19 @@ body.is-dialog-open {
   margin-bottom: 0.8rem;
 }
 
+.personalized-indicator {
+  display: inline-flex;
+  align-items: center;
+  gap: 0.3rem;
+  margin: 0 0 0.7rem;
+  padding: 0.25rem 0.7rem;
+  border-radius: 999px;
+  border: 1px solid color-mix(in srgb, var(--accent) 36%, var(--border));
+  background: var(--accent-soft);
+  color: var(--text);
+  font-size: 0.8rem;
+}
+
 .error-banner,
 .empty-state {
   padding: 0.8rem 0.9rem;

From f353720f4254a5f1eaec53d0e73765405f31c6b2 Mon Sep 17 00:00:00 2001
From: Phat Pham <phat@onroads.xyz>
Date: Wed, 24 Jun 2026 00:42:13 +0700
Subject: [PATCH 2/3] feat: move "For You" personalization from per-request LLM
 rerank to embed-at-ingestion + similarity-at-query

Embeds each item once at ingestion time (Workers AI bge-base-en-v1.5) instead
of sending the full candidate pool through an LLM on every personalize
request. At query time, only the interests string is embedded and the pool
is ranked by cosine similarity in-worker; the existing LLM ranker now runs
only as an optional polish pass over the top similarity hits
(FEEDREADER_PERSONALIZE_POLISH_POOL_SIZE, 0 disables it).

This also fixes the old all-or-nothing degraded fallback: the response now
reports personalization: "llm" | "similarity" | "none" instead of a boolean,
so a failed/disabled LLM polish step still serves a similarity-personalized
order instead of falling all the way back to chronological.
---
 core/personalize/rank.ts                      |  17 ++-
 core/personalize/similarity.ts                |  57 ++++++++
 core/personalize/test/fakeEmbedder.ts         |  15 ++
 core/personalize/test/rank.spec.ts            |  22 +++
 core/personalize/test/similarity.spec.ts      |  80 +++++++++++
 core/ports.ts                                 |  44 ++++++
 core/service.ts                               |  57 +++++++-
 core/test/fakeFeedRepository.ts               |  79 +++++++++++
 core/test/service.spec.ts                     |  73 +++++++++-
 docs/RUNBOOK.md                               |  24 ++++
 .../migrations/0002_add_item_embeddings.sql   |   7 +
 platforms/cloudflare/src/embedder.ts          |  31 ++++
 platforms/cloudflare/src/env.d.ts             |   6 +-
 platforms/cloudflare/src/index.ts             | 133 +++++++++++++-----
 platforms/cloudflare/src/repository.ts        | 108 +++++++++++---
 platforms/cloudflare/wrangler.toml            |   8 +-
 web-static/static/app.js                      |   4 +-
 17 files changed, 695 insertions(+), 70 deletions(-)
 create mode 100644 core/personalize/similarity.ts
 create mode 100644 core/personalize/test/fakeEmbedder.ts
 create mode 100644 core/personalize/test/similarity.spec.ts
 create mode 100644 core/test/fakeFeedRepository.ts
 create mode 100644 platforms/cloudflare/migrations/0002_add_item_embeddings.sql
 create mode 100644 platforms/cloudflare/src/embedder.ts

diff --git a/core/personalize/rank.ts b/core/personalize/rank.ts
index 750e31f..4d9b4c7 100644
--- a/core/personalize/rank.ts
+++ b/core/personalize/rank.ts
@@ -63,8 +63,13 @@ export function parseRankedIndices(raw: string, itemCount: number): number[] {
 
 /**
  * Places ranked items first (in model order), then appends any remaining
- * items in their original order — guarantees a full, valid list even when
- * the ranker returns a partial or empty result.
+ * items in whatever order `items` arrived in — guarantees a full, valid
+ * list even when the ranker returns a partial or empty result. Callers may
+ * pass any base order, not just chronological — e.g. handlePersonalize
+ * feeds this similarity-ranked input (core/personalize/similarity.ts) when
+ * running the LLM as a polish pass over a pre-filtered candidate pool, and
+ * the "remainder" preserves that similarity order rather than reverting to
+ * chronological.
  */
 export function mergeRankedOrder(
   items: FeedItem[],
@@ -96,9 +101,11 @@ export function itemKey(item: FeedItem): string {
  * possibly over a different/smaller pool) onto a freshly-fetched item
  * list: ranked items appear first, in cached order; anything not in
  * rankedKeys — new items, or items the cache doesn't cover — keeps its
- * original relative order at the end. Unlike mergeRankedOrder, this
- * tolerates the two lists having different lengths or contents, since
- * `items` is re-fetched live while rankedKeys may be stale or partial.
+ * relative order from `items` at the end (which, like mergeRankedOrder,
+ * need not be chronological — see that function's comment). Unlike
+ * mergeRankedOrder, this tolerates the two lists having different lengths
+ * or contents, since `items` is re-fetched live while rankedKeys may be
+ * stale or partial.
  */
 export function mergeRankedKeysOrder(
   items: FeedItem[],
diff --git a/core/personalize/similarity.ts b/core/personalize/similarity.ts
new file mode 100644
index 0000000..9261ed1
--- /dev/null
+++ b/core/personalize/similarity.ts
@@ -0,0 +1,57 @@
+// Pure helpers for ranking by embedding similarity — the retrieval half of
+// the "For You" feature's retrieve-then-rerank pipeline. No I/O; adapters
+// (e.g. platforms/cloudflare/src/embedder.ts) own the actual model call and
+// platforms/cloudflare/src/index.ts's handlePersonalize wires this together
+// with the existing LLM polish step (core/personalize/rank.ts).
+
+import type { FeedItem } from "../domain.ts";
+import { itemKey } from "./rank.ts";
+
+export function cosineSimilarity(a: number[], b: number[]): number {
+  let dot = 0;
+  let normA = 0;
+  let normB = 0;
+  const length = Math.min(a.length, b.length);
+  for (let i = 0; i < length; i++) {
+    dot += a[i]! * b[i]!;
+    normA += a[i]! * a[i]!;
+    normB += b[i]! * b[i]!;
+  }
+  if (normA === 0 || normB === 0) return 0;
+  return dot / (Math.sqrt(normA) * Math.sqrt(normB));
+}
+
+/**
+ * Sorts `items` by cosine similarity to `interestsVector`, descending.
+ * Items with no entry in `embeddings` (not yet embedded at ingestion time,
+ * or embedding generation failed that cycle) sink to the end, keeping
+ * their relative order from `items` — a stable partition, not a random
+ * placement, so an all-unembedded pool degrades to a no-op rather than
+ * reshuffling. mergeRankedOrder/mergeRankedKeysOrder in rank.ts then treat
+ * this function's output as the new "base order" for the LLM polish step.
+ */
+export function rankBySimilarity(
+  items: FeedItem[],
+  embeddings: Map<string, number[]>,
+  interestsVector: number[],
+): FeedItem[] {
+  const scored: { item: FeedItem; score: number; index: number }[] = [];
+  const unscored: FeedItem[] = [];
+  items.forEach((item, index) => {
+    const vector = embeddings.get(itemKey(item));
+    if (vector) {
+      scored.push({
+        item,
+        score: cosineSimilarity(vector, interestsVector),
+        index,
+      });
+    } else {
+      unscored.push(item);
+    }
+  });
+  scored.sort((a, b) => {
+    if (b.score !== a.score) return b.score - a.score;
+    return a.index - b.index;
+  });
+  return [...scored.map((entry) => entry.item), ...unscored];
+}
diff --git a/core/personalize/test/fakeEmbedder.ts b/core/personalize/test/fakeEmbedder.ts
new file mode 100644
index 0000000..6d273f2
--- /dev/null
+++ b/core/personalize/test/fakeEmbedder.ts
@@ -0,0 +1,15 @@
+import type { Embedder } from "../../ports.ts";
+
+/** In-memory Embedder for tests — no network calls. `script` decides the
+ * returned vectors (or throws, to simulate a transport/availability
+ * failure), same pattern as FakeLlmRanker. */
+export class FakeEmbedder implements Embedder {
+  readonly calls: string[][] = [];
+
+  constructor(private readonly script: (texts: string[]) => number[][]) {}
+
+  async embed(texts: string[]): Promise<number[][]> {
+    this.calls.push(texts);
+    return this.script(texts);
+  }
+}
diff --git a/core/personalize/test/rank.spec.ts b/core/personalize/test/rank.spec.ts
index 1829e92..75432e3 100644
--- a/core/personalize/test/rank.spec.ts
+++ b/core/personalize/test/rank.spec.ts
@@ -81,6 +81,16 @@ describe("mergeRankedOrder", () => {
     const merged = mergeRankedOrder(items, [99, 1]);
     expect(merged.map((i) => i.externalId)).toEqual(["b", "a", "c"]);
   });
+
+  it("preserves a non-chronological base order for the remainder, e.g. a similarity-ranked pool", () => {
+    const shuffled = [
+      item({ externalId: "c" }),
+      item({ externalId: "a" }),
+      item({ externalId: "b" }),
+    ];
+    const merged = mergeRankedOrder(shuffled, [2]);
+    expect(merged.map((i) => i.externalId)).toEqual(["b", "c", "a"]);
+  });
 });
 
 describe("mergeRankedKeysOrder", () => {
@@ -119,6 +129,18 @@ describe("mergeRankedKeysOrder", () => {
       "c",
     ]);
   });
+
+  it("preserves a non-chronological base order for the remainder, e.g. a similarity-ranked pool", () => {
+    const shuffled = [
+      item({ source: "hackernews", externalId: "c" }),
+      item({ source: "hackernews", externalId: "a" }),
+      item({ source: "github", externalId: "b" }),
+    ];
+    const merged = mergeRankedKeysOrder(shuffled, [
+      itemKey(shuffled[2]!),
+    ]);
+    expect(merged.map((i) => i.externalId)).toEqual(["b", "c", "a"]);
+  });
 });
 
 describe("FakeLlmRanker + degrade pattern", () => {
diff --git a/core/personalize/test/similarity.spec.ts b/core/personalize/test/similarity.spec.ts
new file mode 100644
index 0000000..1357b3c
--- /dev/null
+++ b/core/personalize/test/similarity.spec.ts
@@ -0,0 +1,80 @@
+import { describe, expect, it } from "vitest";
+import type { FeedItem } from "../../domain.ts";
+import { itemKey } from "../rank.ts";
+import { cosineSimilarity, rankBySimilarity } from "../similarity.ts";
+
+function item(overrides: Partial<FeedItem>): FeedItem {
+  return {
+    source: "hackernews",
+    externalId: "1",
+    title: "title",
+    url: "https://example.com",
+    sourceRank: 1,
+    metadata: {},
+    ...overrides,
+  };
+}
+
+describe("cosineSimilarity", () => {
+  it("is 1 for identical vectors", () => {
+    expect(cosineSimilarity([1, 2, 3], [1, 2, 3])).toBeCloseTo(1);
+  });
+
+  it("is 0 for orthogonal vectors", () => {
+    expect(cosineSimilarity([1, 0], [0, 1])).toBeCloseTo(0);
+  });
+
+  it("is -1 for opposite vectors", () => {
+    expect(cosineSimilarity([1, 2], [-1, -2])).toBeCloseTo(-1);
+  });
+
+  it("is 0 rather than NaN for a zero vector", () => {
+    expect(cosineSimilarity([0, 0], [1, 2])).toBe(0);
+  });
+});
+
+describe("rankBySimilarity", () => {
+  it("sorts items strictly by descending similarity when all have vectors", () => {
+    const a = item({ externalId: "a" });
+    const b = item({ externalId: "b" });
+    const c = item({ externalId: "c" });
+    const interests = [1, 0];
+    const embeddings = new Map([
+      [itemKey(a), [0, 1]], // orthogonal -> 0
+      [itemKey(b), [1, 0]], // identical -> 1
+      [itemKey(c), [0.7, 0.3]], // partial match
+    ]);
+    const ranked = rankBySimilarity([a, b, c], embeddings, interests);
+    expect(ranked.map((i) => i.externalId)).toEqual(["b", "c", "a"]);
+  });
+
+  it("sinks un-embedded items to the end, preserving their relative order", () => {
+    const embedded = item({ externalId: "embedded" });
+    const first = item({ externalId: "first" });
+    const second = item({ externalId: "second" });
+    const embeddings = new Map([[itemKey(embedded), [1, 0]]]);
+    const ranked = rankBySimilarity(
+      [first, embedded, second],
+      embeddings,
+      [1, 0],
+    );
+    // embedded item sorts first; the two un-embedded items keep their
+    // original relative order ("first" before "second") rather than being
+    // reshuffled.
+    expect(ranked.map((i) => i.externalId)).toEqual([
+      "embedded",
+      "first",
+      "second",
+    ]);
+  });
+
+  it("degrades to a no-op when no item has a stored embedding", () => {
+    const items = [
+      item({ externalId: "a" }),
+      item({ externalId: "b" }),
+      item({ externalId: "c" }),
+    ];
+    const ranked = rankBySimilarity(items, new Map(), [1, 0]);
+    expect(ranked.map((i) => i.externalId)).toEqual(["a", "b", "c"]);
+  });
+});
diff --git a/core/ports.ts b/core/ports.ts
index 6a5655a..f3085e7 100644
--- a/core/ports.ts
+++ b/core/ports.ts
@@ -8,10 +8,19 @@
 import type { FeedItem, SyncState } from "./domain.ts";
 
 export interface FeedRepository {
+  /**
+   * Upserts `items` for `source`. `embeddings` is keyed by `itemKey()`
+   * (see core/personalize/rank.ts) and may be empty or a partial subset of
+   * `items` — a key's absence means "leave that item's stored embedding
+   * untouched" (preserve whatever is already on the row), not "clear it".
+   * Embedding generation is best-effort at the call site, so this must
+   * accept an empty map without complaint.
+   */
   saveSnapshot(
     source: string,
     fetchedAtIso: string,
     items: FeedItem[],
+    embeddings: Map<string, number[]>,
   ): Promise<void>;
   recordFailure(
     source: string,
@@ -28,6 +37,29 @@ export interface FeedRepository {
     searchQuery: string,
   ): Promise<FeedItem[]>;
   countTotalItems(): Promise<number>;
+  /**
+   * Of the given `externalIds` for `source`, returns the subset that
+   * already have a stored embedding — used at ingestion time to embed only
+   * items that don't have one yet, instead of re-embedding the whole batch
+   * every refresh cycle.
+   */
+  listEmbeddedKeys(
+    source: string,
+    externalIds: string[],
+  ): Promise<Set<string>>;
+  /**
+   * Like `listFeedItems`, but for the /api/personalize candidate pool:
+   * always starts at offset 0, ignores search, and also returns each
+   * item's stored embedding (keyed by `itemKey()`) alongside the items
+   * themselves in one round trip. An item with no stored embedding yet is
+   * simply absent from the map — callers must treat that as "no vector",
+   * not an error.
+   */
+  listFeedItemsForRanking(
+    limit: number,
+    source: string,
+    sources: string[],
+  ): Promise<{ items: FeedItem[]; embeddings: Map<string, number[]> }>;
 }
 
 /**
@@ -43,3 +75,15 @@ export interface FeedRepository {
 export interface LlmRanker {
   rank(items: FeedItem[], interests: string): Promise<number[]>;
 }
+
+/**
+ * Embeds free-text into vectors for similarity ranking. One vector per
+ * input string, same order as `texts`, on success. Must throw only for
+ * genuine transport/availability failures (never return a partial or
+ * malformed result silently) — callers treat a thrown error as "skip
+ * embedding for this batch/request", the same resilience posture as
+ * LlmRanker.
+ */
+export interface Embedder {
+  embed(texts: string[]): Promise<number[][]>;
+}
diff --git a/core/service.ts b/core/service.ts
index 0e39902..2538c97 100644
--- a/core/service.ts
+++ b/core/service.ts
@@ -9,19 +9,26 @@ import type {
   RefreshOutcome,
   SourceSnapshot,
 } from "./domain.ts";
-import type { FeedRepository } from "./ports.ts";
+import type { Embedder, FeedRepository } from "./ports.ts";
+import { itemKey } from "./personalize/rank.ts";
 import type { Source } from "./sources/index.ts";
 
+const MAX_EMBEDDING_TEXT_LENGTH = 1000;
+
 export async function refreshAll(
   sources: Source[],
   repo: FeedRepository,
+  embedder: Embedder,
 ): Promise<RefreshOutcome[]> {
-  return Promise.all(sources.map((source) => refreshOne(source, repo)));
+  return Promise.all(
+    sources.map((source) => refreshOne(source, repo, embedder)),
+  );
 }
 
 export async function refreshOne(
   source: Source,
   repo: FeedRepository,
+  embedder: Embedder,
 ): Promise<RefreshOutcome> {
   const attemptedAtIso = new Date().toISOString();
   let items: FeedItem[];
@@ -37,8 +44,9 @@ export async function refreshOne(
     await repo.recordFailure(source.key(), attemptedAtIso, message);
     return { source: source.key(), ok: false, itemCount: 0, error: message };
   }
+  const embeddings = await embedNewItems(source.key(), items, repo, embedder);
   try {
-    await repo.saveSnapshot(source.key(), attemptedAtIso, items);
+    await repo.saveSnapshot(source.key(), attemptedAtIso, items, embeddings);
   } catch (error) {
     const message = errorMessage(error);
     await repo.recordFailure(source.key(), attemptedAtIso, message);
@@ -47,6 +55,49 @@ export async function refreshOne(
   return { source: source.key(), ok: true, itemCount: items.length };
 }
 
+/**
+ * Embeds only the items in this batch that don't already have a stored
+ * embedding (per repo.listEmbeddedKeys), in one batched call — so a
+ * source's ~20 mostly-unchanged trending items don't get re-embedded every
+ * hourly refresh. Embedding is best-effort: any failure here (transport,
+ * or the embedder returning fewer vectors than requested) yields an empty
+ * map rather than throwing, so it never blocks saveSnapshot — unembedded
+ * items simply sink to the end of similarity-ranked results until a later
+ * refresh succeeds.
+ */
+async function embedNewItems(
+  sourceKey: string,
+  items: FeedItem[],
+  repo: FeedRepository,
+  embedder: Embedder,
+): Promise<Map<string, number[]>> {
+  const alreadyEmbedded = await repo.listEmbeddedKeys(
+    sourceKey,
+    items.map((item) => item.externalId),
+  );
+  const unembedded = items.filter(
+    (item) => !alreadyEmbedded.has(item.externalId),
+  );
+  if (unembedded.length === 0) return new Map();
+  try {
+    const vectors = await embedder.embed(unembedded.map(textForEmbedding));
+    const out = new Map<string, number[]>();
+    unembedded.forEach((item, i) => {
+      const vector = vectors[i];
+      if (vector) out.set(itemKey(item), vector);
+    });
+    return out;
+  } catch {
+    return new Map();
+  }
+}
+
+function textForEmbedding(item: FeedItem): string {
+  const summary = item.summary?.trim();
+  const text = summary ? `${item.title}. ${summary}` : item.title;
+  return text.slice(0, MAX_EMBEDDING_TEXT_LENGTH);
+}
+
 export async function dashboard(
   sources: Source[],
   repo: FeedRepository,
diff --git a/core/test/fakeFeedRepository.ts b/core/test/fakeFeedRepository.ts
new file mode 100644
index 0000000..43cebea
--- /dev/null
+++ b/core/test/fakeFeedRepository.ts
@@ -0,0 +1,79 @@
+import type { FeedItem, SyncState } from "../domain.ts";
+import type { FeedRepository } from "../ports.ts";
+
+interface SaveSnapshotCall {
+  source: string;
+  fetchedAtIso: string;
+  items: FeedItem[];
+  embeddings: Map<string, number[]>;
+}
+
+interface ListEmbeddedKeysCall {
+  source: string;
+  externalIds: string[];
+}
+
+/** In-memory FeedRepository for tests — no D1/network. Records
+ * saveSnapshot/listEmbeddedKeys calls for assertions and lets tests
+ * pre-seed which (source, externalId) pairs already have a stored
+ * embedding, so refreshOne's "only embed what's missing" logic can be
+ * exercised without a real database. */
+export class FakeFeedRepository implements FeedRepository {
+  readonly saveSnapshotCalls: SaveSnapshotCall[] = [];
+  readonly listEmbeddedKeysCalls: ListEmbeddedKeysCall[] = [];
+  private readonly embeddedKeys: Set<string>;
+
+  constructor(preEmbedded: { source: string; externalId: string }[] = []) {
+    this.embeddedKeys = new Set(
+      preEmbedded.map(({ source, externalId }) => `${source} ${externalId}`),
+    );
+  }
+
+  async saveSnapshot(
+    source: string,
+    fetchedAtIso: string,
+    items: FeedItem[],
+    embeddings: Map<string, number[]>,
+  ): Promise<void> {
+    this.saveSnapshotCalls.push({ source, fetchedAtIso, items, embeddings });
+  }
+
+  async recordFailure(): Promise<void> {}
+
+  async listSourceStates(): Promise<Record<string, SyncState>> {
+    return {};
+  }
+
+  async getCurrentItems(): Promise<FeedItem[]> {
+    return [];
+  }
+
+  async listFeedItems(): Promise<FeedItem[]> {
+    return [];
+  }
+
+  async countTotalItems(): Promise<number> {
+    return 0;
+  }
+
+  async listEmbeddedKeys(
+    source: string,
+    externalIds: string[],
+  ): Promise<Set<string>> {
+    this.listEmbeddedKeysCalls.push({ source, externalIds });
+    const out = new Set<string>();
+    for (const externalId of externalIds) {
+      if (this.embeddedKeys.has(`${source} ${externalId}`)) {
+        out.add(externalId);
+      }
+    }
+    return out;
+  }
+
+  async listFeedItemsForRanking(): Promise<{
+    items: FeedItem[];
+    embeddings: Map<string, number[]>;
+  }> {
+    return { items: [], embeddings: new Map() };
+  }
+}
diff --git a/core/test/service.spec.ts b/core/test/service.spec.ts
index 181273b..b5442c1 100644
--- a/core/test/service.spec.ts
+++ b/core/test/service.spec.ts
@@ -2,7 +2,11 @@
 // (cardBriefPrefix/cardBriefSuffix/cardStatFragments) for each source.
 import { describe, expect, it } from "vitest";
 import type { FeedItem, SourceSnapshot } from "../domain.ts";
-import { buildCards, buildErrors } from "../service.ts";
+import { FakeEmbedder } from "../personalize/test/fakeEmbedder.ts";
+import { itemKey } from "../personalize/rank.ts";
+import { buildCards, buildErrors, refreshOne } from "../service.ts";
+import { FakeFeedRepository } from "./fakeFeedRepository.ts";
+import type { Source } from "../sources/index.ts";
 
 function item(overrides: Partial<FeedItem>): FeedItem {
   return {
@@ -118,3 +122,70 @@ describe("buildErrors", () => {
     ]);
   });
 });
+
+function fakeSource(fetch: () => Promise<FeedItem[]>): Source {
+  return {
+    key: () => "hackernews",
+    label: () => "Hacker News",
+    homepageUrl: () => "https://news.ycombinator.com",
+    fetch,
+  };
+}
+
+describe("refreshOne with embeddings", () => {
+  it("embeds only items that don't already have a stored embedding", async () => {
+    const fresh = item({ externalId: "fresh", title: "fresh item" });
+    const stale = item({ externalId: "stale", title: "stale item" });
+    const repo = new FakeFeedRepository([
+      { source: "hackernews", externalId: "stale" },
+    ]);
+    const embedder = new FakeEmbedder((texts) => texts.map(() => [1, 0]));
+
+    const outcome = await refreshOne(
+      fakeSource(async () => [fresh, stale]),
+      repo,
+      embedder,
+    );
+
+    expect(outcome.ok).toBe(true);
+    expect(embedder.calls).toEqual([["fresh item"]]);
+    expect(repo.saveSnapshotCalls).toHaveLength(1);
+    const embeddings = repo.saveSnapshotCalls[0]!.embeddings;
+    expect(embeddings.has(itemKey(fresh))).toBe(true);
+    expect(embeddings.has(itemKey(stale))).toBe(false);
+  });
+
+  it("still saves the snapshot when the embedder throws, with an empty embeddings map", async () => {
+    const repo = new FakeFeedRepository();
+    const embedder = new FakeEmbedder(() => {
+      throw new Error("model unavailable");
+    });
+
+    const outcome = await refreshOne(
+      fakeSource(async () => [item({ externalId: "a" })]),
+      repo,
+      embedder,
+    );
+
+    expect(outcome.ok).toBe(true);
+    expect(repo.saveSnapshotCalls).toHaveLength(1);
+    expect(repo.saveSnapshotCalls[0]!.embeddings.size).toBe(0);
+  });
+
+  it("short-circuits before any embed call when the source fetch fails", async () => {
+    const repo = new FakeFeedRepository();
+    const embedder = new FakeEmbedder(() => [[1, 0]]);
+
+    const outcome = await refreshOne(
+      fakeSource(async () => {
+        throw new Error("network down");
+      }),
+      repo,
+      embedder,
+    );
+
+    expect(outcome.ok).toBe(false);
+    expect(embedder.calls).toHaveLength(0);
+    expect(repo.saveSnapshotCalls).toHaveLength(0);
+  });
+});
diff --git a/docs/RUNBOOK.md b/docs/RUNBOOK.md
index dbf187d..f713ce8 100644
--- a/docs/RUNBOOK.md
+++ b/docs/RUNBOOK.md
@@ -35,6 +35,30 @@ npm run db:migrate:remote   # deployed D1 database
 
 New migrations go in `platforms/cloudflare/migrations/` as `NNNN_description.sql`, following on from `0001_init.sql`.
 
+## "For You" embedding pipeline
+
+`/api/personalize` is retrieve-then-rerank, not a single LLM call: each item gets a `@cf/baai/bge-base-en-v1.5` embedding once at ingestion time (`core/service.ts`'s `refreshOne`, stored in `items.embedding_json`/`embedding_model`), and at request time only the interests string gets embedded — ranking the ~500-item pool by cosine similarity (`core/personalize/similarity.ts`) needs no LLM call. The existing `CloudflareLlmRanker` then runs only as an optional "polish" pass over the top `FEEDREADER_PERSONALIZE_POLISH_POOL_SIZE` similarity hits (default 30; set to `0` to disable the LLM step entirely and serve similarity order on its own — no redeploy needed, just a var change).
+
+**Re-embedding trigger is presence-only, not staleness.** `D1Repository.listEmbeddedKeys` skips any item that already has a non-null `embedding_json`, so a source's mostly-unchanged trending items don't get re-embedded every hourly refresh — but if an item's title/summary is edited in place after first ingestion (rare for these sources), its embedding silently goes stale with no automatic re-embed. Existing rows from before this column existed self-backfill within ~1 hour of deploy, since the hourly refresh re-upserts every currently-live item regardless.
+
+**Neuron budget**: the LLM polish step (one chat-model call over up to ~30 item titles/summaries) dominates Workers AI spend, not the embedder (one batched embed call per source per refresh cycle, only for items missing a vector, plus one tiny embed call per personalize cache-miss request). Under budget pressure, the first lever to pull is `FEEDREADER_PERSONALIZE_POLISH_POOL_SIZE`, not the embedding model.
+
+**Manual verification** (no D1/Miniflare integration tests exist for this — same convention as the rest of `D1Repository`):
+
+```bash
+npm run db:migrate:local
+npm run dev
+# trigger one source's refresh, then inspect:
+wrangler d1 execute feedreader --local --config platforms/cloudflare/wrangler.toml \
+  --command "SELECT external_id, embedding_json IS NOT NULL AS has_embedding FROM items WHERE source='hackernews' LIMIT 5"
+# trigger that source's refresh again and re-run the query — embedding_json
+# should be unchanged (not re-computed) for the same rows, confirming the
+# ON CONFLICT ... coalesce(excluded.embedding_json, items.embedding_json)
+# preserve behavior in repository.ts.
+```
+
+With real Workers AI credentials, exercise `/api/personalize` with two paraphrased interests strings (e.g. "rust and distributed systems" vs "distributed systems, rust") and confirm comparable rankings despite the literal string difference — that's the concrete capability this pipeline is meant to deliver over the old cache-key-by-literal-string approach.
+
 ## Reading cron execution history
 
 Use Cloudflare dashboard → Workers & Pages → `feedreader` → Triggers → Cron Triggers, or `wrangler tail` while a scheduled run is expected, to confirm `sync_state.last_attempt_at` is updating without user-triggered traffic.
diff --git a/platforms/cloudflare/migrations/0002_add_item_embeddings.sql b/platforms/cloudflare/migrations/0002_add_item_embeddings.sql
new file mode 100644
index 0000000..78593b6
--- /dev/null
+++ b/platforms/cloudflare/migrations/0002_add_item_embeddings.sql
@@ -0,0 +1,7 @@
+-- Embedding vector for each item, generated once at ingestion time (see
+-- core/service.ts's refreshOne) and reused across every /api/personalize
+-- request — no per-request LLM call needed for the retrieval step. NULL
+-- until a refresh cycle successfully embeds the item; embedding_model
+-- records which model produced the stored vector, for future migrations.
+ALTER TABLE items ADD COLUMN embedding_json TEXT;
+ALTER TABLE items ADD COLUMN embedding_model TEXT;
diff --git a/platforms/cloudflare/src/embedder.ts b/platforms/cloudflare/src/embedder.ts
new file mode 100644
index 0000000..2bd632c
--- /dev/null
+++ b/platforms/cloudflare/src/embedder.ts
@@ -0,0 +1,31 @@
+// Workers AI-backed Embedder, used both at ingestion time (core/service.ts's
+// refreshOne, embedding each item's title+summary once) and at request time
+// (handlePersonalize, embedding the interests string). Same first-party env.AI
+// binding as llmRanker.ts — no separate secret/binding to provision.
+//
+// @cf/baai/bge-base-en-v1.5: 768-dim, mean-pooled by default. Cloudflare's
+// own model docs (developers.cloudflare.com/workers-ai/models/bge-base-en-v1.5/)
+// don't call for a different prefix/instruction on the query side vs the
+// document side, so item text and interests text are embedded identically
+// here — no asymmetric-retrieval prefix needed. Confirmed non-deprecated as
+// of 2026-06-23; check Cloudflare's catalog before changing the model id.
+
+import type { Embedder } from "../../../core/ports.ts";
+
+// Exported so repository.ts can record provenance (embedding_model column)
+// without duplicating the model id string.
+export const MODEL_ID = "@cf/baai/bge-base-en-v1.5";
+
+export class CloudflareEmbedder implements Embedder {
+  constructor(private readonly ai: Ai) {}
+
+  async embed(texts: string[]): Promise<number[][]> {
+    if (texts.length === 0) return [];
+    const output = await this.ai.run(MODEL_ID, { text: texts });
+    const data = (output as { data?: number[][] }).data;
+    if (!Array.isArray(data)) {
+      throw new Error(`${MODEL_ID} returned no embedding data`);
+    }
+    return data;
+  }
+}
diff --git a/platforms/cloudflare/src/env.d.ts b/platforms/cloudflare/src/env.d.ts
index 3ba7539..21256ae 100644
--- a/platforms/cloudflare/src/env.d.ts
+++ b/platforms/cloudflare/src/env.d.ts
@@ -12,5 +12,9 @@ export interface Env {
   APP_VERSION?: string;
   FEEDREADER_ITEMS_PER_SOURCE?: string;
   FEEDREADER_USER_AGENT?: string;
-  FEEDREADER_PERSONALIZE_POOL_SIZE?: string;
+  /** Size of the top-similarity slice the LLM polishes per /api/personalize
+   * call — 0 disables the LLM polish pass entirely, serving the
+   * embedding-similarity order on its own. See src/index.ts's
+   * handlePersonalize. */
+  FEEDREADER_PERSONALIZE_POLISH_POOL_SIZE?: string;
 }
diff --git a/platforms/cloudflare/src/index.ts b/platforms/cloudflare/src/index.ts
index 24f39ad..96eb6e6 100644
--- a/platforms/cloudflare/src/index.ts
+++ b/platforms/cloudflare/src/index.ts
@@ -20,9 +20,11 @@ import {
   mergeRankedKeysOrder,
   mergeRankedOrder,
 } from "../../../core/personalize/rank.ts";
+import { rankBySimilarity } from "../../../core/personalize/similarity.ts";
 import { paginate } from "../../../core/sources/listInMemory.ts";
 import { D1Repository } from "./repository.ts";
 import { CloudflareLlmRanker } from "./llmRanker.ts";
+import { CloudflareEmbedder } from "./embedder.ts";
 import type { Env } from "./env.d.ts";
 
 const PAGE_SIZE = 12;
@@ -305,8 +307,17 @@ function cardsToJson(cards: CardView[]) {
  * normal limit/offset paging); the candidate pool and final ordering are
  * entirely server-decided — the client never supplies item ids.
  *
- * The LLM is only ever consulted for the *first* page request for a given
- * (interests, source filter, source-data freshness) combination — the
+ * Retrieve-then-rerank: items are embedded once at ingestion time (see
+ * core/service.ts's refreshOne), so the request-time path only embeds the
+ * interests string and sorts the pool by cosine similarity
+ * (core/personalize/similarity.ts) — no LLM call needed for this step, and
+ * it's robust to paraphrased interests in a way a literal cache-key string
+ * match isn't. The LLM ranker then runs only as an optional "polish" pass
+ * on the top FEEDREADER_PERSONALIZE_POLISH_POOL_SIZE similarity hits
+ * (default 30, 0 disables it) instead of the whole candidate pool.
+ *
+ * The polish step is only ever consulted for the *first* page request for a
+ * given (interests, source filter, source-data freshness) combination — the
  * resulting ranked order is cached via the platform Cache API, keyed so it
  * naturally invalidates whenever the underlying sources next refresh (see
  * latestSuccessAt). Every page request — including the first — re-fetches
@@ -314,11 +325,14 @@ function cardsToJson(cards: CardView[]) {
  * pagination never re-invokes the model and item content (title/score/etc.)
  * is never served stale from the cache, only the ordering is.
  *
- * On any ranking failure (bad model output, or every model in the fallback
- * chain throwing) it serves the pool's chronological order and sets
- * `degraded: true` rather than erroring the request — and deliberately does
- * not cache that outcome, so the next request tries ranking again instead of
- * being stuck degraded for the cache's lifetime.
+ * `personalization` in the response tells the client which tier was
+ * actually served: "llm" (similarity + LLM polish both succeeded),
+ * "similarity" (similarity ranking served on its own — polish disabled, or
+ * it failed but similarity is still a real personalized order, unlike
+ * yesterday's flat chronological fallback), or "none" (interests embedding
+ * itself failed, or the pool was empty — chronological order, same as
+ * before). Only "none" is cached as a non-result (not cached at all), so
+ * the next request retries rather than being stuck there.
  */
 async function handlePersonalize(
   request: Request,
@@ -338,17 +352,15 @@ async function handlePersonalize(
     return Response.json({ error: "interests is required" }, { status: 400 });
   }
 
-  const fullPool = await repo.listFeedItems(
+  const { items: fullPool, embeddings } = await repo.listFeedItemsForRanking(
     PERSONALIZE_FULL_POOL_LIMIT,
-    0,
     source,
     sources,
-    "",
   );
   if (fullPool.length === 0) {
     return Response.json({
       generated_at: new Date().toISOString(),
-      degraded: false,
+      personalization: "none",
       has_next: false,
       items: [],
     });
@@ -363,28 +375,58 @@ async function handlePersonalize(
     freshness,
   );
 
-  let rankedKeys = await readRankedKeysCache(cacheRequest);
-  let degraded = false;
-  if (rankedKeys === null) {
-    let poolSize = parsePositiveInt(
-      env.FEEDREADER_PERSONALIZE_POOL_SIZE ?? null,
-      60,
-    );
-    if (poolSize > 100) poolSize = 100;
-    const candidates = fullPool.slice(0, poolSize);
+  const cached = await readRankedKeysCache(cacheRequest);
+  let rankedKeys: string[];
+  let personalization: "llm" | "similarity" | "none";
+  if (cached) {
+    rankedKeys = cached.rankedKeys;
+    personalization = cached.personalization;
+  } else {
+    let interestsVector: number[] | null = null;
     try {
-      const ranker = new CloudflareLlmRanker(env.AI);
-      const ranked = await ranker.rank(candidates, interests);
-      if (ranked.length > 0) {
-        rankedKeys = mergeRankedOrder(candidates, ranked).map(itemKey);
-        await writeRankedKeysCache(cacheRequest, rankedKeys);
+      const embedder = new CloudflareEmbedder(env.AI);
+      const [vector] = await embedder.embed([interests]);
+      interestsVector = vector ?? null;
+    } catch {
+      interestsVector = null;
+    }
+
+    if (interestsVector) {
+      const bySimilarity = rankBySimilarity(
+        fullPool,
+        embeddings,
+        interestsVector,
+      );
+      let polishPoolSize = parseNonNegativeInt(
+        env.FEEDREADER_PERSONALIZE_POLISH_POOL_SIZE ?? null,
+        30,
+      );
+      if (polishPoolSize > 100) polishPoolSize = 100;
+
+      if (polishPoolSize > 0) {
+        const candidates = bySimilarity.slice(0, polishPoolSize);
+        try {
+          const ranker = new CloudflareLlmRanker(env.AI);
+          const ranked = await ranker.rank(candidates, interests);
+          if (ranked.length > 0) {
+            rankedKeys = mergeRankedOrder(candidates, ranked).map(itemKey);
+            personalization = "llm";
+          } else {
+            rankedKeys = bySimilarity.map(itemKey);
+            personalization = "similarity";
+          }
+        } catch {
+          rankedKeys = bySimilarity.map(itemKey);
+          personalization = "similarity";
+        }
       } else {
-        rankedKeys = [];
-        degraded = true;
+        rankedKeys = bySimilarity.map(itemKey);
+        personalization = "similarity";
       }
-    } catch {
+      await writeRankedKeysCache(cacheRequest, { personalization, rankedKeys });
+    } else {
       rankedKeys = [];
-      degraded = true;
+      personalization = "none";
     }
   }
 
@@ -395,7 +437,7 @@ async function handlePersonalize(
 
   return Response.json({
     generated_at: new Date().toISOString(),
-    degraded,
+    personalization,
     has_next: hasNext,
     items: cardsToJson(cards),
   });
@@ -461,16 +503,31 @@ function defaultPersonalizeCache(): Cache {
   return (caches as unknown as { default: Cache }).default;
 }
 
+interface CachedRanking {
+  personalization: "llm" | "similarity";
+  rankedKeys: string[];
+}
+
 async function readRankedKeysCache(
   cacheRequest: Request,
-): Promise<string[] | null> {
+): Promise<CachedRanking | null> {
   try {
     const hit = await defaultPersonalizeCache().match(cacheRequest);
     if (!hit) return null;
-    const data = (await hit.json()) as unknown;
-    return Array.isArray(data)
-      ? data.filter((value): value is string => typeof value === "string")
-      : null;
+    const data = (await hit.json()) as Partial<CachedRanking> | null;
+    if (
+      !data ||
+      (data.personalization !== "llm" && data.personalization !== "similarity") ||
+      !Array.isArray(data.rankedKeys)
+    ) {
+      return null;
+    }
+    return {
+      personalization: data.personalization,
+      rankedKeys: data.rankedKeys.filter(
+        (value): value is string => typeof value === "string",
+      ),
+    };
   } catch {
     // Cache API is best-effort (and a documented no-op in local `wrangler
     // dev`) — any failure here is just a cache miss, never an error.
@@ -480,12 +537,12 @@ async function readRankedKeysCache(
 
 async function writeRankedKeysCache(
   cacheRequest: Request,
-  rankedKeys: string[],
+  cached: CachedRanking,
 ): Promise<void> {
   try {
     await defaultPersonalizeCache().put(
       cacheRequest,
-      new Response(JSON.stringify(rankedKeys), {
+      new Response(JSON.stringify(cached), {
         headers: {
           "content-type": "application/json",
           "cache-control": `max-age=${PERSONALIZE_CACHE_TTL_SECONDS}`,
@@ -516,7 +573,7 @@ async function handleInternalRefresh(
   const sourceKey = url.pathname.slice("/internal/refresh/".length);
   const source = sources.find((s) => s.key() === sourceKey);
   if (!source) return new Response("unknown source", { status: 404 });
-  const outcome = await refreshOne(source, repo);
+  const outcome = await refreshOne(source, repo, new CloudflareEmbedder(env.AI));
   return Response.json(outcome);
 }
 
diff --git a/platforms/cloudflare/src/repository.ts b/platforms/cloudflare/src/repository.ts
index 6da4ef2..e18e5d2 100644
--- a/platforms/cloudflare/src/repository.ts
+++ b/platforms/cloudflare/src/repository.ts
@@ -6,7 +6,9 @@
 
 import type { FeedItem, SyncState } from "../../../core/domain.ts";
 import type { FeedRepository } from "../../../core/ports.ts";
+import { itemKey } from "../../../core/personalize/rank.ts";
 import { paginate, sortFeedItems } from "../../../core/sources/listInMemory.ts";
+import { MODEL_ID as EMBEDDING_MODEL_ID } from "./embedder.ts";
 
 interface ItemRow {
   source: string;
@@ -23,6 +25,10 @@ interface ItemRow {
   first_seen_at: string;
 }
 
+interface RankingItemRow extends ItemRow {
+  embedding_json: string | null;
+}
+
 interface SyncStateRow {
   source: string;
   last_attempt_at: string | null;
@@ -33,6 +39,10 @@ interface SyncStateRow {
 
 const ITEM_COLUMNS =
   "source, external_id, title, url, summary, author, score, comments_url, published_at, source_rank, metadata_json, first_seen_at";
+// Only listFeedItemsForRanking needs embedding_json — keeping it off the
+// base ITEM_COLUMNS list means the home page and /api/items don't pay for
+// reading every item's (multi-KB) vector on every request.
+const RANKING_ITEM_COLUMNS = `${ITEM_COLUMNS}, embedding_json`;
 
 export class D1Repository implements FeedRepository {
   constructor(private readonly db: D1Database) {}
@@ -41,12 +51,14 @@ export class D1Repository implements FeedRepository {
     source: string,
     fetchedAtIso: string,
     items: FeedItem[],
+    embeddings: Map<string, number[]>,
   ): Promise<void> {
     const upsertItem = this.db.prepare(`
       INSERT INTO items (
         source, external_id, title, url, summary, author, score, comments_url,
-        published_at, source_rank, metadata_json, first_seen_at, last_seen_at, updated_at
-      ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+        published_at, source_rank, metadata_json, embedding_json, embedding_model,
+        first_seen_at, last_seen_at, updated_at
+      ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
       ON CONFLICT(source, external_id) DO UPDATE SET
         title = excluded.title,
         url = excluded.url,
@@ -57,11 +69,14 @@ export class D1Repository implements FeedRepository {
         published_at = coalesce(items.published_at, excluded.published_at),
         source_rank = excluded.source_rank,
         metadata_json = excluded.metadata_json,
+        embedding_json = coalesce(excluded.embedding_json, items.embedding_json),
+        embedding_model = coalesce(excluded.embedding_model, items.embedding_model),
         last_seen_at = excluded.last_seen_at,
         updated_at = items.updated_at
     `);
-    const statements = items.map((item) =>
-      upsertItem.bind(
+    const statements = items.map((item) => {
+      const vector = embeddings.get(itemKey(item));
+      return upsertItem.bind(
         item.source,
         item.externalId,
         item.title,
@@ -73,11 +88,13 @@ export class D1Repository implements FeedRepository {
         item.publishedAt ?? null,
         item.sourceRank,
         JSON.stringify(item.metadata ?? {}),
+        vector ? JSON.stringify(vector) : null,
+        vector ? EMBEDDING_MODEL_ID : null,
         fetchedAtIso,
         fetchedAtIso,
         fetchedAtIso,
-      ),
-    );
+      );
+    });
 
     const upsertSyncState = this.db
       .prepare(
@@ -159,17 +176,7 @@ export class D1Repository implements FeedRepository {
     searchQuery: string,
   ): Promise<FeedItem[]> {
     let query = `SELECT ${ITEM_COLUMNS} FROM items`;
-    const conditions: string[] = [];
-    const args: unknown[] = [];
-
-    const trimmedSource = source.trim();
-    if (trimmedSource !== "") {
-      conditions.push("source = ?");
-      args.push(trimmedSource);
-    } else if (sources.length > 0) {
-      conditions.push(`source IN (${sources.map(() => "?").join(",")})`);
-      args.push(...sources);
-    }
+    const { conditions, args } = sourceFilterClause(source, sources);
 
     for (const term of searchTermsSql(searchQuery)) {
       conditions.push(
@@ -199,6 +206,73 @@ export class D1Repository implements FeedRepository {
       .first<{ count: number }>();
     return row?.count ?? 0;
   }
+
+  async listEmbeddedKeys(
+    source: string,
+    externalIds: string[],
+  ): Promise<Set<string>> {
+    if (externalIds.length === 0) return new Set();
+    const { results } = await this.db
+      .prepare(
+        `
+      SELECT external_id FROM items
+      WHERE source = ? AND embedding_json IS NOT NULL
+        AND external_id IN (${externalIds.map(() => "?").join(",")})
+    `,
+      )
+      .bind(source, ...externalIds)
+      .all<{ external_id: string }>();
+    return new Set(results.map((row) => row.external_id));
+  }
+
+  async listFeedItemsForRanking(
+    limit: number,
+    source: string,
+    sources: string[],
+  ): Promise<{ items: FeedItem[]; embeddings: Map<string, number[]> }> {
+    let query = `SELECT ${RANKING_ITEM_COLUMNS} FROM items`;
+    const { conditions, args } = sourceFilterClause(source, sources);
+    if (conditions.length > 0) {
+      query += ` WHERE ${conditions.join(" AND ")}`;
+    }
+
+    const { results } = await this.db
+      .prepare(query)
+      .bind(...args)
+      .all<RankingItemRow>();
+
+    const embeddings = new Map<string, number[]>();
+    const items: FeedItem[] = [];
+    for (const row of results) {
+      const item = rowToFeedItem(row);
+      items.push(item);
+      if (row.embedding_json) {
+        try {
+          embeddings.set(itemKey(item), JSON.parse(row.embedding_json));
+        } catch {
+          // Corrupt/legacy row — treat as unembedded rather than throwing.
+        }
+      }
+    }
+    return { items: paginate(sortFeedItems(items), limit, 0), embeddings };
+  }
+}
+
+function sourceFilterClause(
+  source: string,
+  sources: string[],
+): { conditions: string[]; args: unknown[] } {
+  const trimmedSource = source.trim();
+  if (trimmedSource !== "") {
+    return { conditions: ["source = ?"], args: [trimmedSource] };
+  }
+  if (sources.length > 0) {
+    return {
+      conditions: [`source IN (${sources.map(() => "?").join(",")})`],
+      args: [...sources],
+    };
+  }
+  return { conditions: [], args: [] };
 }
 
 function rowToFeedItem(row: ItemRow): FeedItem {
diff --git a/platforms/cloudflare/wrangler.toml b/platforms/cloudflare/wrangler.toml
index a2e071f..4b99f9c 100644
--- a/platforms/cloudflare/wrangler.toml
+++ b/platforms/cloudflare/wrangler.toml
@@ -50,9 +50,11 @@ binding = "AI"
 APP_VERSION = "dev"
 FEEDREADER_ITEMS_PER_SOURCE = "20"
 FEEDREADER_USER_AGENT = "feedreader/0.1"
-# Candidate pool size ranked per /api/personalize call — bounds prompt size
-# and Workers AI cost.
-FEEDREADER_PERSONALIZE_POOL_SIZE = "60"
+# Size of the top-similarity slice the LLM polishes per /api/personalize
+# call (0 disables the LLM polish pass, serving similarity order on its
+# own) — bounds prompt size and Workers AI cost. See src/index.ts's
+# handlePersonalize and src/embedder.ts.
+FEEDREADER_PERSONALIZE_POLISH_POOL_SIZE = "30"
 
 # REFRESH_SECRET is a secret, not a var — set it with:
 #   wrangler secret put REFRESH_SECRET --config platforms/cloudflare/wrangler.toml
diff --git a/web-static/static/app.js b/web-static/static/app.js
index cdf1d09..cf2a1e4 100644
--- a/web-static/static/app.js
+++ b/web-static/static/app.js
@@ -771,8 +771,8 @@
       applyVisitedLinkState();
 
       if (personalize) {
-        personalizedActive = !payload.degraded;
-        if (!append && payload.degraded) {
+        personalizedActive = payload.personalization !== "none";
+        if (!append && payload.personalization === "none") {
           showToast("Personalization unavailable, showing latest", "error");
         }
       } else {

From b59d0c107241e5b6a618f81be7ac3943c6dd157a Mon Sep 17 00:00:00 2001
From: Phat Pham <phat@onroads.xyz>
Date: Wed, 24 Jun 2026 00:59:56 +0700
Subject: [PATCH 3/3] fix: bump static asset cache-busting versions for
 app.js/style.css

Both files gained real content changes in this branch (personalization
toggle/interests UI, personalization field handling) without bumping their
?v= query strings, so a CDN/browser cache could keep serving the pre-change
script/styles after deploy.
---
 core/render.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/render.ts b/core/render.ts
index 75e9328..7af1452 100644
--- a/core/render.ts
+++ b/core/render.ts
@@ -151,8 +151,8 @@ export function renderIndexPage(data: PageData): string {
     <link rel="icon" href="/favicon.svg?v=8" sizes="any" type="image/svg+xml" />
     <link rel="shortcut icon" href="/favicon.svg?v=8" type="image/svg+xml" />
     <link rel="apple-touch-icon" href="/apple-touch-icon.png?v=8" />
-    <link rel="stylesheet" href="/static/style.css?v=45" />
-    <script src="/static/app.js?v=36" defer></script>
+    <link rel="stylesheet" href="/static/style.css?v=46" />
+    <script src="/static/app.js?v=37" defer></script>
   </head>
   <body data-app-version="${escapeHtml(data.appVersion)}">
     <header class="shell page-header">