diff --git a/docs/byok_guide.md b/docs/byok_guide.md index aae702b29..b185d5a0d 100644 --- a/docs/byok_guide.md +++ b/docs/byok_guide.md @@ -10,6 +10,7 @@ The BYOK (Bring Your Own Knowledge) feature in Lightspeed Core enables users to * [What is BYOK?](#what-is-byok) * [How BYOK Works](#how-byok-works) + * [Prioritization of BYOK content](#prioritization-of-byok-content) * [Prerequisites](#prerequisites) * [Configuration Guide](#configuration-guide) * [Step 1: Prepare Your Knowledge Sources](#step-1-prepare-your-knowledge-sources) @@ -77,17 +78,45 @@ Both modes rely on: - **Vector Database**: Your indexed knowledge sources stored as vector embeddings - **Embedding Model**: Converts queries and documents into vector representations for similarity matching -Inline RAG additionally supports: -- **Score Multiplier**: Optional weight applied per BYOK vector store when mixing multiple sources. Allows custom prioritization of content. +### Prioritization of BYOK content -> [!NOTE] -> OKP and BYOK scores are not directly comparable (different scoring systems), so -> `score_multiplier` does not apply to OKP results. To control the amount of retrieved -> context, set the `BYOK_RAG_MAX_CHUNKS` and `OKP_RAG_MAX_CHUNKS` constants in `src/constants.py` -> (defaults: 10 and 5 respectively). For Tool RAG, use `TOOL_RAG_MAX_CHUNKS` (default: 10). -> The `INLINE_RAG_MAX_CHUNKS` constant (value: 10) caps the final merged inline RAG -> chunks (BYOK + OKP) delivered to the LLM. Tool RAG is controlled independently -> by `TOOL_RAG_MAX_CHUNKS`. +When multiple BYOK stores are configured for Inline RAG, their results are merged and ranked. Two mechanisms control prioritization: + +- **Score Multiplier** (`score_multiplier`): A per-store weight applied to raw similarity scores during Inline RAG. Values > 1.0 boost a store's results; values < 1.0 reduce them. Only affects BYOK stores — OKP scores use a different scoring system and are not comparable. + +- **Reranker**: When enabled, a cross-encoder model re-scores the merged chunk pool (BYOK + OKP) using semantic similarity to the query. This normalizes scores across sources, making OKP and BYOK results directly comparable. BYOK score boosts are applied after reranking. + +**Chunk limits** control how many chunks flow through the pipeline. Configure them in `lightspeed-stack.yaml`: + +| Config path | Default | Description | +|-------------|---------|-------------| +| `rag.byok.max_chunks` | 10 | Total chunks fetched across all BYOK stores | +| `rag.okp.max_chunks` | 5 | Chunks fetched from OKP | +| `rag.retrieval.inline.max_chunks` | 10 | Final cap on merged inline RAG chunks delivered to the LLM | +| `rag.retrieval.tool.max_chunks` | 10 | Max chunks retrieved via Tool RAG (`file_search`) | + +```mermaid +flowchart TD + subgraph Sources["Source Fetching"] + B1["BYOK Store 1"] --> BPool + B2["BYOK Store 2"] --> BPool + BN["BYOK Store N"] --> BPool + BPool["BYOK Pool\ncapped at rag.byok.max_chunks"] + OKP["OKP (Solr)\ncapped at rag.okp.max_chunks"] + end + + BPool --> Pool["Merged Pool\n(all chunks, sorted by score)"] + OKP --> Pool + + Pool --> Decision{Reranker\nenabled?} + + Decision -->|Yes| Rerank["Cross-Encoder Rerank\n+ BYOK score boost"] + Decision -->|No| Cut + + Rerank --> Cut["Top K cut\nrag.retrieval.inline.max_chunks"] + + Cut --> Context["Final Inline RAG Context"] +``` --- @@ -288,7 +317,7 @@ byok_rag: > [!NOTE] > pgvector is not yet supported via `byok_rag` in `lightspeed-stack.yaml` (see [LCORE-2437](https://redhat.atlassian.net/browse/LCORE-2437)). -> It must be configured directly in the Llama Stack configuration file. +> It must be configured directly in the `run.yaml` configuration file. ```yaml vector_io: @@ -342,11 +371,11 @@ rag: ### Example 2: Multiple Knowledge Sources with pgvector -A configuration combining a local FAISS store (via `byok_rag`) with a remote pgvector store (configured directly in the Llama Stack configuration file): +A configuration combining a local FAISS store (via `byok_rag`) with a remote pgvector store (configured directly in the `run.yaml` configuration file): > [!NOTE] > pgvector is not yet supported via `byok_rag` in `lightspeed-stack.yaml` (see [LCORE-2437](https://redhat.atlassian.net/browse/LCORE-2437)). -> The pgvector provider must be configured directly in the Llama Stack configuration file. +> The pgvector provider must be configured directly in the `run.yaml` configuration file. **`lightspeed-stack.yaml`** — FAISS store and RAG strategy: @@ -373,7 +402,7 @@ rag: - local-docs ``` -**Llama Stack configuration file** — pgvector provider: +**`run.yaml` configuration file** — pgvector provider: ```yaml vector_io: diff --git a/docs/openapi.json b/docs/openapi.json index 0150e2055..0c6401ed1 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -6729,7 +6729,6 @@ "authorization": { "access_rules": [] }, - "byok_rag": [], "conversation_cache": {}, "database": { "sqlite": { @@ -6760,6 +6759,26 @@ "period": 1 } }, + "rag": { + "byok": { + "max_chunks": 10, + "stores": [] + }, + "okp": { + "max_chunks": 5, + "offline": true + }, + "retrieval": { + "inline": { + "max_chunks": 10, + "sources": [] + }, + "tool": { + "max_chunks": 10, + "sources": [] + } + } + }, "service": { "access_log": true, "auth_enabled": false, @@ -11806,63 +11825,28 @@ ], "title": "Body_create_file_v1_files_post" }, - "ByokRag": { + "ByokConfiguration": { "properties": { - "rag_id": { - "type": "string", - "minLength": 1, - "title": "RAG ID", - "description": "Unique RAG ID" - }, - "rag_type": { - "type": "string", - "minLength": 1, - "title": "RAG type", - "description": "Type of RAG database.", - "default": "inline::faiss" - }, - "embedding_model": { - "type": "string", - "minLength": 1, - "title": "Embedding model", - "description": "Embedding model identification", - "default": "sentence-transformers/all-mpnet-base-v2" - }, - "embedding_dimension": { + "max_chunks": { "type": "integer", "exclusiveMinimum": 0.0, - "title": "Embedding dimension", - "description": "Dimensionality of embedding vectors.", - "default": 768 - }, - "vector_db_id": { - "type": "string", - "minLength": 1, - "title": "Vector DB ID", - "description": "Vector database identification." - }, - "db_path": { - "type": "string", - "title": "DB path", - "description": "Path to RAG database." + "title": "Max BYOK chunks", + "description": "Maximum total number of chunks returned across all BYOK stores.", + "default": 10 }, - "score_multiplier": { - "type": "number", - "exclusiveMinimum": 0.0, - "title": "Score multiplier", - "description": "Multiplier applied to relevance scores from this vector store. Used to weight results when querying multiple knowledge sources. Values > 1 boost this store's results; values < 1 reduce them.", - "default": 1.0 + "stores": { + "items": { + "$ref": "#/components/schemas/RagStore" + }, + "type": "array", + "title": "BYOK RAG stores", + "description": "List of BYOK RAG store configurations." } }, "additionalProperties": false, "type": "object", - "required": [ - "rag_id", - "vector_db_id", - "db_path" - ], - "title": "ByokRag", - "description": "BYOK (Bring Your Own Knowledge) RAG configuration." + "title": "ByokConfiguration", + "description": "BYOK (Bring Your Own Knowledge) configuration." }, "CORSConfiguration": { "properties": { @@ -12067,14 +12051,6 @@ "title": "Approvals configuration", "description": "Settings for human-in-the-loop approval of MCP tool invocations" }, - "byok_rag": { - "items": { - "$ref": "#/components/schemas/ByokRag" - }, - "type": "array", - "title": "BYOK RAG configuration", - "description": "BYOK RAG configuration. This configuration can be used to reconfigure Llama Stack through its run.yaml configuration file" - }, "a2a_state": { "$ref": "#/components/schemas/A2AStateConfiguration", "title": "A2A state configuration", @@ -12121,12 +12097,7 @@ "rag": { "$ref": "#/components/schemas/RagConfiguration", "title": "RAG configuration", - "description": "Configuration for all RAG strategies (inline and tool-based)." - }, - "okp": { - "$ref": "#/components/schemas/OkpConfiguration", - "title": "OKP configuration", - "description": "OKP provider settings. Only used when 'okp' is listed in rag.inline or rag.tool." + "description": "Unified RAG configuration: BYOK stores, OKP provider, and retrieval strategies (inline and tool-based)." }, "reranker": { "$ref": "#/components/schemas/RerankerConfiguration", @@ -12179,7 +12150,6 @@ "authorization": { "access_rules": [] }, - "byok_rag": [], "conversation_cache": {}, "database": { "sqlite": { @@ -12210,6 +12180,26 @@ "period": 1 } }, + "rag": { + "byok": { + "max_chunks": 10, + "stores": [] + }, + "okp": { + "max_chunks": 5, + "offline": true + }, + "retrieval": { + "inline": { + "max_chunks": 10, + "sources": [] + }, + "tool": { + "max_chunks": 10, + "sources": [] + } + } + }, "service": { "access_log": true, "auth_enabled": false, @@ -14757,12 +14747,19 @@ ], "title": "OKP chunk filter query", "description": "Additional OKP filter query applied to every OKP search request. Use Solr boolean syntax, e.g. 'product:ansible AND product:*openshift*'." + }, + "max_chunks": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Max OKP chunks", + "description": "Maximum number of chunks fetched from OKP.", + "default": 5 } }, "additionalProperties": false, "type": "object", "title": "OkpConfiguration", - "description": "OKP (Offline Knowledge Portal) provider configuration.\n\nControls provider-specific behaviour for the OKP vector store.\nOnly relevant when ``\"okp\"`` is listed in ``rag.inline`` or ``rag.tool``." + "description": "OKP (Offline Knowledge Portal) provider configuration.\n\nControls provider-specific behaviour for the OKP vector store.\nOnly relevant when ``\"okp\"`` is listed in ``rag.retrieval.inline.sources``\nor ``rag.retrieval.tool.sources``." }, "OpenAIResponseAnnotationCitation": { "properties": { @@ -17761,27 +17758,84 @@ }, "RagConfiguration": { "properties": { - "inline": { - "items": { - "type": "string" - }, - "type": "array", - "title": "Inline RAG IDs", - "description": "RAG IDs whose sources are injected as context before the LLM call. Use 'okp' to enable OKP inline RAG. Empty by default (no inline RAG)." + "byok": { + "$ref": "#/components/schemas/ByokConfiguration", + "title": "BYOK configuration", + "description": "Bring Your Own Knowledge store configurations and settings." }, - "tool": { - "items": { - "type": "string" - }, - "type": "array", - "title": "Tool RAG IDs", - "description": "RAG IDs made available to the LLM as a file_search tool. Use 'okp' to include the OKP vector store. When omitted, all registered BYOK vector stores are used (backward compatibility)." + "okp": { + "$ref": "#/components/schemas/OkpConfiguration", + "title": "OKP configuration", + "description": "OKP provider settings. Only used when 'okp' is listed in retrieval.inline.sources or retrieval.tool.sources." + }, + "retrieval": { + "$ref": "#/components/schemas/RetrievalConfiguration", + "title": "Retrieval configuration", + "description": "Inline and tool retrieval strategy settings." } }, "additionalProperties": false, "type": "object", "title": "RagConfiguration", - "description": "RAG strategy configuration.\n\nControls which RAG sources are used for inline and tool-based retrieval.\n\nEach strategy lists RAG IDs to include. The special ID ``\"okp\"`` defined in constants,\nactivates the OKP provider; all other IDs refer to entries in ``byok_rag``.\n\nBackward compatibility:\n - ``inline`` defaults to ``[]`` (no inline RAG).\n - ``tool`` defaults to ``[]`` (no tool RAG).\n\nIf no RAG strategy is defined (inline and tool are empty),\nthe RAG tool will register all stores available to llama-stack." + "description": "Unified RAG configuration.\n\nGroups all RAG-related settings: BYOK stores, OKP provider, and\nretrieval strategies (inline and tool)." + }, + "RagStore": { + "properties": { + "rag_id": { + "type": "string", + "minLength": 1, + "title": "RAG ID", + "description": "Unique RAG ID" + }, + "backend": { + "type": "string", + "minLength": 1, + "title": "RAG backend", + "description": "Storage backend type (e.g. 'faiss').", + "default": "faiss" + }, + "embedding_model": { + "type": "string", + "minLength": 1, + "title": "Embedding model", + "description": "Embedding model identification", + "default": "sentence-transformers/all-mpnet-base-v2" + }, + "embedding_dimension": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Embedding dimension", + "description": "Dimensionality of embedding vectors.", + "default": 768 + }, + "vector_db_id": { + "type": "string", + "minLength": 1, + "title": "Vector DB ID", + "description": "Vector database identification." + }, + "db_path": { + "type": "string", + "title": "DB path", + "description": "Path to RAG database." + }, + "score_multiplier": { + "type": "number", + "exclusiveMinimum": 0.0, + "title": "Score multiplier", + "description": "Multiplier applied to relevance scores from this vector store. Used to weight results when querying multiple knowledge sources. Values > 1 boost this store's results; values < 1 reduce them.", + "default": 1.0 + } + }, + "additionalProperties": false, + "type": "object", + "required": [ + "rag_id", + "vector_db_id", + "db_path" + ], + "title": "RagStore", + "description": "BYOK (Bring Your Own Knowledge) RAG store configuration." }, "ReadinessResponse": { "properties": { @@ -18688,6 +18742,47 @@ ], "sse_example": "event: response.created\ndata: {\"type\":\"response.created\",\"sequence_number\":0,\"response\":{\"id\":\"resp_abc\",\"object\":\"response\",\"created_at\":1704067200,\"status\":\"in_progress\",\"model\":\"openai/gpt-4o-mini\",\"output\":[],\"store\":true,\"text\":{\"format\":{\"type\":\"text\"}},\"conversation\":\"0d21ba731f21f798dc9680125d5d6f49\",\"available_quotas\":{},\"output_text\":\"\"}}\n\nevent: response.output_item.added\ndata: {\"type\":\"response.output_item.added\",\"sequence_number\":1,\"response_id\":\"resp_abc\",\"output_index\":0,\"item\":{\"id\":\"msg_abc\",\"type\":\"message\",\"status\":\"in_progress\",\"role\":\"assistant\",\"content\":[]}}\n\n...\n\nevent: response.completed\ndata: {\"type\":\"response.completed\",\"sequence_number\":30,\"response\":{\"id\":\"resp_abc\",\"object\":\"response\",\"created_at\":1704067200,\"status\":\"completed\",\"model\":\"openai/gpt-4o-mini\",\"output\":[{\"id\":\"msg_abc\",\"type\":\"message\",\"status\":\"completed\",\"role\":\"assistant\",\"content\":[{\"type\":\"output_text\",\"text\":\"Hello! How can I help?\",\"annotations\":[]}]}],\"store\":true,\"text\":{\"format\":{\"type\":\"text\"}},\"usage\":{\"input_tokens\":10,\"output_tokens\":6,\"total_tokens\":16,\"input_tokens_details\":{\"cached_tokens\":0},\"output_tokens_details\":{\"reasoning_tokens\":0}},\"conversation\":\"0d21ba731f21f798dc9680125d5d6f49\",\"available_quotas\":{\"daily\":1000,\"monthly\":50000},\"output_text\":\"Hello! How can I help?\"}}\n\ndata: [DONE]\n\n" }, + "RetrievalConfiguration": { + "properties": { + "inline": { + "$ref": "#/components/schemas/RetrievalStrategyConfiguration", + "title": "Inline retrieval", + "description": "Inline RAG: context injected before the LLM request." + }, + "tool": { + "$ref": "#/components/schemas/RetrievalStrategyConfiguration", + "title": "Tool retrieval", + "description": "Tool RAG: LLM can call file_search on demand." + } + }, + "additionalProperties": false, + "type": "object", + "title": "RetrievalConfiguration", + "description": "Configuration for inline and tool retrieval strategies." + }, + "RetrievalStrategyConfiguration": { + "properties": { + "sources": { + "items": { + "type": "string" + }, + "type": "array", + "title": "RAG source IDs", + "description": "RAG IDs to use for this retrieval strategy. Use 'okp' to include the OKP vector store." + }, + "max_chunks": { + "type": "integer", + "exclusiveMinimum": 0.0, + "title": "Max chunks", + "description": "Maximum number of chunks returned by this retrieval strategy.", + "default": 10 + } + }, + "additionalProperties": false, + "type": "object", + "title": "RetrievalStrategyConfiguration", + "description": "Configuration for a single retrieval strategy (inline or tool)." + }, "RlsapiV1Attachment": { "properties": { "contents": { diff --git a/docs/rag_guide.md b/docs/rag_guide.md index 490e413ef..f22e46a22 100644 --- a/docs/rag_guide.md +++ b/docs/rag_guide.md @@ -34,6 +34,38 @@ Lightspeed Core Stack (LCS) supports two complementary RAG strategies: Both strategies can be enabled independently via the `rag` section of `lightspeed-stack.yaml`. See [BYOK Feature Documentation](byok_guide.md) for configuration details. +> [!NOTE] +> **Backward compatibility:** if neither `retrieval.inline.sources` nor `retrieval.tool.sources` is +> configured, all registered vector stores (BYOK and OKP) are automatically exposed as +> Tool RAG (`file_search`). Inline RAG is **not** enabled in this fallback — only Tool RAG. + +### Inline RAG chunk flow + +```mermaid +flowchart TD + subgraph Sources["Source Fetching"] + B1["BYOK Store 1"] --> BPool + B2["BYOK Store 2"] --> BPool + BN["BYOK Store N"] --> BPool + BPool["BYOK Pool\ncapped at byok.max_chunks"] + OKP["OKP (Solr)\ncapped at okp.max_chunks"] + end + + BPool --> Pool["Merged Pool\n(all chunks, sorted by score)"] + OKP --> Pool + + Pool --> Decision{Reranker\nenabled?} + + Decision -->|Yes| Rerank["Cross-Encoder Rerank\n+ BYOK score boost"] + Decision -->|No| Cut + + Rerank --> Cut["Top K cut\nretrieval.inline.max_chunks"] + + Cut --> Context["Final Inline RAG Context"] +``` + +Each BYOK store is queried in parallel, and the merged BYOK results are capped at `byok.max_chunks` total. OKP fetches up to `okp.max_chunks`. Together these form the reranking pool. If the reranker is enabled, the full pool is reranked with a cross-encoder and BYOK score boosts are applied. The result is capped at `retrieval.inline.max_chunks`. + The **Embedding Model** is used to convert queries and documents into vector representations for similarity matching. > [!NOTE] @@ -90,7 +122,7 @@ This example shows how to configure a remote PostgreSQL database with the [pgvec > [!NOTE] > pgvector is not yet supported via `byok_rag` in `lightspeed-stack.yaml` (see [LCORE-2437](https://redhat.atlassian.net/browse/LCORE-2437)). -> It must be configured directly in the Llama Stack configuration file. +> It must be configured directly in the `run.yaml` configuration file. > You will need to install PostgreSQL with a matching version to pgvector, then log in with `psql` and enable the extension with: > ```sql @@ -313,15 +345,16 @@ Example: **Chunk volume:** OKP and BYOK scores are not directly comparable (different scoring systems), so -`score_multiplier` (a BYOK-only concept) does not apply to OKP results. To control -the number of retrieved chunks, set the constants in `src/constants.py`: - -| Constant | Value | Description | -|----------|-------|-------------| -| `INLINE_RAG_MAX_CHUNKS` | 10 | Hard upper bound on the final merged inline RAG chunks (BYOK + OKP) delivered to the LLM | -| `OKP_RAG_MAX_CHUNKS` | 5 | Fetch hint for OKP (Inline RAG); controls how many chunks enter the reranking pool | -| `BYOK_RAG_MAX_CHUNKS` | 10 | Fetch hint for BYOK stores (Inline RAG); controls how many chunks enter the reranking pool | -| `TOOL_RAG_MAX_CHUNKS` | 10 | Max chunks retrieved via Tool RAG (`file_search`); independent from `INLINE_RAG_MAX_CHUNKS` | +`score_multiplier` (a BYOK-only concept) does not apply to OKP results. However, when +the reranker is enabled, it normalizes scores across sources using a cross-encoder model. +To control the number of retrieved chunks, configure `max_chunks` in `lightspeed-stack.yaml`: + +| Config path | Default | Description | +|-------------|---------|-------------| +| `rag.retrieval.inline.max_chunks` | 10 | Hard upper bound on the final merged inline RAG chunks (BYOK + OKP) delivered to the LLM | +| `rag.okp.max_chunks` | 5 | Fetch limit for OKP (Inline RAG); controls how many chunks enter the reranking pool | +| `rag.byok.max_chunks` | 10 | Fetch limit for BYOK stores (Inline RAG); controls how many chunks enter the reranking pool | +| `rag.retrieval.tool.max_chunks` | 10 | Max chunks retrieved via Tool RAG (`file_search`); independent from inline max_chunks | **Limitations:** diff --git a/examples/lightspeed-stack-byok-okp-rag.yaml b/examples/lightspeed-stack-byok-okp-rag.yaml index 7cbb36fc8..08760dfa0 100644 --- a/examples/lightspeed-stack-byok-okp-rag.yaml +++ b/examples/lightspeed-stack-byok-okp-rag.yaml @@ -34,40 +34,45 @@ quota_handlers: scheduler: # scheduler ticks in seconds period: 10 -byok_rag: - - rag_id: ocp-docs # referenced in rag.inline / rag.tool - rag_type: inline::faiss - embedding_model: sentence-transformers/all-mpnet-base-v2 - embedding_dimension: 768 - vector_db_id: vs_123 # Vector store ID (from index generation) - db_path: /tmp/ocp.faiss - score_multiplier: 1.0 # Weight for this vector store's results (Inline RAG only) - - rag_id: knowledge-base # referenced in rag.inline / rag.tool - rag_type: inline::faiss - embedding_model: sentence-transformers/all-mpnet-base-v2 - embedding_dimension: 768 - vector_db_id: vs_456 # Vector store ID (from index generation) - db_path: /tmp/kb.faiss - score_multiplier: 1.2 # Weight for this vector store's results (Inline RAG only) - # RAG configuration rag: - # Inline RAG: context injected before the LLM request from the listed sources - # List rag_ids from byok_rag, or 'okp' to include OKP - inline: - - ocp-docs - - knowledge-base - - okp - # Tool RAG: LLM can call file_search on demand to retrieve context - # List rag_ids from byok_rag, or 'okp' to include OKP - # Omit to use all registered BYOK stores (backward compatibility) - tool: - - ocp-docs - - knowledge-base + byok: + max_chunks: 10 # Max total chunks across all BYOK stores + stores: + - rag_id: ocp-docs # Referenced in retrieval.inline / retrieval.tool + backend: faiss + embedding_dimension: 1024 + vector_db_id: vs_123 # Llama-stack vector_store_id + db_path: /tmp/ocp.faiss + score_multiplier: 1.0 # Weight for this vector store's results (Inline RAG only) + - rag_id: knowledge-base # Referenced in retrieval.inline / retrieval.tool + backend: faiss + embedding_dimension: 384 + vector_db_id: vs_456 # Llama-stack vector_store_id + db_path: /tmp/kb.faiss + score_multiplier: 1.2 # Weight for this vector store's results (Inline RAG only) + + # OKP provider settings (only used when 'okp' is listed in retrieval sources) + okp: + offline: true # true = use parent_id for source URLs, false = use reference_url + max_chunks: 5 # Max chunks fetched from OKP + # Additional Solr filter query applied to every OKP search request. + # Use Solr boolean syntax + # chunk_filter_query: "product:*ansible* AND product:*openshift*" -# OKP provider settings (only used when 'okp' is listed in rag.inline or rag.tool) -okp: - offline: true # true = use parent_id for source URLs, false = use reference_url - # Additional Solr filter query applied to every OKP search request. - # Use Solr boolean syntax - # chunk_filter_query: "product:*ansible* AND product:*openshift*" + retrieval: + # Inline RAG: context injected before the LLM request from the listed sources + # List rag_ids from byok stores, or 'okp' to include OKP + inline: + sources: + - ocp-docs + - knowledge-base + - okp + max_chunks: 10 # Cap on merged inline result + # Tool RAG: LLM can call file_search on demand to retrieve context + # List rag_ids from byok stores, or 'okp' to include OKP + tool: + sources: + - ocp-docs + - knowledge-base + max_chunks: 10 # Tool RAG limit diff --git a/examples/quota-limiter-configuration-sqlite.yaml b/examples/quota-limiter-configuration-sqlite.yaml index 2bceaafb7..ba48040a7 100644 --- a/examples/quota-limiter-configuration-sqlite.yaml +++ b/examples/quota-limiter-configuration-sqlite.yaml @@ -33,17 +33,19 @@ conversation_cache: ssl_mode: disable gss_encmode: disable -#byok_rag: -# - rag_id: ocp_docs -# rag_type: inline::faiss -# embedding_dimension: 1024 -# vector_db_id: vector_byok_1 -# db_path: /tmp/ocp.faiss -# - rag_id: knowledge_base -# rag_type: inline::faiss -# embedding_dimension: 384 -# vector_db_id: vector_byok_2 -# db_path: /tmp/kb.faiss +#rag: +# byok: +# stores: +# - rag_id: ocp_docs +# backend: faiss +# embedding_dimension: 1024 +# vector_db_id: vector_byok_1 +# db_path: /tmp/ocp.faiss +# - rag_id: knowledge_base +# backend: faiss +# embedding_dimension: 384 +# vector_db_id: vector_byok_2 +# db_path: /tmp/kb.faiss quota_handlers: sqlite: diff --git a/src/app/endpoints/rags.py b/src/app/endpoints/rags.py index c60c6db64..2537ebef9 100644 --- a/src/app/endpoints/rags.py +++ b/src/app/endpoints/rags.py @@ -24,7 +24,7 @@ RAGInfoResponse, RAGListResponse, ) -from models.config import Action, ByokRag +from models.config import Action, RagStore from utils.endpoints import check_configuration_loaded logger = get_logger(__name__) @@ -107,7 +107,7 @@ async def rags_endpoint_handler( raise HTTPException(**response.model_dump()) from e -def _resolve_rag_id_to_vector_db_id(rag_id: str, byok_rags: list[ByokRag]) -> str: +def _resolve_rag_id_to_vector_db_id(rag_id: str, byok_rags: list[RagStore]) -> str: """Resolve a user-facing rag_id to the llama-stack vector_db_id. Checks if the given ID matches a rag_id in the BYOK config and returns @@ -166,7 +166,7 @@ async def get_rag_endpoint_handler( # Resolve user-facing rag_id to llama-stack vector_db_id vector_db_id = _resolve_rag_id_to_vector_db_id( - rag_id, configuration.configuration.byok_rag + rag_id, configuration.configuration.rag.byok.stores ) try: diff --git a/src/client.py b/src/client.py index 8fd1e0370..d4127cf6d 100644 --- a/src/client.py +++ b/src/client.py @@ -91,10 +91,14 @@ def _enrich_library_config(self, input_config_path: str) -> str: config = configuration.configuration # Enrichment: BYOK RAG - enrich_byok_rag(ls_config, [b.model_dump() for b in config.byok_rag]) + enrich_byok_rag(ls_config, [s.model_dump() for s in config.rag.byok.stores]) # Enrichment: Solr - enabled when "okp" appears in either inline or tool list - enrich_solr(ls_config, config.rag.model_dump(), config.okp.model_dump()) + rag_config_for_solr = { + "inline": config.rag.retrieval.inline.sources, + "tool": config.rag.retrieval.tool.sources, + } + enrich_solr(ls_config, rag_config_for_solr, config.rag.okp.model_dump()) # Enrichment: Azure Entra ID deferred auth entra_id_config = ( diff --git a/src/configuration.py b/src/configuration.py index e65c8c230..758ec93ad 100644 --- a/src/configuration.py +++ b/src/configuration.py @@ -479,7 +479,7 @@ def okp(self) -> "OkpConfiguration": """Return OKP configuration.""" if self._configuration is None: raise LogicError("logic error: configuration is not loaded") - return self._configuration.okp + return self._configuration.rag.okp @property def reranker(self) -> "RerankerConfiguration": @@ -502,12 +502,15 @@ def rag_id_mapping(self) -> dict[str, str]: if self._configuration is None: raise LogicError("logic error: configuration is not loaded") byok_mapping = { - brag.vector_db_id: brag.rag_id for brag in self._configuration.byok_rag + store.vector_db_id: store.rag_id + for store in self._configuration.rag.byok.stores } - rag = self._configuration.rag + retrieval = self._configuration.rag.retrieval okp_id = constants.OKP_RAG_ID - okp_enabled = okp_id in (rag.inline or []) or okp_id in (rag.tool or []) + okp_enabled = okp_id in (retrieval.inline.sources or []) or okp_id in ( + retrieval.tool.sources or [] + ) okp_mapping = ( {constants.SOLR_DEFAULT_VECTOR_STORE_ID: okp_id} if okp_enabled else {} ) @@ -527,8 +530,8 @@ def score_multiplier_mapping(self) -> dict[str, float]: if self._configuration is None: raise LogicError("logic error: configuration is not loaded") return { - brag.vector_db_id: brag.score_multiplier - for brag in self._configuration.byok_rag + store.vector_db_id: store.score_multiplier + for store in self._configuration.rag.byok.stores } @property @@ -543,7 +546,7 @@ def inline_solr_enabled(self) -> bool: """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") - return constants.OKP_RAG_ID in self._configuration.rag.inline + return constants.OKP_RAG_ID in self._configuration.rag.retrieval.inline.sources def resolve_index_name( self, vector_store_id: str, rag_id_mapping: Optional[dict[str, str]] = None diff --git a/src/constants.py b/src/constants.py index cae458f51..745933435 100644 --- a/src/constants.py +++ b/src/constants.py @@ -169,9 +169,11 @@ CACHE_TYPE_NOOP: Final[str] = "noop" # BYOK RAG -# Default RAG type for bring-your-own-knowledge RAG configurations, that type -# needs to be supported by Llama Stack -DEFAULT_RAG_TYPE: Final[str] = "inline::faiss" +# Backends that have enrichment support in llama_stack_configuration.py +SUPPORTED_RAG_BACKENDS: Final[frozenset[str]] = frozenset({"faiss"}) + +# Default RAG backend for bring-your-own-knowledge RAG configurations +DEFAULT_RAG_BACKEND: Final[str] = "faiss" # Default sentence transformer model for embedding generation, that type needs # to be supported by Llama Stack and configured properly in providers and @@ -188,16 +190,14 @@ USER_QUOTA_LIMITER: Final[str] = "user_limiter" CLUSTER_QUOTA_LIMITER: Final[str] = "cluster_limiter" -# Hard cap on total RAG chunks delivered to the LLM across all sources -INLINE_RAG_MAX_CHUNKS: Final[int] = 10 +# Default chunk limits (used as Pydantic field defaults in RagConfiguration) +DEFAULT_INLINE_RAG_MAX_CHUNKS: Final[int] = 10 +DEFAULT_TOOL_RAG_MAX_CHUNKS: Final[int] = 10 +DEFAULT_BYOK_RAG_MAX_CHUNKS: Final[int] = 10 +DEFAULT_OKP_RAG_MAX_CHUNKS: Final[int] = 5 # RAG as a tool constants DEFAULT_RAG_TOOL: Final[str] = "file_search" -TOOL_RAG_MAX_CHUNKS: Final[int] = 10 # retrieved from RAG as a tool - -# Inline RAG constants -BYOK_RAG_MAX_CHUNKS: Final[int] = 10 # retrieved from BYOK RAG -OKP_RAG_MAX_CHUNKS: Final[int] = 5 # retrieved from OKP RAG # Score multiplier applied to BYOK chunks after cross-encoder reranking (Solr chunks unchanged) BYOK_RAG_RERANK_BOOST: Final[float] = 1.2 diff --git a/src/llama_stack_configuration.py b/src/llama_stack_configuration.py index ca0775bcf..845b35be4 100644 --- a/src/llama_stack_configuration.py +++ b/src/llama_stack_configuration.py @@ -17,6 +17,17 @@ logger = get_logger(__name__) +BACKEND_TO_LLAMA_STACK_PROVIDER: dict[str, str] = { + "faiss": "inline::faiss", + # "pgvector": "remote::pgvector", # TODO(are-ces): add enrichment support +} + +if constants.DEFAULT_RAG_BACKEND not in BACKEND_TO_LLAMA_STACK_PROVIDER: + raise ValueError( + f"DEFAULT_RAG_BACKEND '{constants.DEFAULT_RAG_BACKEND}' has no entry in " + f"BACKEND_TO_LLAMA_STACK_PROVIDER — add a mapping before changing the default." + ) + class YamlDumper(yaml.Dumper): # pylint: disable=too-many-ancestors """Custom YAML dumper with proper indentation levels.""" @@ -335,13 +346,22 @@ def construct_vector_io_providers_section( continue existing_ids.add(provider_id) added += 1 + + backend = brag.get("backend", constants.DEFAULT_RAG_BACKEND) + provider_type = BACKEND_TO_LLAMA_STACK_PROVIDER.get(backend) + if provider_type is None: + raise ValueError( + f"Unsupported backend '{backend}' for BYOK RAG '{rag_id}'. " + f"Supported backends: {list(BACKEND_TO_LLAMA_STACK_PROVIDER.keys())}" + ) + output.append( { "provider_id": provider_id, - "provider_type": brag.get("rag_type", "inline::faiss"), + "provider_type": provider_type, "config": { "persistence": { - "namespace": "vector_io::faiss", + "namespace": f"vector_io::{backend}", "backend": backend_name, } }, @@ -585,10 +605,18 @@ def generate_configuration( enrich_azure_entra_id_inference(ls_config, config.get("azure_entra_id")) # Enrichment: BYOK RAG - enrich_byok_rag(ls_config, config.get("byok_rag", [])) + rag_section = config.get("rag", {}) + byok_stores = rag_section.get("byok", {}).get("stores", []) + enrich_byok_rag(ls_config, byok_stores) # Enrichment: Solr - enabled when "okp" appears in either inline or tool list - enrich_solr(ls_config, config.get("rag", {}), config.get("okp", {})) + retrieval = rag_section.get("retrieval", {}) + rag_config_for_solr = { + "inline": retrieval.get("inline", {}).get("sources", []), + "tool": retrieval.get("tool", {}).get("sources", []), + } + okp_config = rag_section.get("okp", {}) + enrich_solr(ls_config, rag_config_for_solr, okp_config) dedupe_providers_vector_io(ls_config) diff --git a/src/models/api/responses/successful/configuration.py b/src/models/api/responses/successful/configuration.py index d41e8ff20..f62795bce 100644 --- a/src/models/api/responses/successful/configuration.py +++ b/src/models/api/responses/successful/configuration.py @@ -79,7 +79,19 @@ class ConfigurationResponse(AbstractSuccessfulResponse): "sqlite": None, "postgres": None, }, - "byok_rag": [], + "rag": { + "byok": {"max_chunks": 10, "stores": []}, + "okp": { + "rhokp_url": None, + "offline": True, + "chunk_filter_query": None, + "max_chunks": 5, + }, + "retrieval": { + "inline": {"sources": [], "max_chunks": 10}, + "tool": {"sources": [], "max_chunks": 10}, + }, + }, "quota_handlers": { "sqlite": None, "postgres": None, diff --git a/src/models/config.py b/src/models/config.py index 923d720f0..9d92d9c5e 100644 --- a/src/models/config.py +++ b/src/models/config.py @@ -1762,8 +1762,8 @@ def config( return None -class ByokRag(ConfigurationBase): - """BYOK (Bring Your Own Knowledge) RAG configuration.""" +class RagStore(ConfigurationBase): + """BYOK (Bring Your Own Knowledge) RAG store configuration.""" rag_id: str = Field( ..., @@ -1772,13 +1772,24 @@ class ByokRag(ConfigurationBase): description="Unique RAG ID", ) - rag_type: str = Field( - constants.DEFAULT_RAG_TYPE, + backend: str = Field( + constants.DEFAULT_RAG_BACKEND, min_length=1, - title="RAG type", - description="Type of RAG database.", + title="RAG backend", + description="Storage backend type (e.g. 'faiss').", ) + @field_validator("backend") + @classmethod + def validate_backend(cls, value: str) -> str: + """Reject unsupported backend values at config load time.""" + if value not in constants.SUPPORTED_RAG_BACKENDS: + raise ValueError( + f"Unsupported RAG backend '{value}'. " + f"Supported backends: {sorted(constants.SUPPORTED_RAG_BACKENDS)}" + ) + return value + embedding_model: str = Field( constants.DEFAULT_EMBEDDING_MODEL, min_length=1, @@ -1936,35 +1947,56 @@ class QuotaHandlersConfiguration(ConfigurationBase): ) -class RagConfiguration(ConfigurationBase): - """RAG strategy configuration. +class RetrievalStrategyConfiguration(ConfigurationBase): + """Configuration for a single retrieval strategy (inline or tool).""" - Controls which RAG sources are used for inline and tool-based retrieval. + sources: list[str] = Field( + default_factory=list, + title="RAG source IDs", + description="RAG IDs to use for this retrieval strategy. " + f"Use '{constants.OKP_RAG_ID}' to include the OKP vector store.", + ) - Each strategy lists RAG IDs to include. The special ID ``"okp"`` defined in constants, - activates the OKP provider; all other IDs refer to entries in ``byok_rag``. + max_chunks: PositiveInt = Field( + default=constants.DEFAULT_INLINE_RAG_MAX_CHUNKS, + title="Max chunks", + description="Maximum number of chunks returned by this retrieval strategy.", + ) - Backward compatibility: - - ``inline`` defaults to ``[]`` (no inline RAG). - - ``tool`` defaults to ``[]`` (no tool RAG). - If no RAG strategy is defined (inline and tool are empty), - the RAG tool will register all stores available to llama-stack. - """ +class RetrievalConfiguration(ConfigurationBase): + """Configuration for inline and tool retrieval strategies.""" - inline: list[str] = Field( - default_factory=list, - title="Inline RAG IDs", - description="RAG IDs whose sources are injected as context before the LLM call. " - f"Use '{constants.OKP_RAG_ID}' to enable OKP inline RAG. Empty by default (no inline RAG).", + inline: RetrievalStrategyConfiguration = Field( + default_factory=lambda: RetrievalStrategyConfiguration( + max_chunks=constants.DEFAULT_INLINE_RAG_MAX_CHUNKS, + ), + title="Inline retrieval", + description="Inline RAG: context injected before the LLM request.", ) - tool: list[str] = Field( + tool: RetrievalStrategyConfiguration = Field( + default_factory=lambda: RetrievalStrategyConfiguration( + max_chunks=constants.DEFAULT_TOOL_RAG_MAX_CHUNKS, + ), + title="Tool retrieval", + description="Tool RAG: LLM can call file_search on demand.", + ) + + +class ByokConfiguration(ConfigurationBase): + """BYOK (Bring Your Own Knowledge) configuration.""" + + max_chunks: PositiveInt = Field( + default=constants.DEFAULT_BYOK_RAG_MAX_CHUNKS, + title="Max BYOK chunks", + description="Maximum total number of chunks returned across all BYOK stores.", + ) + + stores: list[RagStore] = Field( default_factory=list, - title="Tool RAG IDs", - description="RAG IDs made available to the LLM as a file_search tool. " - f"Use '{constants.OKP_RAG_ID}' to include the OKP vector store. " - "When omitted, all registered BYOK vector stores are used (backward compatibility).", + title="BYOK RAG stores", + description="List of BYOK RAG store configurations.", ) @@ -1972,7 +2004,8 @@ class OkpConfiguration(ConfigurationBase): """OKP (Offline Knowledge Portal) provider configuration. Controls provider-specific behaviour for the OKP vector store. - Only relevant when ``"okp"`` is listed in ``rag.inline`` or ``rag.tool``. + Only relevant when ``"okp"`` is listed in ``rag.retrieval.inline.sources`` + or ``rag.retrieval.tool.sources``. """ rhokp_url: Optional[AnyHttpUrl] = Field( @@ -1997,6 +2030,58 @@ class OkpConfiguration(ConfigurationBase): "Use Solr boolean syntax, e.g. 'product:ansible AND product:*openshift*'.", ) + max_chunks: PositiveInt = Field( + default=constants.DEFAULT_OKP_RAG_MAX_CHUNKS, + title="Max OKP chunks", + description="Maximum number of chunks fetched from OKP.", + ) + + +class RagConfiguration(ConfigurationBase): + """Unified RAG configuration. + + Groups all RAG-related settings: BYOK stores, OKP provider, and + retrieval strategies (inline and tool). + """ + + byok: ByokConfiguration = Field( + default_factory=ByokConfiguration, + title="BYOK configuration", + description="Bring Your Own Knowledge store configurations and settings.", + ) + + okp: OkpConfiguration = Field( + default_factory=OkpConfiguration, + title="OKP configuration", + description=f"OKP provider settings. Only used when '{constants.OKP_RAG_ID}' " + "is listed in retrieval.inline.sources or retrieval.tool.sources.", + ) + + retrieval: RetrievalConfiguration = Field( + default_factory=RetrievalConfiguration, + title="Retrieval configuration", + description="Inline and tool retrieval strategy settings.", + ) + + @model_validator(mode="after") + def validate_retrieval_sources(self) -> Self: + """Reject retrieval source IDs not declared in byok.stores or OKP.""" + # pylint: disable=no-member + known_ids = {store.rag_id for store in self.byok.stores} + known_ids.add(constants.OKP_RAG_ID) + + for strategy_name in ("inline", "tool"): + strategy = getattr(self.retrieval, strategy_name) + unknown = set(strategy.sources) - known_ids + if unknown: + raise ValueError( + f"retrieval.{strategy_name}.sources contains unknown RAG IDs: " + f"{sorted(unknown)}. " + f"Declared IDs: {sorted(known_ids)}" + ) + + return self + class RerankerConfiguration(ConfigurationBase): """Reranker configuration for RAG chunk reranking.""" @@ -2171,13 +2256,6 @@ class Configuration(ConfigurationBase): description="Settings for human-in-the-loop approval of MCP tool invocations", ) - byok_rag: list[ByokRag] = Field( - default_factory=list, - title="BYOK RAG configuration", - description="BYOK RAG configuration. This configuration can be used to " - "reconfigure Llama Stack through its run.yaml configuration file", - ) - a2a_state: A2AStateConfiguration = Field( default_factory=A2AStateConfiguration, title="A2A state configuration", @@ -2216,14 +2294,8 @@ class Configuration(ConfigurationBase): rag: RagConfiguration = Field( default_factory=RagConfiguration, title="RAG configuration", - description="Configuration for all RAG strategies (inline and tool-based).", - ) - - okp: OkpConfiguration = Field( - default_factory=OkpConfiguration, - title="OKP configuration", - description=f"OKP provider settings. Only used when '{constants.OKP_RAG_ID}' is listed " - "in rag.inline or rag.tool.", + description="Unified RAG configuration: BYOK stores, OKP provider, " + "and retrieval strategies (inline and tool-based).", ) reranker: RerankerConfiguration = Field( @@ -2344,20 +2416,20 @@ def validate_rlsapi_v1_quota_configuration(self) -> Self: def validate_reranker_auto_enable(self) -> Self: """Automatically enable reranker when both BYOK and OKP RAG are configured. - When users have both BYOK entries in byok_rag and OKP - configured in the RAG strategies, automatically - enable the reranker if it's not explicitly disabled. This improves result - quality when multiple knowledge sources are available. + When users have both BYOK stores and OKP configured in the RAG + retrieval strategies, automatically enable the reranker if it's not + explicitly disabled. This improves result quality when multiple + knowledge sources are available. Returns: Self: The validated configuration instance with reranker potentially enabled. """ # Check if BYOK RAG entries are configured - has_byok = len(self.byok_rag) > 0 - - # Check if OKP is configured in either inline or tool RAG strategies # pylint: disable=no-member - has_okp = constants.OKP_RAG_ID in self.rag.inline + has_byok = len(self.rag.byok.stores) > 0 + + # Check if OKP is configured in inline RAG strategy + has_okp = constants.OKP_RAG_ID in self.rag.retrieval.inline.sources # If both BYOK and OKP are present and reranker is using default settings, # ensure it's enabled for optimal results @@ -2368,10 +2440,10 @@ def validate_reranker_auto_enable(self) -> Self: and not self.reranker.enabled ): logger.info( - "Automatically enabling reranker: Both BYOK RAG (%d entries) or " - "other inline RAG and OKP are configured. Reranking improves result " - "quality when multiple knowledge sources are available.", - len(self.byok_rag), + "Automatically enabling reranker: Both BYOK RAG (%d stores) and " + "OKP are configured. Reranking improves result quality when " + "multiple knowledge sources are available.", + len(self.rag.byok.stores), ) self.reranker.enabled = True diff --git a/src/utils/responses.py b/src/utils/responses.py index a6d098199..899318065 100644 --- a/src/utils/responses.py +++ b/src/utils/responses.py @@ -107,7 +107,7 @@ ToolResultSummary, TurnSummary, ) -from models.config import ByokRag +from models.config import RagStore from models.database.conversations import UserConversation from utils.mcp_headers import ( McpHeaders, @@ -239,16 +239,20 @@ async def prepare_tools( # pylint: disable=too-many-arguments,too-many-position # If rag.inline is configured, but not rag.tool, tool RAG is disabled. # 3. All registered vector DBs: fallback when neither rag.tool nor rag.inline are configured. # IDs fetched from llama-stack are already internal and need no translation. - byok_rags = configuration.configuration.byok_rag + byok_stores = configuration.configuration.rag.byok.stores - is_tool_rag_enabled = len(configuration.configuration.rag.tool) > 0 - is_inline_rag_enabled = len(configuration.configuration.rag.inline) > 0 + is_tool_rag_enabled = ( + len(configuration.configuration.rag.retrieval.tool.sources) > 0 + ) + is_inline_rag_enabled = ( + len(configuration.configuration.rag.retrieval.inline.sources) > 0 + ) if vector_store_ids is not None: - effective_ids = resolve_vector_store_ids(vector_store_ids, byok_rags) + effective_ids = resolve_vector_store_ids(vector_store_ids, byok_stores) elif is_tool_rag_enabled: effective_ids = resolve_vector_store_ids( - configuration.configuration.rag.tool, byok_rags + configuration.configuration.rag.retrieval.tool.sources, byok_stores ) elif not is_inline_rag_enabled: effective_ids = await get_vector_store_ids(client, None) @@ -620,7 +624,7 @@ def filter_tools_by_allowed_entries( def resolve_vector_store_ids( - vector_store_ids: list[str], byok_rags: list[ByokRag] + vector_store_ids: list[str], byok_rags: list[RagStore] ) -> list[str]: """Translate customer-facing rag_ids to llama-stack vector_db_ids. @@ -648,7 +652,7 @@ def resolve_vector_store_ids( def translate_tools_vector_store_ids( - tools: list[InputTool], byok_rags: list[ByokRag] + tools: list[InputTool], byok_rags: list[RagStore] ) -> list[InputTool]: """Translate user-facing vector_store_ids to llama-stack IDs in each file_search tool. @@ -688,7 +692,7 @@ def get_rag_tools(vector_store_ids: list[str]) -> Optional[list[InputToolFileSea InputToolFileSearch( type="file_search", vector_store_ids=vector_store_ids, - max_num_results=constants.TOOL_RAG_MAX_CHUNKS, + max_num_results=configuration.rag.retrieval.tool.max_chunks, ) ] @@ -1728,8 +1732,8 @@ async def _resolve_client_tools( # Per-request override of vector stores (user-facing rag_ids) vector_store_ids = extract_vector_store_ids_from_tools(tools) or None # Translate user-facing rag_ids to llama-stack vector_store_ids in each file_search tool - byok_rags = configuration.configuration.byok_rag - prepared_tools = translate_tools_vector_store_ids(tools, byok_rags) + byok_stores = configuration.configuration.rag.byok.stores + prepared_tools = translate_tools_vector_store_ids(tools, byok_stores) prepared_tools = apply_mcp_headers_to_explicit_tools( prepared_tools, token, mcp_headers, request_headers ) @@ -1823,7 +1827,7 @@ async def resolve_tool_choice( ) else: # Pass tools explicitly configured for this request - byok_rags = configuration.configuration.byok_rag + byok_rags = configuration.configuration.rag.byok.stores prepared_tools = translate_tools_vector_store_ids(tools, byok_rags) prepared_tools = apply_mcp_headers_to_explicit_tools( prepared_tools, token, mcp_headers, request_headers diff --git a/src/utils/vector_search.py b/src/utils/vector_search.py index 7267ddb92..aa7213ca3 100644 --- a/src/utils/vector_search.py +++ b/src/utils/vector_search.py @@ -250,7 +250,7 @@ async def _query_store_for_byok_rag( vector_store_id: str, query: str, weight: float, - max_chunks: int = constants.BYOK_RAG_MAX_CHUNKS, + max_chunks: int = constants.DEFAULT_BYOK_RAG_MAX_CHUNKS, ) -> list[dict[str, Any]]: """Query a single vector store for BYOK RAG. @@ -443,7 +443,6 @@ async def _fetch_byok_rag( # pylint: disable=too-many-locals client: AsyncLlamaStackClient, query: str, vector_store_ids: Optional[list[str]] = None, - max_chunks: Optional[int] = None, ) -> tuple[list[RAGChunk], list[ReferencedDocument]]: """Fetch chunks and documents from BYOK RAG sources. @@ -453,15 +452,13 @@ async def _fetch_byok_rag( # pylint: disable=too-many-locals vector_store_ids: Optional list of vector store IDs to query. If provided, only these stores will be queried. If None, all stores (excluding Solr) will be queried. - max_chunks: Maximum number of chunks to return. If None, uses - constants.BYOK_RAG_MAX_CHUNKS. Returns: Tuple containing: - rag_chunks: RAG chunks from BYOK RAG - referenced_documents: Documents referenced in BYOK RAG results """ - limit = max_chunks if max_chunks is not None else constants.BYOK_RAG_MAX_CHUNKS + limit = configuration.rag.byok.max_chunks rag_chunks: list[RAGChunk] = [] referenced_documents: list[ReferencedDocument] = [] @@ -470,17 +467,17 @@ async def _fetch_byok_rag( # pylint: disable=too-many-locals # Per-request IDs are intersected with the config to prevent triggering inline RAG # for stores not explicitly configured for inline use. if vector_store_ids is None: - rag_ids_to_query = configuration.configuration.rag.inline + rag_ids_to_query = configuration.configuration.rag.retrieval.inline.sources else: rag_ids_to_query = [ v for v in vector_store_ids - if v in set(configuration.configuration.rag.inline) + if v in set(configuration.configuration.rag.retrieval.inline.sources) ] # Translate user-facing rag_ids to llama-stack ids vector_store_ids_to_query: list[str] = resolve_vector_store_ids( - rag_ids_to_query, configuration.configuration.byok_rag + rag_ids_to_query, configuration.configuration.rag.byok.stores ) # Request-level override: filter out Solr store, use the rest @@ -550,7 +547,7 @@ async def _fetch_byok_rag( # pylint: disable=too-many-locals return rag_chunks, referenced_documents -async def _fetch_solr_rag( # pylint: disable=too-many-locals +async def _fetch_okp_rag( # pylint: disable=too-many-locals client: AsyncLlamaStackClient, query: str, solr: Optional[SolrVectorSearchRequest] = None, @@ -561,8 +558,6 @@ async def _fetch_solr_rag( # pylint: disable=too-many-locals client: The AsyncLlamaStackClient to use for the request query: The user's query solr: Structured Solr inline RAG request from the API (optional). - max_chunks: Maximum number of chunks to return. If None, uses - constants.OKP_RAG_MAX_CHUNKS. Returns: Tuple containing: @@ -571,7 +566,7 @@ async def _fetch_solr_rag( # pylint: disable=too-many-locals """ rag_chunks: list[RAGChunk] = [] referenced_documents: list[ReferencedDocument] = [] - limit = constants.OKP_RAG_MAX_CHUNKS + limit = configuration.rag.okp.max_chunks if not _is_solr_enabled(): logger.info("OKP vector IO is disabled, skipping OKP search") @@ -655,13 +650,11 @@ async def build_rag_context( # pylint: disable=too-many-locals,too-many-branche if moderation_decision == "blocked": return RAGContext() - top_k = constants.INLINE_RAG_MAX_CHUNKS + top_k = configuration.rag.retrieval.inline.max_chunks # Fetch from each source using per-source limits for the reranking pool - byok_chunks_task = _fetch_byok_rag( - client, query, vector_store_ids, max_chunks=constants.BYOK_RAG_MAX_CHUNKS - ) - solr_chunks_task = _fetch_solr_rag(client, query, solr) + byok_chunks_task = _fetch_byok_rag(client, query, vector_store_ids) + solr_chunks_task = _fetch_okp_rag(client, query, solr) (byok_chunks, byok_documents), (solr_chunks, solr_documents) = await asyncio.gather( byok_chunks_task, solr_chunks_task diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-auth-noop-token.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-auth-noop-token.yaml index 2c55ae440..66b3bd7f4 100644 --- a/tests/e2e/configuration/library-mode/lightspeed-stack-auth-noop-token.yaml +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-auth-noop-token.yaml @@ -27,15 +27,17 @@ inference: default_provider: openai default_model: gpt-4o-mini -byok_rag: - - rag_id: e2e-test-docs - rag_type: inline::faiss - embedding_model: sentence-transformers/all-mpnet-base-v2 - embedding_dimension: 768 - vector_db_id: ${env.FAISS_VECTOR_STORE_ID} - db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} - score_multiplier: 1.0 - rag: - tool: - - e2e-test-docs + byok: + stores: + - rag_id: e2e-test-docs + backend: faiss + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + vector_db_id: ${env.FAISS_VECTOR_STORE_ID} + db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} + score_multiplier: 1.0 + retrieval: + tool: + sources: + - e2e-test-docs diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-inline-rag.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-inline-rag.yaml index ffd744da6..ace9105f4 100644 --- a/tests/e2e/configuration/library-mode/lightspeed-stack-inline-rag.yaml +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-inline-rag.yaml @@ -26,15 +26,17 @@ inference: default_provider: openai default_model: gpt-4o-mini -byok_rag: - - rag_id: e2e-test-docs - rag_type: inline::faiss - embedding_model: sentence-transformers/all-mpnet-base-v2 - embedding_dimension: 768 - vector_db_id: ${env.FAISS_VECTOR_STORE_ID} - db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} - score_multiplier: 1.0 - rag: - inline: - - e2e-test-docs + byok: + stores: + - rag_id: e2e-test-docs + backend: faiss + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + vector_db_id: ${env.FAISS_VECTOR_STORE_ID} + db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} + score_multiplier: 1.0 + retrieval: + inline: + sources: + - e2e-test-docs diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml index 6c032de89..00a76ded0 100644 --- a/tests/e2e/configuration/library-mode/lightspeed-stack.yaml +++ b/tests/e2e/configuration/library-mode/lightspeed-stack.yaml @@ -20,15 +20,17 @@ authentication: inference: default_provider: openai default_model: gpt-4o-mini -byok_rag: - - rag_id: e2e-test-docs - rag_type: inline::faiss - embedding_model: sentence-transformers/all-mpnet-base-v2 - embedding_dimension: 768 - vector_db_id: ${env.FAISS_VECTOR_STORE_ID} - db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} - score_multiplier: 1.0 - rag: - tool: - - e2e-test-docs + byok: + stores: + - rag_id: e2e-test-docs + backend: faiss + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + vector_db_id: ${env.FAISS_VECTOR_STORE_ID} + db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} + score_multiplier: 1.0 + retrieval: + tool: + sources: + - e2e-test-docs diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-auth-noop-token.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-auth-noop-token.yaml index 49ee71d59..42dfc8dc2 100644 --- a/tests/e2e/configuration/server-mode/lightspeed-stack-auth-noop-token.yaml +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-auth-noop-token.yaml @@ -33,15 +33,17 @@ inference: default_provider: openai default_model: gpt-4o-mini -byok_rag: - - rag_id: e2e-test-docs - rag_type: inline::faiss - embedding_model: sentence-transformers/all-mpnet-base-v2 - embedding_dimension: 768 - vector_db_id: ${env.FAISS_VECTOR_STORE_ID} - db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} - score_multiplier: 1.0 - rag: - tool: - - e2e-test-docs + byok: + stores: + - rag_id: e2e-test-docs + backend: faiss + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + vector_db_id: ${env.FAISS_VECTOR_STORE_ID} + db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} + score_multiplier: 1.0 + retrieval: + tool: + sources: + - e2e-test-docs diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-inline-rag.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-inline-rag.yaml index 1a2850162..fefea3b6e 100644 --- a/tests/e2e/configuration/server-mode/lightspeed-stack-inline-rag.yaml +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-inline-rag.yaml @@ -27,15 +27,17 @@ inference: default_provider: openai default_model: gpt-4o-mini -byok_rag: - - rag_id: e2e-test-docs - rag_type: inline::faiss - embedding_model: sentence-transformers/all-mpnet-base-v2 - embedding_dimension: 768 - vector_db_id: ${env.FAISS_VECTOR_STORE_ID} - db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} - score_multiplier: 1.0 - rag: - inline: - - e2e-test-docs + byok: + stores: + - rag_id: e2e-test-docs + backend: faiss + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + vector_db_id: ${env.FAISS_VECTOR_STORE_ID} + db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} + score_multiplier: 1.0 + retrieval: + inline: + sources: + - e2e-test-docs diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml index de945bb2e..d355bdf0a 100644 --- a/tests/e2e/configuration/server-mode/lightspeed-stack.yaml +++ b/tests/e2e/configuration/server-mode/lightspeed-stack.yaml @@ -21,15 +21,17 @@ authentication: inference: default_provider: openai default_model: gpt-4o-mini -byok_rag: - - rag_id: e2e-test-docs - rag_type: inline::faiss - embedding_model: sentence-transformers/all-mpnet-base-v2 - embedding_dimension: 768 - vector_db_id: ${env.FAISS_VECTOR_STORE_ID} - db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} - score_multiplier: 1.0 - rag: - tool: - - e2e-test-docs \ No newline at end of file + byok: + stores: + - rag_id: e2e-test-docs + backend: faiss + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + vector_db_id: ${env.FAISS_VECTOR_STORE_ID} + db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db} + score_multiplier: 1.0 + retrieval: + tool: + sources: + - e2e-test-docs \ No newline at end of file diff --git a/tests/e2e/features/inline_rag.feature b/tests/e2e/features/inline_rag.feature index d22a02ce1..58e8a6367 100644 --- a/tests/e2e/features/inline_rag.feature +++ b/tests/e2e/features/inline_rag.feature @@ -22,6 +22,7 @@ Feature: Inline RAG (BYOK) support tests } """ + @flaky Scenario: Query with inline RAG returns relevant content When I use "query" to ask question with authorization header """ @@ -33,6 +34,7 @@ Feature: Inline RAG (BYOK) support tests | great work | And The response contains non-empty rag_chunks + @flaky Scenario: Inline RAG query includes referenced documents When I use "query" to ask question with authorization header """ @@ -41,6 +43,7 @@ Feature: Inline RAG (BYOK) support tests Then The status code of the response is 200 And The response contains non-empty referenced_documents + @flaky Scenario: Streaming query with inline RAG returns relevant content When I use "streaming_query" to ask question with authorization header """ @@ -52,6 +55,7 @@ Feature: Inline RAG (BYOK) support tests | Fragments in LLM response | | great work | + @flaky Scenario: Responses API with inline RAG returns relevant content When I use "responses" to ask question with authorization header """ @@ -62,6 +66,7 @@ Feature: Inline RAG (BYOK) support tests | Fragments in LLM response | | great work | + @flaky Scenario: Streaming Responses API with inline RAG returns relevant content When I use "responses" to ask question with authorization header """ diff --git a/tests/integration/endpoints/test_query_byok_integration.py b/tests/integration/endpoints/test_query_byok_integration.py index b2a659f19..26e9461d4 100644 --- a/tests/integration/endpoints/test_query_byok_integration.py +++ b/tests/integration/endpoints/test_query_byok_integration.py @@ -256,7 +256,7 @@ def byok_config_fixture(test_config: AppConfig, mocker: MockerFixture) -> AppCon byok_entry.score_multiplier = 1.0 byok_entry.model_dump.return_value = { "rag_id": "test-knowledge", - "rag_type": "inline::faiss", + "backend": "faiss", "embedding_model": "sentence-transformers/all-mpnet-base-v2", "embedding_dimension": 768, "vector_db_id": "vs-byok-knowledge", @@ -264,9 +264,9 @@ def byok_config_fixture(test_config: AppConfig, mocker: MockerFixture) -> AppCon "score_multiplier": 1.0, } - # Patch the loaded configuration's byok_rag and rag.inline - test_config.configuration.byok_rag = [byok_entry] - test_config.configuration.rag.inline = ["test-knowledge"] + # Patch the loaded configuration's rag.byok.stores and rag.retrieval.inline.sources + test_config.configuration.rag.byok.stores = [byok_entry] + test_config.configuration.rag.retrieval.inline.sources = ["test-knowledge"] return test_config @@ -277,8 +277,8 @@ def byok_tool_config_fixture( ) -> AppConfig: """Load test config with BYOK RAG configured for tool-based (file_search) usage. - Sets rag.inline to empty and rag.tool to include the BYOK store, - so only tool-based RAG is active. + Sets rag.retrieval.inline.sources to empty and rag.retrieval.tool.sources + to include the BYOK store, so only tool-based RAG is active. """ byok_entry = mocker.MagicMock() byok_entry.rag_id = "test-knowledge" @@ -286,7 +286,7 @@ def byok_tool_config_fixture( byok_entry.score_multiplier = 1.0 byok_entry.model_dump.return_value = { "rag_id": "test-knowledge", - "rag_type": "inline::faiss", + "backend": "faiss", "embedding_model": "sentence-transformers/all-mpnet-base-v2", "embedding_dimension": 768, "vector_db_id": "vs-byok-knowledge", @@ -294,9 +294,9 @@ def byok_tool_config_fixture( "score_multiplier": 1.0, } - test_config.configuration.byok_rag = [byok_entry] - test_config.configuration.rag.inline = [] - test_config.configuration.rag.tool = ["test-knowledge"] + test_config.configuration.rag.byok.stores = [byok_entry] + test_config.configuration.rag.retrieval.inline.sources = [] + test_config.configuration.rag.retrieval.tool.sources = ["test-knowledge"] return test_config @@ -451,8 +451,8 @@ async def test_query_byok_inline_rag_with_request_vector_store_ids( entry_b.vector_db_id = "vs-source-b" entry_b.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry_a, entry_b] - test_config.configuration.rag.inline = ["source-a"] + test_config.configuration.rag.byok.stores = [entry_a, entry_b] + test_config.configuration.rag.retrieval.inline.sources = ["source-a"] mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") mock_client = _build_base_mock_client(mocker) @@ -522,8 +522,8 @@ async def test_query_byok_request_vector_store_ids_filters_configured_stores( entry_b.score_multiplier = 1.0 # Both sources are in config - test_config.configuration.byok_rag = [entry_a, entry_b] - test_config.configuration.rag.inline = ["source-a", "source-b"] + test_config.configuration.rag.byok.stores = [entry_a, entry_b] + test_config.configuration.rag.retrieval.inline.sources = ["source-a", "source-b"] mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") mock_client = _build_base_mock_client(mocker) @@ -782,9 +782,9 @@ async def test_query_byok_combined_inline_and_tool_rag( # pylint: disable=too-m byok_entry.rag_id = "test-knowledge" byok_entry.vector_db_id = "vs-byok-knowledge" byok_entry.score_multiplier = 1.0 - test_config.configuration.byok_rag = [byok_entry] - test_config.configuration.rag.inline = ["test-knowledge"] - test_config.configuration.rag.tool = ["test-knowledge"] + test_config.configuration.rag.byok.stores = [byok_entry] + test_config.configuration.rag.retrieval.inline.sources = ["test-knowledge"] + test_config.configuration.rag.retrieval.tool.sources = ["test-knowledge"] # Mock Llama Stack client mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") @@ -911,8 +911,8 @@ async def test_query_byok_inline_rag_only_configured_rag_id_is_queried( entry_b.vector_db_id = "vs-source-b" entry_b.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry_a, entry_b] - test_config.configuration.rag.inline = ["source-a"] + test_config.configuration.rag.byok.stores = [entry_a, entry_b] + test_config.configuration.rag.retrieval.inline.sources = ["source-a"] mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") mock_client = _build_base_mock_client(mocker) @@ -995,8 +995,8 @@ async def test_query_byok_score_multiplier_shifts_chunk_priority( # pylint: dis entry_b.vector_db_id = "vs-source-b" entry_b.score_multiplier = 5.0 - test_config.configuration.byok_rag = [entry_a, entry_b] - test_config.configuration.rag.inline = ["source-a", "source-b"] + test_config.configuration.rag.byok.stores = [entry_a, entry_b] + test_config.configuration.rag.retrieval.inline.sources = ["source-a", "source-b"] mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") mock_client = _build_base_mock_client(mocker) @@ -1092,8 +1092,8 @@ async def test_query_rag_content_limit_caps_retrieved_results( # pylint: disabl entry.vector_db_id = "vs-big-source" entry.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry] - test_config.configuration.rag.inline = ["big-source"] + test_config.configuration.rag.byok.stores = [entry] + test_config.configuration.rag.retrieval.inline.sources = ["big-source"] # Disable reranker for this test since it's testing chunk capping, not reranking test_config.configuration.reranker.enabled = False @@ -1102,7 +1102,7 @@ async def test_query_rag_content_limit_caps_retrieved_results( # pylint: disabl mock_client = _build_base_mock_client(mocker) # Generate more chunks than INLINE_RAG_MAX_CHUNKS - num_chunks = constants.INLINE_RAG_MAX_CHUNKS + 1 + num_chunks = constants.DEFAULT_INLINE_RAG_MAX_CHUNKS + 1 chunks_data = [ (f"Chunk content {i}", f"chunk-{i}", round(0.50 + i * 0.03, 2)) for i in range(num_chunks) @@ -1141,7 +1141,7 @@ async def test_query_rag_content_limit_caps_retrieved_results( # pylint: disabl ) assert response.rag_chunks is not None - assert len(response.rag_chunks) == constants.INLINE_RAG_MAX_CHUNKS + assert len(response.rag_chunks) == constants.DEFAULT_INLINE_RAG_MAX_CHUNKS # Check that the score is computed properly for chunk in response.rag_chunks: @@ -1187,14 +1187,14 @@ async def test_query_rag_content_limit_caps_across_multiple_sources( # pylint: entry_b.vector_db_id = "vs-source-b" entry_b.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry_a, entry_b] - test_config.configuration.rag.inline = ["source-a", "source-b"] + test_config.configuration.rag.byok.stores = [entry_a, entry_b] + test_config.configuration.rag.retrieval.inline.sources = ["source-a", "source-b"] mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") mock_client = _build_base_mock_client(mocker) # Overlapping score bands so top-k must pick from both sources - n = constants.INLINE_RAG_MAX_CHUNKS + n = constants.DEFAULT_INLINE_RAG_MAX_CHUNKS resp_a = _make_vector_io_response( mocker, [ @@ -1246,7 +1246,7 @@ async def _side_effect(**kwargs: Any) -> Any: ) assert response.rag_chunks is not None - assert len(response.rag_chunks) == constants.INLINE_RAG_MAX_CHUNKS + assert len(response.rag_chunks) == constants.DEFAULT_INLINE_RAG_MAX_CHUNKS # Check that the score is computed properly for chunk in response.rag_chunks: @@ -1284,21 +1284,20 @@ async def test_query_rag_content_limit_caps_inline_rag( # pylint: disable=too-m - Number of inline RAG chunks equals the lowered INLINE_RAG_MAX_CHUNKS - Returned chunks are the top-scoring ones """ - mocker.patch("utils.vector_search.constants.INLINE_RAG_MAX_CHUNKS", 3) - entry = mocker.MagicMock() entry.rag_id = "big-source" entry.vector_db_id = "vs-big-source" entry.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry] - test_config.configuration.rag.inline = ["big-source"] + test_config.configuration.rag.byok.stores = [entry] + test_config.configuration.rag.retrieval.inline.sources = ["big-source"] + test_config.configuration.rag.retrieval.inline.max_chunks = 3 test_config.configuration.reranker.enabled = False mock_holder_class = mocker.patch("app.endpoints.query.AsyncLlamaStackClientHolder") mock_client = _build_base_mock_client(mocker) - num_chunks = constants.BYOK_RAG_MAX_CHUNKS + num_chunks = constants.DEFAULT_BYOK_RAG_MAX_CHUNKS chunks_data = [ (f"Chunk content {i}", f"chunk-{i}", round(0.50 + i * 0.03, 2)) for i in range(num_chunks) diff --git a/tests/integration/endpoints/test_responses_byok_integration.py b/tests/integration/endpoints/test_responses_byok_integration.py index d316af6a6..0db227de4 100644 --- a/tests/integration/endpoints/test_responses_byok_integration.py +++ b/tests/integration/endpoints/test_responses_byok_integration.py @@ -116,8 +116,8 @@ async def test_responses_byok_inline_rag_injects_context( # pylint: disable=too entry.rag_id = "test-knowledge" entry.vector_db_id = "vs-byok-knowledge" entry.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry] - test_config.configuration.rag.inline = ["test-knowledge"] + test_config.configuration.rag.byok.stores = [entry] + test_config.configuration.rag.retrieval.inline.sources = ["test-knowledge"] mock_client = _build_responses_mock_client(mocker) _patch_all_client_holders(mocker, mock_client) @@ -169,8 +169,8 @@ async def test_responses_byok_inline_rag_error_is_handled_gracefully( # pylint: entry.rag_id = "test-knowledge" entry.vector_db_id = "vs-byok-knowledge" entry.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry] - test_config.configuration.rag.inline = ["test-knowledge"] + test_config.configuration.rag.byok.stores = [entry] + test_config.configuration.rag.retrieval.inline.sources = ["test-knowledge"] mock_client = _build_responses_mock_client(mocker) _patch_all_client_holders(mocker, mock_client) @@ -219,7 +219,7 @@ async def test_responses_byok_tool_rag_returns_tool_calls( # pylint: disable=to byok_entry.score_multiplier = 1.0 byok_entry.model_dump.return_value = { "rag_id": "test-knowledge", - "rag_type": "inline::faiss", + "backend": "faiss", "embedding_model": "sentence-transformers/all-mpnet-base-v2", "embedding_dimension": 768, "vector_db_id": "vs-byok-knowledge", @@ -227,9 +227,9 @@ async def test_responses_byok_tool_rag_returns_tool_calls( # pylint: disable=to "score_multiplier": 1.0, } - test_config.configuration.byok_rag = [byok_entry] - test_config.configuration.rag.inline = [] - test_config.configuration.rag.tool = ["test-knowledge"] + test_config.configuration.rag.byok.stores = [byok_entry] + test_config.configuration.rag.retrieval.inline.sources = [] + test_config.configuration.rag.retrieval.tool.sources = ["test-knowledge"] mock_client = _build_responses_mock_client(mocker) _patch_all_client_holders(mocker, mock_client) @@ -291,16 +291,16 @@ async def test_responses_byok_combined_inline_and_tool_rag( # pylint: disable=t byok_entry.score_multiplier = 1.0 byok_entry.model_dump.return_value = { "rag_id": "test-knowledge", - "rag_type": "inline::faiss", + "backend": "faiss", "embedding_model": "sentence-transformers/all-mpnet-base-v2", "embedding_dimension": 768, "vector_db_id": "vs-byok-knowledge", "db_path": "/tmp/test-db", "score_multiplier": 1.0, } - test_config.configuration.byok_rag = [byok_entry] - test_config.configuration.rag.inline = ["test-knowledge"] - test_config.configuration.rag.tool = ["test-knowledge"] + test_config.configuration.rag.byok.stores = [byok_entry] + test_config.configuration.rag.retrieval.inline.sources = ["test-knowledge"] + test_config.configuration.rag.retrieval.tool.sources = ["test-knowledge"] mock_client = _build_responses_mock_client(mocker) _patch_all_client_holders(mocker, mock_client) @@ -381,8 +381,8 @@ async def test_responses_byok_inline_rag_only_configured_rag_id_is_queried( # p entry_b.vector_db_id = "vs-source-b" entry_b.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry_a, entry_b] - test_config.configuration.rag.inline = ["source-a"] + test_config.configuration.rag.byok.stores = [entry_a, entry_b] + test_config.configuration.rag.retrieval.inline.sources = ["source-a"] mock_client = _build_responses_mock_client(mocker) _patch_all_client_holders(mocker, mock_client) @@ -443,8 +443,8 @@ async def test_responses_byok_score_multiplier_shifts_chunk_priority( # pylint: entry_b.vector_db_id = "vs-source-b" entry_b.score_multiplier = 5.0 - test_config.configuration.byok_rag = [entry_a, entry_b] - test_config.configuration.rag.inline = ["source-a", "source-b"] + test_config.configuration.rag.byok.stores = [entry_a, entry_b] + test_config.configuration.rag.retrieval.inline.sources = ["source-a", "source-b"] mock_client = _build_responses_mock_client(mocker) _patch_all_client_holders(mocker, mock_client) @@ -526,15 +526,15 @@ async def test_responses_rag_content_limit_caps_retrieved_results( # pylint: di entry.vector_db_id = "vs-big-source" entry.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry] - test_config.configuration.rag.inline = ["big-source"] + test_config.configuration.rag.byok.stores = [entry] + test_config.configuration.rag.retrieval.inline.sources = ["big-source"] test_config.configuration.reranker.enabled = False mock_client = _build_responses_mock_client(mocker) _patch_all_client_holders(mocker, mock_client) # Generate more chunks than INLINE_RAG_MAX_CHUNKS - num_chunks = constants.INLINE_RAG_MAX_CHUNKS + 1 + num_chunks = constants.DEFAULT_INLINE_RAG_MAX_CHUNKS + 1 chunks_data = [ (f"Chunk content {i}", f"chunk-{i}", round(0.50 + i * 0.03, 2)) for i in range(num_chunks) @@ -560,7 +560,9 @@ async def test_responses_rag_content_limit_caps_retrieved_results( # pylint: di create_call = mock_client.responses.create.call_args_list[0] input_text = create_call.kwargs.get("input", "") - expected_header = f"file_search found {constants.INLINE_RAG_MAX_CHUNKS} chunks:" + expected_header = ( + f"file_search found {constants.DEFAULT_INLINE_RAG_MAX_CHUNKS} chunks:" + ) assert expected_header in input_text # The highest-scored chunk should be present @@ -595,14 +597,14 @@ async def test_responses_rag_content_limit_caps_across_multiple_sources( # pyli entry_b.vector_db_id = "vs-source-b" entry_b.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry_a, entry_b] - test_config.configuration.rag.inline = ["source-a", "source-b"] + test_config.configuration.rag.byok.stores = [entry_a, entry_b] + test_config.configuration.rag.retrieval.inline.sources = ["source-a", "source-b"] mock_client = _build_responses_mock_client(mocker) _patch_all_client_holders(mocker, mock_client) # Overlapping score bands so top-k must pick from both sources - n = constants.INLINE_RAG_MAX_CHUNKS + n = constants.DEFAULT_INLINE_RAG_MAX_CHUNKS resp_a = _make_vector_io_response( mocker, [ @@ -642,7 +644,9 @@ async def _side_effect(**kwargs: Any) -> Any: create_call = mock_client.responses.create.call_args_list[0] input_text = create_call.kwargs.get("input", "") - expected_header = f"file_search found {constants.INLINE_RAG_MAX_CHUNKS} chunks:" + expected_header = ( + f"file_search found {constants.DEFAULT_INLINE_RAG_MAX_CHUNKS} chunks:" + ) assert expected_header in input_text # Both sources should survive the cap (high-scoring chunks from each) @@ -669,21 +673,20 @@ async def test_responses_rag_content_limit_caps_inline_rag( # pylint: disable=t - Context chunk count equals the lowered INLINE_RAG_MAX_CHUNKS - Only the highest-scored chunks appear in the context """ - mocker.patch("utils.vector_search.constants.INLINE_RAG_MAX_CHUNKS", 3) - entry = mocker.MagicMock() entry.rag_id = "big-source" entry.vector_db_id = "vs-big-source" entry.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry] - test_config.configuration.rag.inline = ["big-source"] + test_config.configuration.rag.byok.stores = [entry] + test_config.configuration.rag.retrieval.inline.sources = ["big-source"] + test_config.configuration.rag.retrieval.inline.max_chunks = 3 test_config.configuration.reranker.enabled = False mock_client = _build_responses_mock_client(mocker) _patch_all_client_holders(mocker, mock_client) - num_chunks = constants.BYOK_RAG_MAX_CHUNKS + num_chunks = constants.DEFAULT_BYOK_RAG_MAX_CHUNKS chunks_data = [ (f"Chunk content {i}", f"chunk-{i}", round(0.50 + i * 0.03, 2)) for i in range(num_chunks) diff --git a/tests/integration/endpoints/test_streaming_query_byok_integration.py b/tests/integration/endpoints/test_streaming_query_byok_integration.py index c539d4294..4d7a9fa9c 100644 --- a/tests/integration/endpoints/test_streaming_query_byok_integration.py +++ b/tests/integration/endpoints/test_streaming_query_byok_integration.py @@ -238,7 +238,7 @@ def byok_config_fixture(test_config: AppConfig, mocker: MockerFixture) -> AppCon byok_entry.score_multiplier = 1.0 byok_entry.model_dump.return_value = { "rag_id": "test-knowledge", - "rag_type": "inline::faiss", + "backend": "faiss", "embedding_model": "sentence-transformers/all-mpnet-base-v2", "embedding_dimension": 768, "vector_db_id": "vs-byok-knowledge", @@ -246,8 +246,8 @@ def byok_config_fixture(test_config: AppConfig, mocker: MockerFixture) -> AppCon "score_multiplier": 1.0, } - test_config.configuration.byok_rag = [byok_entry] - test_config.configuration.rag.inline = ["test-knowledge"] + test_config.configuration.rag.byok.stores = [byok_entry] + test_config.configuration.rag.retrieval.inline.sources = ["test-knowledge"] return test_config @@ -263,7 +263,7 @@ def byok_tool_config_fixture( byok_entry.score_multiplier = 1.0 byok_entry.model_dump.return_value = { "rag_id": "test-knowledge", - "rag_type": "inline::faiss", + "backend": "faiss", "embedding_model": "sentence-transformers/all-mpnet-base-v2", "embedding_dimension": 768, "vector_db_id": "vs-byok-knowledge", @@ -271,9 +271,9 @@ def byok_tool_config_fixture( "score_multiplier": 1.0, } - test_config.configuration.byok_rag = [byok_entry] - test_config.configuration.rag.inline = [] - test_config.configuration.rag.tool = ["test-knowledge"] + test_config.configuration.rag.byok.stores = [byok_entry] + test_config.configuration.rag.retrieval.inline.sources = [] + test_config.configuration.rag.retrieval.tool.sources = ["test-knowledge"] return test_config @@ -345,8 +345,8 @@ async def test_streaming_query_byok_inline_rag_with_request_vector_store_ids( entry_b.vector_db_id = "vs-source-b" entry_b.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry_a, entry_b] - test_config.configuration.rag.inline = ["source-a"] + test_config.configuration.rag.byok.stores = [entry_a, entry_b] + test_config.configuration.rag.retrieval.inline.sources = ["source-a"] mock_holder_class = mocker.patch( "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" @@ -409,8 +409,8 @@ async def test_streaming_query_byok_request_vector_store_ids_filters_configured_ entry_b.vector_db_id = "vs-source-b" entry_b.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry_a, entry_b] - test_config.configuration.rag.inline = ["source-a", "source-b"] + test_config.configuration.rag.byok.stores = [entry_a, entry_b] + test_config.configuration.rag.retrieval.inline.sources = ["source-a", "source-b"] mock_holder_class = mocker.patch( "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" @@ -689,9 +689,9 @@ async def test_streaming_query_byok_combined_inline_and_tool_rag( byok_entry.rag_id = "test-knowledge" byok_entry.vector_db_id = "vs-byok-knowledge" byok_entry.score_multiplier = 1.0 - test_config.configuration.byok_rag = [byok_entry] - test_config.configuration.rag.inline = ["test-knowledge"] - test_config.configuration.rag.tool = ["test-knowledge"] + test_config.configuration.rag.byok.stores = [byok_entry] + test_config.configuration.rag.retrieval.inline.sources = ["test-knowledge"] + test_config.configuration.rag.retrieval.tool.sources = ["test-knowledge"] # Mock Llama Stack client mock_holder_class = mocker.patch( @@ -772,8 +772,8 @@ async def test_streaming_query_byok_only_configured_rag_id_is_queried( entry_b.vector_db_id = "vs-source-b" entry_b.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry_a, entry_b] - test_config.configuration.rag.inline = ["source-a"] + test_config.configuration.rag.byok.stores = [entry_a, entry_b] + test_config.configuration.rag.retrieval.inline.sources = ["source-a"] mock_holder_class = mocker.patch( "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" @@ -851,8 +851,8 @@ async def test_streaming_query_byok_score_multiplier_shifts_priority( # pylint: entry_b.vector_db_id = "vs-source-b" entry_b.score_multiplier = 5.0 - test_config.configuration.byok_rag = [entry_a, entry_b] - test_config.configuration.rag.inline = ["source-a", "source-b"] + test_config.configuration.rag.byok.stores = [entry_a, entry_b] + test_config.configuration.rag.retrieval.inline.sources = ["source-a", "source-b"] mock_holder_class = mocker.patch( "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" @@ -933,8 +933,8 @@ async def test_streaming_query_rag_content_limit_caps_context( # pylint: disabl entry.vector_db_id = "vs-big-source" entry.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry] - test_config.configuration.rag.inline = ["big-source"] + test_config.configuration.rag.byok.stores = [entry] + test_config.configuration.rag.retrieval.inline.sources = ["big-source"] mock_holder_class = mocker.patch( "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" @@ -942,7 +942,7 @@ async def test_streaming_query_rag_content_limit_caps_context( # pylint: disabl mock_client = _build_base_streaming_mock_client(mocker) # Generate more chunks than INLINE_RAG_MAX_CHUNKS - num_chunks = constants.INLINE_RAG_MAX_CHUNKS + 5 + num_chunks = constants.DEFAULT_INLINE_RAG_MAX_CHUNKS + 5 chunks_data = [ (f"Chunk content {i}", f"chunk-{i}", round(0.50 + i * 0.03, 2)) for i in range(num_chunks) @@ -973,7 +973,9 @@ async def test_streaming_query_rag_content_limit_caps_context( # pylint: disabl # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model. create_call = mock_client.responses.create.call_args_list[0] input_text = create_call.kwargs["input"] - expected_header = f"file_search found {constants.INLINE_RAG_MAX_CHUNKS} chunks:" + expected_header = ( + f"file_search found {constants.DEFAULT_INLINE_RAG_MAX_CHUNKS} chunks:" + ) assert expected_header in input_text # The lowest-scoring chunk should NOT be in the context @@ -1009,8 +1011,8 @@ async def test_streaming_query_rag_content_limit_caps_across_multiple_sources( entry_b.vector_db_id = "vs-source-b" entry_b.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry_a, entry_b] - test_config.configuration.rag.inline = ["source-a", "source-b"] + test_config.configuration.rag.byok.stores = [entry_a, entry_b] + test_config.configuration.rag.retrieval.inline.sources = ["source-a", "source-b"] mock_holder_class = mocker.patch( "app.endpoints.streaming_query.AsyncLlamaStackClientHolder" @@ -1018,7 +1020,7 @@ async def test_streaming_query_rag_content_limit_caps_across_multiple_sources( mock_client = _build_base_streaming_mock_client(mocker) # Overlapping score bands so top-k must pick from both sources - n = constants.INLINE_RAG_MAX_CHUNKS + n = constants.DEFAULT_INLINE_RAG_MAX_CHUNKS resp_a = _make_vector_io_response( mocker, [ @@ -1062,7 +1064,9 @@ async def _side_effect(**kwargs: Any) -> Any: # .kwargs holds its keyword arguments, e.g. "input" is the full prompt text sent to the model. create_call = mock_client.responses.create.call_args_list[0] input_text = create_call.kwargs["input"] - expected_header = f"file_search found {constants.INLINE_RAG_MAX_CHUNKS} chunks:" + expected_header = ( + f"file_search found {constants.DEFAULT_INLINE_RAG_MAX_CHUNKS} chunks:" + ) assert expected_header in input_text # Both sources must appear in the context (overlapping scores guarantee this) @@ -1090,15 +1094,14 @@ async def test_streaming_query_rag_content_limit_caps_inline_rag( # pylint: dis - Context chunk count equals the lowered INLINE_RAG_MAX_CHUNKS - Only the highest-scored chunks appear in the context """ - mocker.patch("utils.vector_search.constants.INLINE_RAG_MAX_CHUNKS", 3) - entry = mocker.MagicMock() entry.rag_id = "big-source" entry.vector_db_id = "vs-big-source" entry.score_multiplier = 1.0 - test_config.configuration.byok_rag = [entry] - test_config.configuration.rag.inline = ["big-source"] + test_config.configuration.rag.byok.stores = [entry] + test_config.configuration.rag.retrieval.inline.sources = ["big-source"] + test_config.configuration.rag.retrieval.inline.max_chunks = 3 test_config.configuration.reranker.enabled = False mock_holder_class = mocker.patch( @@ -1106,7 +1109,7 @@ async def test_streaming_query_rag_content_limit_caps_inline_rag( # pylint: dis ) mock_client = _build_base_streaming_mock_client(mocker) - num_chunks = constants.BYOK_RAG_MAX_CHUNKS + num_chunks = constants.DEFAULT_BYOK_RAG_MAX_CHUNKS chunks_data = [ (f"Chunk content {i}", f"chunk-{i}", round(0.50 + i * 0.03, 2)) for i in range(num_chunks) diff --git a/tests/unit/app/endpoints/test_rags.py b/tests/unit/app/endpoints/test_rags.py index 563c223fe..3134d864b 100644 --- a/tests/unit/app/endpoints/test_rags.py +++ b/tests/unit/app/endpoints/test_rags.py @@ -269,24 +269,28 @@ def _make_byok_config(tmp_path: Any) -> AppConfig: "user_data_collection": {}, "authentication": {"module": "noop"}, "authorization": {"access_rules": []}, - "byok_rag": [ - { - "rag_id": "ocp-4.18-docs", - "rag_type": "inline::faiss", - "embedding_model": "all-MiniLM-L6-v2", - "embedding_dimension": 384, - "vector_db_id": "vs_abc123", - "db_path": str(db_file), + "rag": { + "byok": { + "stores": [ + { + "rag_id": "ocp-4.18-docs", + "backend": "faiss", + "embedding_model": "all-MiniLM-L6-v2", + "embedding_dimension": 384, + "vector_db_id": "vs_abc123", + "db_path": str(db_file), + }, + { + "rag_id": "company-kb", + "backend": "faiss", + "embedding_model": "all-MiniLM-L6-v2", + "embedding_dimension": 384, + "vector_db_id": "vs_def456", + "db_path": str(db_file), + }, + ], }, - { - "rag_id": "company-kb", - "rag_type": "inline::faiss", - "embedding_model": "all-MiniLM-L6-v2", - "embedding_dimension": 384, - "vector_db_id": "vs_def456", - "db_path": str(db_file), - }, - ], + }, } ) return cfg @@ -379,7 +383,7 @@ def __init__(self) -> None: def test_resolve_rag_id_to_vector_db_id_with_mapping(tmp_path: Path) -> None: """Test that _resolve_rag_id_to_vector_db_id maps rag_id to vector_db_id.""" byok_config = _make_byok_config(str(tmp_path)) - byok_rags = byok_config.configuration.byok_rag + byok_rags = byok_config.configuration.rag.byok.stores assert _resolve_rag_id_to_vector_db_id("ocp-4.18-docs", byok_rags) == "vs_abc123" assert _resolve_rag_id_to_vector_db_id("company-kb", byok_rags) == "vs_def456" @@ -387,5 +391,5 @@ def test_resolve_rag_id_to_vector_db_id_with_mapping(tmp_path: Path) -> None: def test_resolve_rag_id_to_vector_db_id_passthrough(tmp_path: Path) -> None: """Test that unmapped IDs are passed through unchanged.""" byok_config = _make_byok_config(str(tmp_path)) - byok_rags = byok_config.configuration.byok_rag + byok_rags = byok_config.configuration.rag.byok.stores assert _resolve_rag_id_to_vector_db_id("vs_unknown", byok_rags) == "vs_unknown" diff --git a/tests/unit/models/config/test_byok_rag.py b/tests/unit/models/config/test_byok_rag.py index e80e749c3..1e0bfa221 100644 --- a/tests/unit/models/config/test_byok_rag.py +++ b/tests/unit/models/config/test_byok_rag.py @@ -1,4 +1,4 @@ -"""Unit tests for ByokRag model.""" +"""Unit tests for RagStore model.""" import pytest from pydantic import ValidationError @@ -6,76 +6,76 @@ from constants import ( DEFAULT_EMBEDDING_DIMENSION, DEFAULT_EMBEDDING_MODEL, - DEFAULT_RAG_TYPE, + DEFAULT_RAG_BACKEND, DEFAULT_SCORE_MULTIPLIER, ) -from models.config import ByokRag +from models.config import RagStore -def test_byok_rag_configuration_default_values() -> None: - """Test the ByokRag constructor. +def test_rag_store_configuration_default_values() -> None: + """Test the RagStore constructor. - Verify that ByokRag initializes correctly when only required fields are provided. + Verify that RagStore initializes correctly when only required fields are provided. Asserts that the instance stores the given `rag_id`, `vector_db_id`, and `db_path`, and that unspecified fields use the module's default values for - `rag_type`, `embedding_model`, `embedding_dimension`, and + `backend`, `embedding_model`, `embedding_dimension`, and `score_multiplier`. """ - byok_rag = ByokRag( # pyright: ignore[reportCallIssue] + rag_store = RagStore( # pyright: ignore[reportCallIssue] rag_id="rag_id", vector_db_id="vector_db_id", db_path="tests/configuration/rag.txt", ) - assert byok_rag is not None - assert byok_rag.rag_id == "rag_id" - assert byok_rag.rag_type == DEFAULT_RAG_TYPE - assert byok_rag.embedding_model == DEFAULT_EMBEDDING_MODEL - assert byok_rag.embedding_dimension == DEFAULT_EMBEDDING_DIMENSION - assert byok_rag.vector_db_id == "vector_db_id" - assert byok_rag.db_path == "tests/configuration/rag.txt" - assert byok_rag.score_multiplier == DEFAULT_SCORE_MULTIPLIER + assert rag_store is not None + assert rag_store.rag_id == "rag_id" + assert rag_store.backend == DEFAULT_RAG_BACKEND + assert rag_store.embedding_model == DEFAULT_EMBEDDING_MODEL + assert rag_store.embedding_dimension == DEFAULT_EMBEDDING_DIMENSION + assert rag_store.vector_db_id == "vector_db_id" + assert rag_store.db_path == "tests/configuration/rag.txt" + assert rag_store.score_multiplier == DEFAULT_SCORE_MULTIPLIER -def test_byok_rag_configuration_nondefault_values() -> None: - """Test the ByokRag constructor. +def test_rag_store_configuration_nondefault_values() -> None: + """Test the RagStore constructor. - Verify that ByokRag class accepts and stores non-default configuration values. + Verify that RagStore class accepts and stores non-default configuration values. - Asserts that rag_id, rag_type, embedding_model, embedding_dimension, and + Asserts that rag_id, backend, embedding_model, embedding_dimension, and vector_db_id match the provided inputs and that db_path is converted to a Path. """ - byok_rag = ByokRag( + rag_store = RagStore( rag_id="rag_id", - rag_type="rag_type", + backend="faiss", embedding_model="embedding_model", embedding_dimension=1024, vector_db_id="vector_db_id", db_path="tests/configuration/rag.txt", score_multiplier=1.0, ) - assert byok_rag is not None - assert byok_rag.rag_id == "rag_id" - assert byok_rag.rag_type == "rag_type" - assert byok_rag.embedding_model == "embedding_model" - assert byok_rag.embedding_dimension == 1024 - assert byok_rag.vector_db_id == "vector_db_id" - assert byok_rag.db_path == "tests/configuration/rag.txt" + assert rag_store is not None + assert rag_store.rag_id == "rag_id" + assert rag_store.backend == "faiss" + assert rag_store.embedding_model == "embedding_model" + assert rag_store.embedding_dimension == 1024 + assert rag_store.vector_db_id == "vector_db_id" + assert rag_store.db_path == "tests/configuration/rag.txt" -def test_byok_rag_configuration_wrong_dimension() -> None: - """Test the ByokRag constructor. +def test_rag_store_configuration_wrong_dimension() -> None: + """Test the RagStore constructor. - Verify constructing ByokRag with embedding_dimension less than or equal to + Verify constructing RagStore with embedding_dimension less than or equal to zero raises a ValidationError. The raised ValidationError's message must contain "should be greater than 0". """ with pytest.raises(ValidationError, match="should be greater than 0"): - _ = ByokRag( + _ = RagStore( rag_id="rag_id", - rag_type="rag_type", + backend="faiss", embedding_model="embedding_model", embedding_dimension=-1024, vector_db_id="vector_db_id", @@ -84,10 +84,10 @@ def test_byok_rag_configuration_wrong_dimension() -> None: ) -def test_byok_rag_configuration_empty_rag_id() -> None: - """Test the ByokRag constructor. +def test_rag_store_configuration_empty_rag_id() -> None: + """Test the RagStore constructor. - Validate that constructing a ByokRag with an empty `rag_id` raises a validation error. + Validate that constructing a RagStore with an empty `rag_id` raises a validation error. Expects a `pydantic.ValidationError` whose message contains "String should have at least 1 character". @@ -95,9 +95,9 @@ def test_byok_rag_configuration_empty_rag_id() -> None: with pytest.raises( ValidationError, match="String should have at least 1 character" ): - _ = ByokRag( + _ = RagStore( rag_id="", - rag_type="rag_type", + backend="faiss", embedding_model="embedding_model", embedding_dimension=1024, vector_db_id="vector_db_id", @@ -106,21 +106,21 @@ def test_byok_rag_configuration_empty_rag_id() -> None: ) -def test_byok_rag_configuration_empty_rag_type() -> None: - """Test the ByokRag constructor. +def test_rag_store_configuration_empty_backend() -> None: + """Test the RagStore constructor. - Verify that constructing a ByokRag with an empty `rag_type` raises a validation error. + Verify that constructing a RagStore with an empty `backend` raises a validation error. Raises: - ValidationError: if `rag_type` is an empty string; error message + ValidationError: if `backend` is an empty string; error message includes "String should have at least 1 character". """ with pytest.raises( ValidationError, match="String should have at least 1 character" ): - _ = ByokRag( + _ = RagStore( rag_id="rag_id", - rag_type="", + backend="", embedding_model="embedding_model", embedding_dimension=1024, vector_db_id="vector_db_id", @@ -129,10 +129,23 @@ def test_byok_rag_configuration_empty_rag_type() -> None: ) -def test_byok_rag_configuration_empty_embedding_model() -> None: - """Test the ByokRag constructor. +def test_rag_store_configuration_unsupported_backend() -> None: + """Test that unsupported backend values are rejected.""" + with pytest.raises(ValidationError, match="Unsupported RAG backend"): + _ = RagStore( + rag_id="rag_id", + backend="unsupported", + embedding_model="embedding_model", + embedding_dimension=1024, + vector_db_id="vector_db_id", + db_path="tests/configuration/rag.txt", + ) + + +def test_rag_store_configuration_empty_embedding_model() -> None: + """Test the RagStore constructor. - Verify that constructing a ByokRag with an empty `embedding_model` raises a validation error. + Verify that constructing a RagStore with an empty `embedding_model` raises a validation error. Expects a pydantic.ValidationError whose message contains "String should have at least 1 character". @@ -140,9 +153,9 @@ def test_byok_rag_configuration_empty_embedding_model() -> None: with pytest.raises( ValidationError, match="String should have at least 1 character" ): - _ = ByokRag( + _ = RagStore( rag_id="rag_id", - rag_type="rag_type", + backend="faiss", embedding_model="", embedding_dimension=1024, vector_db_id="vector_db_id", @@ -151,10 +164,10 @@ def test_byok_rag_configuration_empty_embedding_model() -> None: ) -def test_byok_rag_configuration_empty_vector_db_id() -> None: - """Test the ByokRag constructor. +def test_rag_store_configuration_empty_vector_db_id() -> None: + """Test the RagStore constructor. - Ensure constructing a ByokRag with an empty `vector_db_id` raises a ValidationError. + Ensure constructing a RagStore with an empty `vector_db_id` raises a ValidationError. Asserts that Pydantic validation fails with a message containing "String should have at least 1 character". @@ -162,9 +175,9 @@ def test_byok_rag_configuration_empty_vector_db_id() -> None: with pytest.raises( ValidationError, match="String should have at least 1 character" ): - _ = ByokRag( + _ = RagStore( rag_id="rag_id", - rag_type="rag_type", + backend="faiss", embedding_model="embedding_model", embedding_dimension=1024, vector_db_id="", @@ -173,26 +186,26 @@ def test_byok_rag_configuration_empty_vector_db_id() -> None: ) -def test_byok_rag_configuration_custom_score_multiplier() -> None: - """Test ByokRag with custom score_multiplier.""" - byok_rag = ByokRag( +def test_rag_store_configuration_custom_score_multiplier() -> None: + """Test RagStore with custom score_multiplier.""" + rag_store = RagStore( rag_id="rag_id", - rag_type="rag_type", + backend="faiss", vector_db_id="vector_db_id", embedding_model="embedding_model", embedding_dimension=1024, db_path="tests/configuration/rag.txt", score_multiplier=2.5, ) - assert byok_rag.score_multiplier == 2.5 + assert rag_store.score_multiplier == 2.5 -def test_byok_rag_configuration_score_multiplier_must_be_positive() -> None: +def test_rag_store_configuration_score_multiplier_must_be_positive() -> None: """Test that score_multiplier must be greater than 0.""" with pytest.raises(ValidationError, match="greater than 0"): - _ = ByokRag( + _ = RagStore( rag_id="rag_id", - rag_type="rag_type", + backend="faiss", vector_db_id="vector_db_id", embedding_model="embedding_model", embedding_dimension=1024, diff --git a/tests/unit/models/config/test_dump_configuration.py b/tests/unit/models/config/test_dump_configuration.py index 5f3aa5e1d..92a467576 100644 --- a/tests/unit/models/config/test_dump_configuration.py +++ b/tests/unit/models/config/test_dump_configuration.py @@ -11,7 +11,7 @@ import constants from models.config import ( - ByokRag, + ByokConfiguration, Configuration, CORSConfiguration, DatabaseConfiguration, @@ -22,6 +22,8 @@ QuotaHandlersConfiguration, QuotaLimiterConfiguration, QuotaSchedulerConfiguration, + RagConfiguration, + RagStore, ServiceConfiguration, SkillsConfiguration, TLSConfiguration, @@ -114,7 +116,7 @@ def test_dump_configuration(tmp_path: Path) -> None: assert "customization" in content assert "inference" in content assert "database" in content - assert "byok_rag" in content + assert "rag" in content assert "quota_handlers" in content assert "azure_entra_id" in content assert "reranker" in content @@ -218,7 +220,6 @@ def test_dump_configuration(tmp_path: Path) -> None: "buffer_max_ratio": 0.3, }, "approvals": _DEFAULT_APPROVALS_DUMP, - "byok_rag": [], "quota_handlers": { "sqlite": None, "postgres": None, @@ -236,13 +237,20 @@ def test_dump_configuration(tmp_path: Path) -> None: }, "azure_entra_id": None, "rag": { - "inline": [], - "tool": [], - }, - "okp": { - "rhokp_url": None, - "offline": True, - "chunk_filter_query": None, + "byok": { + "max_chunks": 10, + "stores": [], + }, + "okp": { + "rhokp_url": None, + "offline": True, + "chunk_filter_query": None, + "max_chunks": 5, + }, + "retrieval": { + "inline": {"sources": [], "max_chunks": 10}, + "tool": {"sources": [], "max_chunks": 10}, + }, }, "rlsapi_v1": { "allow_verbose_infer": False, @@ -467,7 +475,7 @@ def test_dump_configuration_with_quota_limiters(tmp_path: Path) -> None: assert "customization" in content assert "inference" in content assert "database" in content - assert "byok_rag" in content + assert "rag" in content assert "quota_handlers" in content assert "azure_entra_id" in content assert "reranker" in content @@ -571,7 +579,6 @@ def test_dump_configuration_with_quota_limiters(tmp_path: Path) -> None: "buffer_max_ratio": 0.3, }, "approvals": _DEFAULT_APPROVALS_DUMP, - "byok_rag": [], "quota_handlers": { "sqlite": None, "postgres": None, @@ -604,13 +611,20 @@ def test_dump_configuration_with_quota_limiters(tmp_path: Path) -> None: }, "azure_entra_id": None, "rag": { - "inline": [], - "tool": [], - }, - "okp": { - "rhokp_url": None, - "offline": True, - "chunk_filter_query": None, + "byok": { + "max_chunks": 10, + "stores": [], + }, + "okp": { + "rhokp_url": None, + "offline": True, + "chunk_filter_query": None, + "max_chunks": 5, + }, + "retrieval": { + "inline": {"sources": [], "max_chunks": 10}, + "tool": {"sources": [], "max_chunks": 10}, + }, }, "rlsapi_v1": { "allow_verbose_infer": False, @@ -721,7 +735,7 @@ def test_dump_configuration_with_quota_limiters_different_values( assert "customization" in content assert "inference" in content assert "database" in content - assert "byok_rag" in content + assert "rag" in content assert "quota_handlers" in content # check the whole deserialized JSON file content @@ -823,7 +837,6 @@ def test_dump_configuration_with_quota_limiters_different_values( "buffer_max_ratio": 0.3, }, "approvals": _DEFAULT_APPROVALS_DUMP, - "byok_rag": [], "quota_handlers": { "sqlite": None, "postgres": None, @@ -856,13 +869,20 @@ def test_dump_configuration_with_quota_limiters_different_values( }, "azure_entra_id": None, "rag": { - "inline": [], - "tool": [], - }, - "okp": { - "rhokp_url": None, - "offline": True, - "chunk_filter_query": None, + "byok": { + "max_chunks": 10, + "stores": [], + }, + "okp": { + "rhokp_url": None, + "offline": True, + "chunk_filter_query": None, + "max_chunks": 5, + }, + "retrieval": { + "inline": {"sources": [], "max_chunks": 10}, + "tool": {"sources": [], "max_chunks": 10}, + }, }, "rlsapi_v1": { "allow_verbose_infer": False, @@ -925,13 +945,17 @@ def test_dump_configuration_byok(tmp_path: Path) -> None: default_provider="default_provider", default_model="default_model", ), - byok_rag=[ - ByokRag( - rag_id="rag_id", - vector_db_id="vector_db_id", - db_path="tests/configuration/rag.txt", + rag=RagConfiguration( + byok=ByokConfiguration( + stores=[ + RagStore( + rag_id="rag_id", + vector_db_id="vector_db_id", + db_path="tests/configuration/rag.txt", + ), + ], ), - ], + ), ) assert cfg is not None dump_file = tmp_path / "test.json" @@ -953,7 +977,7 @@ def test_dump_configuration_byok(tmp_path: Path) -> None: assert "customization" in content assert "inference" in content assert "database" in content - assert "byok_rag" in content + assert "rag" in content assert "quota_handlers" in content # check the whole deserialized JSON file content @@ -1055,17 +1079,6 @@ def test_dump_configuration_byok(tmp_path: Path) -> None: "buffer_max_ratio": 0.3, }, "approvals": _DEFAULT_APPROVALS_DUMP, - "byok_rag": [ - { - "db_path": "tests/configuration/rag.txt", - "embedding_dimension": 768, - "embedding_model": "sentence-transformers/all-mpnet-base-v2", - "rag_id": "rag_id", - "rag_type": "inline::faiss", - "vector_db_id": "vector_db_id", - "score_multiplier": 1.0, - }, - ], "quota_handlers": { "sqlite": None, "postgres": None, @@ -1083,13 +1096,30 @@ def test_dump_configuration_byok(tmp_path: Path) -> None: }, "azure_entra_id": None, "rag": { - "inline": [], - "tool": [], - }, - "okp": { - "rhokp_url": None, - "offline": True, - "chunk_filter_query": None, + "byok": { + "max_chunks": 10, + "stores": [ + { + "db_path": "tests/configuration/rag.txt", + "embedding_dimension": 768, + "embedding_model": "sentence-transformers/all-mpnet-base-v2", + "rag_id": "rag_id", + "backend": "faiss", + "vector_db_id": "vector_db_id", + "score_multiplier": 1.0, + }, + ], + }, + "okp": { + "rhokp_url": None, + "offline": True, + "chunk_filter_query": None, + "max_chunks": 5, + }, + "retrieval": { + "inline": {"sources": [], "max_chunks": 10}, + "tool": {"sources": [], "max_chunks": 10}, + }, }, "rlsapi_v1": { "allow_verbose_infer": False, @@ -1175,7 +1205,7 @@ def test_dump_configuration_pg_namespace(tmp_path: Path) -> None: assert "customization" in content assert "inference" in content assert "database" in content - assert "byok_rag" in content + assert "rag" in content assert "quota_handlers" in content # check the whole deserialized JSON file content @@ -1277,7 +1307,6 @@ def test_dump_configuration_pg_namespace(tmp_path: Path) -> None: "buffer_max_ratio": 0.3, }, "approvals": _DEFAULT_APPROVALS_DUMP, - "byok_rag": [], "quota_handlers": { "sqlite": None, "postgres": None, @@ -1295,13 +1324,20 @@ def test_dump_configuration_pg_namespace(tmp_path: Path) -> None: }, "azure_entra_id": None, "rag": { - "inline": [], - "tool": [], - }, - "okp": { - "rhokp_url": None, - "offline": True, - "chunk_filter_query": None, + "byok": { + "max_chunks": 10, + "stores": [], + }, + "okp": { + "rhokp_url": None, + "offline": True, + "chunk_filter_query": None, + "max_chunks": 5, + }, + "retrieval": { + "inline": {"sources": [], "max_chunks": 10}, + "tool": {"sources": [], "max_chunks": 10}, + }, }, "rlsapi_v1": { "allow_verbose_infer": False, @@ -1470,7 +1506,7 @@ def test_dump_configuration_allow_degraded_mode(tmp_path: Path) -> None: assert "customization" in content assert "inference" in content assert "database" in content - assert "byok_rag" in content + assert "rag" in content assert "quota_handlers" in content assert "azure_entra_id" in content assert "reranker" in content @@ -1574,7 +1610,6 @@ def test_dump_configuration_allow_degraded_mode(tmp_path: Path) -> None: "buffer_max_ratio": 0.3, }, "approvals": _DEFAULT_APPROVALS_DUMP, - "byok_rag": [], "quota_handlers": { "sqlite": None, "postgres": None, @@ -1592,13 +1627,20 @@ def test_dump_configuration_allow_degraded_mode(tmp_path: Path) -> None: }, "azure_entra_id": None, "rag": { - "inline": [], - "tool": [], - }, - "okp": { - "rhokp_url": None, - "offline": True, - "chunk_filter_query": None, + "byok": { + "max_chunks": 10, + "stores": [], + }, + "okp": { + "rhokp_url": None, + "offline": True, + "chunk_filter_query": None, + "max_chunks": 5, + }, + "retrieval": { + "inline": {"sources": [], "max_chunks": 10}, + "tool": {"sources": [], "max_chunks": 10}, + }, }, "rlsapi_v1": { "allow_verbose_infer": False, @@ -1688,7 +1730,7 @@ def test_dump_configuration_max_retries_settings(tmp_path: Path) -> None: assert "customization" in content assert "inference" in content assert "database" in content - assert "byok_rag" in content + assert "rag" in content assert "quota_handlers" in content assert "azure_entra_id" in content assert "reranker" in content @@ -1792,7 +1834,6 @@ def test_dump_configuration_max_retries_settings(tmp_path: Path) -> None: "buffer_max_ratio": 0.3, }, "approvals": _DEFAULT_APPROVALS_DUMP, - "byok_rag": [], "quota_handlers": { "sqlite": None, "postgres": None, @@ -1810,13 +1851,20 @@ def test_dump_configuration_max_retries_settings(tmp_path: Path) -> None: }, "azure_entra_id": None, "rag": { - "inline": [], - "tool": [], - }, - "okp": { - "rhokp_url": None, - "offline": True, - "chunk_filter_query": None, + "byok": { + "max_chunks": 10, + "stores": [], + }, + "okp": { + "rhokp_url": None, + "offline": True, + "chunk_filter_query": None, + "max_chunks": 5, + }, + "retrieval": { + "inline": {"sources": [], "max_chunks": 10}, + "tool": {"sources": [], "max_chunks": 10}, + }, }, "rlsapi_v1": { "allow_verbose_infer": False, @@ -1906,7 +1954,7 @@ def test_dump_configuration_retry_count_settings(tmp_path: Path) -> None: assert "customization" in content assert "inference" in content assert "database" in content - assert "byok_rag" in content + assert "rag" in content assert "quota_handlers" in content assert "azure_entra_id" in content assert "reranker" in content @@ -2010,7 +2058,6 @@ def test_dump_configuration_retry_count_settings(tmp_path: Path) -> None: "buffer_max_ratio": 0.3, }, "approvals": _DEFAULT_APPROVALS_DUMP, - "byok_rag": [], "quota_handlers": { "sqlite": None, "postgres": None, @@ -2028,13 +2075,20 @@ def test_dump_configuration_retry_count_settings(tmp_path: Path) -> None: }, "azure_entra_id": None, "rag": { - "inline": [], - "tool": [], - }, - "okp": { - "rhokp_url": None, - "offline": True, - "chunk_filter_query": None, + "byok": { + "max_chunks": 10, + "stores": [], + }, + "okp": { + "rhokp_url": None, + "offline": True, + "chunk_filter_query": None, + "max_chunks": 5, + }, + "retrieval": { + "inline": {"sources": [], "max_chunks": 10}, + "tool": {"sources": [], "max_chunks": 10}, + }, }, "rlsapi_v1": { "allow_verbose_infer": False, diff --git a/tests/unit/models/config/test_rag_configuration.py b/tests/unit/models/config/test_rag_configuration.py index bc44ef154..9a72f9660 100644 --- a/tests/unit/models/config/test_rag_configuration.py +++ b/tests/unit/models/config/test_rag_configuration.py @@ -7,7 +7,79 @@ from pydantic import ValidationError import constants -from models.config import OkpConfiguration, RagConfiguration +from models.config import ( + ByokConfiguration, + OkpConfiguration, + RagConfiguration, + RagStore, + RetrievalConfiguration, + RetrievalStrategyConfiguration, +) + + +class TestRetrievalStrategyConfiguration: + """Tests for RetrievalStrategyConfiguration model.""" + + def test_default_values(self) -> None: + """Test default values.""" + config = RetrievalStrategyConfiguration() + assert config.sources == [] + assert config.max_chunks == constants.DEFAULT_INLINE_RAG_MAX_CHUNKS + + def test_custom_values(self) -> None: + """Test custom sources and max_chunks.""" + config = RetrievalStrategyConfiguration( + sources=["store-1", "okp"], max_chunks=20 + ) + assert config.sources == ["store-1", "okp"] + assert config.max_chunks == 20 + + +class TestRetrievalConfiguration: + """Tests for RetrievalConfiguration model.""" + + def test_default_values(self) -> None: + """Test default inline and tool strategies.""" + config = RetrievalConfiguration() + assert config.inline.sources == [] + assert config.inline.max_chunks == constants.DEFAULT_INLINE_RAG_MAX_CHUNKS + assert config.tool.sources == [] + assert config.tool.max_chunks == constants.DEFAULT_TOOL_RAG_MAX_CHUNKS + + def test_custom_values(self) -> None: + """Test custom inline and tool strategies.""" + config = RetrievalConfiguration( + inline=RetrievalStrategyConfiguration( + sources=["store-1", "okp"], max_chunks=8 + ), + tool=RetrievalStrategyConfiguration(sources=["store-1"], max_chunks=12), + ) + assert config.inline.sources == ["store-1", "okp"] + assert config.inline.max_chunks == 8 + assert config.tool.sources == ["store-1"] + assert config.tool.max_chunks == 12 + + +class TestByokConfiguration: + """Tests for ByokConfiguration model.""" + + def test_default_values(self) -> None: + """Test default values.""" + config = ByokConfiguration() + assert config.stores == [] + assert config.max_chunks == constants.DEFAULT_BYOK_RAG_MAX_CHUNKS + + def test_with_stores(self) -> None: + """Test with store entries.""" + store = RagStore( + rag_id="test", + vector_db_id="vs_123", + db_path="/tmp/test.db", + ) + config = ByokConfiguration(stores=[store], max_chunks=15) + assert len(config.stores) == 1 + assert config.stores[0].rag_id == "test" + assert config.max_chunks == 15 class TestRagConfiguration: @@ -16,39 +88,92 @@ class TestRagConfiguration: def test_default_values(self) -> None: """Test that RagConfiguration has correct default values.""" config = RagConfiguration() - assert config.inline == [] - assert config.tool == [] + assert config.byok.stores == [] + assert config.byok.max_chunks == constants.DEFAULT_BYOK_RAG_MAX_CHUNKS + assert config.okp.offline is True + assert config.okp.max_chunks == constants.DEFAULT_OKP_RAG_MAX_CHUNKS + assert config.retrieval.inline.sources == [] + assert config.retrieval.tool.sources == [] def test_inline_with_byok_ids(self) -> None: - """Test inline list with BYOK rag IDs.""" - config = RagConfiguration(inline=["store-1", "store-2"]) - assert config.inline == ["store-1", "store-2"] - assert config.tool == [] + """Test inline sources with BYOK rag IDs.""" + stores = [ + RagStore(rag_id="store-1", vector_db_id="vs_1", db_path="/tmp/s1.db"), + RagStore(rag_id="store-2", vector_db_id="vs_2", db_path="/tmp/s2.db"), + ] + config = RagConfiguration( + byok=ByokConfiguration(stores=stores), + retrieval=RetrievalConfiguration( + inline=RetrievalStrategyConfiguration(sources=["store-1", "store-2"]), + ), + ) + assert config.retrieval.inline.sources == ["store-1", "store-2"] + assert config.retrieval.tool.sources == [] def test_inline_with_okp_rag(self) -> None: - """Test inline list including the special OKP ID.""" - config = RagConfiguration(inline=[constants.OKP_RAG_ID, "store-1"]) - assert constants.OKP_RAG_ID in config.inline - assert "store-1" in config.inline + """Test inline sources including the special OKP ID.""" + store = RagStore(rag_id="store-1", vector_db_id="vs_1", db_path="/tmp/s1.db") + config = RagConfiguration( + byok=ByokConfiguration(stores=[store]), + retrieval=RetrievalConfiguration( + inline=RetrievalStrategyConfiguration( + sources=[constants.OKP_RAG_ID, "store-1"] + ), + ), + ) + assert constants.OKP_RAG_ID in config.retrieval.inline.sources + assert "store-1" in config.retrieval.inline.sources def test_tool_with_okp_rag_and_byok(self) -> None: - """Test tool list with OKP and BYOK IDs.""" + """Test tool sources with OKP and BYOK IDs.""" + store = RagStore(rag_id="store-1", vector_db_id="vs_1", db_path="/tmp/s1.db") config = RagConfiguration( - inline=["store-1"], - tool=[constants.OKP_RAG_ID, "store-1"], + byok=ByokConfiguration(stores=[store]), + retrieval=RetrievalConfiguration( + inline=RetrievalStrategyConfiguration(sources=["store-1"]), + tool=RetrievalStrategyConfiguration( + sources=[constants.OKP_RAG_ID, "store-1"] + ), + ), ) - assert config.inline == ["store-1"] - assert config.tool == [constants.OKP_RAG_ID, "store-1"] + assert config.retrieval.inline.sources == ["store-1"] + assert config.retrieval.tool.sources == [constants.OKP_RAG_ID, "store-1"] def test_tool_empty_list(self) -> None: - """Test that an explicit empty tool list disables tool RAG.""" - config = RagConfiguration(tool=[]) - assert config.tool == [] + """Test that an explicit empty tool sources list disables tool RAG.""" + config = RagConfiguration( + retrieval=RetrievalConfiguration( + tool=RetrievalStrategyConfiguration(sources=[]), + ), + ) + assert config.retrieval.tool.sources == [] def test_tool_default_is_empty_list(self) -> None: - """Test that tool defaults to an empty list.""" + """Test that tool sources defaults to an empty list.""" config = RagConfiguration() - assert config.tool == [] + assert config.retrieval.tool.sources == [] + + def test_unknown_inline_source_rejected(self) -> None: + """Test that inline sources referencing undeclared rag_ids are rejected.""" + store = RagStore(rag_id="store-1", vector_db_id="vs_1", db_path="/tmp/s1.db") + with pytest.raises(ValidationError, match="unknown RAG IDs"): + RagConfiguration( + byok=ByokConfiguration(stores=[store]), + retrieval=RetrievalConfiguration( + inline=RetrievalStrategyConfiguration( + sources=["store-1", "nonexistent"] + ), + ), + ) + + def test_unknown_tool_source_rejected(self) -> None: + """Test that tool sources referencing undeclared rag_ids are rejected.""" + with pytest.raises(ValidationError, match="unknown RAG IDs"): + RagConfiguration( + retrieval=RetrievalConfiguration( + tool=RetrievalStrategyConfiguration(sources=["missing-store"]), + ), + ) def test_no_unknown_fields_allowed(self) -> None: """Test that RagConfiguration rejects unknown fields.""" @@ -57,13 +182,28 @@ def test_no_unknown_fields_allowed(self) -> None: def test_fully_custom_config(self) -> None: """Test RagConfiguration with all fields set.""" + store = RagStore( + rag_id="store-1", + vector_db_id="vs_123", + db_path="/tmp/test.db", + ) config = RagConfiguration( - inline=[constants.OKP_RAG_ID, "store-1"], - tool=["store-1"], + byok=ByokConfiguration(stores=[store], max_chunks=15), + okp=OkpConfiguration(offline=False, max_chunks=3), + retrieval=RetrievalConfiguration( + inline=RetrievalStrategyConfiguration( + sources=[constants.OKP_RAG_ID, "store-1"], max_chunks=8 + ), + tool=RetrievalStrategyConfiguration(sources=["store-1"], max_chunks=12), + ), ) - assert constants.OKP_RAG_ID in config.inline - assert "store-1" in config.inline - assert config.tool == ["store-1"] + assert constants.OKP_RAG_ID in config.retrieval.inline.sources + assert "store-1" in config.retrieval.inline.sources + assert config.retrieval.tool.sources == ["store-1"] + assert config.byok.max_chunks == 15 + assert config.okp.max_chunks == 3 + assert config.retrieval.inline.max_chunks == 8 + assert config.retrieval.tool.max_chunks == 12 class TestOkpConfiguration: @@ -74,6 +214,7 @@ def test_default_values(self) -> None: config = OkpConfiguration() assert config.offline is True assert config.chunk_filter_query is None + assert config.max_chunks == constants.DEFAULT_OKP_RAG_MAX_CHUNKS def test_offline_false(self) -> None: """Test offline can be set to False (online mode).""" @@ -85,6 +226,11 @@ def test_custom_chunk_filter_query(self) -> None: config = OkpConfiguration(chunk_filter_query="product:*openshift*") assert config.chunk_filter_query == "product:*openshift*" + def test_custom_max_chunks(self) -> None: + """Test that max_chunks can be customised.""" + config = OkpConfiguration(max_chunks=3) + assert config.max_chunks == 3 + def test_no_unknown_fields_allowed(self) -> None: """Test that OkpConfiguration rejects unknown fields.""" with pytest.raises(ValidationError, match="Extra inputs are not permitted"): diff --git a/tests/unit/telemetry/conftest.py b/tests/unit/telemetry/conftest.py index 6b2db6a82..f17cc5a28 100644 --- a/tests/unit/telemetry/conftest.py +++ b/tests/unit/telemetry/conftest.py @@ -12,6 +12,7 @@ Action, AuthenticationConfiguration, AuthorizationConfiguration, + ByokConfiguration, Configuration, CORSConfiguration, Customization, @@ -23,7 +24,11 @@ JwtRoleRule, LlamaStackConfiguration, ModelContextProtocolServer, + OkpConfiguration, PostgreSQLDatabaseConfiguration, + RagConfiguration, + RetrievalConfiguration, + RetrievalStrategyConfiguration, ServiceConfiguration, SQLiteDatabaseConfiguration, TLSConfiguration, @@ -287,13 +292,33 @@ def build_fully_populated_config() -> Configuration: ), ], conversation_cache=None, - byok_rag=[], + rag=RagConfiguration.model_construct( + byok=ByokConfiguration.model_construct( + max_chunks=10, + stores=[], + ), + okp=OkpConfiguration.model_construct( + rhokp_url=None, + offline=True, + chunk_filter_query=None, + max_chunks=5, + ), + retrieval=RetrievalConfiguration.model_construct( + inline=RetrievalStrategyConfiguration.model_construct( + sources=[], + max_chunks=10, + ), + tool=RetrievalStrategyConfiguration.model_construct( + sources=[], + max_chunks=10, + ), + ), + ), a2a_state=None, quota_handlers=None, azure_entra_id=None, splunk=None, deployment_environment="production", - solr=None, ) @@ -363,13 +388,33 @@ def build_minimal_config() -> Configuration: ), mcp_servers=[], conversation_cache=None, - byok_rag=[], + rag=RagConfiguration.model_construct( + byok=ByokConfiguration.model_construct( + max_chunks=10, + stores=[], + ), + okp=OkpConfiguration.model_construct( + rhokp_url=None, + offline=True, + chunk_filter_query=None, + max_chunks=5, + ), + retrieval=RetrievalConfiguration.model_construct( + inline=RetrievalStrategyConfiguration.model_construct( + sources=[], + max_chunks=10, + ), + tool=RetrievalStrategyConfiguration.model_construct( + sources=[], + max_chunks=10, + ), + ), + ), a2a_state=None, quota_handlers=None, azure_entra_id=None, splunk=None, deployment_environment="development", - solr=None, ) diff --git a/tests/unit/test_configuration.py b/tests/unit/test_configuration.py index a6c844b45..9cc5328fd 100644 --- a/tests/unit/test_configuration.py +++ b/tests/unit/test_configuration.py @@ -1001,7 +1001,11 @@ def test_rag_id_mapping_includes_solr_when_okp_in_inline() -> None: }, "user_data_collection": {}, "authentication": {"module": "noop"}, - "rag": {"inline": [constants.OKP_RAG_ID]}, + "rag": { + "retrieval": { + "inline": {"sources": [constants.OKP_RAG_ID]}, + }, + }, } ) assert constants.SOLR_DEFAULT_VECTOR_STORE_ID in cfg.rag_id_mapping @@ -1025,7 +1029,11 @@ def test_rag_id_mapping_includes_solr_when_okp_in_tool() -> None: }, "user_data_collection": {}, "authentication": {"module": "noop"}, - "rag": {"tool": [constants.OKP_RAG_ID]}, + "rag": { + "retrieval": { + "tool": {"sources": [constants.OKP_RAG_ID]}, + }, + }, } ) assert constants.SOLR_DEFAULT_VECTOR_STORE_ID in cfg.rag_id_mapping @@ -1051,13 +1059,17 @@ def test_rag_id_mapping_with_byok(tmp_path: Path) -> None: }, "user_data_collection": {}, "authentication": {"module": "noop"}, - "byok_rag": [ - { - "rag_id": "my-kb", - "vector_db_id": "vs-001", - "db_path": str(db_file), + "rag": { + "byok": { + "stores": [ + { + "rag_id": "my-kb", + "vector_db_id": "vs-001", + "db_path": str(db_file), + }, + ], }, - ], + }, } ) assert cfg.rag_id_mapping == {"vs-001": "my-kb"} @@ -1079,14 +1091,20 @@ def test_rag_id_mapping_with_byok_and_okp(tmp_path: Path) -> None: }, "user_data_collection": {}, "authentication": {"module": "noop"}, - "rag": {"inline": [constants.OKP_RAG_ID]}, - "byok_rag": [ - { - "rag_id": "my-kb", - "vector_db_id": "vs-001", - "db_path": str(db_file), + "rag": { + "retrieval": { + "inline": {"sources": [constants.OKP_RAG_ID]}, }, - ], + "byok": { + "stores": [ + { + "rag_id": "my-kb", + "vector_db_id": "vs-001", + "db_path": str(db_file), + }, + ], + }, + }, } ) assert "vs-001" in cfg.rag_id_mapping @@ -1138,13 +1156,17 @@ def test_score_multiplier_mapping_with_byok_defaults(tmp_path: Path) -> None: }, "user_data_collection": {}, "authentication": {"module": "noop"}, - "byok_rag": [ - { - "rag_id": "my-kb", - "vector_db_id": "vs-001", - "db_path": str(db_file), + "rag": { + "byok": { + "stores": [ + { + "rag_id": "my-kb", + "vector_db_id": "vs-001", + "db_path": str(db_file), + }, + ], }, - ], + }, } ) assert cfg.score_multiplier_mapping == {"vs-001": 1.0} @@ -1168,20 +1190,24 @@ def test_score_multiplier_mapping_with_custom_values(tmp_path: Path) -> None: }, "user_data_collection": {}, "authentication": {"module": "noop"}, - "byok_rag": [ - { - "rag_id": "kb1", - "vector_db_id": "vs-001", - "db_path": str(db_file1), - "score_multiplier": 1.5, - }, - { - "rag_id": "kb2", - "vector_db_id": "vs-002", - "db_path": str(db_file2), - "score_multiplier": 0.75, + "rag": { + "byok": { + "stores": [ + { + "rag_id": "kb1", + "vector_db_id": "vs-001", + "db_path": str(db_file1), + "score_multiplier": 1.5, + }, + { + "rag_id": "kb2", + "vector_db_id": "vs-002", + "db_path": str(db_file2), + "score_multiplier": 0.75, + }, + ], }, - ], + }, } ) assert cfg.score_multiplier_mapping == {"vs-001": 1.5, "vs-002": 0.75} @@ -1328,17 +1354,6 @@ def test_score_multiplier_mapping_not_loaded() -> None: "ca_cert_path": "file", }, }, - "byok_rag": [ - { - "rag_id": "Weight message strong wind land bar.", - "rag_type": "Learn person tell increase dog even.", - "embedding_model": "By our television. Southern full a course.", - "embedding_dimension": 753, - "vector_db_id": "Indicate see door specific hard region one.", - "db_path": "A none owner visit wish medical cut Mrs. Later nig", - "score_multiplier": 388.45, - } - ], "a2a_state": {"sqlite": None, "postgres": None}, "quota_handlers": { "sqlite": {"db_path": "Experience five able citizen work member call cond"}, @@ -1397,21 +1412,40 @@ def test_score_multiplier_mapping_not_loaded() -> None: }, "deployment_environment": "Second say body know music while.", "rag": { - "inline": [ - "Local authority pressure pretty. Travel something ", - "Watch meet able such.", - "Different apply size.", - ], - "tool": [ - "Full develop under his.", - "Black political father project become.", - "Once however son place.", - ], - }, - "okp": { - "rhokp_url": None, - "offline": True, - "chunk_filter_query": "Foreign space system.", + "byok": { + "stores": [ + { + "rag_id": "Weight message strong wind land bar.", + "backend": "Learn person tell increase dog even.", + "embedding_model": "By our television. Southern full a course.", + "embedding_dimension": 753, + "vector_db_id": "Indicate see door specific hard region one.", + "db_path": "A none owner visit wish medical cut Mrs. Later nig", + "score_multiplier": 388.45, + } + ], + }, + "okp": { + "rhokp_url": None, + "offline": True, + "chunk_filter_query": "Foreign space system.", + }, + "retrieval": { + "inline": { + "sources": [ + "Local authority pressure pretty. Travel something ", + "Watch meet able such.", + "Different apply size.", + ], + }, + "tool": { + "sources": [ + "Full develop under his.", + "Black political father project become.", + "Once however son place.", + ], + }, + }, }, }, { @@ -1604,26 +1638,6 @@ def test_score_multiplier_mapping_not_loaded() -> None: "ca_cert_path": "certs", }, }, - "byok_rag": [ - { - "rag_id": "Tonight relate there record.", - "rag_type": "Politics development real play main chair capital ", - "embedding_model": "Prepare memory outside.", - "embedding_dimension": 449, - "vector_db_id": "Political right gun law public group rock.", - "db_path": "Consider still recognize church. Area suggest noth", - "score_multiplier": 183.85, - }, - { - "rag_id": "One again under respond poor beyond.", - "rag_type": "Six base physical.", - "embedding_model": "Surface that choice.", - "embedding_dimension": 736, - "vector_db_id": "Forget level other agreement.", - "db_path": "Argue pull out race town.", - "score_multiplier": 225.21, - }, - ], "a2a_state": {"sqlite": None, "postgres": None}, "quota_handlers": { "sqlite": None, @@ -1686,14 +1700,48 @@ def test_score_multiplier_mapping_not_loaded() -> None: }, "deployment_environment": "Vote mean answer simply turn project.", "rag": { - "inline": [ - "Billion job provide take other.", - "Eight total figure surface development include out", - "Which from cover not choice bring sister front.", - ], - "tool": ["Ground appear group institution."], + "byok": { + "stores": [ + { + "rag_id": "Tonight relate there record.", + "backend": "Politics development real play main chair capital ", + "embedding_model": "Prepare memory outside.", + "embedding_dimension": 449, + "vector_db_id": "Political right gun law public group rock.", + "db_path": "Consider still recognize church. Area suggest noth", + "score_multiplier": 183.85, + }, + { + "rag_id": "One again under respond poor beyond.", + "backend": "Six base physical.", + "embedding_model": "Surface that choice.", + "embedding_dimension": 736, + "vector_db_id": "Forget level other agreement.", + "db_path": "Argue pull out race town.", + "score_multiplier": 225.21, + }, + ], + }, + "okp": { + "rhokp_url": None, + "offline": False, + "chunk_filter_query": None, + }, + "retrieval": { + "inline": { + "sources": [ + "Billion job provide take other.", + "Eight total figure surface development include out", + "Which from cover not choice bring sister front.", + ], + }, + "tool": { + "sources": [ + "Ground appear group institution.", + ], + }, + }, }, - "okp": {"rhokp_url": None, "offline": False, "chunk_filter_query": None}, }, { "name": "Patricia Henderson", @@ -1801,17 +1849,6 @@ def test_score_multiplier_mapping_not_loaded() -> None: "sqlite": None, "postgres": None, }, - "byok_rag": [ - { - "rag_id": "Something worker campaign war through.", - "rag_type": "Check simple since next then statement.", - "embedding_model": "Class third author series.", - "embedding_dimension": 211, - "vector_db_id": "Less put site alone amount.", - "db_path": "Live child most throughout.", - "score_multiplier": 252.41, - } - ], "a2a_state": {"sqlite": None, "postgres": None}, "quota_handlers": { "sqlite": None, @@ -1864,17 +1901,39 @@ def test_score_multiplier_mapping_not_loaded() -> None: }, "deployment_environment": "Mouth view form.", "rag": { - "inline": [ - "Interesting during product himself attack Democrat", - "Decision I order particularly.", - "Couple reflect relate two agree local.", - ], - "tool": ["Her society move lay.", "Network material like."], - }, - "okp": { - "rhokp_url": "xyzzy", - "offline": False, - "chunk_filter_query": "Beautiful society within.", + "byok": { + "stores": [ + { + "rag_id": "Something worker campaign war through.", + "backend": "Check simple since next then statement.", + "embedding_model": "Class third author series.", + "embedding_dimension": 211, + "vector_db_id": "Less put site alone amount.", + "db_path": "Live child most throughout.", + "score_multiplier": 252.41, + } + ], + }, + "okp": { + "rhokp_url": "xyzzy", + "offline": False, + "chunk_filter_query": "Beautiful society within.", + }, + "retrieval": { + "inline": { + "sources": [ + "Interesting during product himself attack Democrat", + "Decision I order particularly.", + "Couple reflect relate two agree local.", + ], + }, + "tool": { + "sources": [ + "Her society move lay.", + "Network material like.", + ], + }, + }, }, }, { @@ -2002,35 +2061,6 @@ def test_score_multiplier_mapping_not_loaded() -> None: "ca_cert_path": None, }, }, - "byok_rag": [ - { - "rag_id": "Ever analysis three perhaps.", - "rag_type": "Ever truth skin.", - "embedding_model": "Type toward never hair relate before.", - "embedding_dimension": 619, - "vector_db_id": "Learn computer positive nor yet notice.", - "db_path": "Sort rule soldier relationship. Wife front kid cit", - "score_multiplier": 310.63, - }, - { - "rag_id": "Question to front often.", - "rag_type": "But catch hear happy.", - "embedding_model": "Hard message wait least focus left daughter reflec", - "embedding_dimension": 97, - "vector_db_id": "Create visit green. Throw more tend throw game.", - "db_path": "Rest could recent test door.", - "score_multiplier": 224.06, - }, - { - "rag_id": "Read hand over fight president feel letter. Over h", - "rag_type": "Set visit describe seat space play.", - "embedding_model": "Lawyer early term direction.", - "embedding_dimension": 119, - "vector_db_id": "Day store girl writer have would participant.", - "db_path": "Later research explain first lose probably.", - "score_multiplier": 627.97, - }, - ], "a2a_state": { "sqlite": {"db_path": "Write herself each generation finally attorney."}, "postgres": None, @@ -2089,16 +2119,55 @@ def test_score_multiplier_mapping_not_loaded() -> None: }, "deployment_environment": "Want hair product.", "rag": { - "inline": [ - "Himself fear read here finally ask teacher.", - "Enjoy standard off.", - ], - "tool": ["Them author financial production."], - }, - "okp": { - "rhokp_url": "xyzzy", - "offline": False, - "chunk_filter_query": "Industry as appear us. Lead dream public compare.", + "byok": { + "stores": [ + { + "rag_id": "Ever analysis three perhaps.", + "backend": "Ever truth skin.", + "embedding_model": "Type toward never hair relate before.", + "embedding_dimension": 619, + "vector_db_id": "Learn computer positive nor yet notice.", + "db_path": "Sort rule soldier relationship. Wife front kid cit", + "score_multiplier": 310.63, + }, + { + "rag_id": "Question to front often.", + "backend": "But catch hear happy.", + "embedding_model": "Hard message wait least focus left daughter reflec", + "embedding_dimension": 97, + "vector_db_id": "Create visit green. Throw more tend throw game.", + "db_path": "Rest could recent test door.", + "score_multiplier": 224.06, + }, + { + "rag_id": "Read hand over fight president feel letter. Over h", + "backend": "Set visit describe seat space play.", + "embedding_model": "Lawyer early term direction.", + "embedding_dimension": 119, + "vector_db_id": "Day store girl writer have would participant.", + "db_path": "Later research explain first lose probably.", + "score_multiplier": 627.97, + }, + ], + }, + "okp": { + "rhokp_url": "xyzzy", + "offline": False, + "chunk_filter_query": "Industry as appear us. Lead dream public compare.", + }, + "retrieval": { + "inline": { + "sources": [ + "Himself fear read here finally ask teacher.", + "Enjoy standard off.", + ], + }, + "tool": { + "sources": [ + "Them author financial production.", + ], + }, + }, }, }, { @@ -2213,26 +2282,6 @@ def test_score_multiplier_mapping_not_loaded() -> None: "sqlite": {"db_path": "Court size your eye choose."}, "postgres": None, }, - "byok_rag": [ - { - "rag_id": "Authority kind apply arm manager local reveal.", - "rag_type": "Seem authority miss.", - "embedding_model": "Have news quality.", - "embedding_dimension": 310, - "vector_db_id": "Education hot full her. Serve mention save executi", - "db_path": "Every popular bit.", - "score_multiplier": 918.43, - }, - { - "rag_id": "Avoid baby miss want education.", - "rag_type": "Sing answer rule soon.", - "embedding_model": "Year let example you paper develop tough.", - "embedding_dimension": 985, - "vector_db_id": "Operation conference phone.", - "db_path": "All effort True see.", - "score_multiplier": 788.57, - }, - ], "a2a_state": { "sqlite": {"db_path": "Green example walk become return front."}, "postgres": { @@ -2284,21 +2333,49 @@ def test_score_multiplier_mapping_not_loaded() -> None: }, "deployment_environment": "Consumer center sign skin total.", "rag": { - "inline": [ - "True four lawyer sound. Light fund former art.", - "Perhaps theory remain. Marriage person put food.", - "Run behind single material else media.", - ], - "tool": [ - "Another Congress part seat bit.", - "Able main door under. Early consumer speech less c", - "Eat read shake three. Development cell mission.", - ], - }, - "okp": { - "rhokp_url": None, - "offline": True, - "chunk_filter_query": "And drug brother tell specific realize hit.", + "byok": { + "stores": [ + { + "rag_id": "Authority kind apply arm manager local reveal.", + "backend": "Seem authority miss.", + "embedding_model": "Have news quality.", + "embedding_dimension": 310, + "vector_db_id": "Education hot full her. Serve mention save executi", + "db_path": "Every popular bit.", + "score_multiplier": 918.43, + }, + { + "rag_id": "Avoid baby miss want education.", + "backend": "Sing answer rule soon.", + "embedding_model": "Year let example you paper develop tough.", + "embedding_dimension": 985, + "vector_db_id": "Operation conference phone.", + "db_path": "All effort True see.", + "score_multiplier": 788.57, + }, + ], + }, + "okp": { + "rhokp_url": None, + "offline": True, + "chunk_filter_query": "And drug brother tell specific realize hit.", + }, + "retrieval": { + "inline": { + "sources": [ + "True four lawyer sound. Light fund former art.", + "Perhaps theory remain. Marriage person put food.", + "Run behind single material else media.", + ], + }, + "tool": { + "sources": [ + "Another Congress part seat bit.", + "Able main door under. Early consumer speech less c", + "Eat read shake three. Development cell mission.", + ], + }, + }, }, }, { @@ -2470,35 +2547,6 @@ def test_score_multiplier_mapping_not_loaded() -> None: "sqlite": None, "postgres": None, }, - "byok_rag": [ - { - "rag_id": "Nor reduce physical section serious. She still rep", - "rag_type": "Hospital political recognize operation tree.", - "embedding_model": "Drug concern old job discover firm imagine.", - "embedding_dimension": 192, - "vector_db_id": "Relationship training argue body market old per.", - "db_path": "Consumer while positive. Why because quite respons", - "score_multiplier": 283.58, - }, - { - "rag_id": "Past detail as star. Teacher spend sit push maybe ", - "rag_type": "After good nature. War option science approach.", - "embedding_model": "Air serve court measure most play item.", - "embedding_dimension": 491, - "vector_db_id": "Other open wonder.", - "db_path": "Car everybody during. Nor believe audience tax soo", - "score_multiplier": 159.31, - }, - { - "rag_id": "Fire feeling person real party game method.", - "rag_type": "Middle together second money need fly.", - "embedding_model": "Do item when politics.", - "embedding_dimension": 896, - "vector_db_id": "Reason decision region past research.", - "db_path": "Every any nice vote civil.", - "score_multiplier": 776.23, - }, - ], "a2a_state": { "sqlite": None, "postgres": { @@ -2557,13 +2605,56 @@ def test_score_multiplier_mapping_not_loaded() -> None: }, "deployment_environment": "Successful cut arrive ever against maybe.", "rag": { - "inline": [ - "Themselves scene just.", - "Sport develop particular when. Task agreement walk", - ], - "tool": ["Anything visit late."], + "byok": { + "stores": [ + { + "rag_id": "Nor reduce physical section serious. She still rep", + "backend": "Hospital political recognize operation tree.", + "embedding_model": "Drug concern old job discover firm imagine.", + "embedding_dimension": 192, + "vector_db_id": "Relationship training argue body market old per.", + "db_path": "Consumer while positive. Why because quite respons", + "score_multiplier": 283.58, + }, + { + "rag_id": "Past detail as star. Teacher spend sit push maybe ", + "backend": "After good nature. War option science approach.", + "embedding_model": "Air serve court measure most play item.", + "embedding_dimension": 491, + "vector_db_id": "Other open wonder.", + "db_path": "Car everybody during. Nor believe audience tax soo", + "score_multiplier": 159.31, + }, + { + "rag_id": "Fire feeling person real party game method.", + "backend": "Middle together second money need fly.", + "embedding_model": "Do item when politics.", + "embedding_dimension": 896, + "vector_db_id": "Reason decision region past research.", + "db_path": "Every any nice vote civil.", + "score_multiplier": 776.23, + }, + ], + }, + "okp": { + "rhokp_url": "xyzzy", + "offline": True, + "chunk_filter_query": None, + }, + "retrieval": { + "inline": { + "sources": [ + "Themselves scene just.", + "Sport develop particular when. Task agreement walk", + ], + }, + "tool": { + "sources": [ + "Anything visit late.", + ], + }, + }, }, - "okp": {"rhokp_url": "xyzzy", "offline": True, "chunk_filter_query": None}, }, { "name": "Mr. Michael Wilson", @@ -2699,35 +2790,6 @@ def test_score_multiplier_mapping_not_loaded() -> None: "sqlite": None, "postgres": None, }, - "byok_rag": [ - { - "rag_id": "Sometimes once win young bar right. Star keep cult", - "rag_type": "Produce energy skill art.", - "embedding_model": "Beautiful series message.", - "embedding_dimension": 739, - "vector_db_id": "Visit night city.", - "db_path": "Paper investment game.", - "score_multiplier": 962.12, - }, - { - "rag_id": "Standard might new national produce thank bill.", - "rag_type": "Bar else center dinner great. Wrong ability big.", - "embedding_model": "Building try left general.", - "embedding_dimension": 973, - "vector_db_id": "Issue never physical stuff edge fire research.", - "db_path": "Help hope our would discussion. Than plan task.", - "score_multiplier": 732.93, - }, - { - "rag_id": "Air culture explain child.", - "rag_type": "Reach must moment.", - "embedding_model": "Manage anyone police someone church.", - "embedding_dimension": 691, - "vector_db_id": "Far tough individual painting send minute.", - "db_path": "Head major down soon.", - "score_multiplier": 485.53, - }, - ], "a2a_state": { "sqlite": None, "postgres": { @@ -2788,14 +2850,57 @@ def test_score_multiplier_mapping_not_loaded() -> None: "splunk": None, "deployment_environment": "Must no land member.", "rag": { - "inline": [ - "Image police section carry. Order walk state commu", - "Society be night participant seat.", - "Minute skin again.", - ], - "tool": ["Use hotel often deal light teacher. Improve more m"], + "byok": { + "stores": [ + { + "rag_id": "Sometimes once win young bar right. Star keep cult", + "backend": "Produce energy skill art.", + "embedding_model": "Beautiful series message.", + "embedding_dimension": 739, + "vector_db_id": "Visit night city.", + "db_path": "Paper investment game.", + "score_multiplier": 962.12, + }, + { + "rag_id": "Standard might new national produce thank bill.", + "backend": "Bar else center dinner great. Wrong ability big.", + "embedding_model": "Building try left general.", + "embedding_dimension": 973, + "vector_db_id": "Issue never physical stuff edge fire research.", + "db_path": "Help hope our would discussion. Than plan task.", + "score_multiplier": 732.93, + }, + { + "rag_id": "Air culture explain child.", + "backend": "Reach must moment.", + "embedding_model": "Manage anyone police someone church.", + "embedding_dimension": 691, + "vector_db_id": "Far tough individual painting send minute.", + "db_path": "Head major down soon.", + "score_multiplier": 485.53, + }, + ], + }, + "okp": { + "rhokp_url": None, + "offline": False, + "chunk_filter_query": None, + }, + "retrieval": { + "inline": { + "sources": [ + "Image police section carry. Order walk state commu", + "Society be night participant seat.", + "Minute skin again.", + ], + }, + "tool": { + "sources": [ + "Use hotel often deal light teacher. Improve more m", + ], + }, + }, }, - "okp": {"rhokp_url": None, "offline": False, "chunk_filter_query": None}, }, { "name": "Ruth Davidson", @@ -2915,35 +3020,6 @@ def test_score_multiplier_mapping_not_loaded() -> None: "ca_cert_path": None, }, }, - "byok_rag": [ - { - "rag_id": "Raise real rather walk product against.", - "rag_type": "Whose mind serve public character letter.", - "embedding_model": "Miss act loss camera.", - "embedding_dimension": 276, - "vector_db_id": "Return generation beat.", - "db_path": "Discover professional really group.", - "score_multiplier": 546.8, - }, - { - "rag_id": "Those sit there reason.", - "rag_type": "Keep third nothing throw.", - "embedding_model": "Like movie lead since traditional for daughter. Re", - "embedding_dimension": 148, - "vector_db_id": "Sure statement only authority.", - "db_path": "Top social suggest she yourself heavy. Use low bud", - "score_multiplier": 623.44, - }, - { - "rag_id": "Ability who manager several.", - "rag_type": "About ago spend poor event.", - "embedding_model": "Be energy lead.", - "embedding_dimension": 14, - "vector_db_id": "Region behind law affect note.", - "db_path": "View within able over sit. Part eat among appear.", - "score_multiplier": 306.05, - }, - ], "a2a_state": { "sqlite": {"db_path": "Air pretty Democrat husband make travel statement."}, "postgres": { @@ -2989,17 +3065,56 @@ def test_score_multiplier_mapping_not_loaded() -> None: "splunk": None, "deployment_environment": "Second window action enter until very low provide.", "rag": { - "inline": [ - "Consider once budget author trade federal.", - "Knowledge the option positive. Court its effect me", - "Add these care drive want and.", - ], - "tool": ["Guess know picture."], - }, - "okp": { - "rhokp_url": "xyzzy", - "offline": False, - "chunk_filter_query": "Much when find smile try.", + "byok": { + "stores": [ + { + "rag_id": "Raise real rather walk product against.", + "backend": "Whose mind serve public character letter.", + "embedding_model": "Miss act loss camera.", + "embedding_dimension": 276, + "vector_db_id": "Return generation beat.", + "db_path": "Discover professional really group.", + "score_multiplier": 546.8, + }, + { + "rag_id": "Those sit there reason.", + "backend": "Keep third nothing throw.", + "embedding_model": "Like movie lead since traditional for daughter. Re", + "embedding_dimension": 148, + "vector_db_id": "Sure statement only authority.", + "db_path": "Top social suggest she yourself heavy. Use low bud", + "score_multiplier": 623.44, + }, + { + "rag_id": "Ability who manager several.", + "backend": "About ago spend poor event.", + "embedding_model": "Be energy lead.", + "embedding_dimension": 14, + "vector_db_id": "Region behind law affect note.", + "db_path": "View within able over sit. Part eat among appear.", + "score_multiplier": 306.05, + }, + ], + }, + "okp": { + "rhokp_url": "xyzzy", + "offline": False, + "chunk_filter_query": "Much when find smile try.", + }, + "retrieval": { + "inline": { + "sources": [ + "Consider once budget author trade federal.", + "Knowledge the option positive. Court its effect me", + "Add these care drive want and.", + ], + }, + "tool": { + "sources": [ + "Guess know picture.", + ], + }, + }, }, }, { @@ -3117,35 +3232,6 @@ def test_score_multiplier_mapping_not_loaded() -> None: "sqlite": {"db_path": "Suggest gun standard fast note stay their."}, "postgres": None, }, - "byok_rag": [ - { - "rag_id": "Hope enough nature. Forward season agreement espec", - "rag_type": "Everyone finish task worry little we.", - "embedding_model": "Third choice enter blue baby behind its.", - "embedding_dimension": 514, - "vector_db_id": "Board how fight.", - "db_path": "Black can heavy write home.", - "score_multiplier": 817.0, - }, - { - "rag_id": "Fish medical really owner different carry.", - "rag_type": "Order window meeting feel.", - "embedding_model": "Occur international consumer.", - "embedding_dimension": 912, - "vector_db_id": "Full tell us century development network scene spe", - "db_path": "Today boy kind key center Mr. Contain reduce coach", - "score_multiplier": 233.12, - }, - { - "rag_id": "Note dog the audience work. We though name.", - "rag_type": "Bad career deep affect.", - "embedding_model": "Budget much see ask.", - "embedding_dimension": 939, - "vector_db_id": "South positive might film control peace seem.", - "db_path": "Go for can player camera.", - "score_multiplier": 268.06, - }, - ], "a2a_state": {"sqlite": None, "postgres": None}, "quota_handlers": { "sqlite": {"db_path": "Suffer best free prove quickly to degree."}, @@ -3193,17 +3279,58 @@ def test_score_multiplier_mapping_not_loaded() -> None: }, "deployment_environment": "Maybe really go court.", "rag": { - "inline": [ - "Without rock staff have campaign.", - "Particular her six.", - "These where I product.", - ], - "tool": [ - "Kind ability hope way.", - "Mean hot pressure onto purpose however.", - ], + "byok": { + "stores": [ + { + "rag_id": "Hope enough nature. Forward season agreement espec", + "backend": "Everyone finish task worry little we.", + "embedding_model": "Third choice enter blue baby behind its.", + "embedding_dimension": 514, + "vector_db_id": "Board how fight.", + "db_path": "Black can heavy write home.", + "score_multiplier": 817.0, + }, + { + "rag_id": "Fish medical really owner different carry.", + "backend": "Order window meeting feel.", + "embedding_model": "Occur international consumer.", + "embedding_dimension": 912, + "vector_db_id": "Full tell us century development network scene spe", + "db_path": "Today boy kind key center Mr. Contain reduce coach", + "score_multiplier": 233.12, + }, + { + "rag_id": "Note dog the audience work. We though name.", + "backend": "Bad career deep affect.", + "embedding_model": "Budget much see ask.", + "embedding_dimension": 939, + "vector_db_id": "South positive might film control peace seem.", + "db_path": "Go for can player camera.", + "score_multiplier": 268.06, + }, + ], + }, + "okp": { + "rhokp_url": "xyzzy", + "offline": True, + "chunk_filter_query": None, + }, + "retrieval": { + "inline": { + "sources": [ + "Without rock staff have campaign.", + "Particular her six.", + "These where I product.", + ], + }, + "tool": { + "sources": [ + "Kind ability hope way.", + "Mean hot pressure onto purpose however.", + ], + }, + }, }, - "okp": {"rhokp_url": "xyzzy", "offline": True, "chunk_filter_query": None}, }, { "name": "William Riley", @@ -3319,26 +3446,6 @@ def test_score_multiplier_mapping_not_loaded() -> None: "sqlite": None, "postgres": None, }, - "byok_rag": [ - { - "rag_id": "Charge herself where impact say billion.", - "rag_type": "Blood thus member soldier.", - "embedding_model": "Sound hotel save.", - "embedding_dimension": 922, - "vector_db_id": "Down simple suffer civil. Modern service scene pas", - "db_path": "Ten fall fine firm.", - "score_multiplier": 671.28, - }, - { - "rag_id": "Include space evidence benefit loss skin.", - "rag_type": "Green anyone be.", - "embedding_model": "Focus clearly physical six.", - "embedding_dimension": 237, - "vector_db_id": "Company put eight.", - "db_path": "Step at let oil leave agreement this.", - "score_multiplier": 368.33, - }, - ], "a2a_state": { "sqlite": None, "postgres": { @@ -3409,14 +3516,48 @@ def test_score_multiplier_mapping_not_loaded() -> None: }, "deployment_environment": "Wonder though writer allow instead.", "rag": { - "inline": [ - "Onto political artist.", - "Trip writer half. Amount south give parent.", - "We thought American exist. Nearly cell case partic", - ], - "tool": ["School of book next man short responsibility able."], + "byok": { + "stores": [ + { + "rag_id": "Charge herself where impact say billion.", + "backend": "Blood thus member soldier.", + "embedding_model": "Sound hotel save.", + "embedding_dimension": 922, + "vector_db_id": "Down simple suffer civil. Modern service scene pas", + "db_path": "Ten fall fine firm.", + "score_multiplier": 671.28, + }, + { + "rag_id": "Include space evidence benefit loss skin.", + "backend": "Green anyone be.", + "embedding_model": "Focus clearly physical six.", + "embedding_dimension": 237, + "vector_db_id": "Company put eight.", + "db_path": "Step at let oil leave agreement this.", + "score_multiplier": 368.33, + }, + ], + }, + "okp": { + "rhokp_url": "xyzzy", + "offline": True, + "chunk_filter_query": None, + }, + "retrieval": { + "inline": { + "sources": [ + "Onto political artist.", + "Trip writer half. Amount south give parent.", + "We thought American exist. Nearly cell case partic", + ], + }, + "tool": { + "sources": [ + "School of book next man short responsibility able.", + ], + }, + }, }, - "okp": {"rhokp_url": "xyzzy", "offline": True, "chunk_filter_query": None}, }, { "name": "Rodney Scott", @@ -3552,26 +3693,6 @@ def test_score_multiplier_mapping_not_loaded() -> None: "ca_cert_path": "xyzzy", }, }, - "byok_rag": [ - { - "rag_id": "Stop choice sing prepare our both traditional.", - "rag_type": "Four account action. Herself measure speech full t", - "embedding_model": "Positive now since middle.", - "embedding_dimension": 799, - "vector_db_id": "Movie word mouth major identify law manage they.", - "db_path": "Finally hot investment role attorney meet husband.", - "score_multiplier": 515.98, - }, - { - "rag_id": "Throw two action station store respond among.", - "rag_type": "Accept exist also happy.", - "embedding_model": "Box structure arrive. Front suffer civil fund invo", - "embedding_dimension": 443, - "vector_db_id": "Begin born decade instead.", - "db_path": "Interest easy remember here fast win. Despite budg", - "score_multiplier": 2.92, - }, - ], "a2a_state": { "sqlite": {"db_path": "Trouble stop speech traditional."}, "postgres": { @@ -3625,19 +3746,47 @@ def test_score_multiplier_mapping_not_loaded() -> None: }, "deployment_environment": "West local subject clearly. Push question in.", "rag": { - "inline": [ - "Garden up certain success student others may.", - "Face can produce.", - ], - "tool": [ - "Though appear collection night message high.", - "Knowledge cup fact.", - ], - }, - "okp": { - "rhokp_url": "xyzzy", - "offline": False, - "chunk_filter_query": "Maybe assume region thus.", + "byok": { + "stores": [ + { + "rag_id": "Stop choice sing prepare our both traditional.", + "backend": "Four account action. Herself measure speech full t", + "embedding_model": "Positive now since middle.", + "embedding_dimension": 799, + "vector_db_id": "Movie word mouth major identify law manage they.", + "db_path": "Finally hot investment role attorney meet husband.", + "score_multiplier": 515.98, + }, + { + "rag_id": "Throw two action station store respond among.", + "backend": "Accept exist also happy.", + "embedding_model": "Box structure arrive. Front suffer civil fund invo", + "embedding_dimension": 443, + "vector_db_id": "Begin born decade instead.", + "db_path": "Interest easy remember here fast win. Despite budg", + "score_multiplier": 2.92, + }, + ], + }, + "okp": { + "rhokp_url": "xyzzy", + "offline": False, + "chunk_filter_query": "Maybe assume region thus.", + }, + "retrieval": { + "inline": { + "sources": [ + "Garden up certain success student others may.", + "Face can produce.", + ], + }, + "tool": { + "sources": [ + "Though appear collection night message high.", + "Knowledge cup fact.", + ], + }, + }, }, }, ] diff --git a/tests/unit/test_llama_stack_configuration.py b/tests/unit/test_llama_stack_configuration.py index aaa3bf53e..2fc08f928 100644 --- a/tests/unit/test_llama_stack_configuration.py +++ b/tests/unit/test_llama_stack_configuration.py @@ -262,7 +262,7 @@ def test_construct_vector_io_providers_section_adds_new() -> None: { "rag_id": "rag1", "vector_db_id": "store1", - "rag_type": "inline::faiss", + "backend": "faiss", }, ] output = construct_vector_io_providers_section(ls_config, byok_rag) @@ -279,7 +279,7 @@ def test_construct_vector_io_providers_section_idempotent_reenrichment() -> None { "rag_id": "rag1", "vector_db_id": "store1", - "rag_type": "inline::faiss", + "backend": "faiss", }, ] ls_config: dict[str, Any] = {"providers": {}} @@ -309,7 +309,7 @@ def test_construct_vector_io_providers_section_collapses_existing_duplicates() - { "rag_id": "rag1", "vector_db_id": "store1", - "rag_type": "inline::faiss", + "backend": "faiss", }, ] output = construct_vector_io_providers_section(ls_config, byok_rag) @@ -537,7 +537,7 @@ def test_generate_configuration_dedupes_vector_io_on_load(tmp_path: Path) -> Non def test_generate_configuration_with_dict(tmp_path: Path) -> None: """Test generate_configuration accepts dict.""" - config: dict[str, Any] = {"byok_rag": []} + config: dict[str, Any] = {"rag": {"byok": {"stores": []}}} outfile = tmp_path / "output.yaml" generate_configuration("tests/configuration/run.yaml", str(outfile), config) @@ -573,16 +573,20 @@ def test_generate_configuration_with_pydantic_model(tmp_path: Path) -> None: def test_generate_configuration_with_byok(tmp_path: Path) -> None: """Test generate_configuration adds BYOK entries.""" config = { - "byok_rag": [ - { - "rag_id": "rag1", - "vector_db_id": "store1", - "embedding_model": "test-model", - "embedding_dimension": 256, - "rag_type": "inline::faiss", - "db_path": "/tmp/store1.db", + "rag": { + "byok": { + "stores": [ + { + "rag_id": "rag1", + "vector_db_id": "store1", + "embedding_model": "test-model", + "embedding_dimension": 256, + "backend": "faiss", + "db_path": "/tmp/store1.db", + }, + ], }, - ], + }, } outfile = tmp_path / "output.yaml" diff --git a/tests/unit/utils/test_responses.py b/tests/unit/utils/test_responses.py index 8666cfd93..299e7fc15 100644 --- a/tests/unit/utils/test_responses.py +++ b/tests/unit/utils/test_responses.py @@ -56,7 +56,7 @@ import constants from models.api.requests import QueryRequest from models.common.responses.types import InputTool, InputToolMCP -from models.config import ApprovalFilter, ByokRag, ModelContextProtocolServer +from models.config import ApprovalFilter, ModelContextProtocolServer, RagStore from utils.query import normalize_vertex_ai_model_id from utils.responses import ( _build_chunk_attributes, @@ -1706,13 +1706,13 @@ class TestResolveVectorStoreIds: """Tests for resolve_vector_store_ids function.""" @staticmethod - def _make_byok_rag(rag_id: str, vector_db_id: str) -> ByokRag: - """Create a ByokRag instance for testing.""" - return ByokRag( + def _make_byok_rag(rag_id: str, vector_db_id: str) -> RagStore: + """Create a RagStore instance for testing.""" + return RagStore( rag_id=rag_id, vector_db_id=vector_db_id, db_path="tests/configuration/rag.txt", - rag_type="rag", + backend="faiss", embedding_model="model", embedding_dimension=768, score_multiplier=1.0, @@ -1776,9 +1776,12 @@ async def test_translates_byok_ids_in_prepare_tools( mock_byok_rag.rag_id = "ocp_docs" mock_byok_rag.vector_db_id = "vs-001" mock_config = mocker.Mock() - mock_config.configuration.byok_rag = [mock_byok_rag] - mock_config.configuration.rag.tool = [] - mock_config.configuration.rag.inline = [] + mock_config.configuration.rag.byok.stores = [mock_byok_rag] + mock_config.configuration.rag.retrieval.tool.sources = [] + mock_config.rag.retrieval.tool.max_chunks = ( + constants.DEFAULT_TOOL_RAG_MAX_CHUNKS + ) + mock_config.configuration.rag.retrieval.inline.sources = [] mocker.patch("utils.responses.configuration", mock_config) result = await prepare_tools(mock_client, ["ocp_docs"], False, "token") @@ -1797,9 +1800,12 @@ async def test_passes_through_unknown_ids_in_prepare_tools( # Configure empty BYOK RAG mock_config = mocker.Mock() - mock_config.configuration.byok_rag = [] - mock_config.configuration.rag.tool = [] - mock_config.configuration.rag.inline = [] + mock_config.configuration.rag.byok.stores = [] + mock_config.configuration.rag.retrieval.tool.sources = [] + mock_config.rag.retrieval.tool.max_chunks = ( + constants.DEFAULT_TOOL_RAG_MAX_CHUNKS + ) + mock_config.configuration.rag.retrieval.inline.sources = [] mocker.patch("utils.responses.configuration", mock_config) result = await prepare_tools(mock_client, ["raw-internal-id"], False, "token") @@ -1828,9 +1834,12 @@ async def test_does_not_translate_when_ids_fetched_from_llama_stack( mock_byok_rag.rag_id = "vs-internal" mock_byok_rag.vector_db_id = "vs-translated" mock_config = mocker.Mock() - mock_config.configuration.byok_rag = [mock_byok_rag] - mock_config.configuration.rag.tool = [] - mock_config.configuration.rag.inline = [] + mock_config.configuration.rag.byok.stores = [mock_byok_rag] + mock_config.configuration.rag.retrieval.tool.sources = [] + mock_config.rag.retrieval.tool.max_chunks = ( + constants.DEFAULT_TOOL_RAG_MAX_CHUNKS + ) + mock_config.configuration.rag.retrieval.inline.sources = [] mocker.patch("utils.responses.configuration", mock_config) result = await prepare_tools(mock_client, None, False, "token") @@ -1852,9 +1861,15 @@ async def test_uses_rag_tool_config_when_no_per_request_ids( mocker.patch("utils.responses.get_mcp_tools", return_value=None) mock_config = mocker.Mock() - mock_config.configuration.byok_rag = [] - mock_config.configuration.rag.tool = ["rag-tool-id-1", "rag-tool-id-2"] - mock_config.configuration.rag.inline = [] + mock_config.configuration.rag.byok.stores = [] + mock_config.configuration.rag.retrieval.tool.sources = [ + "rag-tool-id-1", + "rag-tool-id-2", + ] + mock_config.rag.retrieval.tool.max_chunks = ( + constants.DEFAULT_TOOL_RAG_MAX_CHUNKS + ) + mock_config.configuration.rag.retrieval.inline.sources = [] mocker.patch("utils.responses.configuration", mock_config) result = await prepare_tools(mock_client, None, False, "token") @@ -1877,9 +1892,12 @@ async def test_rag_tool_config_ids_are_translated( mock_byok_rag.rag_id = "ocp_docs" mock_byok_rag.vector_db_id = "vs-001" mock_config = mocker.Mock() - mock_config.configuration.byok_rag = [mock_byok_rag] - mock_config.configuration.rag.tool = ["ocp_docs"] - mock_config.configuration.rag.inline = [] + mock_config.configuration.rag.byok.stores = [mock_byok_rag] + mock_config.configuration.rag.retrieval.tool.sources = ["ocp_docs"] + mock_config.rag.retrieval.tool.max_chunks = ( + constants.DEFAULT_TOOL_RAG_MAX_CHUNKS + ) + mock_config.configuration.rag.retrieval.inline.sources = [] mocker.patch("utils.responses.configuration", mock_config) result = await prepare_tools(mock_client, None, False, "token") @@ -1896,9 +1914,9 @@ async def test_inline_rag_disables_tool_rag(self, mocker: MockerFixture) -> None mocker.patch("utils.responses.get_mcp_tools", return_value=None) mock_config = mocker.Mock() - mock_config.configuration.byok_rag = [] - mock_config.configuration.rag.tool = [] - mock_config.configuration.rag.inline = [ + mock_config.configuration.rag.byok.stores = [] + mock_config.configuration.rag.retrieval.tool.sources = [] + mock_config.configuration.rag.retrieval.inline.sources = [ "inline-store-id" ] # inline is configured mocker.patch("utils.responses.configuration", mock_config) @@ -1918,9 +1936,12 @@ async def test_per_request_ids_override_rag_tool_config( mocker.patch("utils.responses.get_mcp_tools", return_value=None) mock_config = mocker.Mock() - mock_config.configuration.byok_rag = [] - mock_config.configuration.rag.tool = ["config-id-1"] - mock_config.configuration.rag.inline = [] + mock_config.configuration.rag.byok.stores = [] + mock_config.configuration.rag.retrieval.tool.sources = ["config-id-1"] + mock_config.rag.retrieval.tool.max_chunks = ( + constants.DEFAULT_TOOL_RAG_MAX_CHUNKS + ) + mock_config.configuration.rag.retrieval.inline.sources = [] mocker.patch("utils.responses.configuration", mock_config) result = await prepare_tools(mock_client, ["request-id-1"], False, "token") @@ -1944,9 +1965,12 @@ async def test_all_registered_dbs_used_when_neither_tool_nor_inline_configured( mocker.patch("utils.responses.get_mcp_tools", return_value=None) mock_config = mocker.Mock() - mock_config.configuration.byok_rag = [] - mock_config.configuration.rag.tool = [] - mock_config.configuration.rag.inline = [] + mock_config.configuration.rag.byok.stores = [] + mock_config.configuration.rag.retrieval.tool.sources = [] + mock_config.rag.retrieval.tool.max_chunks = ( + constants.DEFAULT_TOOL_RAG_MAX_CHUNKS + ) + mock_config.configuration.rag.retrieval.inline.sources = [] mocker.patch("utils.responses.configuration", mock_config) result = await prepare_tools(mock_client, None, False, "token") @@ -3439,7 +3463,7 @@ async def test_client_tools_without_merge_header( return_value=mock_holder, ) mock_config = mocker.Mock() - mock_config.configuration.byok_rag = [] + mock_config.configuration.rag.byok.stores = [] mock_config.mcp_servers = [] mocker.patch("utils.responses.configuration", mock_config) @@ -3464,7 +3488,7 @@ async def test_client_tools_with_merge_header(self, mocker: MockerFixture) -> No return_value=mock_holder, ) mock_config = mocker.Mock() - mock_config.configuration.byok_rag = [] + mock_config.configuration.rag.byok.stores = [] mock_config.mcp_servers = [] mocker.patch("utils.responses.configuration", mock_config) @@ -3501,7 +3525,7 @@ async def test_merge_header_conflict_raises_409( return_value=mock_holder, ) mock_config = mocker.Mock() - mock_config.configuration.byok_rag = [] + mock_config.configuration.rag.byok.stores = [] mock_config.mcp_servers = [] mocker.patch("utils.responses.configuration", mock_config) @@ -3560,7 +3584,7 @@ async def test_merge_header_no_server_tools_returns_client_only( return_value=mock_holder, ) mock_config = mocker.Mock() - mock_config.configuration.byok_rag = [] + mock_config.configuration.rag.byok.stores = [] mock_config.mcp_servers = [] mocker.patch("utils.responses.configuration", mock_config) mocker.patch( diff --git a/tests/unit/utils/test_vector_search.py b/tests/unit/utils/test_vector_search.py index e53be0148..4cbfb1981 100644 --- a/tests/unit/utils/test_vector_search.py +++ b/tests/unit/utils/test_vector_search.py @@ -22,7 +22,7 @@ _extract_byok_rag_chunks, _extract_solr_document_metadata, _fetch_byok_rag, - _fetch_solr_rag, + _fetch_okp_rag, _format_rag_context, _get_okp_base_url, _get_solr_vector_store_ids, @@ -518,8 +518,8 @@ class TestFetchByokRag: async def test_byok_no_inline_ids(self, mocker: MockerFixture) -> None: """Test when no inline BYOK sources are configured.""" config_mock = mocker.Mock(spec=AppConfig) - config_mock.configuration.rag.inline = [] - config_mock.configuration.byok_rag = [] + config_mock.configuration.rag.retrieval.inline.sources = [] + config_mock.configuration.rag.byok.stores = [] mocker.patch("utils.vector_search.configuration", config_mock) client_mock = mocker.AsyncMock() @@ -537,8 +537,9 @@ async def test_byok_enabled_success(self, mocker: MockerFixture) -> None: byok_rag_mock = mocker.Mock() byok_rag_mock.rag_id = "rag_1" byok_rag_mock.vector_db_id = "vs_1" - config_mock.configuration.rag.inline = ["rag_1"] - config_mock.configuration.byok_rag = [byok_rag_mock] + config_mock.configuration.rag.retrieval.inline.sources = ["rag_1"] + config_mock.configuration.rag.byok.stores = [byok_rag_mock] + config_mock.rag.byok.max_chunks = constants.DEFAULT_BYOK_RAG_MAX_CHUNKS config_mock.score_multiplier_mapping = {"vs_1": 1.5} config_mock.rag_id_mapping = {"vs_1": "rag_1"} mocker.patch("utils.vector_search.configuration", config_mock) @@ -576,8 +577,9 @@ async def test_user_facing_ids_translated_to_internal_ids( byok_rag_mock = mocker.Mock() byok_rag_mock.rag_id = "my-kb" byok_rag_mock.vector_db_id = "vs-internal-001" - config_mock.configuration.byok_rag = [byok_rag_mock] - config_mock.configuration.rag.inline = ["my-kb"] + config_mock.configuration.rag.byok.stores = [byok_rag_mock] + config_mock.configuration.rag.retrieval.inline.sources = ["my-kb"] + config_mock.rag.byok.max_chunks = constants.DEFAULT_BYOK_RAG_MAX_CHUNKS config_mock.score_multiplier_mapping = {"vs-internal-001": 1.0} config_mock.rag_id_mapping = {"vs-internal-001": "my-kb"} mocker.patch("utils.vector_search.configuration", config_mock) @@ -601,7 +603,10 @@ async def test_user_facing_ids_translated_to_internal_ids( client_mock.vector_io.query.assert_called_once_with( vector_store_id="vs-internal-001", query="test query", - params={"max_chunks": constants.BYOK_RAG_MAX_CHUNKS, "mode": "vector"}, + params={ + "max_chunks": constants.DEFAULT_BYOK_RAG_MAX_CHUNKS, + "mode": "vector", + }, ) @pytest.mark.asyncio @@ -616,8 +621,12 @@ async def test_multiple_user_facing_ids_each_translated( byok_rag_2 = mocker.Mock() byok_rag_2.rag_id = "kb-part2" byok_rag_2.vector_db_id = "vs-bbb-222" - config_mock.configuration.byok_rag = [byok_rag_1, byok_rag_2] - config_mock.configuration.rag.inline = ["kb-part1", "kb-part2"] + config_mock.configuration.rag.byok.stores = [byok_rag_1, byok_rag_2] + config_mock.configuration.rag.retrieval.inline.sources = [ + "kb-part1", + "kb-part2", + ] + config_mock.rag.byok.max_chunks = constants.DEFAULT_BYOK_RAG_MAX_CHUNKS config_mock.score_multiplier_mapping = {"vs-aaa-111": 1.0, "vs-bbb-222": 1.0} config_mock.rag_id_mapping = { "vs-aaa-111": "kb-part1", @@ -658,8 +667,8 @@ async def test_no_inline_rag_configured_skips_byok( ) -> None: """Test that BYOK inline RAG is skipped when rag.inline is empty.""" config_mock = mocker.Mock(spec=AppConfig) - config_mock.configuration.rag.inline = [] - config_mock.configuration.byok_rag = [] + config_mock.configuration.rag.retrieval.inline.sources = [] + config_mock.configuration.rag.byok.stores = [] mocker.patch("utils.vector_search.configuration", config_mock) client_mock = mocker.AsyncMock() @@ -678,8 +687,8 @@ async def test_request_id_not_in_inline_config_skips_byok( ) -> None: """Test that a request vector_store_id not registered in rag.inline is filtered out.""" config_mock = mocker.Mock(spec=AppConfig) - config_mock.configuration.rag.inline = ["registered-id"] - config_mock.configuration.byok_rag = [] + config_mock.configuration.rag.retrieval.inline.sources = ["registered-id"] + config_mock.configuration.rag.byok.stores = [] mocker.patch("utils.vector_search.configuration", config_mock) client_mock = mocker.AsyncMock() @@ -694,7 +703,7 @@ async def test_request_id_not_in_inline_config_skips_byok( class TestFetchSolrRag: - """Tests for _fetch_solr_rag async function.""" + """Tests for _fetch_okp_rag async function.""" @pytest.mark.asyncio async def test_solr_disabled(self, mocker: MockerFixture) -> None: @@ -704,7 +713,7 @@ async def test_solr_disabled(self, mocker: MockerFixture) -> None: mocker.patch("utils.vector_search.configuration", config_mock) client_mock = mocker.AsyncMock() - rag_chunks, referenced_docs = await _fetch_solr_rag(client_mock, "test query") + rag_chunks, referenced_docs = await _fetch_okp_rag(client_mock, "test query") assert rag_chunks == [] assert referenced_docs == [] @@ -718,6 +727,7 @@ async def test_solr_enabled_success(self, mocker: MockerFixture) -> None: config_mock.inline_solr_enabled = True config_mock.okp.offline = True config_mock.okp.rhokp_url = "https://okp.test" + config_mock.rag.okp.max_chunks = constants.DEFAULT_OKP_RAG_MAX_CHUNKS mocker.patch("utils.vector_search.configuration", config_mock) # Mock chunk @@ -735,7 +745,7 @@ async def test_solr_enabled_success(self, mocker: MockerFixture) -> None: client_mock = mocker.AsyncMock() client_mock.vector_io.query.return_value = query_response - rag_chunks, _referenced_docs = await _fetch_solr_rag(client_mock, "test query") + rag_chunks, _referenced_docs = await _fetch_okp_rag(client_mock, "test query") assert len(rag_chunks) > 0 assert rag_chunks[0].content == "Solr content" @@ -750,6 +760,7 @@ async def test_solr_enabled_passes_request_mode_to_vector_io( config_mock.inline_solr_enabled = True config_mock.okp.offline = True config_mock.okp.rhokp_url = "https://okp.test" + config_mock.rag.okp.max_chunks = constants.DEFAULT_OKP_RAG_MAX_CHUNKS mocker.patch("utils.vector_search.configuration", config_mock) chunk_mock = mocker.Mock() @@ -764,7 +775,7 @@ async def test_solr_enabled_passes_request_mode_to_vector_io( client_mock = mocker.AsyncMock() client_mock.vector_io.query.return_value = query_response - await _fetch_solr_rag( + await _fetch_okp_rag( client_mock, "test query", SolrVectorSearchRequest(mode="semantic", filters={"fq": ["x:y"]}), @@ -783,8 +794,12 @@ class TestBuildRagContext: async def test_both_sources_disabled(self, mocker: MockerFixture) -> None: """Test when both BYOK inline and Solr inline are not configured.""" config_mock = mocker.Mock(spec=AppConfig) - config_mock.configuration.rag.inline = [] - config_mock.configuration.byok_rag = [] + config_mock.configuration.rag.retrieval.inline.sources = [] + config_mock.configuration.rag.byok.stores = [] + config_mock.rag.retrieval.inline.max_chunks = ( + constants.DEFAULT_INLINE_RAG_MAX_CHUNKS + ) + config_mock.rag.byok.max_chunks = constants.DEFAULT_BYOK_RAG_MAX_CHUNKS config_mock.inline_solr_enabled = False mocker.patch("utils.vector_search.configuration", config_mock) @@ -803,8 +818,12 @@ async def test_byok_enabled_only(self, mocker: MockerFixture) -> None: byok_rag_mock = mocker.Mock() byok_rag_mock.rag_id = "rag_1" byok_rag_mock.vector_db_id = "vs_1" - config_mock.configuration.rag.inline = ["rag_1"] - config_mock.configuration.byok_rag = [byok_rag_mock] + config_mock.configuration.rag.retrieval.inline.sources = ["rag_1"] + config_mock.configuration.rag.byok.stores = [byok_rag_mock] + config_mock.rag.retrieval.inline.max_chunks = ( + constants.DEFAULT_INLINE_RAG_MAX_CHUNKS + ) + config_mock.rag.byok.max_chunks = constants.DEFAULT_BYOK_RAG_MAX_CHUNKS config_mock.inline_solr_enabled = False config_mock.score_multiplier_mapping = {"vs_1": 1.0} config_mock.rag_id_mapping = {"vs_1": "rag_1"} @@ -840,8 +859,12 @@ async def test_reranker_enabled_calls_cross_encoder( byok_rag_mock = mocker.Mock() byok_rag_mock.rag_id = "rag_1" byok_rag_mock.vector_db_id = "vs_1" - config_mock.configuration.rag.inline = ["rag_1"] - config_mock.configuration.byok_rag = [byok_rag_mock] + config_mock.configuration.rag.retrieval.inline.sources = ["rag_1"] + config_mock.configuration.rag.byok.stores = [byok_rag_mock] + config_mock.rag.retrieval.inline.max_chunks = ( + constants.DEFAULT_INLINE_RAG_MAX_CHUNKS + ) + config_mock.rag.byok.max_chunks = constants.DEFAULT_BYOK_RAG_MAX_CHUNKS config_mock.inline_solr_enabled = False config_mock.score_multiplier_mapping = {"vs_1": 1.0} config_mock.rag_id_mapping = {"vs_1": "rag_1"} @@ -891,8 +914,12 @@ async def test_reranker_disabled_skips_cross_encoder( byok_rag_mock = mocker.Mock() byok_rag_mock.rag_id = "rag_1" byok_rag_mock.vector_db_id = "vs_1" - config_mock.configuration.rag.inline = ["rag_1"] - config_mock.configuration.byok_rag = [byok_rag_mock] + config_mock.configuration.rag.retrieval.inline.sources = ["rag_1"] + config_mock.configuration.rag.byok.stores = [byok_rag_mock] + config_mock.rag.retrieval.inline.max_chunks = ( + constants.DEFAULT_INLINE_RAG_MAX_CHUNKS + ) + config_mock.rag.byok.max_chunks = constants.DEFAULT_BYOK_RAG_MAX_CHUNKS config_mock.inline_solr_enabled = False config_mock.score_multiplier_mapping = {"vs_1": 1.0} config_mock.rag_id_mapping = {"vs_1": "rag_1"}