From cb9135825e4a90e89ccdd8f5be84c6c17b936488 Mon Sep 17 00:00:00 2001 From: Ayush8923 <80516839+Ayush8923@users.noreply.github.com> Date: Tue, 2 Jun 2026 15:39:03 +0530 Subject: [PATCH 1/9] fix(api): added the status code corresponding to the collection api and llm call api --- backend/app/crud/rag/open_ai.py | 63 +++++++++++++++++- backend/app/models/llm/response.py | 44 +++++++++++++ backend/app/services/llm/chain/types.py | 3 +- backend/app/services/llm/errors.py | 37 +++++++++++ backend/app/services/llm/jobs.py | 80 ++++++++++++++++++++++- backend/app/services/llm/providers/oai.py | 36 +++++++++- 6 files changed, 258 insertions(+), 5 deletions(-) create mode 100644 backend/app/services/llm/errors.py diff --git a/backend/app/crud/rag/open_ai.py b/backend/app/crud/rag/open_ai.py index 2ae36f4f1..f8b78e844 100644 --- a/backend/app/crud/rag/open_ai.py +++ b/backend/app/crud/rag/open_ai.py @@ -140,14 +140,73 @@ def update( f"[OpenAIVectorStoreCrud.update] File upload completed | {{'vector_store_id': '{vector_store_id}', 'completed_files': {req.file_counts.completed}, 'total_files': {req.file_counts.total}}}" ) if req.file_counts.completed != req.file_counts.total: - error_msg = f"OpenAI document processing error: {req.file_counts.completed}/{req.file_counts.total} files completed" + failure_detail = self._summarize_failed_files( + vector_store_id=vector_store_id, batch_id=req.id + ) + error_msg = ( + f"OpenAI document processing error: " + f"{req.file_counts.completed}/{req.file_counts.total} " + f"files completed" + ) + if failure_detail: + error_msg = f"{error_msg}. Failed files: {failure_detail}" logger.error( - f"[OpenAIVectorStoreCrud.update] Document processing error | {{'vector_store_id': '{vector_store_id}', 'completed_files': {req.file_counts.completed}, 'total_files': {req.file_counts.total}}}" + f"[OpenAIVectorStoreCrud.update] Document processing error | " + f"{{'vector_store_id': '{vector_store_id}', " + f"'completed_files': {req.file_counts.completed}, " + f"'total_files': {req.file_counts.total}, " + f"'failure_detail': '{failure_detail}'}}" ) raise InterruptedError(error_msg) yield from docs + def _summarize_failed_files( + self, + *, + vector_store_id: str, + batch_id: str, + max_files: int = 10, + max_message_chars: int = 600, + ) -> str: + """List failed files in a vector-store batch and join their errors. + + The OpenAI batch response only tells us how many files failed, not why. + This makes a follow-up call to pull per-file `last_error` so the upstream + cause (unsupported type, oversized, etc.) reaches the caller instead of + a bare ratio. Returns "" on lookup failure so the original count-based + message still surfaces. + """ + try: + page = self.client.vector_stores.file_batches.list_files( + batch_id=batch_id, + vector_store_id=vector_store_id, + filter="failed", + limit=max_files, + ) + except OpenAIError as err: + logger.warning( + f"[OpenAIVectorStoreCrud._summarize_failed_files] Could not list " + f"failed files | {{'vector_store_id': '{vector_store_id}', " + f"'batch_id': '{batch_id}', 'error': '{err}'}}" + ) + return "" + + parts: list[str] = [] + for f in page: + err = getattr(f, "last_error", None) + message = ( + getattr(err, "message", None) if err else None + ) or "Unknown error" + parts.append(f"{f.id} ({message})") + + summary = ", ".join(parts) + if getattr(page, "has_more", False): + summary = f"{summary}, ..." + if len(summary) > max_message_chars: + summary = summary[: max_message_chars - 3] + "..." + return summary + def delete(self, vector_store_id: str, retries: int = 3): if retries < 1: try: diff --git a/backend/app/models/llm/response.py b/backend/app/models/llm/response.py index 439a6adfc..b192e9949 100644 --- a/backend/app/models/llm/response.py +++ b/backend/app/models/llm/response.py @@ -67,6 +67,50 @@ class LLMCallResponse(SQLModel): ) +class LLMCallErrorDetail(SQLModel): + """Structured failure context for an LLM call. + + Lives in the `data` field of an `APIResponse.failure_response` so callers + receive machine-readable error information alongside the human-readable + `error` string. The key field is `conversation_id`: without it, a client + that uses our returned `thread_id` to chain turns has its conversation + flow break on every failure. Populating it from the *request* (not the + upstream response, which doesn't exist on failure) keeps the chain + intact. + """ + + conversation_id: str | None = Field( + default=None, + description=( + "Conversation/thread ID this call belongs to, echoed back from " + "the request so the client can continue the conversation after " + "an error." + ), + ) + provider: str | None = Field( + default=None, description="LLM provider the call was routed to (if known)." + ) + provider_status_code: int | None = Field( + default=None, + description=( + "Upstream HTTP status from the provider (e.g. 429 for rate limit, " + "401 for auth). Null when the failure happens before the upstream " + "call or when the provider didn't surface a status." + ), + ) + error_type: str | None = Field( + default=None, + description=( + "Coarse category: 'invalid_request' | 'provider_error' | 'timeout' " + "| 'internal_error'. Use for branching client behaviour (retry vs. " + "surface to user vs. fail hard)." + ), + ) + message: str = Field( + ..., description="Human-readable error message (same as top-level `error`)." + ) + + class LLMChainResponse(SQLModel): """Response schema for an LLM chain execution.""" diff --git a/backend/app/services/llm/chain/types.py b/backend/app/services/llm/chain/types.py index 7fa0f39d8..f2b6f68b2 100644 --- a/backend/app/services/llm/chain/types.py +++ b/backend/app/services/llm/chain/types.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from uuid import UUID -from app.models.llm.response import LLMCallResponse, Usage +from app.models.llm.response import LLMCallErrorDetail, LLMCallResponse, Usage @dataclass @@ -12,6 +12,7 @@ class BlockResult: llm_call_id: UUID | None = None usage: Usage | None = None error: str | None = None + error_detail: LLMCallErrorDetail | None = None metadata: dict | None = None @property diff --git a/backend/app/services/llm/errors.py b/backend/app/services/llm/errors.py new file mode 100644 index 000000000..0735fb31c --- /dev/null +++ b/backend/app/services/llm/errors.py @@ -0,0 +1,37 @@ +"""Shared error-capture plumbing for LLM provider calls. + +Providers catch typed upstream exceptions (e.g. `openai.OpenAIError`) inside +their `execute` methods and only return a string back to the caller. The +caller has no way to recover the upstream HTTP status or error code from +that string. To avoid changing every provider's return signature, providers +populate this context-var at the catch site; `execute_llm_call` reads it +after the provider call and folds the metadata into `BlockResult.error_detail`. + +Providers that don't populate the var work fine — their `error_detail` will +just have the message + the request-side fields (`conversation_id`, `provider`). +""" + +from contextvars import ContextVar +from typing import TypedDict + + +class ProviderErrorMeta(TypedDict, total=False): + provider_status_code: int + error_type: str # rate_limit | authentication | invalid_request | timeout | provider_error + + +_provider_error_meta: ContextVar[ProviderErrorMeta | None] = ContextVar( + "kaapi_provider_error_meta", default=None +) + + +def set_provider_error_meta(meta: ProviderErrorMeta) -> None: + """Called by a provider's `execute` when it catches a typed upstream error.""" + _provider_error_meta.set(meta) + + +def consume_provider_error_meta() -> ProviderErrorMeta | None: + """Read and clear the meta set by a provider call. Returns None if nothing was set.""" + meta = _provider_error_meta.get() + _provider_error_meta.set(None) + return meta diff --git a/backend/app/services/llm/jobs.py b/backend/app/services/llm/jobs.py index d61dadba5..cfc8d59c6 100644 --- a/backend/app/services/llm/jobs.py +++ b/backend/app/services/llm/jobs.py @@ -54,6 +54,7 @@ from app.core.storage_utils import upload_audio_bytes_to_s3 from app.models.llm.response import ( AudioOutput, + LLMCallErrorDetail, LLMCallResponse, LLMResponse, TextOutput, @@ -1001,7 +1002,24 @@ def execute_llm_call( project_id=project_id, ) error_message = error or "Unknown error occurred" - return BlockResult(error=error_message, llm_call_id=llm_call_id) + # Pull provider-side meta (status code, classified error_type) set by + # the provider's catch block via set_provider_error_meta. Empty dict + # if the provider didn't populate it (Anthropic/Google/Sarvam/EL still + # to be wired up). + from app.services.llm.errors import consume_provider_error_meta + + provider_meta = consume_provider_error_meta() or {} + return BlockResult( + error=error_message, + llm_call_id=llm_call_id, + error_detail=LLMCallErrorDetail( + conversation_id=conversation_id, + provider=provider_name, + provider_status_code=provider_meta.get("provider_status_code"), + error_type=provider_meta.get("error_type", "provider_error"), + message=error_message, + ), + ) except (Timeout, SoftTimeLimitExceeded): raise @@ -1010,10 +1028,52 @@ def execute_llm_call( f"[execute_llm_call] Unexpected error: {e} | job_id={job_id}", exc_info=True, ) + # Unexpected errors may fire before conversation_id / provider_name + # are bound, so guard with locals() lookups; the detail still flows + # `message` and `error_type=internal_error` so the client can branch. return BlockResult( error="Unexpected error occurred", llm_call_id=llm_call_id, + error_detail=LLMCallErrorDetail( + conversation_id=locals().get("conversation_id"), + provider=locals().get("provider_name"), + error_type="internal_error", + message="Unexpected error occurred", + ), + ) + + +def _finalize_error_detail( + detail: LLMCallErrorDetail | None, + *, + request: LLMCallRequest, + fallback_message: str | None, + fallback_error_type: str, +) -> dict: + """Ensure the failure callback's `data` always carries useful context. + + `BlockResult.error_detail` may be None (e.g. timeout, very early + validation failures). When it is, build a minimal detail from request + fields so the client still gets back the `conversation_id` it needs to + continue its thread. Always returns a serialized dict suitable for + `APIResponse.failure_response(data=...)`. + """ + request_conversation_id = None + if request.query and request.query.conversation: + request_conversation_id = request.query.conversation.id + + if detail is None: + detail = LLMCallErrorDetail( + conversation_id=request_conversation_id, + error_type=fallback_error_type, + message=fallback_message or "Unknown error occurred", ) + elif detail.conversation_id is None and request_conversation_id is not None: + # `execute_llm_call` couldn't bind conversation_id locally (e.g. + # config-resolution failures fire before that point). Recover it + # from the request — it's the same value the client sent in. + detail = detail.model_copy(update={"conversation_id": request_conversation_id}) + return detail.model_dump() def execute_job( @@ -1136,6 +1196,12 @@ def execute_job( callback_response = APIResponse.failure_response( error=result.error or "Unknown error occurred", + data=_finalize_error_detail( + result.error_detail, + request=request, + fallback_message=result.error, + fallback_error_type="provider_error", + ), metadata=request.request_metadata, ) return handle_job_error( @@ -1152,6 +1218,12 @@ def execute_job( ) callback_response = APIResponse.failure_response( error="Task exceeded soft time limit", + data=_finalize_error_detail( + None, + request=request, + fallback_message="Task exceeded soft time limit", + fallback_error_type="timeout", + ), metadata=request.request_metadata, ) handle_job_error( @@ -1166,6 +1238,12 @@ def execute_job( except Exception as e: callback_response = APIResponse.failure_response( error="Unexpected error occurred", + data=_finalize_error_detail( + None, + request=request, + fallback_message="Unexpected error occurred", + fallback_error_type="internal_error", + ), metadata=request.request_metadata, ) logger.error( diff --git a/backend/app/services/llm/providers/oai.py b/backend/app/services/llm/providers/oai.py index a93bd76c7..a700865af 100644 --- a/backend/app/services/llm/providers/oai.py +++ b/backend/app/services/llm/providers/oai.py @@ -21,6 +21,26 @@ logger = logging.getLogger(__name__) +def _classify_openai_error(status_code: int | None, error_code: str | None) -> str: + """Map OpenAI status + code to a coarse error_type for `LLMCallErrorDetail`. + + Categories are the ones documented on the model so clients can branch + behaviour (retry on rate_limit/timeout, surface auth errors to the user, + etc.) without parsing the message string. + """ + if status_code == 429: + return "rate_limit" + if status_code in (401, 403): + return "authentication" + if status_code == 408 or (error_code and "timeout" in str(error_code).lower()): + return "timeout" + if status_code is not None and 400 <= status_code < 500: + return "invalid_request" + if status_code is not None and status_code >= 500: + return "provider_error" + return "provider_error" + + class OpenAIProvider(BaseProvider): def __init__(self, client: OpenAI): """Initialize OpenAI provider with client. @@ -138,17 +158,31 @@ def execute( except openai.OpenAIError as e: # imported here to avoid circular imports + from app.services.llm.errors import set_provider_error_meta from app.utils import handle_openai_error error_message = handle_openai_error(e) + status_code = getattr(e, "status_code", None) + error_code = getattr(e, "code", None) + set_provider_error_meta( + { + "provider_status_code": status_code, + "error_type": _classify_openai_error(status_code, error_code), + } + ) logger.warning( - f"[OpenAIProvider.execute] OpenAI API error: {error_message} | provider={completion_config.provider}", + f"[OpenAIProvider.execute] OpenAI API error: {error_message} | " + f"status_code={status_code}, code={error_code}, " + f"provider={completion_config.provider}", exc_info=True, ) return None, error_message except Exception as e: + from app.services.llm.errors import set_provider_error_meta + error_message = "Unexpected error occurred" + set_provider_error_meta({"error_type": "internal_error"}) logger.error( f"[OpenAIProvider.execute] {error_message}: {str(e)} | provider={completion_config.provider}", exc_info=True, From 7b41e2a787b1fab7f528a114c233ab3481313d9f Mon Sep 17 00:00:00 2001 From: Ayush8923 <80516839+Ayush8923@users.noreply.github.com> Date: Wed, 3 Jun 2026 13:25:35 +0530 Subject: [PATCH 2/9] fix(api): handling erros and implement the pr comments --- backend/app/crud/model_config.py | 15 +-- backend/app/crud/rag/open_ai.py | 135 +++++++++++--------- backend/app/models/llm/response.py | 44 ------- backend/app/services/llm/chain/types.py | 3 +- backend/app/services/llm/errors.py | 37 ------ backend/app/services/llm/jobs.py | 80 +----------- backend/app/services/llm/providers/oai.py | 144 ++++++++++++++++------ 7 files changed, 186 insertions(+), 272 deletions(-) delete mode 100644 backend/app/services/llm/errors.py diff --git a/backend/app/crud/model_config.py b/backend/app/crud/model_config.py index c37da4ff2..639455afc 100644 --- a/backend/app/crud/model_config.py +++ b/backend/app/crud/model_config.py @@ -22,15 +22,6 @@ NATIVE_PROVIDER_SUFFIX = "-native" -def _normalize_provider(raw: str) -> str: - """Map NativeCompletionConfig providers (e.g. 'openai-native') to model_config provider names.""" - return ( - raw[: -len(NATIVE_PROVIDER_SUFFIX)] - if raw.endswith(NATIVE_PROVIDER_SUFFIX) - else raw - ) - - def list_active_model_configs( session: Session, provider: Provider | None = None, @@ -118,10 +109,12 @@ def validate_blob_model_or_raise(session: Session, blob: ConfigBlob) -> None: if raw_provider is None: return - if raw_provider.endswith("-native"): + if raw_provider.endswith(NATIVE_PROVIDER_SUFFIX): return - provider = _normalize_provider(raw_provider) + # raw_provider is guaranteed not to carry the "-native" suffix here + # because of the early return above. + provider = raw_provider model_name = (completion.params or {}).get("model") if not model_name: diff --git a/backend/app/crud/rag/open_ai.py b/backend/app/crud/rag/open_ai.py index f8b78e844..284a1834c 100644 --- a/backend/app/crud/rag/open_ai.py +++ b/backend/app/crud/rag/open_ai.py @@ -4,6 +4,7 @@ from io import BytesIO from typing import Iterable +import openai from openai import OpenAI, OpenAIError from pydantic import BaseModel @@ -130,83 +131,99 @@ def update( files.append(f_obj) logger.info( - f"[OpenAIVectorStoreCrud.update] Uploading files to vector store | {{'vector_store_id': '{vector_store_id}', 'file_count': {len(files)}}}" - ) - req = self.client.vector_stores.file_batches.upload_and_poll( - vector_store_id=vector_store_id, - files=files, + f"[OpenAIVectorStoreCrud.update] Uploading files to vector store | " + f"{{'vector_store_id': '{vector_store_id}', 'file_count': {len(files)}}}" ) + + try: + req = self.client.vector_stores.file_batches.upload_and_poll( + vector_store_id=vector_store_id, + files=files, + ) + except openai.RateLimitError as e: + raise InterruptedError(f"OpenAI rate limit exceeded: {e.message}") + except openai.AuthenticationError as e: + raise InterruptedError(f"OpenAI authentication failed: {e.message}") + except openai.PermissionDeniedError as e: + raise InterruptedError(f"OpenAI permission denied: {e.message}") + except openai.NotFoundError as e: + raise InterruptedError(f"OpenAI resource not found: {e.message}") + except openai.BadRequestError as e: + raise InterruptedError(f"OpenAI bad request: {e.message}") + except openai.UnprocessableEntityError as e: + raise InterruptedError(f"OpenAI unprocessable entity: {e.message}") + except openai.ConflictError as e: + raise InterruptedError(f"OpenAI conflict: {e.message}") + except openai.InternalServerError as e: + raise InterruptedError(f"OpenAI server error: {e.message}") + except openai.APITimeoutError as e: + raise InterruptedError(f"OpenAI request timed out: {e}") + except openai.APIConnectionError as e: + raise InterruptedError(f"OpenAI connection error: {e}") + except openai.APIStatusError as e: + raise InterruptedError( + f"OpenAI API status error ({e.status_code}): {e.message}" + ) + except openai.OpenAIError as e: + raise InterruptedError(f"OpenAI error: {e}") + logger.info( - f"[OpenAIVectorStoreCrud.update] File upload completed | {{'vector_store_id': '{vector_store_id}', 'completed_files': {req.file_counts.completed}, 'total_files': {req.file_counts.total}}}" + f"[OpenAIVectorStoreCrud.update] File upload completed | " + f"{{'vector_store_id': '{vector_store_id}', " + f"'completed_files': {req.file_counts.completed}, " + f"'total_files': {req.file_counts.total}}}" ) + if req.file_counts.completed != req.file_counts.total: - failure_detail = self._summarize_failed_files( - vector_store_id=vector_store_id, batch_id=req.id - ) + # Enrich the error string by listing each failed file's + # `last_error.message` from OpenAI. Fall back to the + # count-only message if the follow-up list_files call + # itself fails — we still want the primary failure signal + # to surface even if the secondary lookup is broken. + failed_summary = "" + try: + page = self.client.vector_stores.file_batches.list_files( + batch_id=req.id, + vector_store_id=vector_store_id, + filter="failed", + limit=10, + ) + parts = [] + for f in page: + f_err = getattr(f, "last_error", None) + f_msg = ( + getattr(f_err, "message", None) if f_err else None + ) or "Unknown error" + parts.append(f"{f.id} ({f_msg})") + failed_summary = ", ".join(parts) + if getattr(page, "has_more", False): + failed_summary = f"{failed_summary}, ..." + if len(failed_summary) > 600: + failed_summary = failed_summary[:597] + "..." + except OpenAIError as list_err: + logger.warning( + f"[OpenAIVectorStoreCrud.update] Could not list failed " + f"files | {{'vector_store_id': '{vector_store_id}', " + f"'batch_id': '{req.id}', 'error': '{list_err}'}}" + ) + error_msg = ( f"OpenAI document processing error: " f"{req.file_counts.completed}/{req.file_counts.total} " f"files completed" ) - if failure_detail: - error_msg = f"{error_msg}. Failed files: {failure_detail}" + if failed_summary: + error_msg = f"{error_msg}. Failed files: {failed_summary}" logger.error( f"[OpenAIVectorStoreCrud.update] Document processing error | " f"{{'vector_store_id': '{vector_store_id}', " f"'completed_files': {req.file_counts.completed}, " - f"'total_files': {req.file_counts.total}, " - f"'failure_detail': '{failure_detail}'}}" + f"'total_files': {req.file_counts.total}}}" ) raise InterruptedError(error_msg) yield from docs - def _summarize_failed_files( - self, - *, - vector_store_id: str, - batch_id: str, - max_files: int = 10, - max_message_chars: int = 600, - ) -> str: - """List failed files in a vector-store batch and join their errors. - - The OpenAI batch response only tells us how many files failed, not why. - This makes a follow-up call to pull per-file `last_error` so the upstream - cause (unsupported type, oversized, etc.) reaches the caller instead of - a bare ratio. Returns "" on lookup failure so the original count-based - message still surfaces. - """ - try: - page = self.client.vector_stores.file_batches.list_files( - batch_id=batch_id, - vector_store_id=vector_store_id, - filter="failed", - limit=max_files, - ) - except OpenAIError as err: - logger.warning( - f"[OpenAIVectorStoreCrud._summarize_failed_files] Could not list " - f"failed files | {{'vector_store_id': '{vector_store_id}', " - f"'batch_id': '{batch_id}', 'error': '{err}'}}" - ) - return "" - - parts: list[str] = [] - for f in page: - err = getattr(f, "last_error", None) - message = ( - getattr(err, "message", None) if err else None - ) or "Unknown error" - parts.append(f"{f.id} ({message})") - - summary = ", ".join(parts) - if getattr(page, "has_more", False): - summary = f"{summary}, ..." - if len(summary) > max_message_chars: - summary = summary[: max_message_chars - 3] + "..." - return summary - def delete(self, vector_store_id: str, retries: int = 3): if retries < 1: try: diff --git a/backend/app/models/llm/response.py b/backend/app/models/llm/response.py index b192e9949..439a6adfc 100644 --- a/backend/app/models/llm/response.py +++ b/backend/app/models/llm/response.py @@ -67,50 +67,6 @@ class LLMCallResponse(SQLModel): ) -class LLMCallErrorDetail(SQLModel): - """Structured failure context for an LLM call. - - Lives in the `data` field of an `APIResponse.failure_response` so callers - receive machine-readable error information alongside the human-readable - `error` string. The key field is `conversation_id`: without it, a client - that uses our returned `thread_id` to chain turns has its conversation - flow break on every failure. Populating it from the *request* (not the - upstream response, which doesn't exist on failure) keeps the chain - intact. - """ - - conversation_id: str | None = Field( - default=None, - description=( - "Conversation/thread ID this call belongs to, echoed back from " - "the request so the client can continue the conversation after " - "an error." - ), - ) - provider: str | None = Field( - default=None, description="LLM provider the call was routed to (if known)." - ) - provider_status_code: int | None = Field( - default=None, - description=( - "Upstream HTTP status from the provider (e.g. 429 for rate limit, " - "401 for auth). Null when the failure happens before the upstream " - "call or when the provider didn't surface a status." - ), - ) - error_type: str | None = Field( - default=None, - description=( - "Coarse category: 'invalid_request' | 'provider_error' | 'timeout' " - "| 'internal_error'. Use for branching client behaviour (retry vs. " - "surface to user vs. fail hard)." - ), - ) - message: str = Field( - ..., description="Human-readable error message (same as top-level `error`)." - ) - - class LLMChainResponse(SQLModel): """Response schema for an LLM chain execution.""" diff --git a/backend/app/services/llm/chain/types.py b/backend/app/services/llm/chain/types.py index f2b6f68b2..7fa0f39d8 100644 --- a/backend/app/services/llm/chain/types.py +++ b/backend/app/services/llm/chain/types.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from uuid import UUID -from app.models.llm.response import LLMCallErrorDetail, LLMCallResponse, Usage +from app.models.llm.response import LLMCallResponse, Usage @dataclass @@ -12,7 +12,6 @@ class BlockResult: llm_call_id: UUID | None = None usage: Usage | None = None error: str | None = None - error_detail: LLMCallErrorDetail | None = None metadata: dict | None = None @property diff --git a/backend/app/services/llm/errors.py b/backend/app/services/llm/errors.py deleted file mode 100644 index 0735fb31c..000000000 --- a/backend/app/services/llm/errors.py +++ /dev/null @@ -1,37 +0,0 @@ -"""Shared error-capture plumbing for LLM provider calls. - -Providers catch typed upstream exceptions (e.g. `openai.OpenAIError`) inside -their `execute` methods and only return a string back to the caller. The -caller has no way to recover the upstream HTTP status or error code from -that string. To avoid changing every provider's return signature, providers -populate this context-var at the catch site; `execute_llm_call` reads it -after the provider call and folds the metadata into `BlockResult.error_detail`. - -Providers that don't populate the var work fine — their `error_detail` will -just have the message + the request-side fields (`conversation_id`, `provider`). -""" - -from contextvars import ContextVar -from typing import TypedDict - - -class ProviderErrorMeta(TypedDict, total=False): - provider_status_code: int - error_type: str # rate_limit | authentication | invalid_request | timeout | provider_error - - -_provider_error_meta: ContextVar[ProviderErrorMeta | None] = ContextVar( - "kaapi_provider_error_meta", default=None -) - - -def set_provider_error_meta(meta: ProviderErrorMeta) -> None: - """Called by a provider's `execute` when it catches a typed upstream error.""" - _provider_error_meta.set(meta) - - -def consume_provider_error_meta() -> ProviderErrorMeta | None: - """Read and clear the meta set by a provider call. Returns None if nothing was set.""" - meta = _provider_error_meta.get() - _provider_error_meta.set(None) - return meta diff --git a/backend/app/services/llm/jobs.py b/backend/app/services/llm/jobs.py index cfc8d59c6..d61dadba5 100644 --- a/backend/app/services/llm/jobs.py +++ b/backend/app/services/llm/jobs.py @@ -54,7 +54,6 @@ from app.core.storage_utils import upload_audio_bytes_to_s3 from app.models.llm.response import ( AudioOutput, - LLMCallErrorDetail, LLMCallResponse, LLMResponse, TextOutput, @@ -1002,24 +1001,7 @@ def execute_llm_call( project_id=project_id, ) error_message = error or "Unknown error occurred" - # Pull provider-side meta (status code, classified error_type) set by - # the provider's catch block via set_provider_error_meta. Empty dict - # if the provider didn't populate it (Anthropic/Google/Sarvam/EL still - # to be wired up). - from app.services.llm.errors import consume_provider_error_meta - - provider_meta = consume_provider_error_meta() or {} - return BlockResult( - error=error_message, - llm_call_id=llm_call_id, - error_detail=LLMCallErrorDetail( - conversation_id=conversation_id, - provider=provider_name, - provider_status_code=provider_meta.get("provider_status_code"), - error_type=provider_meta.get("error_type", "provider_error"), - message=error_message, - ), - ) + return BlockResult(error=error_message, llm_call_id=llm_call_id) except (Timeout, SoftTimeLimitExceeded): raise @@ -1028,52 +1010,10 @@ def execute_llm_call( f"[execute_llm_call] Unexpected error: {e} | job_id={job_id}", exc_info=True, ) - # Unexpected errors may fire before conversation_id / provider_name - # are bound, so guard with locals() lookups; the detail still flows - # `message` and `error_type=internal_error` so the client can branch. return BlockResult( error="Unexpected error occurred", llm_call_id=llm_call_id, - error_detail=LLMCallErrorDetail( - conversation_id=locals().get("conversation_id"), - provider=locals().get("provider_name"), - error_type="internal_error", - message="Unexpected error occurred", - ), - ) - - -def _finalize_error_detail( - detail: LLMCallErrorDetail | None, - *, - request: LLMCallRequest, - fallback_message: str | None, - fallback_error_type: str, -) -> dict: - """Ensure the failure callback's `data` always carries useful context. - - `BlockResult.error_detail` may be None (e.g. timeout, very early - validation failures). When it is, build a minimal detail from request - fields so the client still gets back the `conversation_id` it needs to - continue its thread. Always returns a serialized dict suitable for - `APIResponse.failure_response(data=...)`. - """ - request_conversation_id = None - if request.query and request.query.conversation: - request_conversation_id = request.query.conversation.id - - if detail is None: - detail = LLMCallErrorDetail( - conversation_id=request_conversation_id, - error_type=fallback_error_type, - message=fallback_message or "Unknown error occurred", ) - elif detail.conversation_id is None and request_conversation_id is not None: - # `execute_llm_call` couldn't bind conversation_id locally (e.g. - # config-resolution failures fire before that point). Recover it - # from the request — it's the same value the client sent in. - detail = detail.model_copy(update={"conversation_id": request_conversation_id}) - return detail.model_dump() def execute_job( @@ -1196,12 +1136,6 @@ def execute_job( callback_response = APIResponse.failure_response( error=result.error or "Unknown error occurred", - data=_finalize_error_detail( - result.error_detail, - request=request, - fallback_message=result.error, - fallback_error_type="provider_error", - ), metadata=request.request_metadata, ) return handle_job_error( @@ -1218,12 +1152,6 @@ def execute_job( ) callback_response = APIResponse.failure_response( error="Task exceeded soft time limit", - data=_finalize_error_detail( - None, - request=request, - fallback_message="Task exceeded soft time limit", - fallback_error_type="timeout", - ), metadata=request.request_metadata, ) handle_job_error( @@ -1238,12 +1166,6 @@ def execute_job( except Exception as e: callback_response = APIResponse.failure_response( error="Unexpected error occurred", - data=_finalize_error_detail( - None, - request=request, - fallback_message="Unexpected error occurred", - fallback_error_type="internal_error", - ), metadata=request.request_metadata, ) logger.error( diff --git a/backend/app/services/llm/providers/oai.py b/backend/app/services/llm/providers/oai.py index a700865af..1d4df7a33 100644 --- a/backend/app/services/llm/providers/oai.py +++ b/backend/app/services/llm/providers/oai.py @@ -21,26 +21,6 @@ logger = logging.getLogger(__name__) -def _classify_openai_error(status_code: int | None, error_code: str | None) -> str: - """Map OpenAI status + code to a coarse error_type for `LLMCallErrorDetail`. - - Categories are the ones documented on the model so clients can branch - behaviour (retry on rate_limit/timeout, surface auth errors to the user, - etc.) without parsing the message string. - """ - if status_code == 429: - return "rate_limit" - if status_code in (401, 403): - return "authentication" - if status_code == 408 or (error_code and "timeout" in str(error_code).lower()): - return "timeout" - if status_code is not None and 400 <= status_code < 500: - return "invalid_request" - if status_code is not None and status_code >= 500: - return "provider_error" - return "provider_error" - - class OpenAIProvider(BaseProvider): def __init__(self, client: OpenAI): """Initialize OpenAI provider with client. @@ -156,35 +136,119 @@ def execute( error_message = f"Invalid or unexpected parameter in Config: {str(e)}" return None, error_message - except openai.OpenAIError as e: - # imported here to avoid circular imports - from app.services.llm.errors import set_provider_error_meta - from app.utils import handle_openai_error - - error_message = handle_openai_error(e) - status_code = getattr(e, "status_code", None) - error_code = getattr(e, "code", None) - set_provider_error_meta( - { - "provider_status_code": status_code, - "error_type": _classify_openai_error(status_code, error_code), - } + except openai.RateLimitError as e: + error_message = f"OpenAI rate limit exceeded: {e.message}" + logger.warning( + f"[OpenAIProvider.execute] {error_message} | " + f"provider={completion_config.provider}", + exc_info=True, ) + return None, error_message + + except openai.AuthenticationError as e: + error_message = f"OpenAI authentication failed: {e.message}" logger.warning( - f"[OpenAIProvider.execute] OpenAI API error: {error_message} | " - f"status_code={status_code}, code={error_code}, " + f"[OpenAIProvider.execute] {error_message} | " f"provider={completion_config.provider}", exc_info=True, ) return None, error_message - except Exception as e: - from app.services.llm.errors import set_provider_error_meta + except openai.PermissionDeniedError as e: + error_message = f"OpenAI permission denied: {e.message}" + logger.warning( + f"[OpenAIProvider.execute] {error_message} | " + f"provider={completion_config.provider}", + exc_info=True, + ) + return None, error_message + + except openai.NotFoundError as e: + error_message = f"OpenAI resource not found: {e.message}" + logger.warning( + f"[OpenAIProvider.execute] {error_message} | " + f"provider={completion_config.provider}", + exc_info=True, + ) + return None, error_message + + except openai.BadRequestError as e: + error_message = f"OpenAI bad request: {e.message}" + logger.warning( + f"[OpenAIProvider.execute] {error_message} | " + f"provider={completion_config.provider}", + exc_info=True, + ) + return None, error_message + + except openai.UnprocessableEntityError as e: + error_message = f"OpenAI unprocessable entity: {e.message}" + logger.warning( + f"[OpenAIProvider.execute] {error_message} | " + f"provider={completion_config.provider}", + exc_info=True, + ) + return None, error_message - error_message = "Unexpected error occurred" - set_provider_error_meta({"error_type": "internal_error"}) + except openai.ConflictError as e: + error_message = f"OpenAI conflict: {e.message}" + logger.warning( + f"[OpenAIProvider.execute] {error_message} | " + f"provider={completion_config.provider}", + exc_info=True, + ) + return None, error_message + + except openai.InternalServerError as e: + error_message = f"OpenAI server error: {e.message}" + logger.warning( + f"[OpenAIProvider.execute] {error_message} | " + f"provider={completion_config.provider}", + exc_info=True, + ) + return None, error_message + + except openai.APITimeoutError as e: + error_message = f"OpenAI request timed out: {e}" + logger.warning( + f"[OpenAIProvider.execute] {error_message} | " + f"provider={completion_config.provider}", + exc_info=True, + ) + return None, error_message + + except openai.APIConnectionError as e: + error_message = f"OpenAI connection error: {e}" + logger.warning( + f"[OpenAIProvider.execute] {error_message} | " + f"provider={completion_config.provider}", + exc_info=True, + ) + return None, error_message + + except openai.APIStatusError as e: + error_message = f"OpenAI API status error ({e.status_code}): {e.message}" + logger.warning( + f"[OpenAIProvider.execute] {error_message} | " + f"provider={completion_config.provider}", + exc_info=True, + ) + return None, error_message + + except openai.OpenAIError as e: + error_message = f"OpenAI error: {e}" + logger.warning( + f"[OpenAIProvider.execute] {error_message} | " + f"provider={completion_config.provider}", + exc_info=True, + ) + return None, error_message + + except Exception as e: + error_message = f"Unexpected error: {e}" logger.error( - f"[OpenAIProvider.execute] {error_message}: {str(e)} | provider={completion_config.provider}", + f"[OpenAIProvider.execute] {error_message} | " + f"provider={completion_config.provider}", exc_info=True, ) return None, error_message From a6d4b5fbe615a716e5791190a41a716b0c4b36a6 Mon Sep 17 00:00:00 2001 From: Ayush8923 <80516839+Ayush8923@users.noreply.github.com> Date: Wed, 3 Jun 2026 13:36:29 +0530 Subject: [PATCH 3/9] fix(api): revert the unwanted changes --- backend/app/crud/model_config.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/backend/app/crud/model_config.py b/backend/app/crud/model_config.py index 639455afc..9dd96850d 100644 --- a/backend/app/crud/model_config.py +++ b/backend/app/crud/model_config.py @@ -22,6 +22,15 @@ NATIVE_PROVIDER_SUFFIX = "-native" +def _normalize_provider(raw: str) -> str: + """Map NativeCompletionConfig providers (e.g. 'openai-native') to model_config provider names.""" + return ( + raw[: -len(NATIVE_PROVIDER_SUFFIX)] + if raw.endswith(NATIVE_PROVIDER_SUFFIX) + else raw + ) + + def list_active_model_configs( session: Session, provider: Provider | None = None, @@ -112,9 +121,7 @@ def validate_blob_model_or_raise(session: Session, blob: ConfigBlob) -> None: if raw_provider.endswith(NATIVE_PROVIDER_SUFFIX): return - # raw_provider is guaranteed not to carry the "-native" suffix here - # because of the early return above. - provider = raw_provider + provider = _normalize_provider(raw_provider) model_name = (completion.params or {}).get("model") if not model_name: From 996b7213dac5e5b028b41c110ac79245180154ee Mon Sep 17 00:00:00 2001 From: Ayush8923 <80516839+Ayush8923@users.noreply.github.com> Date: Wed, 3 Jun 2026 13:38:28 +0530 Subject: [PATCH 4/9] fix(api): remove the unwanted js comments --- backend/app/crud/rag/open_ai.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/app/crud/rag/open_ai.py b/backend/app/crud/rag/open_ai.py index 284a1834c..b9a6dac9a 100644 --- a/backend/app/crud/rag/open_ai.py +++ b/backend/app/crud/rag/open_ai.py @@ -178,8 +178,7 @@ def update( # Enrich the error string by listing each failed file's # `last_error.message` from OpenAI. Fall back to the # count-only message if the follow-up list_files call - # itself fails — we still want the primary failure signal - # to surface even if the secondary lookup is broken. + # itself fails. failed_summary = "" try: page = self.client.vector_stores.file_batches.list_files( From 326bb5b1d163528342fb858b2d9a0de6575e2592 Mon Sep 17 00:00:00 2001 From: Ayush8923 <80516839+Ayush8923@users.noreply.github.com> Date: Wed, 3 Jun 2026 14:27:31 +0530 Subject: [PATCH 5/9] fix(api): update the test cases --- .../services/llm/providers/test_openai.py | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/backend/app/tests/services/llm/providers/test_openai.py b/backend/app/tests/services/llm/providers/test_openai.py index f86395f58..abc36cc04 100644 --- a/backend/app/tests/services/llm/providers/test_openai.py +++ b/backend/app/tests/services/llm/providers/test_openai.py @@ -3,7 +3,7 @@ """ import pytest -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock import openai @@ -170,36 +170,35 @@ def test_execute_with_type_error( def test_execute_with_openai_api_error( self, provider, mock_client, completion_config, query_params ): - """Test handling of OpenAI API errors.""" + """Generic `openai.APIError` (no specific subclass) falls through to the + `except openai.OpenAIError` block, which prefixes with "OpenAI error:" + and uses the raw exception string. No wrapper helper is invoked.""" mock_client.responses.create.side_effect = openai.APIError( message="API request failed", request=MagicMock(), body=None, ) - with patch("app.utils.handle_openai_error") as mock_handler: - mock_handler.return_value = "API request failed: rate limit exceeded" - - result, error = provider.execute( - completion_config, query_params, "Test query" - ) + result, error = provider.execute(completion_config, query_params, "Test query") - assert result is None - assert error is not None - assert "API request failed" in error - mock_handler.assert_called_once() + assert result is None + assert error is not None + assert error.startswith("OpenAI error:") + assert "API request failed" in error def test_execute_with_generic_exception( self, provider, mock_client, completion_config, query_params ): - """Test handling of unexpected exceptions.""" + """Non-OpenAI exceptions land in the `except Exception` catch-all, + prefixed with "Unexpected error:" and carrying the exception text.""" mock_client.responses.create.side_effect = Exception("Timeout occurred") result, error = provider.execute(completion_config, query_params, "Test query") assert result is None assert error is not None - assert "Unexpected error occurred" in error + assert error.startswith("Unexpected error:") + assert "Timeout occurred" in error def test_execute_with_conversation_config_without_id_or_auto_create( self, provider, mock_client, completion_config, query_params From c65d61033afe17bf70591c04da42ed25f3852d73 Mon Sep 17 00:00:00 2001 From: Ayush8923 <80516839+Ayush8923@users.noreply.github.com> Date: Wed, 3 Jun 2026 17:28:01 +0530 Subject: [PATCH 6/9] fix(api): fix the codecov report erro --- backend/app/tests/crud/rag/__init__.py | 0 backend/app/tests/crud/rag/test_open_ai.py | 359 ++++++++++++++++++ .../services/llm/providers/test_openai.py | 150 ++++++++ 3 files changed, 509 insertions(+) create mode 100644 backend/app/tests/crud/rag/__init__.py create mode 100644 backend/app/tests/crud/rag/test_open_ai.py diff --git a/backend/app/tests/crud/rag/__init__.py b/backend/app/tests/crud/rag/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/app/tests/crud/rag/test_open_ai.py b/backend/app/tests/crud/rag/test_open_ai.py new file mode 100644 index 000000000..580c1b4ff --- /dev/null +++ b/backend/app/tests/crud/rag/test_open_ai.py @@ -0,0 +1,359 @@ +""" +Tests for OpenAIVectorStoreCrud.update — focused on the failure-path error +enrichment added in this PR (per-file reasons + category-prefixed messages +for each specific OpenAI exception type). +""" + +from unittest.mock import MagicMock + +import openai +import pytest + +from app.crud.rag.open_ai import OpenAIVectorStoreCrud + + +@pytest.fixture +def mock_client(): + return MagicMock() + + +@pytest.fixture +def crud(mock_client): + return OpenAIVectorStoreCrud(client=mock_client) + + +@pytest.fixture +def mock_storage(): + storage = MagicMock() + storage.get.return_value = b"file content" + return storage + + +@pytest.fixture +def docs_batch(): + """One batch with two documents. update() loops `for docs in documents`.""" + doc1 = MagicMock(object_store_url="s3://bucket/file1.pdf", fname="file1.pdf") + doc2 = MagicMock(object_store_url="s3://bucket/file2.pdf", fname="file2.pdf") + return [[doc1, doc2]] + + +def _batch_result(*, completed: int, total: int, batch_id: str = "batch_abc"): + """Mock the return of vector_stores.file_batches.upload_and_poll.""" + counts = MagicMock(completed=completed, total=total) + return MagicMock(id=batch_id, file_counts=counts) + + +def _failed_file_page(files, has_more: bool = False): + """Mock the iterable returned by list_files(filter='failed').""" + page = MagicMock() + page.__iter__ = MagicMock(return_value=iter(files)) + page.has_more = has_more + return page + + +def _failed_file(file_id: str, error_message: str | None): + """Build one failed-file row. last_error=None means 'no reason recorded'.""" + f = MagicMock() + f.id = file_id + f.last_error = MagicMock(message=error_message) if error_message else None + return f + + +class TestOpenAIVectorStoreCrudUpdateSuccess: + def test_yields_docs_when_all_files_complete( + self, crud, mock_client, mock_storage, docs_batch + ): + mock_client.vector_stores.file_batches.upload_and_poll.return_value = ( + _batch_result(completed=2, total=2) + ) + + yielded = list(crud.update("vs_1", mock_storage, docs_batch)) + + # update yields from the inner docs list on success + assert len(yielded) == 2 + # list_files should not have been called on the happy path + mock_client.vector_stores.file_batches.list_files.assert_not_called() + + +class TestOpenAIVectorStoreCrudUpdatePartialFailure: + """Partial completion -> InterruptedError with enriched per-file reasons.""" + + def test_includes_failed_file_ids_and_messages( + self, crud, mock_client, mock_storage, docs_batch + ): + mock_client.vector_stores.file_batches.upload_and_poll.return_value = ( + _batch_result(completed=1, total=3) + ) + mock_client.vector_stores.file_batches.list_files.return_value = ( + _failed_file_page( + [ + _failed_file("file-abc", "Unsupported file type"), + _failed_file("file-xyz", "File too large"), + ] + ) + ) + + with pytest.raises(InterruptedError) as exc_info: + list(crud.update("vs_1", mock_storage, docs_batch)) + + msg = str(exc_info.value) + assert "OpenAI document processing error" in msg + assert "1/3 files completed" in msg + assert "Failed files:" in msg + assert "file-abc (Unsupported file type)" in msg + assert "file-xyz (File too large)" in msg + + def test_reports_unknown_error_when_last_error_missing( + self, crud, mock_client, mock_storage, docs_batch + ): + """A failed file with no `last_error` shouldn't drop out of the + summary — it gets 'Unknown error' so the user sees that something + was wrong with that file even if OpenAI didn't tell us what.""" + mock_client.vector_stores.file_batches.upload_and_poll.return_value = ( + _batch_result(completed=0, total=1) + ) + mock_client.vector_stores.file_batches.list_files.return_value = ( + _failed_file_page([_failed_file("file-noerr", None)]) + ) + + with pytest.raises(InterruptedError) as exc_info: + list(crud.update("vs_1", mock_storage, docs_batch)) + + assert "file-noerr (Unknown error)" in str(exc_info.value) + + def test_appends_ellipsis_when_has_more_results( + self, crud, mock_client, mock_storage, docs_batch + ): + """When OpenAI returns has_more=True we cap at the first 10 entries + and signal truncation with a trailing ', ...' so callers know more + failures exist beyond what's shown.""" + mock_client.vector_stores.file_batches.upload_and_poll.return_value = ( + _batch_result(completed=0, total=100) + ) + mock_client.vector_stores.file_batches.list_files.return_value = ( + _failed_file_page( + [_failed_file(f"file-{i}", "err") for i in range(10)], + has_more=True, + ) + ) + + with pytest.raises(InterruptedError) as exc_info: + list(crud.update("vs_1", mock_storage, docs_batch)) + + assert str(exc_info.value).endswith(", ...") + + def test_truncates_summary_when_over_600_chars( + self, crud, mock_client, mock_storage, docs_batch + ): + """Long error blobs are truncated at 597 chars + '...' so callback + payloads stay bounded regardless of what OpenAI returns.""" + mock_client.vector_stores.file_batches.upload_and_poll.return_value = ( + _batch_result(completed=0, total=10) + ) + # Inflate per-file strings to push the joined summary past 600 chars. + mock_client.vector_stores.file_batches.list_files.return_value = ( + _failed_file_page( + [ + _failed_file(f"file-{'x' * 80}-{i}", "long error " * 10) + for i in range(10) + ] + ) + ) + + with pytest.raises(InterruptedError) as exc_info: + list(crud.update("vs_1", mock_storage, docs_batch)) + + msg = str(exc_info.value) + marker = "Failed files: " + summary = msg[msg.index(marker) + len(marker) :] + assert len(summary) == 600 + assert summary.endswith("...") + + def test_falls_back_to_count_only_when_list_files_errors( + self, crud, mock_client, mock_storage, docs_batch + ): + """If the follow-up list_files lookup itself raises, the update + still raises InterruptedError but with the original count-only + message — no 'Failed files:' suffix, no transient list_files crash + masking the real upload problem.""" + mock_client.vector_stores.file_batches.upload_and_poll.return_value = ( + _batch_result(completed=0, total=3) + ) + mock_client.vector_stores.file_batches.list_files.side_effect = ( + openai.OpenAIError("list failed") + ) + + with pytest.raises(InterruptedError) as exc_info: + list(crud.update("vs_1", mock_storage, docs_batch)) + + msg = str(exc_info.value) + assert "0/3 files completed" in msg + assert "Failed files:" not in msg + + +class TestOpenAIVectorStoreCrudUpdateOpenAIExceptions: + """upload_and_poll raising each specific openai exception type maps to + InterruptedError with a category-prefixed message.""" + + @pytest.mark.parametrize( + "exception_factory, expected_message", + [ + ( + lambda: openai.RateLimitError( + message="quota exceeded", + response=MagicMock( + status_code=429, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI rate limit exceeded: quota exceeded", + ), + ( + lambda: openai.AuthenticationError( + message="bad api key", + response=MagicMock( + status_code=401, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI authentication failed: bad api key", + ), + ( + lambda: openai.PermissionDeniedError( + message="no access", + response=MagicMock( + status_code=403, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI permission denied: no access", + ), + ( + lambda: openai.NotFoundError( + message="missing resource", + response=MagicMock( + status_code=404, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI resource not found: missing resource", + ), + ( + lambda: openai.BadRequestError( + message="invalid file", + response=MagicMock( + status_code=400, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI bad request: invalid file", + ), + ( + lambda: openai.UnprocessableEntityError( + message="cannot process", + response=MagicMock( + status_code=422, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI unprocessable entity: cannot process", + ), + ( + lambda: openai.ConflictError( + message="conflict", + response=MagicMock( + status_code=409, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI conflict: conflict", + ), + ( + lambda: openai.InternalServerError( + message="upstream boom", + response=MagicMock( + status_code=500, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI server error: upstream boom", + ), + ], + ) + def test_specific_openai_exception_maps_to_category_prefix( + self, + crud, + mock_client, + mock_storage, + docs_batch, + exception_factory, + expected_message, + ): + mock_client.vector_stores.file_batches.upload_and_poll.side_effect = ( + exception_factory() + ) + + with pytest.raises(InterruptedError) as exc_info: + list(crud.update("vs_1", mock_storage, docs_batch)) + assert str(exc_info.value) == expected_message + + def test_api_timeout_error(self, crud, mock_client, mock_storage, docs_batch): + """APITimeoutError uses str(e) (not .message) for the suffix.""" + mock_client.vector_stores.file_batches.upload_and_poll.side_effect = ( + openai.APITimeoutError(request=MagicMock()) + ) + + with pytest.raises(InterruptedError) as exc_info: + list(crud.update("vs_1", mock_storage, docs_batch)) + assert str(exc_info.value).startswith("OpenAI request timed out:") + + def test_api_connection_error(self, crud, mock_client, mock_storage, docs_batch): + mock_client.vector_stores.file_batches.upload_and_poll.side_effect = ( + openai.APIConnectionError(message="connection refused", request=MagicMock()) + ) + + with pytest.raises(InterruptedError) as exc_info: + list(crud.update("vs_1", mock_storage, docs_batch)) + msg = str(exc_info.value) + assert msg.startswith("OpenAI connection error:") + assert "connection refused" in msg + + def test_api_status_error_includes_status_code( + self, crud, mock_client, mock_storage, docs_batch + ): + """A bare APIStatusError falls through to the APIStatusError handler + (after all the named subclasses) and includes the numeric status.""" + mock_client.vector_stores.file_batches.upload_and_poll.side_effect = ( + openai.APIStatusError( + "teapot", + response=MagicMock(status_code=418, request=MagicMock(), headers={}), + body=None, + ) + ) + + with pytest.raises(InterruptedError) as exc_info: + list(crud.update("vs_1", mock_storage, docs_batch)) + assert str(exc_info.value) == "OpenAI API status error (418): teapot" + + def test_generic_openai_error_falls_through( + self, crud, mock_client, mock_storage, docs_batch + ): + """A non-APIError OpenAIError still raises InterruptedError via the + bottom-most `except openai.OpenAIError` handler with a plain prefix.""" + mock_client.vector_stores.file_batches.upload_and_poll.side_effect = ( + openai.OpenAIError("something else") + ) + + with pytest.raises(InterruptedError) as exc_info: + list(crud.update("vs_1", mock_storage, docs_batch)) + msg = str(exc_info.value) + assert msg.startswith("OpenAI error:") + assert "something else" in msg + + +class TestOpenAIVectorStoreCrudInit: + """The base OpenAICrud init rejects a None client; subclasses inherit it.""" + + def test_none_client_raises(self): + with pytest.raises(ValueError): + OpenAIVectorStoreCrud(client=None) diff --git a/backend/app/tests/services/llm/providers/test_openai.py b/backend/app/tests/services/llm/providers/test_openai.py index abc36cc04..ebd5b4529 100644 --- a/backend/app/tests/services/llm/providers/test_openai.py +++ b/backend/app/tests/services/llm/providers/test_openai.py @@ -200,6 +200,156 @@ def test_execute_with_generic_exception( assert error.startswith("Unexpected error:") assert "Timeout occurred" in error + @pytest.mark.parametrize( + "exception_factory, expected_error", + [ + ( + lambda: openai.RateLimitError( + message="quota exceeded", + response=MagicMock( + status_code=429, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI rate limit exceeded: quota exceeded", + ), + ( + lambda: openai.AuthenticationError( + message="bad api key", + response=MagicMock( + status_code=401, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI authentication failed: bad api key", + ), + ( + lambda: openai.PermissionDeniedError( + message="no access to model", + response=MagicMock( + status_code=403, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI permission denied: no access to model", + ), + ( + lambda: openai.NotFoundError( + message="model not found", + response=MagicMock( + status_code=404, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI resource not found: model not found", + ), + ( + lambda: openai.BadRequestError( + message="invalid model param", + response=MagicMock( + status_code=400, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI bad request: invalid model param", + ), + ( + lambda: openai.UnprocessableEntityError( + message="cannot process", + response=MagicMock( + status_code=422, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI unprocessable entity: cannot process", + ), + ( + lambda: openai.ConflictError( + message="resource conflict", + response=MagicMock( + status_code=409, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI conflict: resource conflict", + ), + ( + lambda: openai.InternalServerError( + message="upstream boom", + response=MagicMock( + status_code=500, request=MagicMock(), headers={} + ), + body=None, + ), + "OpenAI server error: upstream boom", + ), + ], + ) + def test_execute_specific_openai_exceptions_use_category_prefix( + self, + provider, + mock_client, + completion_config, + query_params, + exception_factory, + expected_error, + ): + """Each specific OpenAI exception type maps to a distinct category- + prefixed error message instead of a single generic "OpenAI error" line. + """ + mock_client.responses.create.side_effect = exception_factory() + + result, error = provider.execute(completion_config, query_params, "Test query") + + assert result is None + assert error == expected_error + + def test_execute_with_api_timeout_error( + self, provider, mock_client, completion_config, query_params + ): + """APITimeoutError doesn't expose .message — handler interpolates str(e).""" + mock_client.responses.create.side_effect = openai.APITimeoutError( + request=MagicMock() + ) + + result, error = provider.execute(completion_config, query_params, "Test query") + + assert result is None + assert error is not None + assert error.startswith("OpenAI request timed out:") + + def test_execute_with_api_connection_error( + self, provider, mock_client, completion_config, query_params + ): + """APIConnectionError handler also uses str(e) rather than .message.""" + mock_client.responses.create.side_effect = openai.APIConnectionError( + message="connection refused", request=MagicMock() + ) + + result, error = provider.execute(completion_config, query_params, "Test query") + + assert result is None + assert error is not None + assert error.startswith("OpenAI connection error:") + assert "connection refused" in error + + def test_execute_with_api_status_error_includes_status_code( + self, provider, mock_client, completion_config, query_params + ): + """A bare APIStatusError (not one of the named subclasses) hits the + fall-through APIStatusError handler and includes the numeric status.""" + mock_client.responses.create.side_effect = openai.APIStatusError( + "teapot", + response=MagicMock(status_code=418, request=MagicMock(), headers={}), + body=None, + ) + + result, error = provider.execute(completion_config, query_params, "Test query") + + assert result is None + assert error is not None + assert error == "OpenAI API status error (418): teapot" + def test_execute_with_conversation_config_without_id_or_auto_create( self, provider, mock_client, completion_config, query_params ): From 6ec480ad5a1f22708beca1a89b07eebf5a155289 Mon Sep 17 00:00:00 2001 From: Ayush8923 <80516839+Ayush8923@users.noreply.github.com> Date: Fri, 5 Jun 2026 09:34:51 +0530 Subject: [PATCH 7/9] fix(*): address the reviewer comments --- backend/app/crud/rag/open_ai.py | 51 +++++++++++----- backend/app/services/llm/providers/oai.py | 73 ++++++++++------------- 2 files changed, 67 insertions(+), 57 deletions(-) diff --git a/backend/app/crud/rag/open_ai.py b/backend/app/crud/rag/open_ai.py index b9a6dac9a..87dfc3aaa 100644 --- a/backend/app/crud/rag/open_ai.py +++ b/backend/app/crud/rag/open_ai.py @@ -141,31 +141,50 @@ def update( files=files, ) except openai.RateLimitError as e: - raise InterruptedError(f"OpenAI rate limit exceeded: {e.message}") + raise InterruptedError( + f"OpenAI rate limit exceeded (code: {e.status_code}): " + f"{e.message}. Try again in 1 minute. If issue persists, " + f"contact Kaapi." + ) except openai.AuthenticationError as e: - raise InterruptedError(f"OpenAI authentication failed: {e.message}") - except openai.PermissionDeniedError as e: - raise InterruptedError(f"OpenAI permission denied: {e.message}") + raise InterruptedError( + f"OpenAI authentication failed (code: {e.status_code}): " + f"{e.message}. Check your OpenAI API key is valid and " + f"has not expired." + ) except openai.NotFoundError as e: - raise InterruptedError(f"OpenAI resource not found: {e.message}") + raise InterruptedError( + f"OpenAI resource not found (code: {e.status_code}): " + f"{e.message}. Verify the vector store ID exists and " + f"hasn't been deleted." + ) except openai.BadRequestError as e: - raise InterruptedError(f"OpenAI bad request: {e.message}") + raise InterruptedError( + f"OpenAI bad request (code: {e.status_code}): {e.message}. " + f"Review the file payload and metadata; the request may " + f"be malformed." + ) except openai.UnprocessableEntityError as e: - raise InterruptedError(f"OpenAI unprocessable entity: {e.message}") - except openai.ConflictError as e: - raise InterruptedError(f"OpenAI conflict: {e.message}") + raise InterruptedError( + f"OpenAI unprocessable entity (code: {e.status_code}): " + f"{e.message}. The uploaded files may be in an " + f"unsupported format or exceed size limits." + ) except openai.InternalServerError as e: - raise InterruptedError(f"OpenAI server error: {e.message}") + raise InterruptedError( + f"OpenAI server error (code: {e.status_code}): {e.message}. " + f"This is usually transient — retry in a few seconds. If " + f"issue persists, contact Kaapi." + ) except openai.APITimeoutError as e: - raise InterruptedError(f"OpenAI request timed out: {e}") - except openai.APIConnectionError as e: - raise InterruptedError(f"OpenAI connection error: {e}") - except openai.APIStatusError as e: raise InterruptedError( - f"OpenAI API status error ({e.status_code}): {e.message}" + f"OpenAI request timed out: {e}. Retry the upload, or " + f"split the batch into smaller chunks." ) except openai.OpenAIError as e: - raise InterruptedError(f"OpenAI error: {e}") + raise InterruptedError( + f"OpenAI error: {e}. If this persists, contact Kaapi." + ) logger.info( f"[OpenAIVectorStoreCrud.update] File upload completed | " diff --git a/backend/app/services/llm/providers/oai.py b/backend/app/services/llm/providers/oai.py index d0d1a4b81..474a05eb6 100644 --- a/backend/app/services/llm/providers/oai.py +++ b/backend/app/services/llm/providers/oai.py @@ -139,7 +139,11 @@ def execute( return None, error_message except openai.RateLimitError as e: - error_message = f"OpenAI rate limit exceeded: {e.message}" + error_message = ( + f"OpenAI rate limit exceeded (code: {e.status_code}): " + f"{e.message}. Try again in 1 minute. If issue persists, " + f"contact Kaapi." + ) logger.warning( f"[OpenAIProvider.execute] {error_message} | " f"provider={completion_config.provider}", @@ -148,16 +152,11 @@ def execute( return None, error_message except openai.AuthenticationError as e: - error_message = f"OpenAI authentication failed: {e.message}" - logger.warning( - f"[OpenAIProvider.execute] {error_message} | " - f"provider={completion_config.provider}", - exc_info=True, + error_message = ( + f"OpenAI authentication failed (code: {e.status_code}): " + f"{e.message}. Check your OpenAI API key is valid and has " + f"not expired." ) - return None, error_message - - except openai.PermissionDeniedError as e: - error_message = f"OpenAI permission denied: {e.message}" logger.warning( f"[OpenAIProvider.execute] {error_message} | " f"provider={completion_config.provider}", @@ -166,7 +165,11 @@ def execute( return None, error_message except openai.NotFoundError as e: - error_message = f"OpenAI resource not found: {e.message}" + error_message = ( + f"OpenAI resource not found (code: {e.status_code}): " + f"{e.message}. Verify the model name and any referenced IDs " + f"in your config are correct." + ) logger.warning( f"[OpenAIProvider.execute] {error_message} | " f"provider={completion_config.provider}", @@ -175,7 +178,11 @@ def execute( return None, error_message except openai.BadRequestError as e: - error_message = f"OpenAI bad request: {e.message}" + error_message = ( + f"OpenAI bad request (code: {e.status_code}): {e.message}. " + f"Review your config parameters; the request shape may be " + f"invalid." + ) logger.warning( f"[OpenAIProvider.execute] {error_message} | " f"provider={completion_config.provider}", @@ -184,16 +191,11 @@ def execute( return None, error_message except openai.UnprocessableEntityError as e: - error_message = f"OpenAI unprocessable entity: {e.message}" - logger.warning( - f"[OpenAIProvider.execute] {error_message} | " - f"provider={completion_config.provider}", - exc_info=True, + error_message = ( + f"OpenAI unprocessable entity (code: {e.status_code}): " + f"{e.message}. The model rejected the request payload; " + f"check input format and limits." ) - return None, error_message - - except openai.ConflictError as e: - error_message = f"OpenAI conflict: {e.message}" logger.warning( f"[OpenAIProvider.execute] {error_message} | " f"provider={completion_config.provider}", @@ -202,16 +204,11 @@ def execute( return None, error_message except openai.InternalServerError as e: - error_message = f"OpenAI server error: {e.message}" - logger.warning( - f"[OpenAIProvider.execute] {error_message} | " - f"provider={completion_config.provider}", - exc_info=True, + error_message = ( + f"OpenAI server error (code: {e.status_code}): {e.message}. " + f"This is usually transient — retry in a few seconds. If " + f"issue persists, contact Kaapi." ) - return None, error_message - - except openai.APITimeoutError as e: - error_message = f"OpenAI request timed out: {e}" logger.warning( f"[OpenAIProvider.execute] {error_message} | " f"provider={completion_config.provider}", @@ -219,17 +216,11 @@ def execute( ) return None, error_message - except openai.APIConnectionError as e: - error_message = f"OpenAI connection error: {e}" - logger.warning( - f"[OpenAIProvider.execute] {error_message} | " - f"provider={completion_config.provider}", - exc_info=True, + except openai.APITimeoutError as e: + error_message = ( + f"OpenAI request timed out: {e}. Retry the request, or try " + f"with a smaller payload." ) - return None, error_message - - except openai.APIStatusError as e: - error_message = f"OpenAI API status error ({e.status_code}): {e.message}" logger.warning( f"[OpenAIProvider.execute] {error_message} | " f"provider={completion_config.provider}", @@ -238,7 +229,7 @@ def execute( return None, error_message except openai.OpenAIError as e: - error_message = f"OpenAI error: {e}" + error_message = f"OpenAI error: {e}. If this persists, contact Kaapi." logger.warning( f"[OpenAIProvider.execute] {error_message} | " f"provider={completion_config.provider}", From 05f9da63da3513d608c1399bb1f0069e61cccbc9 Mon Sep 17 00:00:00 2001 From: Ayush8923 <80516839+Ayush8923@users.noreply.github.com> Date: Fri, 5 Jun 2026 10:07:36 +0530 Subject: [PATCH 8/9] fix(api): error handling --- backend/app/tests/crud/rag/test_open_ai.py | 161 +++++++++++------- .../services/llm/providers/test_openai.py | 143 ++++++++++------ 2 files changed, 182 insertions(+), 122 deletions(-) diff --git a/backend/app/tests/crud/rag/test_open_ai.py b/backend/app/tests/crud/rag/test_open_ai.py index 580c1b4ff..75e00713e 100644 --- a/backend/app/tests/crud/rag/test_open_ai.py +++ b/backend/app/tests/crud/rag/test_open_ai.py @@ -192,11 +192,17 @@ def test_falls_back_to_count_only_when_list_files_errors( class TestOpenAIVectorStoreCrudUpdateOpenAIExceptions: - """upload_and_poll raising each specific openai exception type maps to - InterruptedError with a category-prefixed message.""" + """`upload_and_poll` raising each specific OpenAI exception type maps to + `InterruptedError` with a category-prefixed message that includes the + upstream status code and a remediation hint. + + Assertions are deliberately structural (prefix + code + original message) + rather than exact-string equality so future tweaks to the remediation + wording don't break the suite. + """ @pytest.mark.parametrize( - "exception_factory, expected_message", + "exception_factory, expected_prefix, expected_status, original_message", [ ( lambda: openai.RateLimitError( @@ -206,7 +212,9 @@ class TestOpenAIVectorStoreCrudUpdateOpenAIExceptions: ), body=None, ), - "OpenAI rate limit exceeded: quota exceeded", + "OpenAI rate limit exceeded", + 429, + "quota exceeded", ), ( lambda: openai.AuthenticationError( @@ -216,17 +224,9 @@ class TestOpenAIVectorStoreCrudUpdateOpenAIExceptions: ), body=None, ), - "OpenAI authentication failed: bad api key", - ), - ( - lambda: openai.PermissionDeniedError( - message="no access", - response=MagicMock( - status_code=403, request=MagicMock(), headers={} - ), - body=None, - ), - "OpenAI permission denied: no access", + "OpenAI authentication failed", + 401, + "bad api key", ), ( lambda: openai.NotFoundError( @@ -236,7 +236,9 @@ class TestOpenAIVectorStoreCrudUpdateOpenAIExceptions: ), body=None, ), - "OpenAI resource not found: missing resource", + "OpenAI resource not found", + 404, + "missing resource", ), ( lambda: openai.BadRequestError( @@ -246,7 +248,9 @@ class TestOpenAIVectorStoreCrudUpdateOpenAIExceptions: ), body=None, ), - "OpenAI bad request: invalid file", + "OpenAI bad request", + 400, + "invalid file", ), ( lambda: openai.UnprocessableEntityError( @@ -256,17 +260,9 @@ class TestOpenAIVectorStoreCrudUpdateOpenAIExceptions: ), body=None, ), - "OpenAI unprocessable entity: cannot process", - ), - ( - lambda: openai.ConflictError( - message="conflict", - response=MagicMock( - status_code=409, request=MagicMock(), headers={} - ), - body=None, - ), - "OpenAI conflict: conflict", + "OpenAI unprocessable entity", + 422, + "cannot process", ), ( lambda: openai.InternalServerError( @@ -276,7 +272,9 @@ class TestOpenAIVectorStoreCrudUpdateOpenAIExceptions: ), body=None, ), - "OpenAI server error: upstream boom", + "OpenAI server error", + 500, + "upstream boom", ), ], ) @@ -287,7 +285,9 @@ def test_specific_openai_exception_maps_to_category_prefix( mock_storage, docs_batch, exception_factory, - expected_message, + expected_prefix, + expected_status, + original_message, ): mock_client.vector_stores.file_batches.upload_and_poll.side_effect = ( exception_factory() @@ -295,10 +295,13 @@ def test_specific_openai_exception_maps_to_category_prefix( with pytest.raises(InterruptedError) as exc_info: list(crud.update("vs_1", mock_storage, docs_batch)) - assert str(exc_info.value) == expected_message + msg = str(exc_info.value) + assert msg.startswith(expected_prefix), msg + assert f"code: {expected_status}" in msg, msg + assert original_message in msg, msg def test_api_timeout_error(self, crud, mock_client, mock_storage, docs_batch): - """APITimeoutError uses str(e) (not .message) for the suffix.""" + """APITimeoutError doesn't expose .message — handler interpolates str(e).""" mock_client.vector_stores.file_batches.upload_and_poll.side_effect = ( openai.APITimeoutError(request=MagicMock()) ) @@ -307,48 +310,74 @@ def test_api_timeout_error(self, crud, mock_client, mock_storage, docs_batch): list(crud.update("vs_1", mock_storage, docs_batch)) assert str(exc_info.value).startswith("OpenAI request timed out:") - def test_api_connection_error(self, crud, mock_client, mock_storage, docs_batch): - mock_client.vector_stores.file_batches.upload_and_poll.side_effect = ( - openai.APIConnectionError(message="connection refused", request=MagicMock()) - ) - - with pytest.raises(InterruptedError) as exc_info: - list(crud.update("vs_1", mock_storage, docs_batch)) - msg = str(exc_info.value) - assert msg.startswith("OpenAI connection error:") - assert "connection refused" in msg - - def test_api_status_error_includes_status_code( - self, crud, mock_client, mock_storage, docs_batch - ): - """A bare APIStatusError falls through to the APIStatusError handler - (after all the named subclasses) and includes the numeric status.""" - mock_client.vector_stores.file_batches.upload_and_poll.side_effect = ( - openai.APIStatusError( + @pytest.mark.parametrize( + "exception_factory, original_message", + [ + ( + lambda: openai.PermissionDeniedError( + message="no access", + response=MagicMock( + status_code=403, request=MagicMock(), headers={} + ), + body=None, + ), + "no access", + ), + ( + lambda: openai.ConflictError( + message="conflict", + response=MagicMock( + status_code=409, request=MagicMock(), headers={} + ), + body=None, + ), + "conflict", + ), + ( + lambda: openai.APIConnectionError( + message="connection refused", request=MagicMock() + ), + "connection refused", + ), + ( + lambda: openai.APIStatusError( + "teapot", + response=MagicMock( + status_code=418, request=MagicMock(), headers={} + ), + body=None, + ), "teapot", - response=MagicMock(status_code=418, request=MagicMock(), headers={}), - body=None, - ) - ) - - with pytest.raises(InterruptedError) as exc_info: - list(crud.update("vs_1", mock_storage, docs_batch)) - assert str(exc_info.value) == "OpenAI API status error (418): teapot" - - def test_generic_openai_error_falls_through( - self, crud, mock_client, mock_storage, docs_batch + ), + ( + lambda: openai.OpenAIError("something else"), + "something else", + ), + ], + ) + def test_unhandled_openai_subclasses_fall_through_to_catch_all( + self, + crud, + mock_client, + mock_storage, + docs_batch, + exception_factory, + original_message, ): - """A non-APIError OpenAIError still raises InterruptedError via the - bottom-most `except openai.OpenAIError` handler with a plain prefix.""" + """PermissionDenied, Conflict, APIConnection, APIStatus, and any other + OpenAIError subclass without a dedicated handler all land in the + bottom-most `except openai.OpenAIError` block — prefixed with the + generic "OpenAI error" tag but still carrying the original message. + """ mock_client.vector_stores.file_batches.upload_and_poll.side_effect = ( - openai.OpenAIError("something else") + exception_factory() ) with pytest.raises(InterruptedError) as exc_info: list(crud.update("vs_1", mock_storage, docs_batch)) msg = str(exc_info.value) - assert msg.startswith("OpenAI error:") - assert "something else" in msg + assert msg.startswith("OpenAI error:"), msg + assert original_message in msg, msg class TestOpenAIVectorStoreCrudInit: diff --git a/backend/app/tests/services/llm/providers/test_openai.py b/backend/app/tests/services/llm/providers/test_openai.py index ebd5b4529..9fbcc2094 100644 --- a/backend/app/tests/services/llm/providers/test_openai.py +++ b/backend/app/tests/services/llm/providers/test_openai.py @@ -201,7 +201,7 @@ def test_execute_with_generic_exception( assert "Timeout occurred" in error @pytest.mark.parametrize( - "exception_factory, expected_error", + "exception_factory, expected_prefix, expected_status, original_message", [ ( lambda: openai.RateLimitError( @@ -211,7 +211,9 @@ def test_execute_with_generic_exception( ), body=None, ), - "OpenAI rate limit exceeded: quota exceeded", + "OpenAI rate limit exceeded", + 429, + "quota exceeded", ), ( lambda: openai.AuthenticationError( @@ -221,17 +223,9 @@ def test_execute_with_generic_exception( ), body=None, ), - "OpenAI authentication failed: bad api key", - ), - ( - lambda: openai.PermissionDeniedError( - message="no access to model", - response=MagicMock( - status_code=403, request=MagicMock(), headers={} - ), - body=None, - ), - "OpenAI permission denied: no access to model", + "OpenAI authentication failed", + 401, + "bad api key", ), ( lambda: openai.NotFoundError( @@ -241,7 +235,9 @@ def test_execute_with_generic_exception( ), body=None, ), - "OpenAI resource not found: model not found", + "OpenAI resource not found", + 404, + "model not found", ), ( lambda: openai.BadRequestError( @@ -251,7 +247,9 @@ def test_execute_with_generic_exception( ), body=None, ), - "OpenAI bad request: invalid model param", + "OpenAI bad request", + 400, + "invalid model param", ), ( lambda: openai.UnprocessableEntityError( @@ -261,17 +259,9 @@ def test_execute_with_generic_exception( ), body=None, ), - "OpenAI unprocessable entity: cannot process", - ), - ( - lambda: openai.ConflictError( - message="resource conflict", - response=MagicMock( - status_code=409, request=MagicMock(), headers={} - ), - body=None, - ), - "OpenAI conflict: resource conflict", + "OpenAI unprocessable entity", + 422, + "cannot process", ), ( lambda: openai.InternalServerError( @@ -281,7 +271,9 @@ def test_execute_with_generic_exception( ), body=None, ), - "OpenAI server error: upstream boom", + "OpenAI server error", + 500, + "upstream boom", ), ], ) @@ -292,17 +284,24 @@ def test_execute_specific_openai_exceptions_use_category_prefix( completion_config, query_params, exception_factory, - expected_error, + expected_prefix, + expected_status, + original_message, ): - """Each specific OpenAI exception type maps to a distinct category- - prefixed error message instead of a single generic "OpenAI error" line. + """Each specific OpenAI exception with a dedicated handler emits a + category-prefixed message including the upstream status code and the + original error text. Structural assertions only — the remediation + suffix can evolve without breaking the suite. """ mock_client.responses.create.side_effect = exception_factory() result, error = provider.execute(completion_config, query_params, "Test query") assert result is None - assert error == expected_error + assert error is not None + assert error.startswith(expected_prefix), error + assert f"code: {expected_status}" in error, error + assert original_message in error, error def test_execute_with_api_timeout_error( self, provider, mock_client, completion_config, query_params @@ -318,37 +317,69 @@ def test_execute_with_api_timeout_error( assert error is not None assert error.startswith("OpenAI request timed out:") - def test_execute_with_api_connection_error( - self, provider, mock_client, completion_config, query_params - ): - """APIConnectionError handler also uses str(e) rather than .message.""" - mock_client.responses.create.side_effect = openai.APIConnectionError( - message="connection refused", request=MagicMock() - ) - - result, error = provider.execute(completion_config, query_params, "Test query") - - assert result is None - assert error is not None - assert error.startswith("OpenAI connection error:") - assert "connection refused" in error - - def test_execute_with_api_status_error_includes_status_code( - self, provider, mock_client, completion_config, query_params + @pytest.mark.parametrize( + "exception_factory, original_message", + [ + ( + lambda: openai.PermissionDeniedError( + message="no access to model", + response=MagicMock( + status_code=403, request=MagicMock(), headers={} + ), + body=None, + ), + "no access to model", + ), + ( + lambda: openai.ConflictError( + message="resource conflict", + response=MagicMock( + status_code=409, request=MagicMock(), headers={} + ), + body=None, + ), + "resource conflict", + ), + ( + lambda: openai.APIConnectionError( + message="connection refused", request=MagicMock() + ), + "connection refused", + ), + ( + lambda: openai.APIStatusError( + "teapot", + response=MagicMock( + status_code=418, request=MagicMock(), headers={} + ), + body=None, + ), + "teapot", + ), + ], + ) + def test_execute_unhandled_openai_subclasses_fall_through_to_catch_all( + self, + provider, + mock_client, + completion_config, + query_params, + exception_factory, + original_message, ): - """A bare APIStatusError (not one of the named subclasses) hits the - fall-through APIStatusError handler and includes the numeric status.""" - mock_client.responses.create.side_effect = openai.APIStatusError( - "teapot", - response=MagicMock(status_code=418, request=MagicMock(), headers={}), - body=None, - ) + """PermissionDenied, Conflict, APIConnection, and APIStatus no longer + have dedicated except blocks. They land in the bottom-most + `except openai.OpenAIError` handler — prefixed with the generic + "OpenAI error" tag but still surfacing the original message. + """ + mock_client.responses.create.side_effect = exception_factory() result, error = provider.execute(completion_config, query_params, "Test query") assert result is None assert error is not None - assert error == "OpenAI API status error (418): teapot" + assert error.startswith("OpenAI error:"), error + assert original_message in error, error def test_execute_with_conversation_config_without_id_or_auto_create( self, provider, mock_client, completion_config, query_params From 6d99f27ead88134e0a6923d3652382c8e3307809 Mon Sep 17 00:00:00 2001 From: Ayush8923 <80516839+Ayush8923@users.noreply.github.com> Date: Fri, 5 Jun 2026 10:28:10 +0530 Subject: [PATCH 9/9] fix(api): error handling --- backend/app/tests/crud/rag/test_open_ai.py | 62 ++---------------- .../services/llm/providers/test_openai.py | 64 ------------------- 2 files changed, 5 insertions(+), 121 deletions(-) diff --git a/backend/app/tests/crud/rag/test_open_ai.py b/backend/app/tests/crud/rag/test_open_ai.py index 75e00713e..b71b1835b 100644 --- a/backend/app/tests/crud/rag/test_open_ai.py +++ b/backend/app/tests/crud/rag/test_open_ai.py @@ -310,74 +310,22 @@ def test_api_timeout_error(self, crud, mock_client, mock_storage, docs_batch): list(crud.update("vs_1", mock_storage, docs_batch)) assert str(exc_info.value).startswith("OpenAI request timed out:") - @pytest.mark.parametrize( - "exception_factory, original_message", - [ - ( - lambda: openai.PermissionDeniedError( - message="no access", - response=MagicMock( - status_code=403, request=MagicMock(), headers={} - ), - body=None, - ), - "no access", - ), - ( - lambda: openai.ConflictError( - message="conflict", - response=MagicMock( - status_code=409, request=MagicMock(), headers={} - ), - body=None, - ), - "conflict", - ), - ( - lambda: openai.APIConnectionError( - message="connection refused", request=MagicMock() - ), - "connection refused", - ), - ( - lambda: openai.APIStatusError( - "teapot", - response=MagicMock( - status_code=418, request=MagicMock(), headers={} - ), - body=None, - ), - "teapot", - ), - ( - lambda: openai.OpenAIError("something else"), - "something else", - ), - ], - ) - def test_unhandled_openai_subclasses_fall_through_to_catch_all( - self, - crud, - mock_client, - mock_storage, - docs_batch, - exception_factory, - original_message, + def test_generic_openai_error_falls_through( + self, crud, mock_client, mock_storage, docs_batch ): - """PermissionDenied, Conflict, APIConnection, APIStatus, and any other - OpenAIError subclass without a dedicated handler all land in the + """Any OpenAIError subclass without a dedicated handler lands in the bottom-most `except openai.OpenAIError` block — prefixed with the generic "OpenAI error" tag but still carrying the original message. """ mock_client.vector_stores.file_batches.upload_and_poll.side_effect = ( - exception_factory() + openai.OpenAIError("something else") ) with pytest.raises(InterruptedError) as exc_info: list(crud.update("vs_1", mock_storage, docs_batch)) msg = str(exc_info.value) assert msg.startswith("OpenAI error:"), msg - assert original_message in msg, msg + assert "something else" in msg, msg class TestOpenAIVectorStoreCrudInit: diff --git a/backend/app/tests/services/llm/providers/test_openai.py b/backend/app/tests/services/llm/providers/test_openai.py index 9fbcc2094..f80bdfc1b 100644 --- a/backend/app/tests/services/llm/providers/test_openai.py +++ b/backend/app/tests/services/llm/providers/test_openai.py @@ -317,70 +317,6 @@ def test_execute_with_api_timeout_error( assert error is not None assert error.startswith("OpenAI request timed out:") - @pytest.mark.parametrize( - "exception_factory, original_message", - [ - ( - lambda: openai.PermissionDeniedError( - message="no access to model", - response=MagicMock( - status_code=403, request=MagicMock(), headers={} - ), - body=None, - ), - "no access to model", - ), - ( - lambda: openai.ConflictError( - message="resource conflict", - response=MagicMock( - status_code=409, request=MagicMock(), headers={} - ), - body=None, - ), - "resource conflict", - ), - ( - lambda: openai.APIConnectionError( - message="connection refused", request=MagicMock() - ), - "connection refused", - ), - ( - lambda: openai.APIStatusError( - "teapot", - response=MagicMock( - status_code=418, request=MagicMock(), headers={} - ), - body=None, - ), - "teapot", - ), - ], - ) - def test_execute_unhandled_openai_subclasses_fall_through_to_catch_all( - self, - provider, - mock_client, - completion_config, - query_params, - exception_factory, - original_message, - ): - """PermissionDenied, Conflict, APIConnection, and APIStatus no longer - have dedicated except blocks. They land in the bottom-most - `except openai.OpenAIError` handler — prefixed with the generic - "OpenAI error" tag but still surfacing the original message. - """ - mock_client.responses.create.side_effect = exception_factory() - - result, error = provider.execute(completion_config, query_params, "Test query") - - assert result is None - assert error is not None - assert error.startswith("OpenAI error:"), error - assert original_message in error, error - def test_execute_with_conversation_config_without_id_or_auto_create( self, provider, mock_client, completion_config, query_params ):