Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions docs/pipeline-rag.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
@startuml pipeline-rag

skinparam backgroundColor #FFFFFF
skinparam defaultFontName Arial
skinparam defaultFontSize 16
skinparam sequenceArrowThickness 2
skinparam SequenceBoxBackgroundColor #F8F9FF
skinparam SequenceBoxBorderColor #AABBDD
skinparam ParticipantBackgroundColor #EEF3FB
skinparam ParticipantBorderColor #5577AA
skinparam ParticipantFontColor #222222
skinparam DatabaseBackgroundColor #FFF8E7
skinparam DatabaseBorderColor #CC9900
skinparam ActorBackgroundColor #F0FFF0
skinparam ActorBorderColor #448844

title RAG Pipeline

box "Retrieval" #EEF6FF
actor "User" as user
participant "rag.py" as rag
participant "search.py" as search
participant "embed.py" as embed
end box

database "Chroma DB" as chroma
participant "OpenAI API" as openai

box "Generation" #FFF0F8
participant "generate.py" as generate
end box

== Retrieval ==

user -> rag : question\n(CLI arg or input())
rag -> search : search(question, top_k=5)
search -> embed : embed_query()
embed -> openai : embeddings.create()\ntext-embedding-3-small
openai --> embed : query vector (1536-dim)
embed --> search : query vector
search -> chroma : query(query_embeddings, n_results=5)
chroma --> search : top-5 nearest chunks\n(text + source + chunk_index + distance)
search --> rag : chunks list

== Generation ==

rag -> generate : generate_answer(question, chunks)
generate -> generate : _select_chunks_within_budget()\ntiktoken counts tokens per chunk\nskips any chunk exceeding 2000-token budget
generate -> generate : _build_context_block()\nnumbered + labelled context entries
generate -> openai : chat.completions.create()\ngpt-4o-mini · temperature=0
note right of openai
System prompt:
Answer ONLY from the provided context.
If context is insufficient, respond with:
"I don't know based on the provided documents."
end note
openai --> generate : answer text
generate --> rag : { answer, sources }

== Output ==

rag -> rag : answer == NO_ANSWER_PHRASE?
alt answer not in documents
rag -> user : "No answer found in the documents."
else answer found
rag -> user : Answer text\n+ source citations\n(source filename · chunk index · text preview)
end

' end
@enduml
90 changes: 90 additions & 0 deletions generate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from openai import OpenAI
from dotenv import load_dotenv
import tiktoken
import os

load_dotenv()

client = OpenAI(api_key = os.getenv("OPENAI_API_KEY"))

GENERATION_MODEL = os.getenv("GENERATION_MODEL", "gpt-4o-mini")
TOKEN_BUDGET = int(os.getenv("TOKEN_BUDGET", "2000"))
NO_ANSWER_PHRASE = "I don't know based on the provided documents."

_PROMPTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "prompts")


def _load_prompt(filename: str) -> str:
path = os.path.join(_PROMPTS_DIR, filename)
with open(path) as f:
return f.read().strip()


_SYSTEM_PROMPT = _load_prompt("system_prompt.txt")


def _count_tokens(text: str) -> int:
enc = tiktoken.get_encoding("cl100k_base")
return len(enc.encode(text))


def _select_chunks_within_budget(chunks: list[dict], budget: int) -> list[dict]:
selected = []
tokens_used = 0

for chunk in chunks:
chunk_tokens = _count_tokens(chunk["text"])
if tokens_used + chunk_tokens > budget:
continue
selected.append(chunk)
tokens_used += chunk_tokens

return selected


def _build_context_block(chunks: list[dict]) -> str:
lines = []
for i, chunk in enumerate(chunks, start = 1):
lines.append(f"[{i}] Source: {chunk['source']} (chunk {chunk['chunk_index']})")
lines.append(chunk["text"])
lines.append("")
return "\n".join(lines)


def generate_answer(question: str, chunks: list[dict]) -> dict:
selected_chunks = _select_chunks_within_budget(chunks, TOKEN_BUDGET)

if not selected_chunks:
return {
"answer": NO_ANSWER_PHRASE,
"sources": []
}

context_block = _build_context_block(selected_chunks)
user_message = f"Context:\n{context_block}\nQuestion: {question}"

response = client.chat.completions.create(
model = GENERATION_MODEL,
messages = [
{"role": "system", "content": _SYSTEM_PROMPT},
{"role": "user", "content": user_message}
],
temperature = 0.0
)

answer = response.choices[0].message.content.strip()

sources = [
{
"index": i + 1,
"source": chunk["source"],
"chunk_index": chunk["chunk_index"],
"text": chunk["text"]
}
for i, chunk in enumerate(selected_chunks)
]

return {
"answer": answer,
"sources": sources
}
8 changes: 8 additions & 0 deletions prompts/system_prompt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
You are a precise document assistant. Answer the user's question using ONLY the information provided in the context below.

Rules:
- Base your answer strictly on the provided context. Do not use prior knowledge.
- If the context does not contain enough information to answer the question, respond with exactly:
I don't know based on the provided documents.
- Do not add caveats, speculation, or information not present in the context.
- Be concise and factual.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ dependencies = [
"openai>=1.0.0",
"python-dotenv>=1.0.0",
"numpy>=1.26.0",
"chromadb>=1.5.9"
"chromadb>=1.5.9",
"tiktoken>=0.7.0"
]

[tool.setuptools]
Expand Down
49 changes: 49 additions & 0 deletions rag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import sys
from search import search
from generate import generate_answer, NO_ANSWER_PHRASE

TOP_K = 5


def _print_result(result: dict) -> None:
print("\nAnswer:")
print(result["answer"])

if result["sources"]:
print("\nSources:")
for source in result["sources"]:
preview = source["text"][:120].replace("\n", " ")
print(f" [{source['index']}] {source['source']} (chunk {source['chunk_index']}): \"{preview}...\"")


def run(question: str) -> None:
chunks = search(question, top_k = TOP_K)

if not chunks:
print("No documents found in the collection. Run ingest.py first.")
return

result = generate_answer(question, chunks)

if result["answer"].strip() == NO_ANSWER_PHRASE:
print("\nNo answer found in the documents.")
return

_print_result(result)


def main():
if len(sys.argv) > 1:
question = " ".join(sys.argv[1:])
else:
question = input("Enter your question: ").strip()

if not question:
print("Error: question cannot be empty.")
sys.exit(1)

run(question)


if __name__ == "__main__":
main()
Loading