augmented generation
The generation step in RAG: injecting retrieved document chunks into the LLM's context alongside the user query to ground the response in retrieved facts.
Syntax
rag
prompt = f"Context:\n{retrieved_docs}\n\nQuestion: {query}\nAnswer:"Example
rag
# Full RAG generation:
def rag_answer(query, retrieved_chunks, llm_client):
context = "\n\n".join(retrieved_chunks)
prompt = f"""Use the following context to answer the question.
Context:
{context}
Question: {query}
Answer based only on the provided context. If the answer is not in the context, say so.
Answer:"""
response = llm_client.complete(prompt)
return response.text