因為項目的需要,之前研究了一段時間的RAG,於是本文總結 8 種 RAG 架構,對每種架構進行簡要介紹,並用 langchain 實現其參考程式碼。
簡介:
Naive RAG 是最基礎的檢索增強生成架構,採用“索引-檢索-生成”的經典流程。架構:

實現步驟:
參考程式碼:
from langchain_openai import ChatOpenAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import LanceDB
from langchain.schema import Document
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import lancedb
class NaiveRAG:
def __init__(self):
self.llm = ChatOpenAI(model="gpt-5", temperature=0)
# 使用輕量級的all-MiniLM-L6-v2模型,仅80MB
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
self.db = lancedb.connect("/tmp/lancedb_naive_rag")
self.vectorstore = None
def build_index(self, documents: list):
"""構建向量索引"""
docs = [Document(page_content=d) for d in documents]
self.vectorstore = LanceDB.from_documents(
docs,
self.embeddings,
connection=self.db,
table_name="naive_rag_docs"
)
def query(self, question: str) -> str:
"""執行檢索並生成答案"""
# 創建檢索鏈
retriever = self.vectorstore.as_retriever(search_kwargs={"k": 3})
prompt_template = PromptTemplate(
input_variables=["context", "question"],
template="""基於以下上下文回答問題:
上下文: {context}
問題: {question}
答案:"""
)
qa_chain = RetrievalQA.from_chain_type(
llm=self.llm,
chain_type="stuff",
retriever=retriever,
chain_type_kwargs={"prompt": prompt_template}
)
return qa_chain.invoke({"query": question})["result"]
# 使用示例
naive_rag = NaiveRAG()
naive_rag.build_index(["文檔1內容...", "文檔2內容...", "文檔3內容..."])
answer = naive_rag.query("What is issue date of lease?")
print(answer)
簡介:
Multi-Head RAG 借鑒了 Transformer 的多頭注意力機制,利用模型不同注意力頭捕獲的多樣化語義特徵進行並行檢索。
架構:

實現步驟:
相關參考如下:
參考程式碼:
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import LanceDB
from langchain.schema import Document
from langchain.embeddings.base import Embeddings
from transformers import AutoModel, AutoTokenizer
import torch
import lancedb
from typing import List
class MultiHeadEmbeddings(Embeddings):
"""自定義多頭注意力嵌入,繼承LangChain的Embeddings基類"""
def __init__(self, model_name="bert-base-uncased", head_index=0, num_heads=12):
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.model = AutoModel.from_pretrained(model_name, output_hidden_states=True)
self.head_index = head_index
self.num_heads = num_heads
self.head_dim = 768 // num_heads # BERT hidden size / num_heads
def _get_head_embedding(self, texts: List[str]) -> List[List[float]]:
"""獲取指定頭的嵌入"""
inputs = self.tokenizer(texts, return_tensors="pt", padding=True, truncation=True)
with torch.no_grad():
outputs = self.model(**inputs, output_hidden_states=True)
hidden_states = outputs.hidden_states[-2] # 倒數第二層
start = self.head_index * self.head_dim
end = (self.head_index + 1) * self.head_dim
head_emb = hidden_states[:, 0, start:end].numpy()
return head_emb.tolist()
def embed_documents(self, texts: List[str]) -> List[List[float]]:
return self._get_head_embedding(texts)
def embed_query(self, text: str) -> List[float]:
return self._get_head_embedding([text])[0]
class MultiHeadRAG:
def __init__(self, num_heads=12):
self.llm = ChatOpenAI(model="gpt-5", temperature=0)
self.num_heads = num_heads
self.db = lancedb.connect("/tmp/lancedb_multihead_rag")
self.vectorstores = [] # 每個頭一個向量儲存
self.documents = []
def build_index(self, documents: List[str]):
"""為每個頭構建獨立的LanceDB向量儲存"""
self.documents = documents
docs = [Document(page_content=d) for d in documents]
for head_idx in range(self.num_heads):
embeddings = MultiHeadEmbeddings(head_index=head_idx, num_heads=self.num_heads)
vectorstore = LanceDB.from_documents(
docs,
embeddings,
connection=self.db,
table_name=f"head_{head_idx}_docs"
)
self.vectorstores.append(vectorstore)
def search(self, query: str, top_k: int = 3) -> List[str]:
"""多頭並行檢索並融合結果"""
all_results = set()
for vectorstore in self.vectorstores:
docs = vectorstore.similarity_search(query, k=top_k)
for doc in docs:
all_results.add(doc.page_content)
return list(all_results)
def query(self, question: str) -> str:
"""檢索並生成答案"""
retrieved_docs = self.search(question)
context = "\n\n".join(retrieved_docs)
from langchain.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(
"""基於以下多維度檢索的上下文回答問題:
上下文: {context}
問題: {question}
答案:"""
)
chain = prompt | self.llm
response = chain.invoke({"context": context, "question": question})
return response.content
# 使用示例
mrag = MultiHeadRAG(num_heads=12)
documents = ["文檔1的內容...", "文檔2的內容...", "文檔3的內容..."]
mrag.build_index(documents)
answer = mrag.query("查詢問題")
print(answer)
簡介:
Corrective RAG 在傳統 RAG 基礎上引入了文檔質量評估和自我修正機制。對檢索到的每個文檔進行相關性評分(Correct/Incorrect/Ambiguous),對於質量不足的檢索結果,搜索外部知識源進行補充。
架構:

實現步驟:
參考程式碼:
from langchain_openai import ChatOpenAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import LanceDB
from langchain_community.tools import TavilySearchResults
from langchain.schema import Document
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import lancedb
from typing import List
class CorrectiveRAG:
def __init__(self):
self.llm = ChatOpenAI(model="gpt-5", temperature=0)
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
self.db = lancedb.connect("/tmp/lancedb_corrective_rag")
self.vectorstore = None
# 使用Tavily進行網路搜索
self.web_search = TavilySearchResults(max_results=3)
def build_index(self, documents: List[str]):
"""構建向量索引"""
docs = [Document(page_content=d) for d in documents]
self.vectorstore = LanceDB.from_documents(
docs,
self.embeddings,
connection=self.db,
table_name="corrective_rag_docs"
)
def evaluate_relevance(self, query: str, document: str) -> str:
"""評估文檔與查詢的相關性"""
prompt = ChatPromptTemplate.from_template(
"""評估以下文檔與查詢的相關性。
查詢: {query}
文檔: {document}
請回答: CORRECT(相關), INCORRECT(不相關), 或 AMBIGUOUS(模糊)
只返回一個詞。"""
)
chain = prompt | self.llm | StrOutputParser()
response = chain.invoke({"query": query, "document": document})
return response.strip().upper()
def search_web(self, query: str) -> List[str]:
"""當本地文檔不足時進行網路搜索"""
try:
results = self.web_search.invoke(query)
return [r["content"] for r in results if "content" in r]
except:
return []
def retrieve_and_correct(self, query: str, top_k: int = 5) -> List[str]:
"""檢索並修正文檔"""
# 1. 初始檢索
retriever = self.vectorstore.as_retriever(search_kwargs={"k": top_k})
docs = retriever.invoke(query)
# 2. 評估每個文檔的相關性
correct_docs = []
need_web_search = True
for doc in docs:
relevance = self.evaluate_relevance(query, doc.page_content)
if relevance == "CORRECT":
correct_docs.append(doc.page_content)
need_web_search = False
elif relevance == "AMBIGUOUS":
# 對模糊文檔進行知識精煉
refined = self.refine_document(query, doc.page_content)
correct_docs.append(refined)
# 3. 必要時進行網路搜索補充
if need_web_search or len(correct_docs) < 2:
web_results = self.search_web(query)
correct_docs.extend(web_results)
return correct_docs
def refine_document(self, query: str, document: str) -> str:
"""精煉文檔,提取與查詢相關的部分"""
prompt = ChatPromptTemplate.from_template(
"""從以下文檔中提取與查詢最相關的信息:
查詢: {query}
文檔: {document}
請只返回相關的精煉內容:"""
)
chain = prompt | self.llm | StrOutputParser()
return chain.invoke({"query": query, "document": document})
def query(self, question: str) -> str:
"""生成最終答案"""
corrected_docs = self.retrieve_and_correct(question)
context = "\n\n".join(corrected_docs)
prompt = ChatPromptTemplate.from_template(
"""基於以下經過修正的上下文回答問題:
上下文: {context}
問題: {question}
答案:"""
)
chain = prompt | self.llm | StrOutputParser()
return chain.invoke({"context": context, "question": question})
# 使用示例
crag = CorrectiveRAG()
crag.build_index(["文檔1...", "文檔2...", "文檔3..."])
answer = crag.query("你的問題是什麼?")
print(answer)
簡介:
Agentic RAG(智能體RAG)將 AI Agent 的規劃和推理能力與 RAG 相結合。
Agent 可以自主分析查詢、制定檢索策略、選擇合適的工具(語義搜索、關鍵詞搜索、計算器等),並根據中間結果進行迭代優化。
架構:

實現步驟:
參考程式碼:
from langchain_openai import ChatOpenAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import LanceDB
from langchain.schema import Document
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.tools import tool
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
import lancedb
from typing import List
class AgenticRAG:
def __init__(self):
self.llm = ChatOpenAI(model="gpt-4", temperature=0)
# 使用輕量級的all-MiniLM-L6-v2模型,仅80MB
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
self.db = lancedb.connect("/tmp/lancedb_agentic_rag")
self.vectorstore = None
self.agent_executor = None
def build_index(self, documents: List[str]):
"""構建向量索引"""
docs = [Document(page_content=d) for d in documents]
self.vectorstore = LanceDB.from_documents(
docs,
self.embeddings,
connection=self.db,
table_name="agentic_rag_docs"
)
def setup_agent(self):
"""配置Agent和工具"""
vectorstore = self.vectorstore # 閉包引用
@tool
def semantic_search(query: str) -> str:
"""用於語義搜索,當需要理解問題含義並查找相關文檔時使用"""
docs = vectorstore.similarity_search(query, k=3)
return "\n".join([d.page_content for d in docs])
@tool
def keyword_search(query: str) -> str:
"""用於關鍵詞搜索,當需要精確匹配特定術語時使用"""
docs = vectorstore.similarity_search(query, k=2)
return "\n".join([d.page_content for d in docs])
@tool
def calculator(expression: str) -> str:
"""用於數學計算,輸入數學表達式"""
try:
return str(eval(expression))
except:
return "計算錯誤"
tools = [semantic_search, keyword_search, calculator]
# 使用新版本的Agent提示模板
prompt = ChatPromptTemplate.from_messages([
("system", """你是一个智能助手,可以使用工具来回答问题。
可用工具:
- semantic_search: 用于语义搜索,查找相关文档
- keyword_search: 用于关键词精确匹配
- calculator: 用于数学计算
请根据问题选择合适的工具,可以多次调用工具来获取完整信息。"""),
("human", "{input}"),
MessagesPlaceholder(variable_name="agent_scratchpad")
])
# 創建Tool Calling Agent
agent = create_tool_calling_agent(self.llm, tools, prompt)
self.agent_executor = AgentExecutor(
agent=agent,
tools=tools,
verbose=True,
max_iterations=5,
handle_parsing_errors=True
)
def query(self, question: str) -> str:
"""執行查詢"""
if not self.agent_executor:
self.setup_agent()
result = self.agent_executor.invoke({"input": question})
return result["output"]
# 使用示例
arag = AgenticRAG()
arag.build_index(["產品A價格100元...", "產品B價格200元...", "優惠政策..."])
answer = arag.query("產品A和產品B的總價是多少?有什麼優惠?")
print(answer)
簡介:
Graph RAG 將知識圖譜技術與 RAG 相結合,通過從文檔中抽取實體和關係構建知識圖譜,並進行社區檢測和摘要生成。
架構:

實現步驟:
參考程式碼:
from langchain_openai import ChatOpenAI
from langchain_community.graphs import Neo4jGraph
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
import networkx as nx
from typing import List, Dict
import json
class GraphRAG:
def __init__(self, neo4j_uri="bolt://localhost:7687", neo4j_user="neo4j", neo4j_password="password"):
self.llm = ChatOpenAI(model="gpt-5", temperature=0)
# 使用LangChain的Neo4j集成
self.graph_db = Neo4jGraph(
url=neo4j_uri,
username=neo4j_user,
password=neo4j_password
)
self.nx_graph = nx.Graph()
def extract_entities_and_relations(self, text: str) -> Dict:
"""使用LLM抽取實體和關係"""
prompt = ChatPromptTemplate.from_template(
"""從以下文本中抽取實體和關係,返回JSON格式:
文本: {text}
返回格式(只返回JSON):
{{
"entities": ["實體1", "實體2", ...],
"relations": [["實體1", "關係", "實體2"], ...]
}}"""
)
chain = prompt | self.llm | StrOutputParser()
response = chain.invoke({"text": text})
try:
return json.loads(response)
except:
return {"entities": [], "relations": []}
def build_knowledge_graph(self, documents: List[str]):
"""構建知識圖譜"""
for doc in documents:
extracted = self.extract_entities_and_relations(doc)
# 添加到NetworkX圖
for entity in extracted["entities"]:
self.nx_graph.add_node(entity)
for rel in extracted["relations"]:
if len(rel) == 3:
self.nx_graph.add_edge(rel[0], rel[2], relation=rel[1])
# 存儲到Neo4j
for entity in extracted["entities"]:
self.graph_db.query(
"MERGE (e:Entity {name: $name})",
{"name": entity}
)
for rel in extracted["relations"]:
if len(rel) == 3:
self.graph_db.query(
"""MATCH (a:Entity {name: $from})
MATCH (b:Entity {name: $to})
MERGE (a)-[r:RELATED {type: $rel}]->(b)""",
{"from": rel[0], "to": rel[2], "rel": rel[1]}
)
def detect_communities(self) -> List[List[str]]:
"""社區檢測"""
from networkx.algorithms import community
if len(self.nx_graph.nodes()) == 0:
return []
communities = community.louvain_communities(self.nx_graph)
return [list(c) for c in communities]
def generate_community_summaries(self, communities: List[List[str]]) -> List[Dict]:
"""為每個社區生成摘要"""
summaries = []
for i, comm in enumerate(communities):
subgraph = self.nx_graph.subgraph(comm)
edges_info = [(u, v, d.get('relation', '')) for u, v, d in subgraph.edges(data=True)]
prompt = ChatPromptTemplate.from_template(
"""為以下實體群組生成簡短摘要:
實體: {entities}
關係: {relations}
摘要:"""
)
chain = prompt | self.llm | StrOutputParser()
summary = chain.invoke({"entities": comm, "relations": edges_info})
summaries.append({"community": i, "entities": comm, "summary": summary})
return summaries
def query(self, question: str) -> str:
"""基於圖的檢索和回答"""
# 1. 從問題中提取關鍵實體
entities = self.extract_entities_and_relations(question)["entities"]
# 2. 在Neo4j中查找相關子圖
graph_context = self.graph_db.query(
"""MATCH (e:Entity)-[r]-(related)
WHERE e.name IN $entities
RETURN e.name AS entity, type(r) AS rel_type, r.type AS relation, related.name AS related_entity
LIMIT 20""",
{"entities": entities}
)
# 3. 獲取社區摘要
communities = self.detect_communities()
summaries = self.generate_community_summaries(communities[:3])
# 4. 生成答案
context = f"圖關係: {graph_context}\n社區摘要: {summaries}"
prompt = ChatPromptTemplate.from_template(
"""基於以下知識圖譜信息回答問題:
{context}
問題: {question}
答案:"""
)
chain = prompt | self.llm | StrOutputParser()
return chain.invoke({"context": context, "question": question})
# 使用示例
grag = GraphRAG()
grag.build_knowledge_graph([
"張三是ABC公司的CEO,該公司位於北京",
"李四是ABC公司的CTO,他與張三是大學同學",
"ABC公司開發了產品X,市場份額領先"
])
answer = grag.query("ABC公司的領導層有哪些人?")
print(answer)
簡介:
Self RAG 提供給模型自我評估和決策能力,它通過四個反思標記(Retrieve/ISREL/ISSUP/ISUSE)來判斷:是否需要檢索、文檔是否相關、答案是否被支持、答案是否有用,模型會生成多個候選答案並綜合評分,選擇最優結果輸出。
架構:

實現步驟:
參考程式碼:
from langchain_openai import ChatOpenAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import LanceDB
from langchain.schema import Document
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import lancedb
from typing import List, Tuple
class SelfRAG:
def __init__(self):
self.llm = ChatOpenAI(model="gpt-4", temperature=0)
# 使用輕量級的all-MiniLM-L6-v2模型,仅80MB
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
self.db = lancedb.connect("/tmp/lancedb_self_rag")
self.vectorstore = None
def build_index(self, documents: List[str]):
"""構建向量索引"""
docs = [Document(page_content=d) for d in documents]
self.vectorstore = LanceDB.from_documents(
docs,
self.embeddings,
connection=self.db,
table_name="self_rag_docs"
)
def should_retrieve(self, query: str) -> bool:
"""判斷是否需要檢索 (Retrieve標記)"""
prompt = ChatPromptTemplate.from_template(
"""判斷以下問題是否需要檢索外部知識來回答。
問題: {query}
如果問題需要事實性知識、最新信息或特定領域知識,回答YES。
如果問題是通用問題或推理問題,回答NO。
只回答YES或NO:"""
)
chain = prompt | self.llm | StrOutputParser()
response = chain.invoke({"query": query}).strip().upper()
return "YES" in response
def evaluate_relevance(self, query: str, document: str) -> Tuple[bool, float]:
"""評估文檔相關性 (ISREL標記)"""
prompt = ChatPromptTemplate.from_template(
"""評估文檔與問題的相關性,打分1-5分。
問題: {query}
文檔: {document}
返回格式: 分數|理由
示例: 4|文檔直接回答了問題的核心內容"""
)
chain = prompt | self.llm | StrOutputParser()
response = chain.invoke({"query": query, "document": document})
try:
score = int(response.split("|")[0].strip())
return score >= 3, score / 5.0
except:
return True, 0.6
def evaluate_support(self, document: str, answer: str) -> Tuple[bool, float]:
"""評估答案是否被文檔支持 (ISSUP標記)"""
prompt = ChatPromptTemplate.from_template(
"""評估答案是否被文檔內容支持,打分1-5分。
文檔: {document}
答案: {answer}
返回格式: 分數|理由"""
)
chain = prompt | self.llm | StrOutputParser()
response = chain.invoke({"document": document, "answer": answer})
try:
score = int(response.split("|")[0].strip())
return score >= 3, score / 5.0
except:
return True, 0.6
def evaluate_usefulness(self, query: str, answer: str) -> Tuple[bool, float]:
"""評估答案有用性 (ISUSE標記)"""
prompt = ChatPromptTemplate.from_template(
"""評估答案對用戶問題的有用程度,打分1-5分。
問題: {query}
答案: {answer}
返回格式: 分數|理由"""
)
chain = prompt | self.llm | StrOutputParser()
response = chain.invoke({"query": query, "answer": answer})
try:
score = int(response.split("|")[0].strip())
return score >= 3, score / 5.0
except:
return True, 0.6
def generate_with_context(self, query: str, context: str) -> str:
"""基於上下文生成答案"""
prompt = ChatPromptTemplate.from_template(
"""基於以下上下文回答問題。如果上下文不足以回答,請說明。
上下文: {context}
問題: {query}
答案:"""
)
chain = prompt | self.llm | StrOutputParser()
return chain.invoke({"context": context, "query": query})
def generate_without_context(self, query: str) -> str:
"""不使用檢索直接生成"""
prompt = ChatPromptTemplate.from_template("請回答以下問題: {query}")
chain = prompt | self.llm | StrOutputParser()
return chain.invoke({"query": query})
def query(self, question: str) -> str:
"""Self-RAG主流程"""
# 1. 檢索決策
need_retrieval = self.should_retrieve(question)
if not need_retrieval:
# 直接生成
answer = self.generate_without_context(question)
_, usefulness = self.evaluate_usefulness(question, answer)
return answer
# 2. 檢索文檔
retriever = self.vectorstore.as_retriever(search_kwargs={"k": 3})
docs = retriever.invoke(question)
# 3. 對每個文檔生成候選答案並評分
candidates = []
for doc in docs:
# 評
---
原文出處:https://juejin.cn/post/7585390679399333894