赞
踩
将本地embedding数据存储到Chroma,然后使用LangChain调用openai api完成本地知识库问答
from langchain.vectorstores import Chroma from langchain.embeddings import SentenceTransformerEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.llms import OpenAI from langchain.chains import VectorDBQA from langchain.document_loaders import TextLoader import os os.environ["OPENAI_API_KEY"] = 'sk-xxx' # 加载数据,并转换为document格式 loader = TextLoader('state_of_the_union.txt') documents = loader.load() # 分割document text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0) texts = text_splitter.split_documents(documents) # 使用本地模型进行embedding,并将嵌入后的数据存储到db路径下 persist_directory = 'db' embedding = SentenceTransformerEmbeddings() vectordb = Chroma.from_documents(documents=texts, embedding=embedding, persist_directory=persist_directory) # 持久化数据,并释放内存 vectordb.persist() vectordb = None # 从磁盘中加载数据 vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding) # 使用LangChain的VectorDBQA链,来初始化qa对象 qa = VectorDBQA.from_chain_type(llm=OpenAI(model_name="gpt-3.5-turbo"), chain_type="stuff", vectorstore=vectordb) # 输入query,调用qa对象 query = "What did the president say about Ketanji Brown Jackson" qa.run(query)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。