赞
踩
步聚如下:
文档加载器:它用于将数据加载为文档。
文档转换器:它将文档分成更小的块。
嵌入:它将块转换为向量表示,即嵌入。
嵌入向量存储:用于将上述块向量存储在矢量数据库中。
检索器:它用于检索一组向量,这些向量以嵌入在相同Latent空间中的向量的形式与查询最相似。
生成Prompt
LLM 检索
代码如下:
import streamlit as st from langchain.llms import LlamaCpp from langchain.embeddings import LlamaCppEmbeddings from langchain.prompts import PromptTemplate from langchain.chains import LLMChain from langchain.document_loaders import TextLoader from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import Chroma # Customize the layout st.set_page_config(page_title="DOCAI", page_icon=" ", layout="wide", ) st.markdown(f""" <style> .stApp {{background-image: url("https://images.unsplash.com/photo-1509537257950-20f875b03669?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1469&q=80"); background-attachment: fixed; background-size: cover}} </style> """, unsafe_allow_html=True) # function for writing uploaded file in temp def write_text_file(content, file_path): try: with open(file_path, 'w') as file: file.write(content) return True except Exception as e: print(f"Error occurred while writing the file: {e}") return False # set prompt template prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. {context} Question: {question} Answer:""" prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"]) # initialize hte LLM & Embeddings llm = LlamaCpp(model_path="./models/llama-7b.ggmlv3.q4_0.bin") embeddings = LlamaCppEmbeddings(model_path="models/llama-7b.ggmlv3.q4_0.bin") llm_chain = LLMChain(llm=llm, prompt=prompt) st.title(" Document Conversation ") uploaded_file = st.file_uploader("Upload an article", type="txt") if uploaded_file is not None: content = uploaded_file.read().decode('utf-8') # st.write(content) file_path = "temp/file.txt" write_text_file(content, file_path) loader = TextLoader(file_path) docs = loader.load() text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0) texts = text_splitter.split_documents(docs) db = Chroma.from_documents(texts, embeddings) st.success("File Loaded Successfully!!") # Query through LLM question = st.text_input("Ask something from the file", placeholder="Find something similar to: ....this.... in the text?", disabled=not uploaded_file,) if question: similar_doc = db.similarity_search(question, k=1) context = similar_doc[0].page_content query_llm = LLMChain(llm=llm, prompt=prompt) response = query_llm.run({"context": context, "question": question}) st.write(response)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。