pip install langchain
pip install langchain_community
pip install sentence-transformer
pip install sentence_transformers
pip install chromadb
from langchain.llms.base import LLM
from typing import Any, List, Optional from langchain.llms.base import LLM from langchain_core.callbacks import CallbackManagerForLLMRun from pydantic import BaseModel, Field from modelscope import AutoModelForCausalLM, AutoTokenizer class QwenLLM(LLM): ##这部分是必须的 model: Any = Field(description="Qwen2-Model") tokenizer: Any = Field(description="Qwen2-Tokenizer") #generation_config: Any = Field(description="generation-config") def __init__(self): # 加载原模型 super().__init__() model_name = "qwen/Qwen2-7B-Instruct" #model_dir = snapshot_download('TongyiFinance/Tongyi-Finance-14B-Chat', cache_dir = model_path) self.tokenizer = AutoTokenizer.from_pretrained(model_name,cache_dir = 'models') self.model = AutoModelForCausalLM.from_pretrained(model_name,cache_dir = 'models',torch_dtype="auto",device_map="cuda") @property def _llm_type(self) -> str: return "QwenLLM" def _call(self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any) -> str: model_inputs = self.tokenizer([prompt], return_tensors="pt").to("cuda") generated_ids = self.model.generate(model_inputs.input_ids,max_new_tokens=512,temperature=0.01) generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)] response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] return response
llm = Qwen()
##PromptTemplate 创建
from langchain import PromptTemplate
template = "请翻译下面这段话:{english}。"
prompt_template = PromptTemplate(input_variables=["english"], template=template)
word = "hello world!"
prompt = prompt_template.format(english = word)
##PromptTemplate 使用
prompt = prompt_template.format(english = word)
response = llm(prompt)
下面简单介绍如何使用StructuredOutputParser, ResponseSchema返回并解析一个Json
#parser解释器使用 from langchain.output_parsers import StructuredOutputParser, ResponseSchema #定义需要的格式内容 response_schemas = [ ResponseSchema(name="age", description="The age of the writer"), ResponseSchema(name="sex", description="Whether the writer is male or female"), ResponseSchema(name="hair", description="The color of the writer's hair") ] # 使用定义的响应结构创建StructuredOutputParser output_parser = StructuredOutputParser.from_response_schemas(response_schemas) # 格式化指令,告诉模型如何返回信息 format_instructions = output_parser.get_format_instructions() passage = "I am a young lady, I am twenty years old, I have long red hair" ##定义需求模板 passage_template = '''For the following text, extract the following information: age: How old is the writter? sex: Is he a man or women? hair: How about his hair color? text:{text} {format_instruction} ''' #创建一个prompt模板 prompt_template = PromptTemplate(input_variables=["text","format_instruction"], template=passage_template) format_instruction = output_parser.get_format_instructions() #实例化模板并且进行推理 prompt = prompt_template.format(text = passage, format_instruction = format_instruction) response = llm.invoke(prompt) #此时只是json样子的字符串 print(response) ##解析成真正的json ans_json = output_parser.parse(response) print(ans_json)
### 记忆 from langchain.chains import ConversationChain from langchain.memory import ConversationBufferMemory,ConversationBufferWindowMemory from langchain.prompts import PromptTemplate #两种记忆类型 #ConversationBufferWindowMemory可以定义记忆的轮次 #ConversationBufferMemory则是简单的记忆所有 #memory = ConversationBufferWindowMemory(k=10) memory = ConversationBufferMemory() #需要通过一个对话链来装载 #verbose = True会显示prompt相关细节 conversation = ConversationChain( llm = llm, memory = memory, verbose=True ) template = "问题:{question} ,只针对问题回答,不要发散问题!" question_template = PromptTemplate(input_variables=["question"],template=template) print(conversation.predict(input=question_template.format(question = "你好,我是徐!请介绍下你自己!"))) print("------------") print(conversation.predict(input=question_template.format(question = "请问1+1等于几"))) print("------------") print(conversation.predict(input="我的名字是什么?"))
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
question_prompt = PromptTemplate(
template="Question: {question}\nAnswer:"
# 创建LLMChain
qa_chain = LLMChain(llm=llm, prompt=question_prompt)
# 运行链
answer = qa_chain.run(question="What is the capital of France?")
from langchain.chains import SimpleSequentialChain
translate_prompt = PromptTemplate(
# 修改explain_prompt以接受翻译后的中文句子
explain_prompt = ChatPromptTemplate.from_template("请告诉我更多关于这句话的信息:{translated_text}")
# 创建LLMChain实例,注意chain_two的输入变量应该是translated_text
chain_one = LLMChain(llm=llm, prompt=translate_prompt)
chain_two = LLMChain(llm=llm, prompt=explain_prompt)
# 创建SimpleSequentialChain,确保chains列表的顺序正确
over_all_chain = SimpleSequentialChain(chains=[chain_one, chain_two], verbose=True)
# 运行整个链,传递原始英语句子
result = over_all_chain.run("Hello World!")
## Sequential Chain ## SequentialChain 更自由的定义 from langchain.chains import SequentialChain from langchain.prompts import PromptTemplate, ChatPromptTemplate from langchain.output_parsers import StructuredOutputParser, ResponseSchema first_prompt = PromptTemplate( input_variables=["english"], template="Please just translate the following sentence in to English.\n\nsentence:{Input}" ) second_prompt = ChatPromptTemplate.from_template("Please tell what {English_output} is used for in computer science.") third_prompt = ChatPromptTemplate.from_template("Please tell me the language name of following sentence. \n\nsentence:{Input} \nLanguage_name:") forth_prompt = ChatPromptTemplate.from_template( '''You are a translate expert, you can translate english to any language. Translate following whole sentence into Chinese, don't cut it off. sentence:{Explain_output} translated:''' ) chain_one = LLMChain(llm = llm,prompt = first_prompt, output_key = "English_output") chain_two = LLMChain(llm = llm, prompt = second_prompt, output_key = "Explain_output") chain_three = LLMChain(llm =llm, prompt = third_prompt, output_key = "language") chain_four = LLMChain(llm = llm, prompt = forth_prompt, output_key = "follow_message") ##指定输入和输出,及其ID over_all_chain = SequentialChain( chains = [chain_one, chain_two, chain_three, chain_four], input_variables = ["Input"], output_variables = ["English_output","Explain_output","language","follow_message"], verbose = True ) ##运行方式不同 answer = over_all_chain("你好,世界!") print(answer["Explain_output"]) print(answer["follow_message"])
## Router Chain ## 路由链,根据输入进行转发到不同的链 ## LLMRouterChain,借助大模型帮助在不同子链之间路由 ## RouterOutputParser,解析LLM的输出,得到要使用哪条子链 from langchain.chains.router import MultiPromptChain from langchain.chains.router.llm_router import LLMRouterChain, RouterOutputParser from langchain.prompts import ChatPromptTemplate physics_template = """You are a very smart physics professor. \ You are great at answering questions about physics in a concise\ and easy to understand manner. \ When you don't know the answer to a question you admit\ that you don't know. Here is a question: {input} Assistant:""" computerscience_template = """ You are a successful computer scientist.\ You have a passion for creativity, collaboration,\ forward-thinking, confidence, strong problem-solving capabilities,\ understanding of theories and algorithms, and excellent communication \ skills. You are great at answering coding questions. \ You are so good because you know how to solve a problem by \ describing the solution in imperative steps \ that a machine can easily interpret and you know how to \ choose a solution that has a good balance between \ time complexity and space complexity. Here is a question: {input} Assistant:""" ##description将决定使用什么prompt prompt_info = [ { "name":"physics", "description": "Good for answering questions about physics", "prompt_template":physics_template }, { "name":"computer science", "description": "Good for answering questions about computer science", "prompt_template":computerscience_template }, ] ##创建子链的集合供给Router Chain选择 destination_chains = {} for p_info in prompt_info: name = p_info["name"] prompt_template = p_info['prompt_template'] prompt = ChatPromptTemplate.from_template(prompt_template) chain = LLMChain(llm = llm, prompt = prompt) destination_chains[name] = chain destinations = [f"{p['name']}: {p['description']}" for p in prompt_info] destinations_str = "\n".join(destinations) default_prompt = ChatPromptTemplate.from_template("{input}") default_chain = LLMChain(llm=llm, prompt=default_prompt) MULTI_PROMPT_ROUTER_TEMPLATE = """Given a raw text input to a \ language model select the model prompt best suited for the input. \ You will be given the names of the available prompts and a \ description of what the prompt is best suited for. \ You may also revise the original input if you think that revising\ it will ultimately lead to a better response from the language model. << FORMATTING >> Return a markdown code snippet with a JSON object formatted to look like: ```json {{{{ "destination": string \ name of the prompt to use or "DEFAULT" "next_inputs": string \ a potentially modified version of the original input }}}} ``` REMEMBER: "destination" MUST be one of the candidate prompt \ names specified below OR it can be "DEFAULT" if the input is not\ well suited for any of the candidate prompts. REMEMBER: "next_inputs" can just be the original input \ if you don't think any modifications are needed. << CANDIDATE PROMPTS >> {destinations} << INPUT >> {{input}} << OUTPUT (remember to include the ```json)>>""" router_template = MULTI_PROMPT_ROUTER_TEMPLATE.format( destinations=destinations_str ) router_prompt = PromptTemplate( template=router_template, input_variables=["input"], output_parser=RouterOutputParser(), ) ###定义转发的llm router_chain = LLMRouterChain.from_llm(llm, router_prompt) ##指定router chain, destination chains, default_chain chain = MultiPromptChain(router_chain=router_chain, destination_chains=destination_chains, default_chain=default_chain, verbose=True ) answer = chain.run("什么是print(\"helloworld\")?") print(answer)
##构建知识库 from langchain.vectorstores import Chroma from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import CSVLoader,TextLoader from langchain.indexes import VectorstoreIndexCreator from langchain.embeddings.huggingface import HuggingFaceEmbeddings ##可以换成自己的文档 file = '0b46f7a2d67b5b59ad67cafffa0e12a9f0837790.txt' loader = TextLoader(file_path=file, encoding='utf-8') doc = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100) docs = text_splitter.split_documents(doc) from modelscope import snapshot_download # 初始化OpenAI嵌入 snapshot_download("iic/nlp_gte_sentence-embedding_chinese-base") embeddings = HuggingFaceEmbeddings(model_name="./.cache/modelscope/hub/iic/nlp_gte_sentence-embedding_chinese-base") # 创建向量数据库 db = Chroma.from_documents(docs, embeddings,persist_directory="db") # 可选:保存向量数据库 db.persist()
#知识库问答 from langchain.chains import RetrievalQA from langchain.embeddings.huggingface import HuggingFaceEmbeddings embeddings = HuggingFaceEmbeddings(model_name="./.cache/modelscope/hub/iic/nlp_gte_sentence-embedding_chinese-base") db = Chroma(persist_directory="db", embedding_function=embeddings) ##设置召回文件数量 retriever = db.as_retriever(search_kwargs={"k": 10}) qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever(), return_source_documents = False) # 回答问题 # query = "文件中的公司名字是什么?" # result = qa_chain({"query": query}) # print(result['result']) #return_source_documents = True回答时需要invoke # result = qa_chain.invoke("这个公司的主要营业内容是什么?") # print(result['result']) #print(result['source_documents']) result = qa_chain.run("公司的主要资产情况如何?") print(result)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。