赞
踩
模型 I/O 封装
数据连接封装
记忆封装
架构封装
Chain:实现一个功能或者一系列顺序功能组合
Agent:根据用户输入,自动规划执行步骤,自动选择每步需要的工具,最终完成用户指定的功能
Tools:调用外部功能的函数,例如:调 google 搜索、文件 I/O、Linux Shell 等等
Toolkits:操作某软件的一组工具集,例如:操作 DB、操作 Gmail 等等
langchain的官方网站: https://python.langchain.com/docs/get_started
pip install --upgrade langchain
pip install --upgrade langchain-openai
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4") # 默认是gpt-3.5-turbo
response = llm.invoke("你是的能力是什么")
print(response.content)
from langchain_openai import ChatOpenAI from langchain.schema import ( AIMessage, #等价于OpenAI接口中的assistant role HumanMessage, #等价于OpenAI接口中的user role SystemMessage #等价于OpenAI接口中的system role ) from dotenv import load_dotenv, find_dotenv _ = load_dotenv(find_dotenv()) llm = ChatOpenAI(model="gpt-4") # 默认是gpt-3.5-turbo messages = [ SystemMessage(content="你是我得情感博主。"), HumanMessage(content="我是你的粉丝,我叫zgg。"), AIMessage(content="欢迎!"), HumanMessage(content="我是谁") ] ret = llm.invoke(messages) print(ret)
content='你是zgg,我的忠实粉丝。' response_metadata={'finish_reason': 'stop', 'logprobs': None}
PromptTemplate 可以在模板中自定义变量
from langchain.prompts import PromptTemplate
template = PromptTemplate.from_template("给我讲个关于{subject}的故事")
print("===Template===")
print(template)
print("===Prompt===")
print(template.format(subject='zgg'))
===Template===
input_variables=['subject'] template='给我讲个关于{subject}的故事'
===Prompt===
给我讲个关于zgg的故事
ChatPromptTemplate 用模板表示的对话上下文
from langchain.prompts import ( ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate, ) from langchain_openai import ChatOpenAI template = ChatPromptTemplate.from_messages( [ SystemMessagePromptTemplate.from_template("你是{product}的专家。你的名字叫{name}"), HumanMessagePromptTemplate.from_template("{query}"), ] ) llm = ChatOpenAI() prompt = template.format_messages( product="情感课堂", name="薇薇", query="你是谁" ) ret = llm.invoke(prompt) print(ret.content)
输出
我是情感课堂的专家,名字叫微微。有什么可以帮助你的吗?
MessagesPlaceholder 把多轮对话变成模板
from langchain.prompts import ( ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder, ) human_prompt = "Translate your answer to {language}." human_message_template = HumanMessagePromptTemplate.from_template(human_prompt) chat_prompt = ChatPromptTemplate.from_messages( # variable_name 是 message placeholder 在模板中的变量名 # 用于在赋值时使用 [MessagesPlaceholder(variable_name="conversation"), human_message_template] ) from langchain_core.messages import AIMessage, HumanMessage human_message = HumanMessage(content="Who is Elon Musk?") ai_message = AIMessage( content="Elon Musk is a billionaire entrepreneur, inventor, and industrial designer" ) messages = chat_prompt.format_prompt( # 对 "conversation" 和 "language" 赋值 conversation=[human_message, ai_message], language="中文" ) print(messages.to_messages()) result = llm.invoke(messages) print(result.content)
输出
[HumanMessage(content='Who is Elon Musk?'), AIMessage(content='Elon Musk is a billionaire entrepreneur, inventor, and industrial designer'), HumanMessage(content='Translate your answer to 中文.')]
埃隆·马斯克是一位亿万富翁企业家、发明家和工业设计师。
从文件加载Prompt模板
example_prompt_template.txt内容
给我讲一个关于{topic}的童话故事
from langchain.prompts import PromptTemplate
template = PromptTemplate.from_file("example_prompt_template.txt")
print("===Template===")
print(template)
print("===Prompt===")
print(template.format(topic='九色鹿'))
===Template===
input_variables=['topic'] template='给我讲一个关于{topic}的童话故事'
===Prompt===
给我讲一个关于九色鹿的童话故事
输出封装 OutputParser
自动把 LLM 输出的字符串按指定格式加载。
LangChain 内置的 OutputParser 包括:
ListParser
DatetimeParser
EnumParser
JsonOutputParser
PydanticParser
XMLParser
Pydantic (JSON) Parser
自动根据Pydantic类的定义,生成输出的格式说明
from langchain_core.pydantic_v1 import BaseModel, Field, validator from typing import List, Dict # 定义你的输出对象 class Date(BaseModel): year: int = Field(description="Year") month: int = Field(description="Month") day: int = Field(description="Day") era: str = Field(description="BC or AD") # ----- 可选机制 -------- # 你可以添加自定义的校验机制 @validator('month') def valid_month(cls, field): if field <= 0 or field > 12: raise ValueError("月份必须在1-12之间") return field @validator('day') def valid_day(cls, field): if field <= 0 or field > 31: raise ValueError("日期必须在1-31日之间") return field @validator('day', pre=True, always=True) def valid_date(cls, day, values): year = values.get('year') month = values.get('month') # 确保年份和月份都已经提供 if year is None or month is None: return day # 无法验证日期,因为没有年份和月份 # 检查日期是否有效 if month == 2: if cls.is_leap_year(year) and day > 29: raise ValueError("闰年2月最多有29天") elif not cls.is_leap_year(year) and day > 28: raise ValueError("非闰年2月最多有28天") elif month in [4, 6, 9, 11] and day > 30: raise ValueError(f"{month}月最多有30天") return day @staticmethod def is_leap_year(year): if year % 400 == 0 or (year % 4 == 0 and year % 100 != 0): return True return False
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate from langchain_openai import ChatOpenAI from langchain_core.output_parsers import PydanticOutputParser model_name = 'gpt-4' temperature = 0 model = ChatOpenAI(model_name=model_name, temperature=temperature) # 根据Pydantic对象的定义,构造一个OutputParser parser = PydanticOutputParser(pydantic_object=Date) template = """提取用户输入中的日期。 {format_instructions} 用户输入: {query}""" prompt = PromptTemplate( template=template, input_variables=["query"], # 直接从OutputParser中获取输出描述,并对模板的变量预先赋值 partial_variables={"format_instructions": parser.get_format_instructions()} ) print("====Format Instruction=====") print(parser.get_format_instructions()) query = "2023年四月6日天气晴..." model_input = prompt.format_prompt(query=query) print("====Prompt=====") print(model_input.to_string()) output = model.invoke(model_input.to_messages()) print("====模型原始输出=====") print(output.content) print("====Parse后的输出=====") date = parser.parse(output.content) print(date)
====Format Instruction=====
The output should be formatted as a JSON instance that conforms to the JSON schema below.
As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.
Here is the output schema:
{“properties”: {“year”: {“title”: “Year”, “description”: “Year”, “type”: “integer”}, “month”: {“title”: “Month”, “description”: “Month”, “type”: “integer”}, “day”: {“title”: “Day”, “description”: “Day”, “type”: “integer”}, “era”: {“title”: “Era”, “description”: “BC or AD”, “type”: “string”}}, “required”: [“year”, “month”, “day”, “era”]}
====Prompt=====
提取用户输入中的日期。
The output should be formatted as a JSON instance that conforms to the JSON schema below.
As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.
Here is the output schema:
{“properties”: {“year”: {“title”: “Year”, “description”: “Year”, “type”: “integer”}, “month”: {“title”: “Month”, “description”: “Month”, “type”: “integer”}, “day”: {“title”: “Day”, “description”: “Day”, “type”: “integer”}, “era”: {“title”: “Era”, “description”: “BC or AD”, “type”: “string”}}, “required”: [“year”, “month”, “day”, “era”]}
用户输入:
2023年四月6日天气晴...
====模型原始输出=====
{"year": 2023, "month": 4, "day": 6, "era": "AD"}
====Parse后的输出=====
year=2023 month=4 day=6 era='AD'
文档加载器:Document Loaders
pip install pypdf
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("xxxx.pdf")
pages = loader.load_and_split()
print(pages[0].page_content)
文档处理器TextSplitter
pip install --upgrade langchain-text-splitters
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=200,
chunk_overlap=100, # 思考:为什么要做overlap
length_function=len,
add_start_index=True,
)
paragraphs = text_splitter.create_documents([pages[0].page_content])
for para in paragraphs:
print(para.page_content)
print('-------')
向量数据库与向量检索
其中读取的文档换成自己的文档
pip install chromadb
from langchain_openai import OpenAIEmbeddings from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_openai import ChatOpenAI from langchain_community.document_loaders import PyPDFLoader # 加载文档 loader = PyPDFLoader("llama2.pdf") pages = loader.load_and_split() # 文档切分 text_splitter = RecursiveCharacterTextSplitter( chunk_size=300, chunk_overlap=100, length_function=len, add_start_index=True, ) texts = text_splitter.create_documents([pages[2].page_content,pages[3].page_content]) # 灌库 embeddings = OpenAIEmbeddings() db = Chroma.from_documents(texts, embeddings) # 检索 top-1 结果 retriever = db.as_retriever(search_kwargs={"k": 1}) docs = retriever.get_relevant_documents("llama 2有多少参数?") print(docs[0].page_content)
输出
but are not releasing.§
2.Llama 2-Chat , a fine-tuned version of Llama 2 that is optimized for dialogue use cases. We release
variants of this model with 7B, 13B, and 70B parameters as well.
WebelievethattheopenreleaseofLLMs,whendonesafely,willbeanetbenefittosociety. LikeallLLMs,
Chain 和 LangChain Expression Language (LCEL)
LangChain Expression Language(LCEL)是一种声明式语言,可轻松组合不同的调用顺序构成 Chain。LCEL 自创立之初就被设计为能够支持将原型投入生产环境,无需代码更改,从最简单的“提示+LLM”链到最复杂的链(已有用户成功在生产环境中运行包含数百个步骤的 LCEL Chain)
Pipeline 式调用 PromptTemplate, LLM 和 OutputParser
#输出结构 class SortEnum(str, Enum): data = 'data' price = 'price' class OrderingEnum(str, Enum): ascend = 'ascend' descend = 'descend' class Semantics(BaseModel): name: Optional[str] = Field(description="流量包名称",default=None) price_lower: Optional[int] = Field(description="价格下限",default=None) price_upper: Optional[int] = Field(description="价格上限",default=None) data_lower: Optional[int] = Field(description="流量下限",default=None) data_upper: Optional[int] = Field(description="流量上限",default=None) sort_by: Optional[SortEnum] = Field(description="按价格或流量排序",default=None) ordering: Optional[OrderingEnum] = Field(description="升序或降序排列",default=None) #OutputParser parser = PydanticOutputParser(pydantic_object=Semantics) #Prompt 模板 prompt = ChatPromptTemplate.from_messages( [ ( "system", "将用户的输入解析成JSON表示。输出格式如下:\n{format_instructions}\n不要输出未提及的字段。", ), ("human", "{text}"), ] ).partial(format_instructions=parser.get_format_instructions()) #模型 model = ChatOpenAI(model="gpt-4-0125-preview",temperature=0) #LCEL 表达式 runnable = ( {"text": RunnablePassthrough()} | prompt | model | parser ) #运行 ret = runnable.invoke("不超过100元的流量大的套餐有哪些") print(ret.json())
{"name": null, "price_lower": null, "price_upper": 100, "data_lower": null, "data_upper": null, "sort_by": "data", "ordering": "descend"}
注意: 在当前的文档中 LCEL 产生的对象,被叫做 runnable 或 chain,经常两种叫法混用。本质就是一个自定义调用流程。
用 LCEL 实现 RAG
from langchain_openai import OpenAIEmbeddings from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_openai import ChatOpenAI from langchain.chains import RetrievalQA from langchain_community.document_loaders import PyPDFLoader # 加载文档 loader = PyPDFLoader("llama2.pdf") pages = loader.load_and_split() # 文档切分 text_splitter = RecursiveCharacterTextSplitter( chunk_size=300, chunk_overlap=100, length_function=len, add_start_index=True, ) texts = text_splitter.create_documents([pages[2].page_content,pages[3].page_content]) # 灌库 embeddings = OpenAIEmbeddings() db = Chroma.from_documents(texts, embeddings) # 检索 top-1 结果 retriever = db.as_retriever(search_kwargs={"k": 1}) from langchain.schema.output_parser import StrOutputParser from langchain.schema.runnable import RunnablePassthrough # Prompt模板 template = """Answer the question based only on the following context: {context} Question: {question} """ prompt = ChatPromptTemplate.from_template(template) # Chain rag_chain = ( {"question": RunnablePassthrough(),"context": retriever} | prompt | model | StrOutputParser() ) rag_chain.invoke("Llama 2有多少参数")
通过 LCEL 实现 Function Calling
from langchain_core.tools import tool @tool def multiply(first_int: int, second_int: int) -> int: """两个整数相乘""" return first_int * second_int @tool def add(first_int: int, second_int: int) -> int: "Add two integers." return first_int + second_int @tool def exponentiate(base: int, exponent: int) -> int: "Exponentiate the base to the exponent power." return base**exponent from langchain_core.output_parsers import StrOutputParser from langchain.output_parsers import JsonOutputToolsParser tools = [multiply, add, exponentiate] # 带有分支的 LCEL llm_with_tools = llm.bind_tools(tools) | { "functions": JsonOutputToolsParser(), "text": StrOutputParser() } result = llm_with_tools.invoke("1024的16倍是多少") print(result)
{'functions': [{'type': 'multiply', 'args': {'first_int': 1024, 'second_int': 16}}], 'text': ''}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。