赞
踩
1、模型I/O封装
#安装最新版本
!pip install langchain==0.1.0
!pip install langchain-openai # v0.1.0新增的底包
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4") # 默认是gpt-3.5-turbo
response = llm.invoke("你是谁")
print(response.content)
答复:
我是OpenAI的人工智能助手。我被设计出来是为了帮助解答问题、提供信息和帮助用户完成各种任务。
from langchain.schema import (
AIMessage, #等价于OpenAI接口中的assistant role
HumanMessage, #等价于OpenAI接口中的user role
SystemMessage #等价于OpenAI接口中的system role
)
messages = [
SystemMessage(content="你是AGIClass的课程助理。"),
HumanMessage(content="我是学员,我叫王卓然。"),
AIMessage(content="欢迎!"),
HumanMessage(content="我是谁")
]
llm.invoke(messages)
答复:
AIMessage(content=‘您是学员王卓然。’)
# 其它模型分装在 langchain_community 底包中
from langchain_community.chat_models import ErnieBotChat
from langchain.schema import HumanMessage
ernie = ErnieBotChat()
messages = [
HumanMessage(content="你是谁")
]
ernie.invoke(messages)
答复:
AIMessage(content=‘您好,我是百度研发的知识增强大语言模型,中文名是文心一言,英文名是ERNIE Bot。我能够与人对话互动,回答问题,协助创作,高效便捷地帮助人们获取信息、知识和灵感。\n\n如果您有任何问题,请随时告诉我。’)
①主题格式化封装
from langchain.prompts import PromptTemplate
template = PromptTemplate.from_template("给我讲个关于{subject}的笑话")
print(template)
print(template.format(subject='小明'))
答复:
input_variables=[‘subject’] template=‘给我讲个关于{subject}的笑话’
给我讲个关于小明的笑话
②更加复杂的主题应用
from langchain.prompts import ChatPromptTemplate from langchain.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate from langchain.chat_models import ChatOpenAI template = ChatPromptTemplate.from_messages( [ SystemMessagePromptTemplate.from_template("你是{product}的客服助手。你的名字叫{name}"), HumanMessagePromptTemplate.from_template("{query}"), ] ) llm = ChatOpenAI() prompt = template.format_messages( product="AGI课堂", name="瓜瓜", query="你是谁" ) llm.invoke(prompt)
AIMessage(content=‘我是AGI课堂的客服助手,名字叫瓜瓜。我可以回答关于AGI课堂的问题,提供帮助和支持。有什么我可以帮助你的吗?’)
①Yaml格式
_type: prompt
input_variables:
["adjective", "content"]
template:
Tell me a {adjective} joke about {content}.
②json个数
{
"_type": "prompt",
"input_variables": ["adjective", "content"],
"template": "Tell me a {adjective} joke about {content}."
}
from langchain.prompts import load_prompt
prompt = load_prompt("simple_prompt.yaml")
# OR
# prompt = load_prompt("simple_prompt.json")
print(prompt.format(adjective="funny", content="Xiao Ming"))
回复:
Tell me a funny joke about Xiao Ming.
自动把 LLM 输出的字符串按指定格式加载。
LangChain 内置的 OutputParser 包括:
from langchain_core.pydantic_v1 import BaseModel, Field, validator from typing import List, Dict # 定义你的输出对象 class Date(BaseModel): year: int = Field(description="Year") month: int = Field(description="Month") day: int = Field(description="Day") era: str = Field(description="BC or AD") # ----- 可选机制 -------- # 你可以添加自定义的校验机制 @validator('month') def valid_month(cls, field): if field <= 0 or field > 12: raise ValueError("月份必须在1-12之间") return field @validator('day') def valid_day(cls, field): if field <= 0 or field > 31: raise ValueError("日期必须在1-31日之间") return field @validator('day', pre=True, always=True) def valid_date(cls, day, values): year = values.get('year') month = values.get('month') # 确保年份和月份都已经提供 if year is None or month is None: return day # 无法验证日期,因为没有年份和月份 # 检查日期是否有效 if month == 2: if cls.is_leap_year(year) and day > 29: raise ValueError("闰年2月最多有29天") elif not cls.is_leap_year(year) and day > 28: raise ValueError("非闰年2月最多有28天") elif month in [4, 6, 9, 11] and day > 30: raise ValueError(f"{month}月最多有30天") return day @staticmethod def is_leap_year(year): if year % 400 == 0 or (year % 4 == 0 and year % 100 != 0): return True return False
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate from langchain_openai import ChatOpenAI from langchain.output_parsers import PydanticOutputParser model_name = 'gpt-4' temperature = 0 model = ChatOpenAI(model_name=model_name, temperature=temperature) # 根据Pydantic对象的定义,构造一个OutputParser parser = PydanticOutputParser(pydantic_object=Date) template = """提取用户输入中的日期。 {format_instructions} 用户输入: {query}""" prompt = PromptTemplate( template=template, input_variables=["query"], #每次用户询问都会变化的query # 直接从OutputParser中获取输出描述,并对模板的变量预先赋值,一开始就赋值的format_instructions partial_variables={"format_instructions": parser.get_format_instructions()} ) print("====Format Instruction=====") print(parser.get_format_instructions()) query = "2023年四月6日天气晴..." model_input = prompt.format_prompt(query=query) print("====Prompt=====") print(model_input.to_string()) output = model(model_input.to_messages()) print("====模型原始输出=====") print(output) print("====Parse后的输出=====") date = parser.parse(output.content) print(date)
回复:
Format Instruction=
The output should be formatted as a JSON instance that conforms to the JSON schema below.
As an example, for the schema {“properties”: {“foo”: {“title”: “Foo”, “description”: “a list of strings”, “type”: “array”, “items”: {“type”: “string”}}}, “required”: [“foo”]}
the object {“foo”: [“bar”, “baz”]} is a well-formatted instance of the schema. The object {“properties”: {“foo”: [“bar”, “baz”]}} is not well-formatted.
Here is the output schema:
{"properties": {"year": {"title": "Year", "description": "Year", "type": "integer"}, "month": {"title": "Month", "description": "Month", "type": "integer"}, "day": {"title": "Day", "description": "Day", "type": "integer"}, "era": {"title": "Era", "description": "BC or AD", "type": "string"}}, "required": ["year", "month", "day", "era"]}
Prompt=
提取用户输入中的日期。
The output should be formatted as a JSON instance that conforms to the JSON schema below.
As an example, for the schema {“properties”: {“foo”: {“title”: “Foo”, “description”: “a list of strings”, “type”: “array”, “items”: {“type”: “string”}}}, “required”: [“foo”]}
the object {“foo”: [“bar”, “baz”]} is a well-formatted instance of the schema. The object {“properties”: {“foo”: [“bar”, “baz”]}} is not well-formatted.
Here is the output schema:
{"properties": {"year": {"title": "Year", "description": "Year", "type": "integer"}, "month": {"title": "Month", "description": "Month", "type": "integer"}, "day": {"title": "Day", "description": "Day", "type": "integer"}, "era": {"title": "Era", "description": "BC or AD", "type": "string"}}, "required": ["year", "month", "day", "era"]}
用户输入:
2023年四月6日天气晴…
模型原始输出=
content=‘{“year”: 2023, “month”: 4, “day”: 6, “era”: “AD”}’
Parse后的输出=
year=2023 month=4 day=6 era=‘AD’
from langchain.output_parsers import OutputFixingParser new_parser = OutputFixingParser.from_llm(parser=parser, llm=ChatOpenAI(model="gpt-4")) #我们把之前output的格式改错 output = output.content.replace("4","四月") print("===格式错误的Output===") print(output) try: date = parser.parse(output) except Exception as e: print("===出现异常===") print(e) #用OutputFixingParser自动修复并解析 date = new_parser.parse(output) print("===重新解析结果===") print(date)
答复:
=格式错误的Output=
{“year”: 2023, “month”: 四月, “day”: 6, “era”: “AD”}
=出现异常=
Failed to parse Date from completion {“year”: 2023, “month”: 四月, “day”: 6, “era”: “AD”}. Got: Expecting value: line 1 column 25 (char 24)
=重新解析结果=
year=2023 month=4 day=6 era=‘AD’
!pip install pypdf
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("llama2.pdf")
pages = loader.load_and_split()
print(pages[0].page_content)
代码拆分段落
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=200,
chunk_overlap=100, # 思考:为什么要做overlap
length_function=len,
add_start_index=True,
)
paragraphs = text_splitter.create_documents([pages[0].page_content])
for para in paragraphs:
print(para.page_content)
print('-------')
!pip install chromadb
from langchain.document_loaders import UnstructuredMarkdownLoader from langchain_openai import OpenAIEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import Chroma from langchain_openai import ChatOpenAI from langchain.chains import RetrievalQA from langchain.document_loaders import PyPDFLoader # 加载文档 loader = PyPDFLoader("llama2.pdf") pages = loader.load_and_split() # 文档切分 text_splitter = RecursiveCharacterTextSplitter( chunk_size=300, chunk_overlap=100, length_function=len, add_start_index=True, ) texts = text_splitter.create_documents([pages[2].page_content,pages[3].page_content]) # 灌库 embeddings = OpenAIEmbeddings() db = Chroma.from_documents(texts, embeddings) # LangChain内置的 RAG 实现 qa_chain = RetrievalQA.from_chain_type( llm=ChatOpenAI(temperature=0), retriever=db.as_retriever() ) query = "llama 2有多少参数?" response = qa_chain.invoke(query) print(response["result"])
答复:
Llama 2有7B、13B和70B参数的变体。
1、这部分能力 LangChain 的实现非常粗糙;
2、实际生产中,建议自己实现,不建议用 LangChain 的工具。
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
history = ConversationBufferMemory()
history.save_context({"input": "你好啊"}, {"output": "你也好啊"})
print(history.load_memory_variables({}))
history.save_context({"input": "你再好啊"}, {"output": "你又好啊"})
print(history.load_memory_variables({}))
回复:
{‘history’: ‘Human: 你好啊\nAI: 你也好啊’}
{‘history’: ‘Human: 你好啊\nAI: 你也好啊\nHuman: 你再好啊\nAI: 你又好啊’}
from langchain.memory import ConversationBufferWindowMemory
window = ConversationBufferWindowMemory(k=1)
window.save_context({"input": "第一轮问"}, {"output": "第一轮答"})
window.save_context({"input": "第二轮问"}, {"output": "第二轮答"})
window.save_context({"input": "第三轮问"}, {"output": "第三轮答"})
print(window.load_memory_variables({}))
回复:
{‘history’: ‘Human: 第三轮问\nAI: 第三轮答’}
from langchain.memory import ConversationTokenBufferMemory
from langchain_openai import ChatOpenAI
memory = ConversationTokenBufferMemory(
llm=ChatOpenAI(),
max_token_limit=40
)
memory.save_context(
{"input": "你好啊"}, {"output": "你好,我是你的AI助手。"})
memory.save_context(
{"input": "你会干什么"}, {"output": "我什么都会"})
print(memory.load_memory_variables({}))
回复:
{‘history’: ‘Human: 你会干什么\nAI: 我什么都会’}
1、ConversationSummaryMemory: 对上下文做摘要
https://python.langchain.com/docs/modules/memory/types/summary
2、ConversationSummaryBufferMemory: 保存 Token 数限制内的上下文,对更早的做摘要
https://python.langchain.com/docs/modules/memory/types/summary_buffer
3、VectorStoreRetrieverMemory: 将 Memory 存储在向量数据库中,根据用户输入检索回最相关的部分
https://python.langchain.com/docs/modules/memory/types/vectorstore_retriever_memory
4、LangChain 的 Memory 管理机制属于可用的部分,尤其是简单情况如按轮数或按 Token 数管理;对于复杂情况,它不一定是最优的实现,例如检索向量库方式,建议根据实际情况和效果评估;但是它对内存的各种维护方法的思路在实际生产中可以借鉴
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。