赞
踩
官方网站:https://langfuse.com/
创建自己的项目获得API Key
项目地址:https://github.com/langfuse
git clone https://github.com/langfuse/langfuse.git
cd langfuse
通过docker compose up -d安装
# 安装 langfuse SDK
!pip install --upgrade langfuse
'''
将相应的KEY写到.env文件中,然后加载
LANGFUSE_SECRET_KEY="sk-lf-"
LANGFUSE_PUBLIC_KEY="pk-lf-"
LANGFUSE_HOST="https://us.cloud.langfuse.com"
'''
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
from langfuse.openai import openai from langfuse import Langfuse trace = Langfuse().trace( name = "hello-world", user_id = "naive frank", release = "v0.0.1" ) completion = openai.chat.completions.create( name="hello-world", model="gpt-3.5-turbo", messages=[ {"role": "user", "content": "你能为我做什么?"} ], temperature=0, trace_id=trace.id, ) print(completion.choices[0].message.content)
from langfuse.callback import CallbackHandler
handler = CallbackHandler(
trace_name="SayHello",
user_id="naive frank",
)
from langchain_openai import ChatOpenAI from langchain.schema.output_parser import StrOutputParser from langchain.schema.runnable import RunnablePassthrough from langchain.prompts.chat import HumanMessagePromptTemplate from langchain.prompts import ChatPromptTemplate model = ChatOpenAI(model="gpt-3.5-turbo-0613") prompt = ChatPromptTemplate.from_messages([ HumanMessagePromptTemplate.from_template("Say hello to {input}!") ]) # 定义输出解析器 parser = StrOutputParser() chain = ( {"input":RunnablePassthrough()} | prompt | model | parser ) chain.invoke(input="Naive Frank", config={"callbacks":[handler]})
构建 PromptTemplate
from langchain.prompts import PromptTemplate need_answer=PromptTemplate.from_template(""" ********* 你是人工智能机器学习算法的大咖,你的工作是回答有关机器学习算法的问题。 机器学习算法: {outlines} ********* 用户输入: {user_input} ********* 如果这是一个需要老师答疑的问题,回复Y,否则回复N。 只回复Y或N,不要回复其他内容。""") check_duplicated=PromptTemplate.from_template(""" ********* 已有问题列表: [ {question_list} ] ********* 新问题: {user_input} ********* 已有提问列表是否有和新提问类似的问题? 回复Y或N, Y表示有,N表示没有。 只回复Y或N,不要回复其他内容。""")
初始化大纲和问题列表
outlines="""
线性回归
逻辑回归
决策树
随机森林
支持向量机
神经网络
"""
question_list=[
"谢谢老师",
"什么是线性回归",
]
创建 chain
model = ChatOpenAI(temperature=0,model_kwargs={"seed":50})
parser = StrOutputParser()
chain1 = (
need_answer
| model
| parser
)
chain2 = (
check_duplicated
| model
| parser
)
import uuid from langfuse.client import Langfuse # 创建一个新trace def create_trace(user_id): langfuse = Langfuse() trace_id = str(uuid.uuid4()) trace = langfuse.trace( name="assistant", id=trace_id, user_id=user_id ) return trace # 主流程 def verify_question( question: str, outlines: str, question_list: list, user_id: str, ) -> bool: # 创建trace trace = create_trace(user_id) # 获取一个语言链(language chain)处理器(handler) handler = trace.get_langchain_handler() # 判断是否需要回答 if chain1.invoke( {"user_input":question,"outlines": outlines}, config={"callbacks":[handler]} ) == 'Y': # 判断是否为重复问题 if chain2.invoke( {"user_input":question,"question_list": "\n".join(question_list)}, config={"callbacks":[handler]} ) == 'N': question_list.append(question) return True return False
实际调用,判断是否为新问题,需要回答
ret = verify_question(
"什么是随机森林",
outlines,
question_list,
user_id="naive frank",
)
print(ret)
Trace
import uuid from langchain_openai import ChatOpenAI from langchain.schema import ( AIMessage, #等价于OpenAI接口中的assistant role HumanMessage, #等价于OpenAI接口中的user role SystemMessage #等价于OpenAI接口中的system role ) llm = ChatOpenAI() messages = [ SystemMessage(content="你是人工智能机器学习算法的大咖。"), ] handler = CallbackHandler( user_id="naive frank", trace_name="naive_chat", session_id=str(uuid.uuid4()) ) while True: user_input=input("User: ") if user_input.strip() == "": break messages.append(HumanMessage(content=user_input)) response = llm.invoke(messages,config={"callbacks":[handler]}) print("AI: "+response.content) messages.append(response)
创建数据集
数据标注
标注结果:
解析JSON数据
import json
data = []
with open('my_annotations.jsonl','r',encoding='utf-8') as fp:
for line in fp:
example = json.loads(line.strip())
item = {
"input": {
"outlines": example["outlines"],
"user_input": example["user_input"]
},
"expected_output": example["label"]
}
data.append(item)
将解析数据上传至创建的数据集my_dataset
from langfuse import Langfuse
from langfuse.model import CreateDatasetRequest, CreateDatasetItemRequest
from tqdm import tqdm
# init
langfuse = Langfuse()
# 考虑演示运行速度,只上传前50条数据
for item in tqdm(data[:50]):
langfuse.create_dataset_item(
dataset_name="my_dataset",
input=item["input"],
expected_output=item["expected_output"]
)
def simple_evaluation(output, expected_output): return output == expected_output # 构建 PromptTemplate from langchain.prompts import PromptTemplate need_answer=PromptTemplate.from_template(""" ********* 你是人工智能机器学习算法的大咖,你的工作是回答有关机器学习算法的问题。 机器学习算法: {outlines} ********* 用户输入: {user_input} ********* 如果这是一个需要老师答疑的问题,回复Y,否则回复N。 只回复Y或N,不要回复其他内容。""") model = ChatOpenAI(temperature=0,model_kwargs={"seed":50}) parser = StrOutputParser() chain_v1 = ( need_answer | model | parser )
from concurrent.futures import ThreadPoolExecutor from functools import partial from langfuse import Langfuse langfuse = Langfuse() def run_evaluation(chain, dataset_name, run_name): dataset = langfuse.get_dataset(dataset_name) def process_item(item): handler = item.get_langchain_handler(run_name=run_name) # Assuming chain.invoke is a synchronous function output = chain.invoke(item.input, config={"callbacks": [handler]}) # Assuming handler.root_span.score is a synchronous function handler.root_span.score( name="accuracy", value=simple_evaluation(output, item.expected_output) ) print('.', end='',flush=True) # Using ThreadPoolExecutor with a maximum of 10 workers with ThreadPoolExecutor(max_workers=4) as executor: # Map the process_item function to each item in the dataset executor.map(process_item, dataset.items)
run_evaluation(chain_v1, "my_dataset", "v1-"+str(uuid.uuid4())[:8])
from langchain.prompts import PromptTemplate need_answer=PromptTemplate.from_template(""" ********* 你是人工智能机器学习算法的大咖,你的工作是回答有关机器学习算法的问题。。 你的选择需要遵循以下原则: 1 需要老师回答的问题是指与课程内容或AI/LLM相关的技术问题; 2 评论性的观点、闲聊、表达模糊不清的句子,不需要老师回答; 3 学生输入不构成疑问句的,不需要老师回答; 4 学生问题中如果用“这”、“那”等代词指代,不算表达模糊不清,请根据问题内容判断是否需要老师回答。 课程内容: {outlines} ********* 学员输入: {user_input} ********* Analyse the student's input according to the lecture's contents and your criteria. Output your analysis process step by step. Finally, output a single letter Y or N in a separate line. Y means that the input needs to be answered by the teacher. N means that the input does not needs to be answered by the teacher.""")
from langchain_core.output_parsers import BaseOutputParser
import re
class MyOutputParser(BaseOutputParser):
"""自定义parser,从思维链中取出最后的Y/N"""
def parse(self, text: str)->str:
matches = re.findall(r'[YN]', text)
return matches[-1] if matches else 'N'
chain_v2 = (
need_answer
| model
| MyOutputParser()
)
run_evaluation(chain_v2, "my_dataset", "cot-"+str(uuid.uuid4())[:8])
添加Prompt
加载prompt测试
# 按名称加载
prompt = langfuse.get_prompt("my_prompt")
# 按名称和版本号加载
prompt = langfuse.get_prompt("my_prompt", version=3)
# 对模板中的变量赋值
compiled_prompt = prompt.compile(user_input="你好",outlines="SVM是什么?")
print(compiled_prompt)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。