赞
踩
查看本地 python 版本,命令台输入 python -V
建议升级 python 到 3.0 以上,如果已经安装了 Homebrew,直接通过命令brew install python3
安装即可(注:该方法会比较慢,如一直在卡在Updating Homebrew...
,按control + c
可仅仅取消Homebrew
本次更新操作)
安装或更新完成后,控制台会打印出安装所在路径,copy 路径
把安装路径设置环境变量中,输入open ~/.bash_profile
打开文件,把安装路径新增到文件中
新增 alias python="/usr/local/bin/python3"
(注:路径为各自控制台输出安装路径) 完成后控制台执行保存 source ~/.bash_profile
,再次执行python -V
发现已经升级了本地 pytho 版本
查看本地 pip 版本,执行命令 pip -V
如果没有安装,执行命令python3 -m pip install --upgrade pip
安装 pip 包
然后设置环境变量,执行open ~/.bash_profile
,新增export HNSWLIB_NO_NATIVE=1
,更新环境source ~/.bash_profile
直接使用 pip 安装,执行pip install langchain
(注:安装比较慢,如遇下载超时情况,重新执行命令即可) 执行pip install chromadb
(注:安装比较慢,如遇下载超时情况,重新执行命令即可)
直接使用 pip 安装,一般执行pip install
xxx,比如安装 openAI,pip install openai
示例:输入命令pyhton3
进入 python 中,依次输入以下命令
import chromadb(导包) chroma_client = chromadb.Client()(创建客户端) collection = chroma_client.create_collection(name="my_collection")(设置表名称) collection.add( documents=["This is a document", "This is another document"], metadatas=[{"source": "my_source"}, {"source": "my_source"}], ids=["id1", "id2"] )(执行插入) results = collection.query( query_texts=["This is a query document"], n_results=2 )(设置查询) print(results)(打印查询数据)
退出 python 执行 exit()
-
- # 导包
- import chromadb
- from chromadb.config import Settings
-
- # 保存数据路径
- persist_directory = "/Users/baixiaobai/SHUIDI/shuidi/chroma_db"
- # 创建启用持久性的新 Chroma 客户端
- client = chromadb.Client(
- Settings(
- persist_directory=persist_directory,
- chroma_db_impl="duckdb+parquet",
- )
- )
-
- # 重置数据库
- client.reset()
-
- # 创建集合
- collection_name = "peristed_collection"
- collection = client.create_collection(name=collection_name)
-
- # 添加一些数据到集合中
- collection.add(
- embeddings=[
- [1.1, 2.3, 3.2],
- [4.5, 6.9, 4.4],
- [1.1, 2.3, 3.2],
- [4.5, 6.9, 4.4],
- [1.1, 2.3, 3.2],
- [4.5, 6.9, 4.4],
- [1.1, 2.3, 3.2],
- [4.5, 6.9, 4.4],
- ],
- metadatas=[
- {"uri": "img1.png", "style": "style1"},
- {"uri": "img2.png", "style": "style2"},
- {"uri": "img3.png", "style": "style1"},
- {"uri": "img4.png", "style": "style1"},
- {"uri": "img5.png", "style": "style1"},
- {"uri": "img6.png", "style": "style1"},
- {"uri": "img7.png", "style": "style1"},
- {"uri": "img8.png", "style": "style1"},
- ],
- documents=["doc1", "doc2", "doc3", "doc4", "doc5", "doc6", "doc7", "doc8"],
- ids=["id1", "id2", "id3", "id4", "id5", "id6", "id7", "id8"],
- )
-
- # 显示手动调用,保存数据到磁盘
- client.persist()
-
- # 使用相同的设置创建一个新客户端
- client = chromadb.Client(
- Settings(
- persist_directory=persist_directory,
- chroma_db_impl="duckdb+parquet",
- )
- )
-
- # 加载集合
- collection = client.get_collection(collection_name)
-
- # 查询集合
- results = collection.query(
- query_embeddings=[1.1, 2.2, 3.2],
- n_results=1,
- include=["embeddings", "documents", "metadatas", "distances"]
- )
-
- # 打印结果
- print(results)
-
-
- import chromadb
- from chromadb.utils import embedding_functions
- client = chromadb.Client()
- # Using OpenAI Embeddings. This assumes you have the openai package installed
- openai_ef = embedding_functions.OpenAIEmbeddingFunction(
- api_key="*********************************",
- model_name="text-embedding-ada-002"
- )
- openai_collection = client.create_collection(name="openai_embeddings", embedding_function=openai_ef)
- openai_collection.add(
- documents=["This is a document", "This is another document"],
- metadatas=[{"source": "my_source"}, {"source": "my_source"}],
- ids=["id1", "id2"]
- )
- results = openai_collection.query(
- query_texts=["This is a query document"],
- n_results=2,
- include=["embeddings", "documents", "metadatas", "distances"]
- )
- print(results)
-
-
- from langchain.document_loaders import DirectoryLoader
- from langchain.text_splitter import CharacterTextSplitter
- from langchain.embeddings.openai import OpenAIEmbeddings
- from langchain.vectorstores import Chroma
- import os
- from langchain import PromptTemplate
- from langchain.output_parsers import CommaSeparatedListOutputParser
- from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
- from langchain.llms import OpenAI
- from langchain.chat_models import ChatOpenAI
- from langchain.document_loaders import TextLoader
- # key
- OPENAI_API_KEY = '********'
- # 加载文件
- loader = TextLoader('/Users/baixiaobai/SHUIDI/shuidi/11.txt')
- documents = loader.load()
- # 按照句号切块数据
- text_splitter = CharacterTextSplitter(chunk_size=20, chunk_overlap=0, separator='。')
- split_docs = text_splitter.split_documents(documents)
- # 初始请求向量化数据
- embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
- # 持久化文件地址
- persist_directory = 'chroma_storage'
- # 执行向量化
- vectorstore = Chroma.from_documents(split_docs, embeddings, persist_directory=persist_directory)
- # 持久化到本地
- vectorstore.persist()
- # 从本地加载数据
- vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
- query = "小黄今天干什么?"
- # 查询
- docs = vectordb.similarity_search(query,1)
- # 创建提示模板
- output_parser = CommaSeparatedListOutputParser()
- format_instructions = output_parser.get_format_instructions()
- prompt = PromptTemplate(
- template="已知信息:{background}。 根据上述已知信息,以客服的语气来回答用户的问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”,不允许在答案中添加编造成分,答案请使用中文。 问题是:{question}",
- input_variables=["question"],
- partial_variables={"background": str(docs[0].page_content)}
- )
- # 初始化
- llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
- input = prompt.format(question="小黄明天干什么?")
- # 调用openai
- output = llm(input)
- print(output)
-
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。