赞
踩
通过调用百度语音识别的api接口进行语音识别,然后将识别到内容发送给chatgpt以及文心一言的api接口进行交互,实现该功能。
要实现这个功能,首先需要申请百度语音识别的API接口,并进行相关配置。然后编写代码,利用百度语音识别的API接口对语音进行识别,获取识别到的文本内容。
接着,需要调用ChatGPT和文心一言的API接口,将识别到的文本内容发送给这两个接口进行交互。这样就可以实现将语音内容转换成文本后,再通过人工智能模型进行进一步处理和生成相关回复。
在具体实现过程中,需要注意处理API接口的请求和响应,并确保数据的安全性和准确性。
总的来说,通过调用百度语音识别的API接口进行语音识别,并与ChatGPT和文心一言的API接口进行交互,可以实现一个智能化的语音交互系统,提供更加便捷和个性化的服务体验。
1、首先去百度AI开放平台-全球领先的人工智能服务平台申请语音识别的API_ID以及API_SECRET。
2、根据官方的API调用示例,调取语音识别的接口。
- import requests
- import json
- from urllib.request import urlopen
- from urllib.request import Request
- from urllib.error import URLError
- from urllib.parse import urlencode
-
- API_KEY = "自己的"
- SECRET_KEY = "自己的"
-
- #获取百度语音token
- def get_access_token():
- """
- 使用 AK,SK 生成鉴权签名{Access Token)
- :return: access_token,或是None(如果错误)
- """
- url = "https://aip.baidubce.com/oauth/2.0/token"
- params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
- return str(requests.post(url, params=params).json().get("access_token"))
- #百度语音转文字
- def speech_to_text_baidu():
-
- ASR_URL = 'http://vop.baidu.com/server_api'
- #要想转换的音频(要想实时转换的话,可以在这行代码前面加入语音采集的代码,保存到/home/lxj/chat_robot/audio.wav,自己的地址)
- with open('/home/lxj/chat_robot/audio.wav', 'rb') as f:
- audio_data = f.read()
-
- params = {'cuid': "eAfx7sGxI092FxvCuADljopHxiJjruPX", 'token': get_access_token(), 'dev_pid': 1537}#1537代表普通话识别,其他的看官网文档
- params_query = urlencode(params)
-
- headers ={
- 'Content-Type': 'audio/' + 'wav' + '; rate=' + str(16000),
- }
- # 请求转换结果
- req = Request(ASR_URL + "?" + params_query, audio_data, headers)
-
- try:
- f = urlopen(req)
- result_str = f.read()
-
- result_str =json.loads(result_str)
- result_str = result_str.get("result")[0]
-
- return result_str
- except URLError as err:
- print('asr http response http code : ' + str(err.code))
- result_str = err.read()
- print(result_str)
1、首先到OpenAI官网注册账号登陆,申请api_key,一个月5美元的免费额度(够用)。
2、安装openai的pyhon库
pip install openai
3、实现OpenAI的调用(很简单)
- import openai
-
- client = OpenAI(api_key=" ")# 请替换为您的API密钥
-
- #Chatgpt
- def gpt(data):
-
- try:
- # 向GPT发送请求,并获取回复
- chat_completion = client.chat.completions.create(
- model="gpt-3.5-turbo",
- messages=[
- {"role": "user", "content": data} # 使用接收到的用户消息
- ]
- )
- except Exception as e:
- print(f"发生错误: {e}")
- return ""
-
- # 获取GPT的回复
- gpt_reply = chat_completion.choices[0].message.content
- print("GPT Reply: %s", gpt_reply)
1、到百度AI开放平台-全球领先的人工智能服务平台去申请千帆大模型的API_ID以及API_SECRET。
2、根据官方的API调用示例,调取语音识别的接口。
- import requests
- import json
- from urllib.request import urlopen
- from urllib.request import Request
- from urllib.error import URLError
- from urllib.parse import urlencode
-
- wx_API_KEY = "自己的"
- wx_SECRET_KEY = "自己的"
-
- #获取文心一言token
- def get_access_token_wx():
- """
- 使用 AK,SK 生成鉴权签名(Access Token)
- :return: access_token,或是None(如果错误)
- """
- url = "https://aip.baidubce.com/oauth/2.0/token"
- params = {"grant_type": "client_credentials", "client_id": wx_API_KEY, "client_secret": wx_SECRET_KEY}
- return str(requests.post(url, params=params).json().get("access_token"))
-
- def wxyy(data):
-
- url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions?access_token=" + get_access_token_wx()
-
- payload = json.dumps({
- "messages":[{
- "role": "user",
- "content": data
- }]
-
- })
- headers = {
- 'Content-Type': 'application/json'
- }
-
- response = requests.request("POST", url, headers=headers, data=payload)
- response_json = response.json() # 将响应文本解析为JSON对象
- result = response_json.get("result") # 提取result字段的值
- print("WX Reply: %s", result)
加入了键盘监控事件,当按下键盘enter键时,开始交互。
-
- from pyaudio import PyAudio,paInt16 #导入音频处理库Pyaudio,没有的话要pip 安装
- from openai import OpenAI
- import threading
- import requests
- import json
- from urllib.request import urlopen
- from urllib.request import Request
- from urllib.error import URLError
- from urllib.parse import urlencode
- from pynput.keyboard import Listener, Key
- import speech_recognition as sr
-
- # 创建一个锁对象
- lock = threading.Lock()
-
- ######################百度语音############3############
- API_KEY = ""
- SECRET_KEY = ""
- ####################文心一言############################
- wx_API_KEY = ""
- wx_SECRET_KEY = ""
- #######################################################
-
- #Chatgpt
- def gpt(data):
- try:
- # 向GPT发送请求,并获取回复
- chat_completion = client.chat.completions.create(
- model="gpt-3.5-turbo",
- messages=[
- {"role": "user", "content": data} # 使用接收到的用户消息
- ]
- )
- except Exception as e:
- print(f"发生错误: {e}")
- return ""
-
- # 获取GPT的回复
- gpt_reply = chat_completion.choices[0].message.content
- print("GPT Reply: %s", gpt_reply)
-
- #获取百度语音token
- def get_access_token():
- """
- 使用 AK,SK 生成鉴权签名{Access Token)
- :return: access_token,或是None(如果错误)
- """
- url = "https://aip.baidubce.com/oauth/2.0/token"
- params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
- return str(requests.post(url, params=params).json().get("access_token"))
-
- #获取文心一言token
- def get_access_token_wx():
- """
- 使用 AK,SK 生成鉴权签名(Access Token)
- :return: access_token,或是None(如果错误)
- """
- url = "https://aip.baidubce.com/oauth/2.0/token"
- params = {"grant_type": "client_credentials", "client_id": wx_API_KEY, "client_secret": wx_SECRET_KEY}
- return str(requests.post(url, params=params).json().get("access_token"))
-
- def wx(data):
-
- url = "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions?access_token=" + get_access_token_wx()
-
- payload = json.dumps({
- "messages":[{
- "role": "user",
- "content": data
- }]
-
- })
- headers = {
- 'Content-Type': 'application/json'
- }
-
- response = requests.request("POST", url, headers=headers, data=payload)
- response_json = response.json() # 将响应文本解析为JSON对象
- result = response_json.get("result") # 提取result字段的值
- print("WX Reply: %s", result)
-
- #百度语音转文字
- def speech_to_text_baidu():
-
- ASR_URL = 'http://vop.baidu.com/server_api'
-
- # 初始化识别器
- recognizer = sr.Recognizer()
-
- # 初始化识别器
- recognizer = sr.Recognizer()
- with sr.Microphone() as source:
- print("请说话...")
- audio = recognizer.listen(source)
- audio_data = audio.get_wav_data(convert_rate=16000, convert_width=2)
-
- params = {'cuid': "eAfx7sGxI092FxvCuADljopHxiJjruPX", 'token': get_access_token(), 'dev_pid': 1537}
- params_query = urlencode(params)
-
- headers ={
- 'Content-Type': 'audio/' + 'wav' + '; rate=' + str(16000),
- }
- # 请求转换结果
- req = Request(ASR_URL + "?" + params_query, audio_data, headers)
-
- try:
- f = urlopen(req)
- result_str = f.read()
-
- result_str =json.loads(result_str)
- result_str = result_str.get("result")[0]
-
- return result_str
- except URLError as err:
- print('asr http response http code : ' + str(err.code))
- result_str = err.read()
- print(result_str)
-
- # 当有键盘被按下时执行的函数
- def on_press(key):
- if key == Key.enter:
-
- global user_input
- user_input = speech_to_text_baidu()
- print("语音转换结果:"+user_input)
-
- #gpt(user_input)
- wx(user_input)
- if key == Key.esc:
- print("按了ESC键")
- exit(0)
-
- if __name__ == '__main__':
- # 初始化OpenAI客户端
- client = OpenAI(
- api_key="自己的", # 请替换为您的API密钥
- )
- print("chat启动!")
- # 创建键盘监听器
- while True:
- with Listener(on_press=on_press) as listener:
- listener.join()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。