赞
踩
在Hugging Face下载各个版本LLM的文件,然后存入qwen文件夹,每个文件夹下各子文件夹即代表各个版本的llm,模型文件等存储在对应版本的子文件夹下
XXX-GPTQ-Int4的意思是对模型进行了精度为Int4的量化,虽然实测降低了一点生成速度但是显著降低了占用的显存,适合显卡资源不足的研究者(毕竟Qwen1.5这几个小模型就是主打一个显存低啊哈)
博主python版本为3.8,cuda(cudatoolkit)版本为11.8(版本太低运行时会报错)
pip install transformers==4.37.0
from transformers import AutoModelForCausalLM, AutoTokenizer device = "cuda" # the device to load the model onto model = AutoModelForCausalLM.from_pretrained( "Qwen/Qwen1.5-14B-Chat-AWQ", torch_dtype="auto", device_map="auto" ) tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-14B-Chat-AWQ") prompt = "Give me a short introduction to large language model." messages = [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) model_inputs = tokenizer([text], return_tensors="pt").to(device) generated_ids = model.generate( model_inputs.input_ids, max_new_tokens=512 ) generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) ] response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
pip install gradio==4.19.2
web网页端我使用了比较热门的gradio框架,即官方推的聊天机器人框架:
import gradio as gr import time def echo(message, history, system_prompt, tokens): response = f"System prompt: {system_prompt}\n Message: {message}." for i in range(min(len(response), int(tokens))): time.sleep(0.05) yield response[: i+1] demo = gr.ChatInterface(echo, additional_inputs=[ gr.Textbox("You are helpful AI.", label="System Prompt"), gr.Slider(10, 100) ] ) if __name__ == "__main__": demo.queue().launch()
公网:在demo.queue().launch()
中加上share=True
运行时即会生成一个随机公网地址(注意别开VPN)
pip install PyPDF2, docx
import warnings, sys, os, time import transformers, PyPDF2 import gradio as gr from docx import Document warnings.filterwarnings('ignore') transformers.logging.set_verbosity_error() def txt_to_str(file_path): """ txt, html, json, py, md, java, c, cpp, h, js, css, xml, yml, yaml, sql, sh, bat """ try: with open(file_path, 'r', encoding='utf-8') as file: return file.read() except Exception as e: return f"读取文件时发生错误: {e}" def pdf_to_str(file_path): """ pdf """ try: with open(file_path, 'rb') as file: pdf_reader = PyPDF2.PdfReader(file) # # 检查PDF是否有页 # if not len(pdf_reader.pages): # return "" # 读取所有页面的内容并合并为一个字符串 content = "" for page_num in range(len(pdf_reader.pages)): page_obj = pdf_reader.pages[page_num] content += page_obj.extract_text() # 返回提取的文本 return content except Exception as e: return f"读取文件时发生错误: {e}" def docx_to_str(file_path): """ docx """ try: # 打开docx文件 doc = Document(file_path) # 将所有段落内容合并为一个字符串 content = '\n'.join([paragraph.text for paragraph in doc.paragraphs]) # 返回提取的文本 return content except Exception as e: return f"读取docx文件时发生错误: {e}" def file_to_str(file_path): file_types = ['.txt', '.html', '.json', '.py', '.md', '.java', '.c', '.cpp', '.h', '.js', '.css', '.xml', '.yml', '.yaml', '.sql', '.sh', '.bat'] + ['.pdf'] + ['.docx'] if not file_path.endswith(tuple(file_types)): raise TypeError("文件类型不支持") if file_path.endswith('.pdf'): return pdf_to_str(file_path) elif file_path.endswith('.docx'): return docx_to_str(file_path) else: return txt_to_str(file_path) def list_to_str(histories): histories_str = "" for i in range(len(histories)): user_prompt, response = histories[i] histories_str += f"I: {user_prompt}\nYou: {response}\n\n" return histories_str def qwen(user_prompt="", histories=list(), system_prompt="", max_length=512, model_choice="Qwen1.5-0.5B-Chat"): if(histories != list()): user_prompt = user_prompt + "\n\n这是我们俩之前的聊天记录:{}".format(list_to_str(histories)) device = "cuda" # the device to load the model onto model = transformers.AutoModelForCausalLM.from_pretrained( model_choice, torch_dtype="auto", device_map="auto" ) tokenizer = transformers.AutoTokenizer.from_pretrained(model_choice) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) model_inputs = tokenizer([text], return_tensors="pt").to(device) generated_ids = model.generate( model_inputs.input_ids, max_new_tokens=max_length ) generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) ] response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] with open("./history/"+find_max("./history"), "a", encoding='utf-8') as f: f.write("User: " + user_prompt + "\nResponse: " + response + "\n\n") return response def echo(user_prompt, histories, system_prompt, max_length, model_choice, uploaded_file_path=None): if uploaded_file_path: file_content = file_to_str(uploaded_file_path) # print(f"已成功上传并读取了文件的部分内容:\n{file_content[:70]}...") user_prompt += "\n\n这是文件内容:\n" + file_content response = qwen(user_prompt, histories, system_prompt, max_length, model_choice) for i in range(len(response)): time.sleep(0.05) yield response[: i+1] def find_max(folder_path): # 使用os.listdir()列出文件夹中的所有条目 all_entries = os.listdir(folder_path) # 创建一个空列表来存放文件名 file_names = [] # 遍历列表,检查每个条目是否为文件而非子目录,并将文件名添加到列表中 for entry in all_entries: full_path = os.path.join(folder_path, entry) if os.path.isfile(full_path): file_names.append(entry) return max(file_names) def main(): current_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()) with open("./history/history_{}.txt".format(current_time), "w", encoding='utf-8') as f: f.write("") demo = gr.ChatInterface(echo, additional_inputs=[ gr.Textbox("You are a helpful assistant.", label="System Prompt"), gr.Slider(128, 2048, label="The max length of tokens", interactive=True), gr.Radio([f"程俊凯的辣鸡大模型Qwen1.5-{i}B-Chat" for i in [0.5, 1.8, 4, 4]], label="Model Choice"), gr.UploadButton(label="Upload a file", type="filepath", file_count="single", file_types=["file"], interactive=True, size="sm") ] ) # demo.queue().launch() demo.queue().launch(share=True) #pip install gevent==22.10.2 if __name__ == "__main__": main()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。