赞
踩
一、ChatGLM-6B介绍
ChatGLM2-6B是智谱AI及清华KEG实验室发布的中英双语对话模型。
二、下载ChatGLM2-6B
https://github.com/THUDM/ChatGLM2-6B
三、创建环境
3.1、conda create ChatGLM2B
3.2、进入ChatGLM2-6B
cd D:\workspace\opensource\openai\ChatGLM2-6B
3.3、安装chatglm2-6b
pip install requirements.txt
3.4、下载model
Huggingface上的项目地址:https://huggingface.co/THUDM/chatglm2-6b
部分代码需要在这里下载。chatglm2-6b-int4
或者
3.5、需要安装tdm64-gcc-5.1.0-2
3.6、安装PyTorch
...
四、修改部分代码
cli_demo.py
- import os
- import platform
- import signal
- from transformers import AutoTokenizer, AutoModel
- import readline
-
- tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True)
- #model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).cuda()
- model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).float()
- # 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
- # from utils import load_model_on_gpus
- # model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
- model = model.eval()
-
- os_name = platform.system()
- clear_command = 'cls' if os_name == 'Windows' else 'clear'
- stop_stream = False
-
-
- def build_prompt(history):
- prompt = "欢迎使用 ChatGLM2-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序"
- for query, response in history:
- prompt += f"\n\n用户:{query}"
- prompt += f"\n\nChatGLM2-6B:{response}"
- return prompt
-
-
- def signal_handler(signal, frame):
- global stop_stream
- stop_stream = True
-
-
- def main():
- past_key_values, history = None, []
- global stop_stream
- print("欢迎使用 ChatGLM2-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序")
- while True:
- query = input("\n用户:")
- if query.strip() == "stop":
- break
- if query.strip() == "clear":
- past_key_values, history = None, []
- os.system(clear_command)
- print("欢迎使用 ChatGLM2-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序")
- continue
- print("\nChatGLM:", end="")
- current_length = 0
- for response, history, past_key_values in model.stream_chat(tokenizer, query, history=history,
- past_key_values=past_key_values,
- return_past_key_values=True):
- if stop_stream:
- stop_stream = False
- break
- else:
- print(response[current_length:], end="", flush=True)
- current_length = len(response)
- print("")
-
-
- if __name__ == "__main__":
- main()
web_demo.py
- from transformers import AutoModel, AutoTokenizer
- import gradio as gr
- import mdtex2html
- from utils import load_model_on_gpus
-
- tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True)
- model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).float()
-
- #tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b", trust_remote_code=True)
- #model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b", trust_remote_code=True).float()
- # from utils import load_model_on_gpus
- # model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
- model = model.eval()
-
- """Override Chatbot.postprocess"""
-
-
- def postprocess(self, y):
- if y is None:
- return []
- for i, (message, response) in enumerate(y):
- y[i] = (
- None if message is None else mdtex2html.convert((message)),
- None if response is None else mdtex2html.convert(response),
- )
- return y
-
-
- gr.Chatbot.postprocess = postprocess
-
-
- def parse_text(text):
- """copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
- lines = text.split("\n")
- lines = [line for line in lines if line != ""]
- count = 0
- for i, line in enumerate(lines):
- if "```" in line:
- count += 1
- items = line.split('`')
- if count % 2 == 1:
- lines[i] = f"<pre><code class='language-{items[-1]}'>"
- else:
- lines[i] = f"<br></code></pre>"
- else:
- if i > 0:
- if count % 2 == 1:
- line = line.replace("`", "\`")
- line = line.replace("<", "<")
- line = line.replace(">", ">")
- line = line.replace(" ", " ")
- line = line.replace("*", "*")
- line = line.replace("_", "_")
- line = line.replace("-", "-")
- line = line.replace(".", ".")
- line = line.replace("!", "!")
- line = line.replace("(", "(")
- line = line.replace(")", ")")
- line = line.replace("$", "$")
- lines[i] = "<br>"+line
- text = "".join(lines)
- return text
-
-
- def predict(input, chatbot, max_length, top_p, temperature, history, past_key_values):
- chatbot.append((parse_text(input), ""))
- #for response, history, past_key_values in model.stream_chat(tokenizer, input, history, past_key_values=past_key_values,
- # return_past_key_values=True,
- # max_length=max_length, top_p=top_p,
- # temperature=temperature):
- for response, history in model.stream_chat(tokenizer ,input ,history,past_key_values=past_key_values,
- return_past_key_values=False,
- max_length=max_length, top_p=top_p,
- temperature=temperature):
- chatbot[-1] = (parse_text(input), parse_text(response))
-
- yield chatbot, history, past_key_values
-
-
- def reset_user_input():
- return gr.update(value='')
-
-
- def reset_state():
- return [], [], None
-
-
- with gr.Blocks() as demo:
- gr.HTML("""<h1 align="center">ChatGLM2-6B</h1>""")
-
- chatbot = gr.Chatbot()
- with gr.Row():
- with gr.Column(scale=4):
- with gr.Column(scale=12):
- #user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(container=False)
- user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10)
- with gr.Column(min_width=32, scale=1):
- submitBtn = gr.Button("Submit", variant="primary")
- with gr.Column(scale=1):
- emptyBtn = gr.Button("Clear History")
- max_length = gr.Slider(0, 32768, value=8192, step=1.0, label="Maximum length", interactive=True)
- top_p = gr.Slider(0, 1, value=0.8, step=0.01, label="Top P", interactive=True)
- temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True)
-
- history = gr.State([])
- past_key_values = gr.State(None)
-
- submitBtn.click(predict, [user_input, chatbot, max_length, top_p, temperature, history, past_key_values],
- [chatbot, history, past_key_values], show_progress=True)
- submitBtn.click(reset_user_input, [], [user_input])
-
- emptyBtn.click(reset_state, outputs=[chatbot, history, past_key_values], show_progress=True)
-
- #demo.queue().launch(share=False, inbrowser=True)
- demo.queue().launch(share=False, inbrowser=True,server_name = '0.0.0.0')
web_demo2.py
- from transformers import AutoModel, AutoTokenizer
- import streamlit as st
-
-
- st.set_page_config(
- page_title="ChatGLM2-6b",
- page_icon=":robot:",
- layout='wide'
- )
-
-
- @st.cache_resource
- def get_model():
- #tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True)
- #model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).float()
- tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True)
- model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).float()
- # from utils import load_model_on_gpus
- # model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
- model = model.eval()
- return tokenizer, model
-
-
- tokenizer, model = get_model()
-
- st.title("ChatGLM2-6B")
-
- max_length = st.sidebar.slider(
- 'max_length', 0, 32768, 8192, step=1
- )
- top_p = st.sidebar.slider(
- 'top_p', 0.0, 1.0, 0.8, step=0.01
- )
- temperature = st.sidebar.slider(
- 'temperature', 0.0, 1.0, 0.8, step=0.01
- )
-
- if 'history' not in st.session_state:
- st.session_state.history = []
-
- if 'past_key_values' not in st.session_state:
- st.session_state.past_key_values = None
-
- for i, (query, response) in enumerate(st.session_state.history):
- #for i, (query, response) in enumerate(st.session_state):
- with st.chat_message(name="user", avatar="user"):
- st.markdown(query)
- with st.chat_message(name="assistant", avatar="assistant"):
- st.markdown(response)
- with st.chat_message(name="user", avatar="user"):
- input_placeholder = st.empty()
- with st.chat_message(name="assistant", avatar="assistant"):
- message_placeholder = st.empty()
-
- prompt_text = st.text_area(label="user input",
- height=100,
- placeholder="please")
-
- button = st.button("send", key="predict")
-
- if button:
- input_placeholder.markdown(prompt_text)
- history, past_key_values = st.session_state.history, st.session_state.past_key_values
- for response, history, past_key_values in model.stream_chat(tokenizer, prompt_text, history,
- past_key_values=past_key_values,
- max_length=max_length, top_p=top_p,
- temperature=temperature,
- return_past_key_values=True):
- message_placeholder.markdown(response)
-
- st.session_state.history = history
- st.session_state.past_key_values = past_key_values
五、运行代码
5.1、激活环境
conda activate ChatGLM2B
5.2、python openai_api.py
- (ChatGLM2B) D:\workspace\opensource\openai\ChatGLM2-6B>python openai_api.py
- Traceback (most recent call last):
- File "openai_api.py", line 16, in <module>
- from sse_starlette.sse import ServerSentEvent, EventSourceResponse
- ModuleNotFoundError: No module named 'sse_starlette'
pip install sse_starlette -i https://pypi.douban.com/simple/
在windows上装不了readline
pip install readline -i https://pypi.douban.com/simple/
- (ChatGLM2B) D:\workspace\opensource\openai\ChatGLM2-6B>pip install readline -i https://pypi.douban.com/simple/
- Looking in indexes: https://pypi.douban.com/simple/
- Collecting readline
- Downloading https://mirrors.cloud.tencent.com/pypi/packages/f4/01/2cf081af8d880b44939a5f1b446551a7f8d59eae414277fd0c303757ff1b/readline-6.2.4.1.tar.gz (2.3 MB)
- ---------------------------------------- 2.3/2.3 MB 3.1 MB/s eta 0:00:00
- Preparing metadata (setup.py) ... error
- error: subprocess-exited-with-error
-
- × python setup.py egg_info did not run successfully.
- │ exit code: 1
- ╰─> [1 lines of output]
- error: this module is not meant to work on Windows
- [end of output]
-
- note: This error originates from a subprocess, and is likely not a problem with pip.
- error: metadata-generation-failed
-
- × Encountered error while generating package metadata.
- ╰─> See above for output.
-
- note: This is an issue with the package mentioned above, not pip.
- hint: See above for details.
5.3、python web_demo.py
- (ChatGLM2B) D:\workspace\opensource\openai\ChatGLM2-6B>python web_demo.py
- Failed to load cpm_kernels:No module named 'cpm_kernels'
- C:\Users\xgr\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels_parallel.c:1:0: warning: -fPIC ignored for target (all code is position independent)
- #include <omp.h>
- ^
- gcc: error: libgomp.spec: No such file or directory
- Compile parallel cpu kernel gcc -O3 -fPIC -pthread -fopenmp -std=c99 C:\Users\xgr\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels_parallel.c -shared -o C:\Users\xgr\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels_parallel.so failed.
- C:\Users\xgr\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels.c:1:0: warning: -fPIC ignored for target (all code is position independent)
- void compress_int4_weight(void *weight, void *out, int n, int m)
- ^
- Running on local URL: http://0.0.0.0:7860
5.4、streamlit run web_demo2.py
- (ChatGLM2B) D:\workspace\opensource\openai\ChatGLM2-6B>streamlit run web_demo2.py
-
- You can now view your Streamlit app in your browser.
-
- Local URL: http://localhost:8501
- Network URL: http://192.168.1.103:8501
这样LLM大模型就跑起来了。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。