当前位置:   article > 正文

windows上本地部署ChatGLM2-6B_win部署chatglm2-6b cpu

win部署chatglm2-6b cpu

一、ChatGLM-6B介绍

ChatGLM2-6B是智谱AI及清华KEG实验室发布的中英双语对话模型。

二、下载ChatGLM2-6B

https://github.com/THUDM/ChatGLM2-6B

三、创建环境

3.1、conda create ChatGLM2B

3.2、进入ChatGLM2-6B

cd D:\workspace\opensource\openai\ChatGLM2-6B

3.3、安装chatglm2-6b

pip install requirements.txt

3.4、下载model

Huggingface上的项目地址:https://huggingface.co/THUDM/chatglm2-6b

部分代码需要在这里下载。chatglm2-6b-int4

或者

清华大学云盘 (tsinghua.edu.cn)

3.5、需要安装tdm64-gcc-5.1.0-2

tdm64-gcc

3.6、安装PyTorch

...

四、修改部分代码

cli_demo.py

  1. import os
  2. import platform
  3. import signal
  4. from transformers import AutoTokenizer, AutoModel
  5. import readline
  6. tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True)
  7. #model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).cuda()
  8. model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).float()
  9. # 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量
  10. # from utils import load_model_on_gpus
  11. # model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
  12. model = model.eval()
  13. os_name = platform.system()
  14. clear_command = 'cls' if os_name == 'Windows' else 'clear'
  15. stop_stream = False
  16. def build_prompt(history):
  17. prompt = "欢迎使用 ChatGLM2-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序"
  18. for query, response in history:
  19. prompt += f"\n\n用户:{query}"
  20. prompt += f"\n\nChatGLM2-6B:{response}"
  21. return prompt
  22. def signal_handler(signal, frame):
  23. global stop_stream
  24. stop_stream = True
  25. def main():
  26. past_key_values, history = None, []
  27. global stop_stream
  28. print("欢迎使用 ChatGLM2-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序")
  29. while True:
  30. query = input("\n用户:")
  31. if query.strip() == "stop":
  32. break
  33. if query.strip() == "clear":
  34. past_key_values, history = None, []
  35. os.system(clear_command)
  36. print("欢迎使用 ChatGLM2-6B 模型,输入内容即可进行对话,clear 清空对话历史,stop 终止程序")
  37. continue
  38. print("\nChatGLM:", end="")
  39. current_length = 0
  40. for response, history, past_key_values in model.stream_chat(tokenizer, query, history=history,
  41. past_key_values=past_key_values,
  42. return_past_key_values=True):
  43. if stop_stream:
  44. stop_stream = False
  45. break
  46. else:
  47. print(response[current_length:], end="", flush=True)
  48. current_length = len(response)
  49. print("")
  50. if __name__ == "__main__":
  51. main()

web_demo.py

  1. from transformers import AutoModel, AutoTokenizer
  2. import gradio as gr
  3. import mdtex2html
  4. from utils import load_model_on_gpus
  5. tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True)
  6. model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).float()
  7. #tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b", trust_remote_code=True)
  8. #model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b", trust_remote_code=True).float()
  9. # from utils import load_model_on_gpus
  10. # model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
  11. model = model.eval()
  12. """Override Chatbot.postprocess"""
  13. def postprocess(self, y):
  14. if y is None:
  15. return []
  16. for i, (message, response) in enumerate(y):
  17. y[i] = (
  18. None if message is None else mdtex2html.convert((message)),
  19. None if response is None else mdtex2html.convert(response),
  20. )
  21. return y
  22. gr.Chatbot.postprocess = postprocess
  23. def parse_text(text):
  24. """copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
  25. lines = text.split("\n")
  26. lines = [line for line in lines if line != ""]
  27. count = 0
  28. for i, line in enumerate(lines):
  29. if "```" in line:
  30. count += 1
  31. items = line.split('`')
  32. if count % 2 == 1:
  33. lines[i] = f"<pre><code class='language-{items[-1]}'>"
  34. else:
  35. lines[i] = f"<br></code></pre>"
  36. else:
  37. if i > 0:
  38. if count % 2 == 1:
  39. line = line.replace("`", "\`")
  40. line = line.replace("<", "&lt;")
  41. line = line.replace(">", "&gt;")
  42. line = line.replace(" ", "&nbsp;")
  43. line = line.replace("*", "&ast;")
  44. line = line.replace("_", "&lowbar;")
  45. line = line.replace("-", "&#45;")
  46. line = line.replace(".", "&#46;")
  47. line = line.replace("!", "&#33;")
  48. line = line.replace("(", "&#40;")
  49. line = line.replace(")", "&#41;")
  50. line = line.replace("$", "&#36;")
  51. lines[i] = "<br>"+line
  52. text = "".join(lines)
  53. return text
  54. def predict(input, chatbot, max_length, top_p, temperature, history, past_key_values):
  55. chatbot.append((parse_text(input), ""))
  56. #for response, history, past_key_values in model.stream_chat(tokenizer, input, history, past_key_values=past_key_values,
  57. # return_past_key_values=True,
  58. # max_length=max_length, top_p=top_p,
  59. # temperature=temperature):
  60. for response, history in model.stream_chat(tokenizer ,input ,history,past_key_values=past_key_values,
  61. return_past_key_values=False,
  62. max_length=max_length, top_p=top_p,
  63. temperature=temperature):
  64. chatbot[-1] = (parse_text(input), parse_text(response))
  65. yield chatbot, history, past_key_values
  66. def reset_user_input():
  67. return gr.update(value='')
  68. def reset_state():
  69. return [], [], None
  70. with gr.Blocks() as demo:
  71. gr.HTML("""<h1 align="center">ChatGLM2-6B</h1>""")
  72. chatbot = gr.Chatbot()
  73. with gr.Row():
  74. with gr.Column(scale=4):
  75. with gr.Column(scale=12):
  76. #user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(container=False)
  77. user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10)
  78. with gr.Column(min_width=32, scale=1):
  79. submitBtn = gr.Button("Submit", variant="primary")
  80. with gr.Column(scale=1):
  81. emptyBtn = gr.Button("Clear History")
  82. max_length = gr.Slider(0, 32768, value=8192, step=1.0, label="Maximum length", interactive=True)
  83. top_p = gr.Slider(0, 1, value=0.8, step=0.01, label="Top P", interactive=True)
  84. temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True)
  85. history = gr.State([])
  86. past_key_values = gr.State(None)
  87. submitBtn.click(predict, [user_input, chatbot, max_length, top_p, temperature, history, past_key_values],
  88. [chatbot, history, past_key_values], show_progress=True)
  89. submitBtn.click(reset_user_input, [], [user_input])
  90. emptyBtn.click(reset_state, outputs=[chatbot, history, past_key_values], show_progress=True)
  91. #demo.queue().launch(share=False, inbrowser=True)
  92. demo.queue().launch(share=False, inbrowser=True,server_name = '0.0.0.0')

web_demo2.py

  1. from transformers import AutoModel, AutoTokenizer
  2. import streamlit as st
  3. st.set_page_config(
  4. page_title="ChatGLM2-6b",
  5. page_icon=":robot:",
  6. layout='wide'
  7. )
  8. @st.cache_resource
  9. def get_model():
  10. #tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True)
  11. #model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).float()
  12. tokenizer = AutoTokenizer.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True)
  13. model = AutoModel.from_pretrained("D:/workspace/opensource/openai/ChatGLM2-6B/model/chatglm2-6b-int4", trust_remote_code=True).float()
  14. # from utils import load_model_on_gpus
  15. # model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2)
  16. model = model.eval()
  17. return tokenizer, model
  18. tokenizer, model = get_model()
  19. st.title("ChatGLM2-6B")
  20. max_length = st.sidebar.slider(
  21. 'max_length', 0, 32768, 8192, step=1
  22. )
  23. top_p = st.sidebar.slider(
  24. 'top_p', 0.0, 1.0, 0.8, step=0.01
  25. )
  26. temperature = st.sidebar.slider(
  27. 'temperature', 0.0, 1.0, 0.8, step=0.01
  28. )
  29. if 'history' not in st.session_state:
  30. st.session_state.history = []
  31. if 'past_key_values' not in st.session_state:
  32. st.session_state.past_key_values = None
  33. for i, (query, response) in enumerate(st.session_state.history):
  34. #for i, (query, response) in enumerate(st.session_state):
  35. with st.chat_message(name="user", avatar="user"):
  36. st.markdown(query)
  37. with st.chat_message(name="assistant", avatar="assistant"):
  38. st.markdown(response)
  39. with st.chat_message(name="user", avatar="user"):
  40. input_placeholder = st.empty()
  41. with st.chat_message(name="assistant", avatar="assistant"):
  42. message_placeholder = st.empty()
  43. prompt_text = st.text_area(label="user input",
  44. height=100,
  45. placeholder="please")
  46. button = st.button("send", key="predict")
  47. if button:
  48. input_placeholder.markdown(prompt_text)
  49. history, past_key_values = st.session_state.history, st.session_state.past_key_values
  50. for response, history, past_key_values in model.stream_chat(tokenizer, prompt_text, history,
  51. past_key_values=past_key_values,
  52. max_length=max_length, top_p=top_p,
  53. temperature=temperature,
  54. return_past_key_values=True):
  55. message_placeholder.markdown(response)
  56. st.session_state.history = history
  57. st.session_state.past_key_values = past_key_values

五、运行代码

  5.1、激活环境

conda activate ChatGLM2B
5.2、python openai_api.py
  1. (ChatGLM2B) D:\workspace\opensource\openai\ChatGLM2-6B>python openai_api.py
  2. Traceback (most recent call last):
  3. File "openai_api.py", line 16, in <module>
  4. from sse_starlette.sse import ServerSentEvent, EventSourceResponse
  5. ModuleNotFoundError: No module named 'sse_starlette'
pip install sse_starlette  -i https://pypi.douban.com/simple/
在windows上装不了readline
pip install readline  -i https://pypi.douban.com/simple/

  1. (ChatGLM2B) D:\workspace\opensource\openai\ChatGLM2-6B>pip install readline -i https://pypi.douban.com/simple/
  2. Looking in indexes: https://pypi.douban.com/simple/
  3. Collecting readline
  4. Downloading https://mirrors.cloud.tencent.com/pypi/packages/f4/01/2cf081af8d880b44939a5f1b446551a7f8d59eae414277fd0c303757ff1b/readline-6.2.4.1.tar.gz (2.3 MB)
  5. ---------------------------------------- 2.3/2.3 MB 3.1 MB/s eta 0:00:00
  6. Preparing metadata (setup.py) ... error
  7. error: subprocess-exited-with-error
  8. × python setup.py egg_info did not run successfully.
  9. exit code: 1
  10. ╰─> [1 lines of output]
  11. error: this module is not meant to work on Windows
  12. [end of output]
  13. note: This error originates from a subprocess, and is likely not a problem with pip.
  14. error: metadata-generation-failed
  15. × Encountered error while generating package metadata.
  16. ╰─> See above for output.
  17. note: This is an issue with the package mentioned above, not pip.
  18. hint: See above for details.
5.3、python web_demo.py
  1. (ChatGLM2B) D:\workspace\opensource\openai\ChatGLM2-6B>python web_demo.py
  2. Failed to load cpm_kernels:No module named 'cpm_kernels'
  3. C:\Users\xgr\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels_parallel.c:1:0: warning: -fPIC ignored for target (all code is position independent)
  4. #include <omp.h>
  5. ^
  6. gcc: error: libgomp.spec: No such file or directory
  7. Compile parallel cpu kernel gcc -O3 -fPIC -pthread -fopenmp -std=c99 C:\Users\xgr\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels_parallel.c -shared -o C:\Users\xgr\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels_parallel.so failed.
  8. C:\Users\xgr\.cache\huggingface\modules\transformers_modules\chatglm2-6b-int4\quantization_kernels.c:1:0: warning: -fPIC ignored for target (all code is position independent)
  9. void compress_int4_weight(void *weight, void *out, int n, int m)
  10. ^
  11. Running on local URL: http://0.0.0.0:7860

5.4、streamlit run web_demo2.py

  1. (ChatGLM2B) D:\workspace\opensource\openai\ChatGLM2-6B>streamlit run web_demo2.py
  2. You can now view your Streamlit app in your browser.
  3. Local URL: http://localhost:8501
  4. Network URL: http://192.168.1.103:8501

这样LLM大模型就跑起来了。

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/寸_铁/article/detail/1001866
推荐阅读
相关标签
  

闽ICP备14008679号