赞
踩
据ChatGLM-6B b站的说法:【官方教程】ChatGLM-6B 微调:P-Tuning,LoRA,Full parameter大概意思就是练了后面的前面就忘了。
```bash export PRE_SEQ_LEN=128 export LR=2e-2 export NUM_GPUS=1 # export WANDB_MODE=dryrun export WANDB_DISABLED=true torchrun --standalone --nnodes=1 --nproc-per-node=$NUM_GPUS ptuning/main.py \ --do_train \ --train_file data/AdvertiseGen/train.json \ --validation_file data/AdvertiseGen/dev.json \ --preprocessing_num_workers 4 \ --prompt_column content \ --response_column summary \ --overwrite_cache \ --model_name_or_path /home/geekplusa/ai/models/bigmodels/prepare_models/chatglm/chatglm2-6b-32k-int4 \ --output_dir /home/geekplusa/ai/models/bigmodels/train_models/chatglm/chatglm2/adgen-chatglm2-6b-pt-$PRE_SEQ_LEN-$LR \ --overwrite_output_dir \ --max_source_length 64 \ --max_target_length 256 \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ --gradient_accumulation_steps 16 \ --predict_with_generate \ --max_steps 2000 \ --logging_steps 10 \ --save_steps 500 \ --learning_rate $LR \ --pre_seq_len $PRE_SEQ_LEN \ --quantization_bit 4 ```
```bash
export PRE_SEQ_LEN=128
export LR=2e-2
export NUM_GPUS=4
# export WANDB_MODE=dryrun
export WANDB_DISABLED=true
nohup torchrun --standalone --nnodes=1 --nproc-per-node=$NUM_GPUS ptuning/main.py --do_train --train_file data/AdvertiseGen/train_min.json --validation_file data/AdvertiseGen/dev_min.json --preprocessing_num_workers 4 --prompt_column content --response_column summary --overwrite_cache --model_name_or_path models/chatglm2-6b-int4 --output_dir models/adgen-chatglm2-6b-int4-pt-128-2e-e --overwrite_output_dir --max_source_length 64 --max_target_length 256 --per_device_train_batch_size 1 --per_device_eval_batch_size 1 --gradient_accumulation_steps 16 --predict_with_generate --max_steps 100 --logging_steps 10 --save_steps 50 --learning_rate $LR --pre_seq_len $PRE_SEQ_LEN --quantization_bit 4 > log 2>&1 &
```
```bash export WANDB_DISABLED=true export PRE_SEQ_LEN=128 export LR=2e-2 CUDA_VISIBLE_DEVICES=0 python ptuning/main.py \ --do_train \ --train_file data/AdvertiseGen/train_min.json \ --validation_file data/AdvertiseGen/val_min.json \ --prompt_column content \ --response_column summary \ --overwrite_cache \ --model_name_or_path /home/geekplusa/ai/models/bigmodels/prepare_models/chatglm/chatglm2-6b-int4 \ --output_dir /home/geekplusa/ai/models/bigmodels/train_models/chatglm/chatglm2/adgen-chatglm2-6b-pt-医院1-$PRE_SEQ_LEN-$LR \ --overwrite_output_dir \ --max_source_length 64 \ --max_target_length 1000 \ --per_device_train_batch_size 1 \ --per_device_eval_batch_size 1 \ --gradient_accumulation_steps 16 \ --predict_with_generate \ --max_steps 1 \ --logging_steps 10 \ --save_steps 1 \ --learning_rate 2e-2 \ --pre_seq_len 128 \ --quantization_bit 4 ```
这里包含基础模型单卡部署、基础模型多卡部署、ptuning模型单卡部署、ptuning模型多卡部署
参见下面的脚本web_demo2.py
from transformers import AutoModel, AutoTokenizer from transformers import AutoConfig import streamlit as st import os st.set_page_config( page_title="ChatGLM2-6b 演示", page_icon=":robot:", layout='wide' ) @st.cache_resource def get_model_onegpu(): tokenizer = AutoTokenizer.from_pretrained("models/chatglm2-6b-int4", trust_remote_code=True) model = AutoModel.from_pretrained("models/chatglm2-6b-int4", trust_remote_code=True).cuda() return tokenizer, model @st.cache_resource def get_model_mitugpu(): tokenizer = AutoTokenizer.from_pretrained("models/chatglm2-6b-int4", trust_remote_code=True) from utils import load_model_on_gpus model = load_model_on_gpus("models/chatglm2-6b-int4", num_gpus=4) return tokenizer, model @st.cache_resource def get_model_ptuning_onegpu(): tokenizer = AutoTokenizer.from_pretrained("models/chatglm2-6b-int4", trust_remote_code=True) import torch CHECKPOINT_PATH = "models/gukai/checkpoint-500/" config = AutoConfig.from_pretrained("models/chatglm2-6b-int4", trust_remote_code=True, pre_seq_len=128) model = AutoModel.from_pretrained("models/chatglm2-6b-int4", config=config, trust_remote_code=True) prefix_state_dict = torch.load(os.path.join(CHECKPOINT_PATH, "pytorch_model.bin")) new_prefix_state_dict = {} for k, v in prefix_state_dict.items(): if k.startswith("transformer.prefix_encoder."): new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict) model = model.quantize(4) model = model.cuda() return tokenizer, model @st.cache_resource def get_model_ptuning_mutigpu(): tokenizer = AutoTokenizer.from_pretrained("models/chatglm2-6b-int4", trust_remote_code=True) import torch from utils import load_model_on_gpus CHECKPOINT_PATH = "models/gukai/checkpoint-500/" prefix_state_dict = torch.load(os.path.join(CHECKPOINT_PATH, "pytorch_model.bin")) #prefix_state_dict = torch.load(os.path.join(CHECKPOINT_PATH, "pytorch_model.bin"), map_location=lambda storage, loc: storage.cuda(1)) new_prefix_state_dict = {} for k, v in prefix_state_dict.items(): if k.startswith("transformer.prefix_encoder."): new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v model = load_model_on_gpus("models/chatglm2-6b-int4", num_gpus=4, pre_seq_len=128) model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict) model.transformer.prefix_encoder.float() model = model.quantize(4) #model = model.cuda() return tokenizer, model @st.cache_resource def get_model(): tokenizer = AutoTokenizer.from_pretrained("models/gukai/checkpoint-500", trust_remote_code=True) model = AutoModel.from_pretrained("models/gukai/checkpoint-500", trust_remote_code=True).cuda() # 多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量 # from utils import load_model_on_gpus # model = load_model_on_gpus("THUDM/chatglm2-6b", num_gpus=2) model = model.eval() return tokenizer, model #tokenizer, model = get_model() tokenizer, model = get_model_ptuning_mutigpu() st.title("ChatGLM2-6B") max_length = st.sidebar.slider( 'max_length', 0, 32768, 8192, step=1 ) top_p = st.sidebar.slider( 'top_p', 0.0, 1.0, 0.8, step=0.01 ) temperature = st.sidebar.slider( 'temperature', 0.0, 1.0, 0.8, step=0.01 ) if 'history' not in st.session_state: st.session_state.history = [] if 'past_key_values' not in st.session_state: st.session_state.past_key_values = None for i, (query, response) in enumerate(st.session_state.history): with st.chat_message(name="user", avatar="user"): st.markdown(query) with st.chat_message(name="assistant", avatar="assistant"): st.markdown(response) with st.chat_message(name="user", avatar="user"): input_placeholder = st.empty() with st.chat_message(name="assistant", avatar="assistant"): message_placeholder = st.empty() prompt_text = st.text_area(label="用户命令输入", height=100, placeholder="请在这儿输入您的命令") button = st.button("发送", key="predict") if button: input_placeholder.markdown(prompt_text) history, past_key_values = st.session_state.history, st.session_state.past_key_values for response, history, past_key_values in model.stream_chat(tokenizer, prompt_text, history, past_key_values=past_key_values, max_length=max_length, top_p=top_p, temperature=temperature, return_past_key_values=True): message_placeholder.markdown(response) st.session_state.history = history st.session_state.past_key_values = past_key_values
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。