当前的Alpaca模型是在Self-Instruct论文中使用的技术生成的52K条指令数据,从7B LLaMA模型微调而来,并进行了一些修改。
A10 gpu显存:22G,cu117,驱动470.103.01
- !wget https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/tutorials/alpaca/stanford_alpaca.tgz
- !tar -xvf stanford_alpaca.tgz
- !cd stanford_alpaca && echo y | pip uninstall torch && echo y | pip uninstall torchvision && pip install -r requirements.txt && pip install gradio
- !git clone https://github.com/huggingface/transformers.git && \
- cd transformers && \
- git checkout 165dd6dc916a43ed9b6ce8c1ed62c3fe8c28b6ef && \
- pip install -e .
- 数据格式如下,如需使用自己的数据进行微调可以转化成如下形式:
- "instruction":用于描述模型应该执行的任务
- "input" : 任务的可选上下文或输入。例如,当指令是“总结以下文章”时,输入就是文章。
- "output" :需要模型输出的答案
- 格式如下
- [
- {
- "instruction": "Give three tips for staying healthy.",
- "input": "",
- "output": "1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule."
- }
- ]
- # 下载数据集,如有重名文件,先将文件夹中的重名文件重命名。
- !wget https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/tutorials/alpaca/alpaca_data.json
4.1 准备权重
!wget https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/tutorials/alpaca/llama-7b-hf.tar.gz && tar -xvf llama-7b-hf.tar.gz
4.2 参数调节
- {
- "architectures": ["LLaMAForCausalLM"],
- "bos_token_id": 0,
- "eos_token_id": 1,
- "hidden_act": "silu",
- "hidden_size": 4096,
- "intermediate_size": 11008,
- "initializer_range": 0.02,
- "max_sequence_length": 4,
- "model_type": "llama",
- "num_attention_heads": 32,
- "num_hidden_layers": 4,
- "pad_token_id": -1,
- "rms_norm_eps": 1e-06,
- "torch_dtype": "float16",
- "transformers_version": "4.27.0.dev0",
- "use_cache": true,
- "vocab_size": 32000
- }
4.3 训练
- import os
- os.environ["WANDB_DISABLED"] = "true"
- # 执行训练指令
- !torchrun --nproc_per_node=1 --master_port=29588 ./stanford_alpaca/train.py \
- --model_name_or_path "./llama-7b-hf" \
- --data_path ./alpaca_data.json \
- --bf16 False \
- --output_dir ./models/alpaca-2 \
- --num_train_epochs 1 \
- --per_device_train_batch_size 1 \
- --per_device_eval_batch_size 1 \
- --gradient_accumulation_steps 8 \
- --evaluation_strategy "no" \
- --save_strategy "steps" \
- --save_steps 20 \
- --save_total_limit 1 \
- --learning_rate 2e-5 \
- --model_max_length 4 \
- --weight_decay 0. \
- --warmup_ratio 0.03 \
- --lr_scheduler_type "cosine" \
- --logging_steps 1 \
- --fsdp "full_shard auto_wrap" \
- --fsdp_transformer_layer_cls_to_wrap 'LlamaDecoderLayer' \
- --tf32 False
- import transformers
- tokenizers = transformers.LlamaTokenizer.from_pretrained("./models/alpaca-2")
- model = transformers.LlamaForCausalLM.from_pretrained("./models/alpaca-2").cuda()
- model.eval()
- def gen(req):
- batch = tokenizers(req, return_tensors='pt', add_special_tokens=False)
- batch = {k: v.cuda() for k, v in batch.items()}
- full_completion = model.generate(inputs=batch["input_ids"],
- attention_mask=batch["attention_mask"],
- temperature=0.7,
- top_p=0.9,
- do_sample=True,
- num_beams=1,
- max_new_tokens=600,
- eos_token_id=tokenizers.eos_token_id,
- pad_token_id=tokenizers.pad_token_id)
- print(tokenizers.decode(full_completion[0]))
- gen("List all Canadian provinces in alphabetical order.")
!wget https://atp-modelzoo-sh.oss-cn-shanghai.aliyuncs.com/release/tutorials/alpaca/gen.py
- import gradio as gr
- import requests
- import json
- import transformers
- tokenizers = transformers.LlamaTokenizer.from_pretrained("./models/alpaca-2")
- model = transformers.LlamaForCausalLM.from_pretrained("./models/alpaca-2").cuda()
- model.eval()
- def inference(text):
- batch = tokenizers(text, return_tensors="pt", add_special_tokens=False)
- batch = {k: v.cuda() for k, v in batch.items()}
- full_completion = model.generate(inputs=batch["input_ids"],
- attention_mask=batch["attention_mask"],
- temperature=0.7,
- top_p=0.9,
- do_sample=True,
- num_beams=1,
- max_new_tokens=600,
- eos_token_id=tokenizers.eos_token_id,
- pad_token_id=tokenizers.pad_token_id)
- print(tokenizers.decode(full_completion[0]))
- return tokenizers.decode(full_completion[0])
- demo = gr.Blocks()
- with demo:
- input_prompt = gr.Textbox(label="请输入需求",
- value="帮我写一篇安全检查的新闻稿件。",
- lines=6)
- generated_txt = gr.Textbox(lines=6)
- b1 = gr.Button("发送")
- b1.click(inference, inputs=[input_prompt], outputs=generated_txt)
- demo.launch(enable_queue=True, share=True)
