赞
踩
在Google的Colab上面采用unsloth,trl等库,训练数据集来自Google的云端硬盘,微调llama3-8b模型,进行推理验证模型的微调效果。
保存模型到Google的云端硬盘可以下载到本地供其它使用。
准备工作:将训练数据集上传到google的云端硬盘根目录下,文件名就叫做train.json
train.json里面的数据格式如下:
[
{
"instruction": "你好",
"output": "你好,我是智能助手胖胖"
},
{
"instruction": "hello",
"output": "Hello! I am 智能助手胖胖, an AI assistant developed by 丹宇码农. How can I assist you ?"
}
......
]
采用unsloth库、trl库、transformers等库。
直接上代码:
- %%capture
- # Installs Unsloth, Xformers (Flash Attention) and all other packages!
- !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
- !pip install --no-deps "xformers<0.0.26" trl peft accelerate bitsandbytes
-
- from unsloth import FastLanguageModel
- import torch
- max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
- dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
- load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
-
- # 4bit pre quantized models we support for 4x faster downloading + no OOMs.
- fourbit_models = [
- "unsloth/mistral-7b-bnb-4bit",
- "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
- "unsloth/llama-2-7b-bnb-4bit",
- "unsloth/gemma-7b-bnb-4bit",
- "unsloth/gemma-7b-it-bnb-4bit", # Instruct version of Gemma 7b
- "unsloth/gemma-2b-bnb-4bit",
- "unsloth/gemma-2b-it-bnb-4bit", # Instruct version of Gemma 2b
- "unsloth/llama-3-8b-bnb-4bit", # [NEW] 15 Trillion token Llama-3
- ] # More models at https://huggingface.co/unsloth
-
- model, tokenizer = FastLanguageModel.from_pretrained(
- model_name = "unsloth/llama-3-8b-bnb-4bit",
- max_seq_length = max_seq_length,
- dtype = dtype,
- load_in_4bit = load_in_4bit,
- # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
- )
-
- model = FastLanguageModel.get_peft_model(
- model,
- r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
- target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
- "gate_proj", "up_proj", "down_proj",],
- lora_alpha = 16,
- lora_dropout = 0, # Supports any, but = 0 is optimized
- bias = "none", # Supports any, but = "none" is optimized
- # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
- use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
- random_state = 3407,
- use_rslora = False, # We support rank stabilized LoRA
- loftq_config = None, # And LoftQ
- )
-
-
- alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
- ### Instruction:
- {}
- ### Input:
- {}
- ### Response:
- {}"""
-
- EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
- def formatting_prompts_func(examples):
- instructions = examples["instruction"]
- outputs = examples["output"]
- texts = []
- for instruction, output in zip(instructions, outputs):
- input = ""
- # Must add EOS_TOKEN, otherwise your generation will go on forever!
- text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
- texts.append(text)
- return { "text" : texts, }
- pass
-
- from datasets import load_dataset
- #dataset = load_dataset("yahma/alpaca-cleaned", split = "train")
- #dataset = dataset.map(formatting_prompts_func, batched = True,)
- from google.colab import drive
- # 挂载云端硬盘,加载成功后,在左边的文件树中将会多一个 /content/drive/MyDrive/ 目录
- drive.mount('/content/drive')
-
-
- # 加载本地数据集:
- # 有instruction和output,input为空字符串
- from datasets import load_dataset
-
- data_home = r"/content/drive/MyDrive/"
- data_dict = {
- "train": os.path.join(data_home, "train.json"),
- #"validation": os.path.join(data_home, "dev.json"),
- }
- dataset = load_dataset("json", data_files=data_dict, split = "train")
- print(dataset[0])
- dataset = dataset.map(formatting_prompts_func, batched = True,)
-
-
- from trl import SFTTrainer
- from transformers import TrainingArguments
-
- trainer = SFTTrainer(
- model = model,
- tokenizer = tokenizer,
- train_dataset = dataset,
- dataset_text_field = "text",
- max_seq_length = max_seq_length,
- dataset_num_proc = 2,
- packing = False, # Can make training 5x faster for short sequences.
- args = TrainingArguments(
- per_device_train_batch_size = 2,
- gradient_accumulation_steps = 4,
- warmup_steps = 5,
- max_steps = 60,
- learning_rate = 2e-4,
- fp16 = not torch.cuda.is_bf16_supported(),
- bf16 = torch.cuda.is_bf16_supported(),
- logging_steps = 1,
- optim = "adamw_8bit",
- weight_decay = 0.01,
- lr_scheduler_type = "linear",
- seed = 3407,
- output_dir = "outputs",
- ),
- )
-
- # 开始微调训练
- trainer_stats = trainer.train()
-
- #推理
- # alpaca_prompt = Copied from above
- FastLanguageModel.for_inference(model) # Enable native 2x faster inference
- inputs = tokenizer(
- [
- alpaca_prompt.format(
- "你是谁?", # instruction
- "", # input
- "", # output - leave this blank for generation!
- )
- ], return_tensors = "pt").to("cuda")
-
- outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
- tokenizer.batch_decode(outputs)
-
- #此处输出的答案,能明显看到就是自己训练的数据,而不是原来模型的输出。说明微调起作用了
-
-
- # 保存模型,改成挂接的云硬盘目录也可以保存到google的个人云存储空间,然后打开个人云存储空间下载到本地
- model.save_pretrained("lora_model") # Local saving
- tokenizer.save_pretrained("lora_model")
-
- # Merge to 16bit
- if True: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",)
其实可以将.ipynb文件上传到个人云存储空间,双击这个文件就会打开colab,然后依次执行代码即可,随时可以增加、删除、修改,特别方便,还能免费使用GPU、CPU等资源,真的是广大AI爱好者的不错选择。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。