当前位置:   article > 正文

llama3.1-8B-微调_llama 3.1 8b如何微调

llama 3.1 8b如何微调

https://github.com/unslothai/unsloth

使用unslothai微调

1)准备环境

  1. %%capture
  2. # Installs Unsloth, Xformers (Flash Attention) and all other packages!
  3. !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
  4. !pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

2)加载模型
记得先

!pip install pyarrow==8.0.0

不然会报错

  1. from unsloth import FastLanguageModel
  2. import torch
  3. # 配置参数
  4. max_seq_length = 2048 # 可以选择任意长度!我们内部自动支持RoPE Scaling
  5. dtype = None # None表示自动检测。对于Tesla T4、V100选择Float16,Ampere+选择Bfloat16
  6. load_in_4bit = True # 使用4bit量化以减少内存使用。可以设为False
  7. # 支持的4bit预量化模型,可实现4倍下载速度提升且无OOM(内存溢出)问题
  8. fourbit_models = [
  9. "unsloth/Meta-Llama-3.1-8B-bnb-4bit", # Llama-3.1 15万亿tokens模型,速度提升2倍!
  10. "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
  11. "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
  12. "unsloth/Meta-Llama-3.1-405B-bnb-4bit", # 我们还上传了405b的4bit版本!
  13. "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", # 新Mistral 12b,速度提升2倍!
  14. "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
  15. "unsloth/mistral-7b-v0.3-bnb-4bit", # Mistral v3,速度提升2倍!
  16. "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
  17. "unsloth/Phi-3-mini-4k-instruct", # Phi-3,速度提升2倍!
  18. "unsloth/Phi-3-medium-4k-instruct",
  19. "unsloth/gemma-2-9b-bnb-4bit",
  20. "unsloth/gemma-2-27b-bnb-4bit", # Gemma,速度提升2倍!
  21. ] # 更多模型见 https://huggingface.co/unsloth
  22. # 加载模型和tokenizer
  23. model, tokenizer = FastLanguageModel.from_pretrained(
  24. model_name="unsloth/Meta-Llama-3.1-8B",
  25. max_seq_length=max_seq_length,
  26. dtype=dtype,
  27. load_in_4bit=load_in_4bit,
  28. # token="hf_...", # 如果使用需要访问权限的模型(如meta-llama/Llama-2-7b-hf),请添加token
  29. )

3)应用LoRA技术来减少模型训练和推理时的内存使用,同时保持模型性能。

  1. model = FastLanguageModel.get_peft_model(
  2. model,
  3. r=16, # 选择任意大于0的数值,建议使用8163264128
  4. target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
  5. "gate_proj", "up_proj", "down_proj"],
  6. lora_alpha=16,
  7. lora_dropout=0, # 支持任何值,但设置为0是优化的
  8. bias="none", # 支持任何值,但设置为"none"是优化的
  9. use_gradient_checkpointing="unsloth", # 设置为True"unsloth"以支持超长上下文
  10. random_state=3407,
  11. use_rslora=False, # 支持使用rank stabilized LoRA
  12. loftq_config=None, # 以及LoftQ配置
  13. )

4)加载一个数据集,并规范格式

  1. alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
  2. ### Instruction:
  3. {}
  4. ### Input:
  5. {}
  6. ### Response:
  7. {}"""
  8. EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
  9. def formatting_prompts_func(examples):
  10. instructions = examples["instruction"]
  11. inputs = examples["input"]
  12. outputs = examples["output"]
  13. texts = []
  14. for instruction, input, output in zip(instructions, inputs, outputs):
  15. # Must add EOS_TOKEN, otherwise your generation will go on forever!
  16. text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
  17. texts.append(text)
  18. return { "text" : texts, }
  19. pass
  20. from datasets import load_dataset
  21. dataset = load_dataset("leo009/mytest", split = "train")
  22. dataset = dataset.map(formatting_prompts_func, batched = True,)

5)初始化了一个 SFTTrainer 实例,用于训练一个语言模型。SFTTrainer 是一种在 trl 库中用于特定任务(例如微调)的训练类。代码配置了训练参数、数据集和模型,并为训练过程定义了一些关键设置。

  1. from trl import SFTTrainer
  2. from transformers import TrainingArguments
  3. from unsloth import is_bfloat16_supported
  4. trainer = SFTTrainer(
  5. model = model,
  6. tokenizer = tokenizer,
  7. train_dataset = dataset,
  8. dataset_text_field = "text",
  9. max_seq_length = max_seq_length,
  10. dataset_num_proc = 2,
  11. packing = False, # Can make training 5x faster for short sequences.
  12. args = TrainingArguments(
  13. per_device_train_batch_size = 2,
  14. gradient_accumulation_steps = 4,
  15. warmup_steps = 5,
  16. # num_train_epochs = 1, # Set this for 1 full training run.
  17. max_steps = 60,
  18. learning_rate = 2e-4,
  19. fp16 = not is_bfloat16_supported(),
  20. bf16 = is_bfloat16_supported(),
  21. logging_steps = 1,
  22. optim = "adamw_8bit",
  23. weight_decay = 0.01,
  24. lr_scheduler_type = "linear",
  25. seed = 3407,
  26. output_dir = "outputs",
  27. ),
  28. )

6)开始训练

trainer_stats = trainer.train()

7)使用 FastLanguageModel 进行模型推理,生成一个给定指令的响应。

  1. alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
  2. ### Instruction:
  3. {}
  4. ### Input:
  5. {}
  6. ### Response:
  7. {}"""
  8. FastLanguageModel.for_inference(model) # Enable native 2x faster inference
  9. inputs = tokenizer(
  10. [
  11. alpaca_prompt.format(
  12. "介绍AI超元域频道.", # instruction
  13. "", # input
  14. "", # output - leave this blank for generation!
  15. )
  16. ], return_tensors = "pt").to("cuda")
  17. outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
  18. tokenizer.batch_decode(outputs)

8)保存模型

model.save_pretrained("lora_model") # Local saving

tokenizer.save_pretrained("lora_model")

9)推送模型到hugging-face

  1. model.push_to_hub("fengn/llama3.1-lora", token = "") # Online saving
  2. tokenizer.push_to_hub("fengn/llama3.1-lora", token = "") # Online saving

10)加在本地模型进行推理

  1. if True:
  2. from unsloth import FastLanguageModel
  3. model, tokenizer = FastLanguageModel.from_pretrained(
  4. model_name = "lora_model", # YOUR MODEL YOU USED FOR TRAINING
  5. max_seq_length = max_seq_length,
  6. dtype = dtype,
  7. load_in_4bit = load_in_4bit,
  8. )
  9. FastLanguageModel.for_inference(model) # Enable native 2x faster inference
  10. alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
  11. ### Instruction:
  12. {}
  13. ### Input:
  14. {}
  15. ### Response:
  16. {}"""
  17. inputs = tokenizer(
  18. [
  19. alpaca_prompt.format(
  20. "AI超元域是谁?", # instruction
  21. "", # input
  22. "", # output - leave this blank for generation!
  23. )
  24. ], return_tensors = "pt").to("cuda")
  25. from transformers import TextStreamer
  26. text_streamer = TextStreamer(tokenizer)
  27. _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

11)

  • 16-bit 和 4-bit 合并保存: 通过将模型保存为 16-bit 或 4-bit 格式来优化存储和计算效率。
  • 仅 LoRA 适配器保存: 只保存 LoRA 适配器而不是整个模型,这对共享或部署经过特定任务微调的适配器非常有用。
  1. # Merge to 16bit
  2. if True: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",)
  3. if True: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "")
  4. # Merge to 4bit
  5. if True: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",)
  6. if True: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "")
  7. # Just LoRA adapters
  8. if True: model.save_pretrained_merged("model", tokenizer, save_method = "lora",)
  9. if True: model.push_to_hub_merged("hf/model", tokenizer, save_method = "lora", token = "")

12)量化

  1. # Save to 8bit Q8_0
  2. if True: model.save_pretrained_gguf("model", tokenizer,)
  3. # Remember to go to https://huggingface.co/settings/tokens for a token!
  4. # And change hf to your username!
  5. if True: model.push_to_hub_gguf("hf/model", tokenizer, token = "")
  6. # Save to 16bit GGUF
  7. if True: model.save_pretrained_gguf("model", tokenizer, quantization_method = "f16")
  8. if True: model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "f16", token = "")
  9. # Save to q4_k_m GGUF
  10. if True: model.save_pretrained_gguf("model", tokenizer, quantization_method = "q4_k_m")
  11. if True: model.push_to_hub_gguf("hf/model", tokenizer, quantization_method = "q4_k_m", token = "")
  12. # Save to multiple GGUF options - much faster if you want multiple!
  13. if True:
  14. model.push_to_hub_gguf(
  15. "hf/model", # Change hf to your username!
  16. tokenizer,
  17. quantization_method = ["q4_k_m", "q8_0", "q5_k_m",],
  18. token = "", # Get a token at https://huggingface.co/settings/tokens
  19. )

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Guff_9hys/article/detail/992216
推荐阅读
相关标签
  

闽ICP备14008679号