赞
踩
seed: 0 output_dir: './output' # path to save checkpoint/strategy load_checkpoint: '/mnt/dataset/mindformers/research/pretrain/ckpt' src_strategy_path_or_dir: '' auto_trans_ckpt: True # If true, auto transform load_checkpoint to load in distributed model only_save_strategy: False resume_training: False use_parallel: True run_mode: 'finetune' # trainer config trainer: type: CausalLanguageModelingTrainer model_name: 'qwen2_7b' # dataset train_dataset: &train_dataset data_loader: type: MindDataset dataset_dir: "/mnt/dataset/alpaca_ds/mindrecord/alpaca-messages.mindrecord" shuffle: True input_columns: ["input_ids", "target_ids", "attention_mask"] num_parallel_workers: 8 python_multiprocessing: False drop_remainder: True batch_size: 2 repeat: 1 numa_enable: False prefetch_size: 1 train_dataset_task: type: CausalLanguageModelDataset dataset_config: *train_dataset # runner config runner_config: epochs: 5 batch_size: 2 sink_mode: True sink_size: 2 runner_wrapper: type: MFTrainOneStepCell scale_sense: type: DynamicLossScaleUpdateCell loss_scale_value: 65536 scale_factor: 2 scale_window: 1000 use_clip_grad: True # optimizer optimizer: type: FP32StateAdamWeightDecay beta1: 0.9 beta2: 0.95 eps: 1.e-6 weight_decay: 0.1 # lr sechdule lr_schedule: type: CosineWithWarmUpLR learning_rate: 8.e-4 lr_end: 3.e-5 warmup_ratio: 0.01 total_steps: -1 # -1 means it will load the total steps of the dataset # callbacks callbacks: - type: MFLossMonitor - type: CheckpointMointor prefix: "qwen2" save_checkpoint_steps: 10000 keep_checkpoint_max: 3 integrated_save: False async_save: False - type: ObsMonitor # default parallel of device num = 8 for Atlas 800T A2 parallel_config: data_parallel: 2 model_parallel: 4 pipeline_stage: 1 micro_batch_num: 4 vocab_emb_dp: True gradient_aggregation_group: 4 # when model parallel is greater than 1, we can set micro_batch_interleave_num=2, that may accelerate the train process. micro_batch_interleave_num: 1 # recompute config recompute_config: recompute: False select_recompute: False parallel_optimizer_comm_recompute: False mp_comm_recompute: False recompute_slice_activation: False model: model_config: type: LlamaConfig batch_size: 1 seq_length: 2048 hidden_size: 4096 num_layers: 32 num_heads: 32 vocab_size: 151936 intermediate_size: 11008 qkv_has_bias: True rms_norm_eps: 1.0e-6 theta: 1000000.0 emb_dropout_prob: 0.0 eos_token_id: 151643 pad_token_id: 151643 bos_token_id: 151643 compute_dtype: "float16" layernorm_compute_type: "float16" softmax_compute_type: "float16" rotary_dtype: "float16" param_init_type: "float16" use_past: True use_flash_attention: False use_paged_attention: False # block_size: 16 # num_blocks: 512 use_past_shard: False offset: 0 checkpoint_name_or_path: "" repetition_penalty: 1 max_decode_length: 512 top_k: 0 top_p: 0.8 do_sample: False pet_config: pet_type: lora # configuration of lora lora_rank: 64 lora_alpha: 16 lora_dropout: 0.05 target_modules: '.*wq|.*wk|.*wv|.*wo|.*w1|.*w2|.*w3' freeze_exclude: ["*wte*", "*lm_head*"] # configuration items copied from Qwen rotary_pct: 1.0 rotary_emb_base: 10000 kv_channels: 128 arch: type: LlamaForCausalLM processor: return_tensors: ms tokenizer: model_max_length: 32768 vocab_file: "/mnt/dataset/mindformers/research/pretrain/vocab.json" merges_file: "/mnt/dataset/mindformers/research/pretrain/merges.txt" unk_token: "<|endoftext|>" eos_token: "<|endoftext|>" pad_token: "<|endoftext|>" type: Qwen2Tokenizer type: Qwen2Processor # mindspore context init config context: mode: 0 #0--Graph Mode; 1--Pynative Mode device_target: "Ascend" enable_graph_kernel: False graph_kernel_flags: "--disable_expand_ops=Softmax,Dropout --enable_parallel_fusion=true --reduce_fuse_depth=8 --enable_auto_tensor_inplace=true" ascend_config: precision_mode: "must_keep_origin_dtype" max_call_depth: 10000 max_device_memory: "31GB" save_graphs: False save_graphs_path: "./graph" device_id: 0 # parallel context config parallel: parallel_mode: 1 # 0-data parallel, 1-semi-auto parallel, 2-auto parallel, 3-hybrid parallel gradients_mean: False enable_alltoall: False full_batch: True search_mode: "sharding_propagation" enable_parallel_optimizer: True strategy_ckpt_config: save_file: "./ckpt_strategy.ckpt" only_trainable_params: False parallel_optimizer_config: gradient_accumulation_shard: False parallel_optimizer_threshold: 64
、
910A使用MindFormers1.0 跑qwen1_5 7b的lora微调报错,Cast failed, original value: 0.0883883, type: FP32Imm,如下:
File "/home/ma-user/anaconda3/envs/MindSpore/lib/python3.9/site-packages/mindspore/common/api.py", line 1547, in compile result = self._graph_executor.compile(obj, args, kwargs, phase, self._use_vm_mode()) RuntimeError: Cast failed, original value: 0.0883883, type: FP32Imm ---------------------------------------------------- - Framework Unexpected Exception Raised: ---------------------------------------------------- This exception is caused by framework's unexpected error. Please create an issue at https://gitee.com/mindspore/mindspore/issues to get help. ---------------------------------------------------- - C++ Call Stack: (For framework developers) ---------------------------------------------------- mindspore/core/ir/anf.h:910 GetValue [WARNING] MD(67780,ffffb77d0010,python):2024-05-11-03:04:21.629.364 [mindspore/ccsrc/minddata/dataset/engine/datasetops/data_queue_op.cc:163] ~DataQueueOp] preprocess_batch: 100; batch_queue: 1, 1, 1, 1, 1, 1, 1, 1, 1, 1;
如下,配置文件修改项:
use_flash_attention: False
use_paged_attention: False
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。