赞
踩
1、首先在启动文件train.py中将resume设置为True,其他的设置随便设。
- from ultralytics import YOLO
-
- model = YOLO('runs/detect/train/weights/last.pt')
- results = model.train(save=True, resume=True)
2、修改ultralytics/engine/trainer.py文件
在ultralytics/engine/trainer.py中找到def check_resume(self),将resume = self.args.resume替换为resume = 'runs/detect/train/weights/last.pt';这个路径是你想要继续训练的权重文件。
-
- def check_resume(self):
- ###### 修改处 ###############
- # resume = self.args.resume
- resume = 'runs/detect/train/weights/last.pt';
- ######################################
再找到def resume_training(self, ckpt):在第一行添加
ckpt =torch.load('runs/detect/train/weights/last.pt')
将start_epoch = ckpt['epoch'] + 1修改为上次训练的epoch数就可以,比如说上次训练了100次这次想继续训练50次就改为100
- def resume_training(self, ckpt):
- """Resume YOLO training from given epoch and best fitness."""
- ###### 修改处 ###############
- ckpt = torch.load('runs/detect/train/weights/last.pt')
- ######################################
- if ckpt is None:
- return
- best_fitness = 0.0
- ###### 修改处 ###############
- #start_epoch = ckpt['epoch'] + 1
- start_epoch = 100
- ######################################
找到BaseTrainer下的
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
将self.epochs = self.args.epochs修改为这次要训练的epochs,比如说上次训练了100次这次想继续训练50次就改为150
- def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
- """
- Initializes the BaseTrainer class.
- Args:
- cfg (str, optional): Path to a configuration file. Defaults to DEFAULT_CFG.
- overrides (dict, optional): Configuration overrides. Defaults to None.
- """
- self.args = get_cfg(cfg, overrides)
- self.check_resume(overrides)
- self.device = select_device(self.args.device, self.args.batch)
- self.validator = None
- self.model = None
- self.metrics = None
- self.plots = {}
- init_seeds(self.args.seed + 1 + RANK, deterministic=self.args.deterministic)
-
- # Dirs
- self.save_dir = get_save_dir(self.args)
- self.wdir = self.save_dir / 'weights' # weights dir
- if RANK in (-1, 0):
- self.wdir.mkdir(parents=True, exist_ok=True) # make dir
- self.args.save_dir = str(self.save_dir)
- yaml_save(self.save_dir / 'args.yaml', vars(self.args)) # save run args
- self.last, self.best = self.wdir / 'last.pt', self.wdir / 'best.pt' # checkpoint paths
- self.save_period = self.args.save_period
-
- self.batch_size = self.args.batch
- ###### 修改处 ###############
- #self.epochs = self.args.epochs
- self.epochs = 150
- ######################################
- self.start_epoch = 0

3、修改ultralytics/engine/model.py文件
在model.py文件中找到def train(self, trainer=None, **kwargs):进行如下修改
- def train(self, trainer=None, **kwargs):
- """
- Trains the model on a given dataset.
- Args:
- trainer (BaseTrainer, optional): Customized trainer.
- **kwargs (Any): Any number of arguments representing the training configuration.
- """
- self._check_is_pytorch_model()
- if self.session: # Ultralytics HUB session
- if any(kwargs):
- LOGGER.warning('WARNING ⚠️ using HUB training arguments, ignoring local training arguments.')
- kwargs = self.session.train_args
- check_pip_update_available()
-
- overrides = yaml_load(check_yaml(kwargs['cfg'])) if kwargs.get('cfg') else self.overrides
- custom = {'data': TASK2DATA[self.task]} # method defaults
- args = {**overrides, **custom, **kwargs, 'mode': 'train'} # highest priority args on the right
- if args.get('resume'):
- args['resume'] = self.ckpt_path
-
- self.trainer = (trainer or self.smart_load('trainer'))(overrides=args, _callbacks=self.callbacks)
- if not args.get('resume'): # manually set model only if not resuming
- ###### 修改处 ###############
- # self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml)
- # self.model = self.trainer.model
-
- self.trainer.model = self.model
- ######################################
- self.trainer.hub_session = self.session # attach optional HUB session
- self.trainer.train()
- # Update model and cfg after training
- if RANK in (-1, 0):
- ckpt = self.trainer.best if self.trainer.best.exists() else self.trainer.last
- self.model, _ = attempt_load_one_weight(ckpt)
- self.overrides = self.model.args
- self.metrics = getattr(self.trainer.validator, 'metrics', None) # TODO: no metrics returned by DDP
- return self.metrics

最后运行train.py即可
!训练完成后把代码改回去 !
4、如果因为patience太小提前结束的训练,想要继续训练到指定epoch数只需要再修改patience即可,将patience修改为一个较大的值,这里我设置为300。
ultralytics/engine/trainer.py:
- def _setup_train(self, world_size):
- """
- Builds dataloaders and optimizer on correct rank process.
- """
-
- # Model
- self.run_callbacks('on_pretrain_routine_start')
- ckpt = self.setup_model()
- self.model = self.model.to(self.device)
- self.set_model_attributes()
-
- # Freeze layers
- freeze_list = self.args.freeze if isinstance(
- self.args.freeze, list) else range(self.args.freeze) if isinstance(self.args.freeze, int) else []
- always_freeze_names = ['.dfl'] # always freeze these layers
- freeze_layer_names = [f'model.{x}.' for x in freeze_list] + always_freeze_names
- for k, v in self.model.named_parameters():
- # v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results)
- if any(x in k for x in freeze_layer_names):
- LOGGER.info(f"Freezing layer '{k}'")
- v.requires_grad = False
- elif not v.requires_grad:
- LOGGER.info(f"WARNING ⚠️ setting 'requires_grad=True' for frozen layer '{k}'. "
- 'See ultralytics.engine.trainer for customization of frozen layers.')
- v.requires_grad = True
-
- # Check AMP
- self.amp = torch.tensor(self.args.amp).to(self.device) # True or False
- if self.amp and RANK in (-1, 0): # Single-GPU and DDP
- callbacks_backup = callbacks.default_callbacks.copy() # backup callbacks as check_amp() resets them
- self.amp = torch.tensor(check_amp(self.model), device=self.device)
- callbacks.default_callbacks = callbacks_backup # restore callbacks
- if RANK > -1 and world_size > 1: # DDP
- dist.broadcast(self.amp, src=0) # broadcast the tensor from rank 0 to all other ranks (returns None)
- self.amp = bool(self.amp) # as boolean
- self.scaler = amp.GradScaler(enabled=self.amp)
- if world_size > 1:
- self.model = DDP(self.model, device_ids=[RANK])
-
- # Check imgsz
- gs = max(int(self.model.stride.max() if hasattr(self.model, 'stride') else 32), 32) # grid size (max stride)
- self.args.imgsz = check_imgsz(self.args.imgsz, stride=gs, floor=gs, max_dim=1)
-
- # Batch size
- if self.batch_size == -1 and RANK == -1: # single-GPU only, estimate best batch size
- self.args.batch = self.batch_size = check_train_batch_size(self.model, self.args.imgsz, self.amp)
-
- # Dataloaders
- batch_size = self.batch_size // max(world_size, 1)
- self.train_loader = self.get_dataloader(self.trainset, batch_size=batch_size, rank=RANK, mode='train')
- if RANK in (-1, 0):
- self.test_loader = self.get_dataloader(self.testset, batch_size=batch_size * 2, rank=-1, mode='val')
- self.validator = self.get_validator()
- metric_keys = self.validator.metrics.keys + self.label_loss_items(prefix='val')
- self.metrics = dict(zip(metric_keys, [0] * len(metric_keys)))
- self.ema = ModelEMA(self.model)
- if self.args.plots:
- self.plot_training_labels()
-
- # Optimizer
- self.accumulate = max(round(self.args.nbs / self.batch_size), 1) # accumulate loss before optimizing
- weight_decay = self.args.weight_decay * self.batch_size * self.accumulate / self.args.nbs # scale weight_decay
- iterations = math.ceil(len(self.train_loader.dataset) / max(self.batch_size, self.args.nbs)) * self.epochs
- self.optimizer = self.build_optimizer(model=self.model,
- name=self.args.optimizer,
- lr=self.args.lr0,
- momentum=self.args.momentum,
- decay=weight_decay,
- iterations=iterations)
- # Scheduler
- if self.args.cos_lr:
- self.lf = one_cycle(1, self.args.lrf, self.epochs) # cosine 1->hyp['lrf']
- else:
- self.lf = lambda x: (1 - x / self.epochs) * (1.0 - self.args.lrf) + self.args.lrf # linear
- self.scheduler = optim.lr_scheduler.LambdaLR(self.optimizer, lr_lambda=self.lf)
- ###### 修改处 ###############
- #self.stopper, self.stop = EarlyStopping(patience=self.args.patience), False
- self.stopper, self.stop = EarlyStopping(patience=300), False
- ######################################
- self.resume_training(ckpt)
- self.scheduler.last_epoch = self.start_epoch - 1 # do not move
- self.run_callbacks('on_pretrain_routine_end')

最后运行train.py即可
!训练完成后把代码改回去 !
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。