赞
踩
pyskl官代码实在linux环境下运行的,在issues里作者也没有提供windows的运行修改,但是给出了pytorch分布式参考修改方法,本文记录下修改过程:
PYSKL Release v0.2:https://github.com/kennymckormick/pyskl/releases/tag/v0.2
按官方说明安装pyskl:
- git clone https://github.com/kennymckormick/pyskl.git
- cd pyskl
- # This command runs well with conda 22.9.0, if you are running an early conda version and got some errors, try to update your conda first
- conda env create -f pyskl.yaml
- conda activate pyskl
- pip install -e .
测试成功的安装包版本如下:
在config前面加‘--’
- def parse_args():
- parser = argparse.ArgumentParser(description='Train a recognizer')
- parser.add_argument('--config',default='../configs/posec3d/c3d_light_gym/joint.py', help='train config file path')
- parser.add_argument(
- '--validate',
- action='store_true',
- help='whether to evaluate the checkpoint during training')
- parser.add_argument(
- '--test-last',
- action='store_true',
- help='whether to test the checkpoint after training')
- parser.add_argument(
- '--test-best',
- action='store_true',
- help='whether to test the best checkpoint (if applicable) after training')
- parser.add_argument('--seed', type=int, default=None, help='random seed')
- parser.add_argument(
- '--deterministic',
- action='store_true',
- help='whether to set deterministic options for CUDNN backend.')
- parser.add_argument(
- '--launcher',
- choices=['pytorch', 'slurm'],
- default='pytorch',
- help='job launcher')
- # parser.add_argument('--local_rank', type=int, default=0)
- args = parser.parse_args()
- # if 'LOCAL_RANK' not in os.environ:
- # os.environ['LOCAL_RANK'] = str(args.local_rank)
-
- return args
修改注释的代码:
- # 第47行,注释
- parser.add_argument('--local_rank', type=int, default=0)
-
- # 第49、50行,注释
- if 'LOCAL_RANK' not in os.environ:
- os.environ['LOCAL_RANK'] = str(args.local_rank)
-
- # 第70-74行,注释
- if not hasattr(cfg, 'dist_params'):
- cfg.dist_params = dict(backend='nccl')
-
- init_dist(args.launcher, **cfg.dist_params)
- rank, world_size = get_dist_info()
-
- # 修改第75行
- # cfg.gpu_ids = range(world_size)
- cfg.gpu_ids =[0]
-
- #修改第134行
- # if rank == 0 and memcached:
- if memcached:
-
- #修改第153行
- # if rank == 0 and memcached:
- if memcached:
- # 注释所有dist.barrier(),148行,151行
下面是修改完成后的代码,替换pyskl中的train.py即可
- # Copyright (c) OpenMMLab. All rights reserved.
- # flake8: noqa: E722
- import argparse
- import os
- import os.path as osp
- import time
-
- import mmcv
- import torch
- import torch.distributed as dist
- from mmcv import Config
- from mmcv.runner import get_dist_info, init_dist, set_random_seed
- from mmcv.utils import get_git_hash
-
- from pyskl import __version__
- from pyskl.apis import init_random_seed, train_model
- from pyskl.datasets import build_dataset
- from pyskl.models import build_model
- from pyskl.utils import collect_env, get_root_logger, mc_off, mc_on, test_port
-
-
- def parse_args():
- parser = argparse.ArgumentParser(description='Train a recognizer')
- parser.add_argument('--config',default='../configs/posec3d/c3d_light_gym/joint.py', help='train config file path')
- parser.add_argument(
- '--validate',
- action='store_true',
- help='whether to evaluate the checkpoint during training')
- parser.add_argument(
- '--test-last',
- action='store_true',
- help='whether to test the checkpoint after training')
- parser.add_argument(
- '--test-best',
- action='store_true',
- help='whether to test the best checkpoint (if applicable) after training')
- parser.add_argument('--seed', type=int, default=None, help='random seed')
- parser.add_argument(
- '--deterministic',
- action='store_true',
- help='whether to set deterministic options for CUDNN backend.')
- parser.add_argument(
- '--launcher',
- choices=['pytorch', 'slurm'],
- default='pytorch',
- help='job launcher')
- # parser.add_argument('--local_rank', type=int, default=0)
- args = parser.parse_args()
- # if 'LOCAL_RANK' not in os.environ:
- # os.environ['LOCAL_RANK'] = str(args.local_rank)
-
- return args
-
-
- def main():
- args = parse_args()
-
- cfg = Config.fromfile(args.config)
-
- # set cudnn_benchmark
- if cfg.get('cudnn_benchmark', False):
- torch.backends.cudnn.benchmark = True
-
- # work_dir is determined in this priority:
- # config file > default (base filename)
- if cfg.get('work_dir', None) is None:
- # use config filename as default work_dir if cfg.work_dir is None
- cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(args.config))[0])
-
- # if not hasattr(cfg, 'dist_params'):
- # cfg.dist_params = dict(backend='nccl')
- #
- # init_dist(args.launcher, **cfg.dist_params)
- # rank, world_size = get_dist_info()
- cfg.gpu_ids =[0]
-
- auto_resume = cfg.get('auto_resume', True)
- if auto_resume and cfg.get('resume_from', None) is None:
- resume_pth = osp.join(cfg.work_dir, 'latest.pth')
- if osp.exists(resume_pth):
- cfg.resume_from = resume_pth
-
- # create work_dir
- mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
- # dump config
- cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
- # init logger before other steps
- timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
- log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
- logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
-
- # init the meta dict to record some important information such as
- # environment info and seed, which will be logged
- meta = dict()
- # log env info
- env_info_dict = collect_env()
- env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
- dash_line = '-' * 60 + '\n'
- logger.info('Environment info:\n' + dash_line + env_info + '\n' +
- dash_line)
- meta['env_info'] = env_info
-
- # log some basic info
- logger.info(f'Config: {cfg.pretty_text}')
-
- # set random seeds
- seed = init_random_seed(args.seed)
- logger.info(f'Set random seed to {seed}, deterministic: {args.deterministic}')
- set_random_seed(seed, deterministic=args.deterministic)
-
- cfg.seed = seed
- meta['seed'] = seed
- meta['config_name'] = osp.basename(args.config)
- meta['work_dir'] = osp.basename(cfg.work_dir.rstrip('/\\'))
-
- model = build_model(cfg.model)
-
- datasets = [build_dataset(cfg.data.train)]
-
- cfg.workflow = cfg.get('workflow', [('train', 1)])
- assert len(cfg.workflow) == 1
- if cfg.checkpoint_config is not None:
- # save pyskl version, config file content and class names in
- # checkpoints as meta data
- cfg.checkpoint_config.meta = dict(
- pyskl_version=__version__ + get_git_hash(digits=7),
- config=cfg.pretty_text)
-
- test_option = dict(test_last=args.test_last, test_best=args.test_best)
-
- default_mc_cfg = ('localhost', 22077)
- memcached = cfg.get('memcached', False)
-
- # if rank == 0 and memcached:
- if memcached:
- # mc_list is a list of pickle files you want to cache in memory.
- # Basically, each pickle file is a dictionary.
- mc_cfg = cfg.get('mc_cfg', default_mc_cfg)
- assert isinstance(mc_cfg, tuple) and mc_cfg[0] == 'localhost'
- if not test_port(mc_cfg[0], mc_cfg[1]):
- mc_on(port=mc_cfg[1], launcher=args.launcher)
- retry = 3
- while not test_port(mc_cfg[0], mc_cfg[1]) and retry > 0:
- time.sleep(5)
- retry -= 1
- assert retry >= 0, 'Failed to launch memcached. '
-
- # dist.barrier()
-
- train_model(model, datasets, cfg, validate=args.validate, test=test_option, timestamp=timestamp, meta=meta)
- # dist.barrier()
-
- # if rank == 0 and memcached:
- if memcached:
- mc_off()
-
-
- if __name__ == '__main__':
- main()
这个不是是train的主要代码,使用了mmcv的分布式训练代码,需要替换
- # 第10行注释,替换
- # from mmcv.parallel import MMDistributedDataParallel
- from mmcv.parallel import MMDistributedDataParallel, MMDataParallel
-
- # 第94-98注释,替换
- # model = MMDistributedDataParallel(
- # model.cuda(),
- # device_ids=[torch.cuda.current_device()],
- # broadcast_buffers=False,
- # find_unused_parameters=fin
- model = MMDataParallel(model.cuda())
- # 注释第147行dist.barrier()
- # dist.barrier()
完整代码:
- # Copyright (c) OpenMMLab. All rights reserved.
- import warnings
-
- import numpy as np
-
- from ..builder import PIPELINES
-
-
- @PIPELINES.register_module()
- class UniformSampleFrames:
- """Uniformly sample frames from the video.
- To sample an n-frame clip from the video. UniformSampleFrames basically
- divide the video into n segments of equal length and randomly sample one
- frame from each segment. To make the testing results reproducible, a
- random seed is set during testing, to make the sampling results
- deterministic.
- Required keys are "total_frames", "start_index" , added or modified keys
- are "frame_inds", "clip_len", "frame_interval" and "num_clips".
- Args:
- clip_len (int): Frames of each sampled output clip.
- num_clips (int): Number of clips to be sampled. Default: 1.
- test_mode (bool): Store True when building test or validation dataset.
- Default: False.
- seed (int): The random seed used during test time. Default: 255.
- """
-
- def __init__(self,
- clip_len,
- num_clips=1,
- test_mode=False,
- float_ok=False,
- p_interval=1,
- seed=255):
-
- self.clip_len = clip_len
- self.num_clips = num_clips
- self.test_mode = test_mode
- self.float_ok = float_ok
- self.seed = seed
- self.p_interval = p_interval
- if not isinstance(p_interval, tuple):
- self.p_interval = (p_interval, p_interval)
-
- if self.float_ok:
- warnings.warn('When float_ok == True, there will be no loop.')
-
- def _get_train_clips(self, num_frames, clip_len):
- """Uniformly sample indices for training clips.
- Args:
- num_frames (int): The number of frames.
- clip_len (int): The length of the clip.
- """
- allinds = []
- for clip_idx in range(self.num_clips):
- old_num_frames = num_frames
- pi = self.p_interval
- ratio = np.random.rand() * (pi[1] - pi[0]) + pi[0]
- num_frames = int(ratio * num_frames)
- off = np.random.randint(old_num_frames - num_frames + 1)
-
- if self.float_ok:
- interval = (num_frames - 1) / clip_len
- offsets = np.arange(clip_len) * interval
- inds = np.random.rand(clip_len) * interval + offsets
- inds = inds.astype(np.float32)
- elif num_frames < clip_len:
- start = np.random.randint(0, num_frames)
- inds = np.arange(start, start + clip_len)
- elif clip_len <= num_frames < 2 * clip_len:
- basic = np.arange(clip_len)
- inds = np.random.choice(
- clip_len + 1, num_frames - clip_len, replace=False)
- offset = np.zeros(clip_len + 1, dtype=np.int64)
- offset[inds] = 1
- offset = np.cumsum(offset)
- inds = basic + offset[:-1]
- else:
- bids = np.array(
- [i * num_frames // clip_len for i in range(clip_len + 1)])
- bsize = np.diff(bids)
- bst = bids[:clip_len]
- offset = np.random.randint(bsize)
- inds = bst + offset
-
- inds = inds + off
- num_frames = old_num_frames
-
- allinds.append(inds)
-
- return np.concatenate(allinds)
-
- def _get_test_clips(self, num_frames, clip_len):
- """Uniformly sample indices for testing clips.
- Args:
- num_frames (int): The number of frames.
- clip_len (int): The length of the clip.
- """
- np.random.seed(self.seed)
- if self.float_ok:
- interval = (num_frames - 1) / clip_len
- offsets = np.arange(clip_len) * interval
- inds = np.concatenate([
- np.random.rand(clip_len) * interval + offsets
- for i in range(self.num_clips)
- ]).astype(np.float32)
-
- all_inds = []
-
- for i in range(self.num_clips):
-
- old_num_frames = num_frames
- pi = self.p_interval
- ratio = np.random.rand() * (pi[1] - pi[0]) + pi[0]
- num_frames = int(ratio * num_frames)
- off = np.random.randint(old_num_frames - num_frames + 1)
-
- if num_frames < clip_len:
- start_ind = i if num_frames < self.num_clips else i * num_frames // self.num_clips
- inds = np.arange(start_ind, start_ind + clip_len)
- elif clip_len <= num_frames < clip_len * 2:
- basic = np.arange(clip_len)
- inds = np.random.choice(clip_len + 1, num_frames - clip_len, replace=False)
- offset = np.zeros(clip_len + 1, dtype=int64)
- offset[inds] = 1
- offset = np.cumsum(offset)
- inds = basic + offset[:-1]
- else:
- bids = np.array([i * num_frames // clip_len for i in range(clip_len + 1)])
- bsize = np.diff(bids)
- bst = bids[:clip_len]
- offset = np.random.randint(bsize)
- inds = bst + offset
-
- all_inds.append(inds + off)
- num_frames = old_num_frames
-
- return np.concatenate(all_inds)
-
- def __call__(self, results):
- num_frames = results['total_frames']
-
- if self.test_mode:
- inds = self._get_test_clips(num_frames, self.clip_len)
- else:
- inds = self._get_train_clips(num_frames, self.clip_len)
-
- inds = np.mod(inds, num_frames)
- start_index = results['start_index']
- inds = inds + start_index
-
- if 'keypoint' in results:
- kp = results['keypoint']
- assert num_frames == kp.shape[1]
- num_person = kp.shape[0]
- num_persons = [num_person] * num_frames
- for i in range(num_frames):
- j = num_person - 1
- while j >= 0 and np.all(np.abs(kp[j, i]) < 1e-5):
- j -= 1
- num_persons[i] = j + 1
- transitional = [False] * num_frames
- for i in range(1, num_frames - 1):
- if num_persons[i] != num_persons[i - 1]:
- transitional[i] = transitional[i - 1] = True
- if num_persons[i] != num_persons[i + 1]:
- transitional[i] = transitional[i + 1] = True
- inds_int = inds.astype(int)
- coeff = np.array([transitional[i] for i in inds_int])
- inds = (coeff * inds_int + (1 - coeff) * inds).astype(np.float32)
-
- results['frame_inds'] = inds if self.float_ok else inds.astype(int)
- results['clip_len'] = self.clip_len
- results['frame_interval'] = None
- results['num_clips'] = self.num_clips
- return results
-
- def __repr__(self):
- repr_str = (f'{self.__class__.__name__}('
- f'clip_len={self.clip_len}, '
- f'num_clips={self.num_clips}, '
- f'test_mode={self.test_mode}, '
- f'seed={self.seed})')
- return repr_str
-
-
- @PIPELINES.register_module()
- class UniformSample(UniformSampleFrames):
- pass
-
-
- @PIPELINES.register_module()
- class SampleFrames:
- """Sample frames from the video.
- Required keys are "total_frames", "start_index" , added or modified keys
- are "frame_inds", "frame_interval" and "num_clips".
- Args:
- clip_len (int): Frames of each sampled output clip.
- frame_interval (int): Temporal interval of adjacent sampled frames.
- Default: 1.
- num_clips (int): Number of clips to be sampled. Default: 1.
- temporal_jitter (bool): Whether to apply temporal jittering.
- Default: False.
- twice_sample (bool): Whether to use twice sample when testing.
- If set to True, it will sample frames with and without fixed shift,
- which is commonly used for testing in TSM model. Default: False.
- out_of_bound_opt (str): The way to deal with out of bounds frame
- indexes. Available options are 'loop', 'repeat_last'.
- Default: 'loop'.
- test_mode (bool): Store True when building test or validation dataset.
- Default: False.
- start_index (None): This argument is deprecated and moved to dataset
- class (``BaseDataset``, ``VideoDatset``, ``RawframeDataset``, etc),
- see this: https://github.com/open-mmlab/mmaction2/pull/89.
- keep_tail_frames (bool): Whether to keep tail frames when sampling.
- Default: False.
- """
-
- def __init__(self,
- clip_len,
- frame_interval=1,
- num_clips=1,
- temporal_jitter=False,
- twice_sample=False,
- out_of_bound_opt='loop',
- test_mode=False,
- start_index=None,
- keep_tail_frames=False):
-
- self.clip_len = clip_len
- self.frame_interval = frame_interval
- self.num_clips = num_clips
- self.temporal_jitter = temporal_jitter
- self.twice_sample = twice_sample
- self.out_of_bound_opt = out_of_bound_opt
- self.test_mode = test_mode
- self.keep_tail_frames = keep_tail_frames
- assert self.out_of_bound_opt in ['loop', 'repeat_last']
-
- if start_index is not None:
- warnings.warn('No longer support "start_index" in "SampleFrames", '
- 'it should be set in dataset class, see this pr: '
- 'https://github.com/open-mmlab/mmaction2/pull/89')
-
- def _get_train_clips(self, num_frames):
- """Get clip offsets in train mode.
- It will calculate the average interval for selected frames,
- and randomly shift them within offsets between [0, avg_interval].
- If the total number of frames is smaller than clips num or origin
- frames length, it will return all zero indices.
- Args:
- num_frames (int): Total number of frame in the video.
- Returns:
- np.ndarray: Sampled frame indices in train mode.
- """
- ori_clip_len = self.clip_len * self.frame_interval
-
- if self.keep_tail_frames:
- avg_interval = (num_frames - ori_clip_len + 1) / float(
- self.num_clips)
- if num_frames > ori_clip_len - 1:
- base_offsets = np.arange(self.num_clips) * avg_interval
- clip_offsets = (base_offsets + np.random.uniform(
- 0, avg_interval, self.num_clips)).astype(int)
- else:
- clip_offsets = np.zeros((self.num_clips, ), dtype=int)
- else:
- avg_interval = (num_frames - ori_clip_len + 1) // self.num_clips
-
- if avg_interval > 0:
- base_offsets = np.arange(self.num_clips) * avg_interval
- clip_offsets = base_offsets + np.random.randint(
- avg_interval, size=self.num_clips)
- elif num_frames > max(self.num_clips, ori_clip_len):
- clip_offsets = np.sort(
- np.random.randint(
- num_frames - ori_clip_len + 1, size=self.num_clips))
- elif avg_interval == 0:
- ratio = (num_frames - ori_clip_len + 1.0) / self.num_clips
- clip_offsets = np.around(np.arange(self.num_clips) * ratio)
- else:
- clip_offsets = np.zeros((self.num_clips, ), dtype=int)
-
- return clip_offsets
-
- def _get_test_clips(self, num_frames):
- """Get clip offsets in test mode.
- Calculate the average interval for selected frames, and shift them
- fixedly by avg_interval/2. If set twice_sample True, it will sample
- frames together without fixed shift. If the total number of frames is
- not enough, it will return all zero indices.
- Args:
- num_frames (int): Total number of frame in the video.
- Returns:
- np.ndarray: Sampled frame indices in test mode.
- """
- ori_clip_len = self.clip_len * self.frame_interval
- avg_interval = (num_frames - ori_clip_len + 1) / float(self.num_clips)
- if num_frames > ori_clip_len - 1:
- base_offsets = np.arange(self.num_clips) * avg_interval
- clip_offsets = (base_offsets + avg_interval / 2.0).astype(int)
- if self.twice_sample:
- clip_offsets = np.concatenate([clip_offsets, base_offsets])
- else:
- clip_offsets = np.zeros((self.num_clips, ), dtype=int)
- return clip_offsets
-
- def _sample_clips(self, num_frames):
- """Choose clip offsets for the video in a given mode.
- Args:
- num_frames (int): Total number of frame in the video.
- Returns:
- np.ndarray: Sampled frame indices.
- """
- if self.test_mode:
- clip_offsets = self._get_test_clips(num_frames)
- else:
- clip_offsets = self._get_train_clips(num_frames)
-
- return clip_offsets
-
- def __call__(self, results):
- """Perform the SampleFrames loading.
- Args:
- results (dict): The resulting dict to be modified and passed
- to the next transform in pipeline.
- """
- total_frames = results['total_frames']
-
- clip_offsets = self._sample_clips(total_frames)
- frame_inds = clip_offsets[:, None] + np.arange(
- self.clip_len)[None, :] * self.frame_interval
- frame_inds = np.concatenate(frame_inds)
-
- if self.temporal_jitter:
- perframe_offsets = np.random.randint(
- self.frame_interval, size=len(frame_inds))
- frame_inds += perframe_offsets
-
- frame_inds = frame_inds.reshape((-1, self.clip_len))
- if self.out_of_bound_opt == 'loop':
- frame_inds = np.mod(frame_inds, total_frames)
- elif self.out_of_bound_opt == 'repeat_last':
- safe_inds = frame_inds < total_frames
- unsafe_inds = 1 - safe_inds
- last_ind = np.max(safe_inds * frame_inds, axis=1)
- new_inds = (safe_inds * frame_inds + (unsafe_inds.T * last_ind).T)
- frame_inds = new_inds
- else:
- raise ValueError('Illegal out_of_bound option.')
-
- start_index = results['start_index']
- frame_inds = np.concatenate(frame_inds) + start_index
- results['frame_inds'] = frame_inds.astype(int)
- results['clip_len'] = self.clip_len
- results['frame_interval'] = self.frame_interval
- results['num_clips'] = self.num_clips
- return results
-
- def __repr__(self):
- repr_str = (f'{self.__class__.__name__}('
- f'clip_len={self.clip_len}, '
- f'frame_interval={self.frame_interval}, '
- f'num_clips={self.num_clips}, '
- f'temporal_jitter={self.temporal_jitter}, '
- f'twice_sample={self.twice_sample}, '
- f'out_of_bound_opt={self.out_of_bound_opt}, '
- f'test_mode={self.test_mode})')
- return repr_str
此时运行train.py可能会遇到
TypeError: FormatCode() got an unexpected keyword argument 'verify'
这是因为yapf包版本太新了,降低为0.40.1
pip uninstall yapf
pip install yapf==0.40.1 -i https://pypi.tuna.tsinghua.edu.cn/simple
安装mmcv-full1.5.0版本运行过程会遇到如下问题
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd3 in position 0: invalid continuation byte
这是因为编码格式的问题,修改C:\ProgramData\Anaconda3\envs\pyskl\lib\site-packages\mmcv\utils\env.py文件
- # 第91行env_info['MSVC'] = cc.decode(encoding).partition('\n')[0].strip()修改为
- env_info['MSVC'] = cc.decode(encoding, 'ignore').partition('\n')[0].strip()
由于numpy的原因,会存在版本冲突,np.int在最先的版本中改为了int,降低numpy版本会导致scipy冲突,因此建议将sample.py中所有的np.int替换为int,同时np.int64无需更改
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。