main(parser.parse_args()) File "my_train.py", line 75, in main backbone = torch.nn.parallel.DistributedDataParallel( File "/roo_raise child">
赞
踩
在进行分布式训练的时候遇到这个错,不知道怎么解决,先记录一下
Traceback (most recent call last): File "my_train.py", line 194, in <module> main(parser.parse_args()) File "my_train.py", line 75, in main backbone = torch.nn.parallel.DistributedDataParallel( File "/root/.virtualenvs/torchenv/lib/python3.8/site-packages/torch/nn/parallel/distributed.py", line 578, in __init__ dist._verify_model_across_ranks(self.process_group, parameters) RuntimeError: NCCL error in: ../torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:957, unhandled system error, NCCL version 21.0.3 ncclSystemError: System call (socket, malloc, munmap, etc) failed. ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: 1) local_rank: 0 (pid: 3299404) of binary: /root/.virtualenvs/torchenv/bin/python3 Traceback (most recent call last): File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main return _run_code(code, main_globals, None, File "/usr/lib/python3.8/runpy.py", line 87, in _run_code exec(code, run_globals) File "/root/.virtualenvs/torchenv/lib/python3.8/site-packages/torch/distributed/launch.py", line 193, in <module> main() File "/root/.virtualenvs/torchenv/lib/python3.8/site-packages/torch/distributed/launch.py", line 189, in main launch(args) File "/root/.virtualenvs/torchenv/lib/python3.8/site-packages/torch/distributed/launch.py", line 174, in launch run(args) File "/root/.virtualenvs/torchenv/lib/python3.8/site-packages/torch/distributed/run.py", line 710, in run elastic_launch( File "/root/.virtualenvs/torchenv/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 131, in __call__ return launch_agent(self._config, self._entrypoint, list(args)) File "/root/.virtualenvs/torchenv/lib/python3.8/site-packages/torch/distributed/launcher/api.py", line 259, in launch_agent raise ChildFailedError( torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。