赞
踩
使用自己的数据转化成与QM9数据集类似的数据结构。
读取CSV文件,创建DGL图和标签列表。
将这些图和标签列表组合成一个单一的DGL数据集对象。
from graphormer.data import register_dataset from dgl.data import DGLDataset from sklearn.model_selection import train_test_split import dgl import pandas as pd from rdkit import Chem import torch import numpy as np class Custom: //省略 @register_dataset("customized_qm9_dataset") def create_customized_dataset(): train_df = pd.read_csv('train.csv') val_df = pd.read_csv('valid.csv') test_df = pd.read_csv('test.csv') data = pd.concat([train_df, val_df, test_df]) dataset = Custom(data) num_graphs = len(dataset) train_valid_idx, test_idx = train_test_split( np.arange(num_graphs), test_size=num_graphs // 10, random_state=0 ) train_idx, valid_idx = train_test_split( train_valid_idx, test_size=num_graphs // 5, random_state=0 ) return { "dataset": dataset, "train_idx": train_idx, "valid_idx": valid_idx, "test_idx": test_idx, "source": "dgl" }
https://docs.dgl.ai/en/0.6.x/api/python/dgl.data.html#qm9-dataset
2023-09-03 14:54:58 | WARNING | root | The OGB package is out of date. Your version is 1.3.2, while the latest version is 1.3.6. Using backend: pytorch Traceback (most recent call last): File "/root/miniconda3/bin/fairseq-train", line 8, in <module> sys.exit(cli_main()) File "/root/miniconda3/lib/python3.8/site-packages/fairseq_cli/train.py", line 512, in cli_main parser = options.get_training_parser() File "/root/miniconda3/lib/python3.8/site-packages/fairseq/options.py", line 38, in get_training_parser parser = get_parser("Trainer", default_task) File "/root/miniconda3/lib/python3.8/site-packages/fairseq/options.py", line 234, in get_parser utils.import_user_module(usr_args) File "/root/miniconda3/lib/python3.8/site-packages/fairseq/utils.py", line 497, in import_user_module import_tasks(tasks_path, f"{module_name}.tasks") File "/root/miniconda3/lib/python3.8/site-packages/fairseq/tasks/__init__.py", line 117, in import_tasks importlib.import_module(namespace + "." + task_name) File "/root/miniconda3/lib/python3.8/importlib/__init__.py", line 127, in import_module return _bootstrap._gcd_import(name[level:], package, level) File "<frozen importlib._bootstrap>", line 1014, in _gcd_import File "<frozen importlib._bootstrap>", line 991, in _find_and_load File "<frozen importlib._bootstrap>", line 975, in _find_and_load_unlocked File "<frozen importlib._bootstrap>", line 671, in _load_unlocked File "<frozen importlib._bootstrap_external>", line 848, in exec_module File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed File "/root/autodl-tmp/Graphormer/graphormer/tasks/is2re.py", line 25, in <module> class LMDBDataset: File "/root/autodl-tmp/Graphormer/graphormer/tasks/is2re.py", line 43, in LMDBDataset def __getitem__(self, idx: int) -> dict[str, Union[Tensor, float]]: TypeError: 'type' object is not subscriptable
版本问题,需要更新到python3.9
在这里插入代码片/usr/bin/ld: warning: /root/miniconda3/envs/Graphormer/lib/libstdc++.so: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001
/usr/bin/ld: warning: /root/miniconda3/envs/Graphormer/lib/libstdc++.so: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002
/usr/bin/ld: warning: /root/miniconda3/envs/Graphormer/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001
/usr/bin/ld: warning: /root/miniconda3/envs/Graphormer/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002
/usr/bin/ld: warning: /root/miniconda3/envs/Graphormer/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001
/usr/bin/ld: warning: /root/miniconda3/envs/Graphormer/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002
building 'fairseq.data.data_utils_fast' extension
error: unknown file type '.pyx' (from 'fairseq/data/data_utils_fast.pyx')
pip版本太低,需要更新pip install --upgrade pip
不行的话用这个:
python -m pip install --upgrade "pip==21.1"
ERROR: Directory '.' is not installable. Neither 'setup.py' nor 'pyproject.toml' found.
python: can't open file 'setup.py': [Errno 2] No such file or directory
用这句,然后继续
cd fairseq
pip install . --use-feature=in-tree-build
python setup.py build_ext --inplace
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0. If you cannot immediately regenerate your protos, some other possible workarounds are: 1. Downgrade the protobuf package to 3.20.x or lower. 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slow
解决办法
pip install protobuf==3.20.*
AttributeError: module 'numpy' has no attribute 'float'.
`np.float` was a deprecated alias for the builtin `float`. To avoid this error in existing code, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
解决办法:
pip install numpy==1.23.5
Downloading: "https://ml2md.blob.core.windows.net/graphormer-ckpts/checkpoint_best_pcqm4mv1.pt" to /root/.cache/torch/hub/checkpoints/checkpoint_best_pcqm4mv1.pt Traceback (most recent call last): File "/root/miniconda3/envs/Graphormer/bin/fairseq-train", line 8, in <module> sys.exit(cli_main()) File "/root/miniconda3/envs/Graphormer/lib/python3.9/site-packages/fairseq_cli/train.py", line 528, in cli_main distributed_utils.call_main(cfg, main) File "/root/miniconda3/envs/Graphormer/lib/python3.9/site-packages/fairseq/distributed/utils.py", line 369, in call_main main(cfg, **kwargs) File "/root/miniconda3/envs/Graphormer/lib/python3.9/site-packages/fairseq_cli/train.py", line 94, in main model = task.build_model(cfg.model) File "/root/autodl-tmp/Graphormer/graphormer/tasks/graph_prediction.py", line 229, in build_model model = models.build_model(cfg, self) File "/root/miniconda3/envs/Graphormer/lib/python3.9/site-packages/fairseq/models/__init__.py", line 105, in build_model return model.build_model(cfg, task) File "/root/autodl-tmp/Graphormer/graphormer/models/graphormer.py", line 149, in build_model return cls(args, encoder) File "/root/autodl-tmp/Graphormer/graphormer/models/graphormer.py", line 43, in __init__ self.load_state_dict(load_pretrained_model(args.pretrained_model_name)) File "/root/autodl-tmp/Graphormer/graphormer/pretrain/__init__.py", line 15, in load_pretrained_model return load_state_dict_from_url(PRETRAINED_MODEL_URLS[pretrained_model_name], progress=True)["model"] File "/root/miniconda3/envs/Graphormer/lib/python3.9/site-packages/torch/hub.py", line 571, in load_state_dict_from_url download_url_to_file(url, cached_file, hash_prefix, progress=progress) File "/root/miniconda3/envs/Graphormer/lib/python3.9/site-packages/torch/hub.py", line 437, in download_url_to_file u = urlopen(req) File "/root/miniconda3/envs/Graphormer/lib/python3.9/urllib/request.py", line 214, in urlopen return opener.open(url, data, timeout) File "/root/miniconda3/envs/Graphormer/lib/python3.9/urllib/request.py", line 523, in open response = meth(req, response) File "/root/miniconda3/envs/Graphormer/lib/python3.9/urllib/request.py", line 632, in http_response response = self.parent.error( File "/root/miniconda3/envs/Graphormer/lib/python3.9/urllib/request.py", line 561, in error return self._call_chain(*args) File "/root/miniconda3/envs/Graphormer/lib/python3.9/urllib/request.py", line 494, in _call_chain result = func(*args) File "/root/miniconda3/envs/Graphormer/lib/python3.9/urllib/request.py", line 641, in http_error_default raise HTTPError(req.full_url, code, msg, hdrs, fp) urllib.error.HTTPError: HTTP Error 409: Conflict
解决办法
wget https://ml2md.blob.core.windows.net/graphormer-ckpts/checkpoint_best_pcqm4mv1.pt
mv checkpoint_best_pcqm4mv1.pt /root/.cache/torch/hub/checkpoints/
不过后面还是不行,就没用预训练模型
File "/root/autodl-tmp/Graphormer/graphormer/data/dgl_datasets/dgl_dataset.py", line 155, in __getitem__
graph, y = self.dataset[idx]
TypeError: 'DiskDataset' object is not subscriptable
这里记得要改
return {
"dataset": dataset,
"train_idx": train_idx,
"valid_idx": valid_idx,
"test_idx": test_idx,
"source": "smiles"
}
ile "/root/autodl-tmp/Graphormer/graphormer/data/dgl_datasets/dgl_dataset.py", line 156, in __getitem__
return self.__preprocess_dgl_graph(graph, y, idx)
File "/root/autodl-tmp/Graphormer/graphormer/data/dgl_datasets/dgl_dataset.py", line 144, in __preprocess_dgl_graph
if y.dim() == 0:
AttributeError: 'int' object has no attribute 'dim'
在前面加上下面这句就行了:
y = torch.tensor(y)
File "/root/autodl-tmp/Graphormer/graphormer/criterions/binary_logloss.py", line 103, in forward
logits_flatten[mask].float(), targets_flatten[mask].float(), reduction="sum"
IndexError: The shape of the mask [16] at index 0 does not match the shape of the indexed tensor [32] at index 0
Exception in thread Thread-5:
出现这个就把num_class设为2
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。