赞
踩
信息系统建模作业,要求是使用四种不同的节点表征方法两个3k+数据集
环境:pytorch cuda11.1 dgl-0.6.1
(cuda环境配置指路:我发的第一篇文章)
dgl库各种网络功能介绍:
https://zhuanlan.zhihu.com/p/161853672
数据集:CiteSeer PubMed
import torch import torch.nn as nn import torch.nn.functional as F from dgl.data import citation_graph as citegrh from dgl.nn.pytorch.conv.graphconv import GraphConv #加载数据集,另一个数据集使用citegrh.load_pubmed() dataset = citegrh.load_citeseer() print('Number of categories:', dataset.num_classes) #dataset是list,数据集为单图,g是graph类 g = dataset[0] #数据放在gpu上 g = g.to('cuda') #构造网络 class GCN(nn.Module): def __init__(self, in_feats, h_feats, num_classes): super(GCN, self).__init__() self.conv1 = GraphConv(in_feats, h_feats) #特征维度映射 self.conv2 = GraphConv(h_feats, num_classes) #映射到节点类别 def forward(self, g, in_feat): # g:数据Graph信息,经过归一化的邻接矩阵 # in_feat:节点初始化特征信息 h = self.conv1(g, in_feat) h = F.relu(h) h = self.conv2(g, h) return h # 实例化GCN模型在gpu上,hidden维度设定为16,输入维度和输出维度由数据集确定。 model = GCN(g.ndata['feat'].shape[1], 16, dataset.num_classes).to('cuda') #训练网络 def train(g, model): optimizer = torch.optim.Adam(model.parameters(), lr=0.01) best_val_acc = 0 best_test_acc = 0 #.ndata属性是一个dict,含五个key features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] for e in range(100): # Forward logits = model(g, features) # Compute prediction pred = logits.argmax(1) #每个样本全连接输出中最大的值,预测值 # Compute loss loss = F.cross_entropy(logits[train_mask], labels[train_mask]) # Compute accuracy on training/validation/test train_acc = (pred[train_mask] == labels[train_mask]).float().mean() val_acc = (pred[val_mask] == labels[val_mask]).float().mean() test_acc = (pred[test_mask] == labels[test_mask]).float().mean() # Save the best validation accuracy and the corresponding test accuracy. if best_val_acc < val_acc: best_val_acc = val_acc best_test_acc = test_acc # Backward optimizer.zero_grad() loss.backward() optimizer.step() if e % 5 == 0: print('In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})'.format( e, loss, val_acc, best_val_acc, test_acc, best_test_acc)) train(g, model)
import torch import torch.nn as nn import torch.nn.functional as F from dgl.data import citation_graph as citegrh import time def load_data(): data = citegrh.load_pubmed() #list g = data[0] # 单图 g = g.to('cuda') #放到gpu上 features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] test_mask = g.ndata['test_mask'] val_mask = g.ndata['val_mask'] return g, features, labels, train_mask, test_mask, val_mask #构造注意力图网络 class GATLayer(nn.Module): def __init__(self, g, in_dim, out_dim): super(GATLayer, self).__init__() self.g = g # step1 : 下层嵌入的线性变换 self.fc = nn.Linear(in_dim, out_dim, bias=False) # step2 : 计算邻居节点对的非标准化注意力得分 self.attn_fc = nn.Linear(2 * out_dim, 1, bias=False) def edge_attention(self, edges): # 计算step2需要的edge UDF z2 = torch.cat([edges.src['z'], edges.dst['z']], dim=1) a = self.attn_fc(z2) return {'e': F.leaky_relu(a)} def message_func(self, edges): # step 3 & 4需要的message UDF return {'z': edges.src['z'], 'e': edges.data['e']} def reduce_func(self, nodes): # 3 & 4 reduce UDF # step3 : 标准化每个节点的入边的注意力得分 alpha = F.softmax(nodes.mailbox['e'], dim=1) # step4 : 聚合邻居节点的嵌入信息,并按照注意力得分缩放 h = torch.sum(alpha * nodes.mailbox['z'], dim=1) return {'h': h} def forward(self, h): # step1 z = self.fc(h) self.g.ndata['z'] = z # step2 self.g.apply_edges(self.edge_attention) # step 3 & 4 self.g.update_all(self.message_func, self.reduce_func) return self.g.ndata.pop('h') # 用单头注意力构建多头注意力层 class MultiHeadGATLayer(nn.Module): def __init__(self, g, in_dim, out_dim, num_heads, merge='cat'): super(MultiHeadGATLayer, self).__init__() self.heads = nn.ModuleList() for i in range(num_heads): self.heads.append(GATLayer(g, in_dim, out_dim)) self.merge = merge def forward(self, h): head_outs = [attn_head(h) for attn_head in self.heads] if self.merge == 'cat': # concat 输出特征 return torch.cat(head_outs, dim=1) else: # 聚合信息,用mean方法 return torch.mean(torch.stack(head_outs)) class GAT(nn.Module): def __init__(self, g, in_dim, hidden_dim, out_dim, num_heads): super(GAT, self).__init__() self.layer1 = MultiHeadGATLayer(g, in_dim, hidden_dim, num_heads) # 输入维度是hidden_dim * num_heads,输出是多头连接的 # 输出层是一个注意力头 self.layer2 = MultiHeadGATLayer(g, hidden_dim * num_heads, out_dim, 1) def forward(self, h): h = self.layer1(h) h = F.elu(h) h = self.layer2(h) return h # 加载数据 g, features, labels, train_mask, test_mask, val_mask = load_data() # 实例化网络,两个注意力头,每个头的 hidden size = 8 net = GAT(g,in_dim=features.size()[1],hidden_dim=8,out_dim=7,num_heads=2).to('cuda') # create optimizer optimizer = torch.optim.Adam(net.parameters(), lr=1e-2) best_val_acc = 0 best_test_acc = 0 # main loop dur = [] for epoch in range(100): if epoch >= 3: t0 = time.time() logits = net(features) # Compute prediction pred = logits.argmax(1) # Compute loss loss = F.cross_entropy(logits[train_mask], labels[train_mask]) # Compute accuracy on training/validation/test train_acc = (pred[train_mask] == labels[train_mask]).float().mean() val_acc = (pred[val_mask] == labels[val_mask]).float().mean() test_acc = (pred[test_mask] == labels[test_mask]).float().mean() # Save the best validation accuracy and the corresponding test accuracy. if best_val_acc < val_acc: best_val_acc = val_acc best_test_acc = test_acc # Backward optimizer.zero_grad() loss.backward() optimizer.step() if epoch % 5 == 0: print('In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})'.format( epoch, loss, val_acc, best_val_acc, test_acc, best_test_acc))
import dgl import numpy as np import torch import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.utils.data import DataLoader from dgl.nn.pytorch.conv.sageconv import SAGEConv import time from dgl.data import citation_graph import tqdm # 邻居采样器 class NeighborSampler(object): def __init__(self, g, fanouts): """ g 为 DGLGraph; fanouts 为采样节点的数量,两个参数,分别指一阶邻居采样和二阶邻居采样数 """ self.g = g self.fanouts = fanouts def sample_blocks(self, seeds): seeds = th.LongTensor(np.asarray(seeds)) blocks = [] for fanout in self.fanouts: # 对每个节点实现邻居采样,返回对应子图 # replace=True 用采样节点代替所有邻居节点 frontier = dgl.sampling.sample_neighbors(g, seeds, fanout, replace=True) # 将子图frontier转为可以传递消息的二部图 block = dgl.to_block(frontier, seeds) # 获取新图的源节点作为种子节点 # srcdata表示源节点信息,因采样和聚合是相反的操作过程 seeds = block.srcdata[dgl.NID] blocks.insert(0, block) return blocks # 构建适用于大图的sage网络 class GraphSAGE(nn.Module): def __init__(self, in_feats, n_hidden, n_classes, n_layers, activation, dropout): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() # 输入层,四种聚合方式,此处使用"gcn" # 参数 : 输入特征,输出特征,聚合方式(特征drop概率=0,输出层bias=True,归一化=None,更新节点特征的激活函数=None) self.layers.append(SAGEConv(in_feats, n_hidden, 'gcn')) # 隐藏层 for i in range(1, n_layers - 1): self.layers.append(SAGEConv(n_hidden, n_hidden, 'gcn')) # 输出层 self.layers.append(SAGEConv(n_hidden, n_classes, 'gcn')) self.dropout = nn.Dropout(dropout) self.activation = activation # block是采样获得的子图(二部图) # x表示节点特征 def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h_dst = h[:block.number_of_dst_nodes()] h = layer(block, (h, h_dst)) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h # 用于评估测试 def inference(self, g, x, batch_size): nodes = th.arange(g.number_of_nodes()) for l, layer in enumerate(self.layers): y = th.zeros(g.number_of_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes) for start in tqdm.trange(0, len(nodes), batch_size): end = start + batch_size batch_nodes = nodes[start:end] block = dgl.to_block(dgl.in_subgraph(g, batch_nodes), batch_nodes) input_nodes = block.srcdata[dgl.NID] h = x[input_nodes] h_dst = h[:block.number_of_dst_nodes()] h = layer(block, (h, h_dst)) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) y[start:end] = h.cpu() x = y return y # 计算准确率 def evaluate(model, g, inputs, labels, val_mask, batch_size): """ 评估模型,调用 model 的 inference 函数 """ model.eval() with torch.no_grad(): label_pred = model.inference(g, inputs, batch_size) model.train() return (torch.argmax(label_pred[val_mask], dim=1) == labels[val_mask]).float().sum() / len(label_pred[val_mask]) def load_subtensor(g, labels, seeds, input_nodes, device): """ 将一组节点的特征和标签复制到 GPU 上。 """ batch_inputs = g.ndata['features'][input_nodes].to(device) batch_labels = labels[seeds].to(device) return batch_inputs, batch_labels # 参数设置 gpu = 0 num_epochs = 50 num_hidden = 16 num_layers = 2 fan_out = '5,10'# 一阶采样5,二阶10 batch_size = 50 log_every = 1 # 记录日志的频率 eval_every = 5 lr = 0.001 dropout = 0.5 num_workers = 0 if gpu >= 0: device = th.device('cuda:%d' % gpu) else: device = th.device('cpu') # 读取数据,另一个数据集使用citation_graph.CiteseerGraphDataset() data = citation_graph.PubmedGraphDataset() graph = data[0] # 取单图 train_mask = graph.ndata['train_mask'] val_mask = graph.ndata['val_mask'] test_mask = graph.ndata['test_mask'] features = graph.ndata['feat'] in_feats = features.shape[1] labels = graph.ndata['label'] n_classes = data.num_classes # Construct graph g = data.__getitem__(0) #为DGL.Graph类 g.ndata['features'] = features train_nid = th.LongTensor(np.nonzero(train_mask)[0]) val_nid = th.LongTensor(np.nonzero(val_mask)[0]) train_mask = th.BoolTensor(train_mask) val_mask = th.BoolTensor(val_mask) test_mask = th.BoolTensor(test_mask) # Create sampler sampler = NeighborSampler(g, [int(fanout) for fanout in fan_out.split(',')]) # Create PyTorch DataLoader for constructing blocks dataloader = DataLoader( dataset=train_nid.numpy(), batch_size=batch_size, collate_fn=sampler.sample_blocks, shuffle=True, drop_last=False, num_workers=num_workers) # Define model and optimizer model = GraphSAGE(in_feats, num_hidden, n_classes, num_layers, F.relu, dropout) loss_fcn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr) # Training loop best_val_acc = 0 for epoch in range(num_epochs): for step, blocks in enumerate(dataloader): tic_step = time.time() input_nodes = blocks[0].srcdata[dgl.NID] #特殊二部图,输入节点的特征向量 seeds = blocks[-1].dstdata[dgl.NID] #输出节点的标签 # Load the input features as well as output labels batch_inputs, batch_labels = load_subtensor(g, labels, seeds, input_nodes, device) #输入节点的特征和标签 # Compute loss and prediction batch_inputs = batch_inputs.to("cpu") batch_labels = batch_labels.to("cpu") batch_pred = model(blocks, batch_inputs).to(device) batch_pred = batch_pred.to("cpu") loss = loss_fcn(batch_pred, batch_labels).to(device) optimizer.zero_grad() loss.backward() optimizer.step() print('| Loss {:.4f}'.format(loss.item())) if epoch % 5 == 0: train_acc = evaluate(model, g, g.ndata['features'], labels, train_mask, batch_size) val_acc = evaluate(model, g, g.ndata['features'], labels, val_mask, batch_size) print('Epoch {:05d} | Val ACC {:.4f} | Train ACC {:.4f}'.format(epoch, val_acc.item(), train_acc.item())) # 模型训练完毕,检查test集合的acc acc_test = evaluate(model, g, g.ndata['features'], labels, test_mask, batch_size) print('Test ACC: %.4f' % (acc_test.item()))
import torch import torch.nn as nn import torch.nn.functional as F from dgl.data import citation_graph as citegrh from dgl.nn.pytorch.conv.graphconv import GraphConv from dgl.nn.pytorch.conv.appnpconv import APPNPConv dataset = citegrh.load_citeseer() #更换数据集使用citegrh.load_pubmed() print('Number of categories:', dataset.num_classes) # 单图 g = dataset[0] # 数据放在gpu上 g = g.to('cuda') # 构造APPNP网络,该网络使用类似PageRank框架更新节点嵌入 class APPNP(nn.Module): def __init__(self, in_feats, h_feats, num_classes): super(APPNP, self).__init__() self.conv1 = GraphConv(in_feats, h_feats) # APPNP层,特征维度不变。 ''' k : int, The number of iterations alpha : float, The teleport probability ''' self.conv2 = APPNPConv(k=3, alpha=0.5) # 维度映射到节点类别 self.conv3 = GraphConv(h_feats, num_classes) def forward(self, g, in_feat): h1 = self.conv1(g, in_feat) h2 = self.conv2(g, h1) h = self.conv2(g, h2) return h # 实例化到gpu,hidden维度设定为16,输入维度和输出维度由数据集确定。 model = APPNP(g.ndata['feat'].shape[1], 16, dataset.num_classes).to('cuda') def train(g, model): optimizer = torch.optim.Adam(model.parameters(), lr=0.01) best_val_acc = 0 best_test_acc = 0 features = g.ndata['feat'] labels = g.ndata['label'] train_mask = g.ndata['train_mask'] val_mask = g.ndata['val_mask'] test_mask = g.ndata['test_mask'] for e in range(3000): # Forward logits = model(g, features) # Compute prediction pred = logits.argmax(1) #每个样本全连接输出中最大的值,预测值 # Compute loss loss = F.cross_entropy(logits[train_mask], labels[train_mask]) # Compute accuracy on training/validation/test train_acc = (pred[train_mask] == labels[train_mask]).float().mean() val_acc = (pred[val_mask] == labels[val_mask]).float().mean() test_acc = (pred[test_mask] == labels[test_mask]).float().mean() # Save the best validation accuracy and the corresponding test accuracy. if best_val_acc < val_acc: best_val_acc = val_acc best_test_acc = test_acc # Backward optimizer.zero_grad() loss.backward() optimizer.step() if e % 5 == 0: print('In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})'.format( e, loss, val_acc, best_val_acc, test_acc, best_test_acc)) train(g, model)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。