Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
81 commits
Select commit Hold shift + click to select a range
808b54e
Create train.py
claire62 Apr 21, 2024
0d308e3
Add files via upload
claire62 Apr 21, 2024
fe77d09
Create train.py
claire62 Apr 21, 2024
29d319c
Create train.py
claire62 Apr 21, 2024
f4aaa53
Add files via upload
claire62 Apr 21, 2024
ece429b
Create train.py
claire62 Apr 21, 2024
ad6578f
Add files via upload
claire62 Apr 21, 2024
2a922e6
Update train.py
claire62 Apr 21, 2024
5312568
Update train.py
claire62 Apr 21, 2024
32d2c80
Update train.py
claire62 Apr 21, 2024
459d063
Create train.py
claire62 Apr 21, 2024
702fd9d
Add files via upload
claire62 Apr 21, 2024
f023ff4
Create a
claire62 Apr 21, 2024
0887c22
Add files via upload
claire62 Apr 21, 2024
6fe7674
Delete examples/itemknn/rel-movielens1/data/ml-100k/ratings/a
claire62 Apr 21, 2024
4039d24
Create a
claire62 Apr 21, 2024
49b6984
Add files via upload
claire62 Apr 21, 2024
7dfea2f
Delete examples/itemknn/rel-movielens1/data/ml-100k/ratings directory
claire62 Apr 21, 2024
a2da870
Create AbstractRecommender.py
claire62 Apr 21, 2024
2305039
Add files via upload
claire62 Apr 21, 2024
042186c
Create __init__.py
claire62 Apr 21, 2024
9e01bdb
Add files via upload
claire62 Apr 21, 2024
c91305e
Create __init__.py
claire62 Apr 21, 2024
ca2c47a
Add files via upload
claire62 Apr 21, 2024
fd4f351
Create train.py
claire62 Apr 21, 2024
75871d6
Add files via upload
claire62 Apr 21, 2024
eecc16c
Create a
claire62 Apr 21, 2024
2d5461a
Add files via upload
claire62 Apr 21, 2024
f7e4e64
Create __init__.py
claire62 Apr 21, 2024
d83166f
Add files via upload
claire62 Apr 21, 2024
2f315a2
Create __init__.py
claire62 Apr 21, 2024
275b441
Add files via upload
claire62 Apr 21, 2024
7d3ffc3
Create __init__.py
claire62 Apr 21, 2024
d3a4d25
Add files via upload
claire62 Apr 21, 2024
2a7b22d
Add files via upload
claire62 Apr 21, 2024
9beef03
Add files via upload
claire62 Apr 21, 2024
187affa
Delete examples/itemknn/rel-movielens1m directory
claire62 Apr 21, 2024
89f21ad
Add files via upload
claire62 Apr 21, 2024
e4153b4
Create train.py
claire62 Apr 21, 2024
c5bb459
Add files via upload
claire62 Apr 21, 2024
1eb2fa9
Create train.py
claire62 Apr 21, 2024
3372a62
Add files via upload
claire62 Apr 21, 2024
80ac770
Create a
claire62 Apr 21, 2024
49c10b2
Add files via upload
claire62 Apr 21, 2024
c3e6c10
Create __init__.py
claire62 Apr 21, 2024
ac6f559
Add files via upload
claire62 Apr 21, 2024
a0dc289
Create __init__.py
claire62 Apr 21, 2024
b0ee166
Add files via upload
claire62 Apr 21, 2024
7a50a77
Create __init__.py
claire62 Apr 21, 2024
c16c781
Add files via upload
claire62 Apr 21, 2024
1deb3d8
Add files via upload
claire62 Apr 21, 2024
530618d
Create a
claire62 Apr 21, 2024
5cd63b6
Add files via upload
claire62 Apr 21, 2024
ae41174
Delete examples/itemknn/daisy directory
claire62 Apr 21, 2024
cea46f2
Delete examples/itemknn/README.md
claire62 Apr 21, 2024
557fa5a
Delete examples/itemknn/train.py
claire62 Apr 21, 2024
780ebb1
Delete examples/itemknn/data/ml-100k/ratings directory
claire62 Apr 21, 2024
03be45f
Delete examples/itemknn/rel-movielens1m/a
claire62 Apr 21, 2024
4753c26
Delete examples/itemknn/rel-movielens1m/data/ml-100k/ratings/a
claire62 Apr 21, 2024
f92a009
Delete examples/itemknn/ml-100k-regression/train.py
claire62 Apr 21, 2024
ffe1c8b
Delete examples/mostpop/rel-movielens1m/data/ml-100k/ratings/a
claire62 Apr 21, 2024
d5680cf
Add files via upload
claire62 Apr 21, 2024
3d48bd8
Update train.py
claire62 Apr 21, 2024
4c8819b
Create __init__.py
claire62 Apr 21, 2024
e52cacb
Add files via upload
claire62 Apr 21, 2024
2c69c98
Add files via upload
claire62 Apr 21, 2024
c2c0ee9
Create __init__.py
claire62 Apr 21, 2024
75345c3
Add files via upload
claire62 Apr 21, 2024
49ef025
Create __init__.py
claire62 Apr 21, 2024
1ff13cf
Add files via upload
claire62 Apr 21, 2024
7f6be1b
Add files via upload
claire62 Apr 21, 2024
94097be
Create __init__.py
claire62 Apr 21, 2024
f8d84f2
Add files via upload
claire62 Apr 21, 2024
a85d98e
Create __init__.py
claire62 Apr 21, 2024
081d566
Add files via upload
claire62 Apr 21, 2024
e5cfcf5
Create __init__.py
claire62 Apr 21, 2024
2ef5c52
Add files via upload
claire62 Apr 21, 2024
cc5e932
Create __init__.py
claire62 Apr 21, 2024
b086b79
Add files via upload
claire62 Apr 21, 2024
c642dfe
Delete examples/itemknn/model directory
claire62 Apr 21, 2024
92a47ad
Add files via upload
claire62 Apr 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions examples/GMT/TUDataset/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# GMT for classification task in TUDataset dataset
# Paper: Baek, J., Kang, M., & Hwang, S. J. (2021). Accurate learning of graph representations with graph multiset pooling. arXiv preprint arXiv:2102.11533.
# Test f1_score micro: 0.43037974683544306; macro: 0.15178571428571427
# Runtime: 6.8501s on a single GPU
# Cost: N/A

import os.path as osp
import time
from sklearn.metrics import f1_score
import torch
import torch.nn.functional as F
from torch.nn import Linear

from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, GraphMultisetTransformer
import warnings
warnings.filterwarnings("ignore")
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'PROTEINS')
dataset = TUDataset(path, name='PROTEINS').shuffle()

n = (len(dataset) + 9) // 10
train_dataset = dataset[2 * n:]
val_dataset = dataset[n:2 * n]
test_dataset = dataset[:n]

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128)
test_loader = DataLoader(test_dataset, batch_size=128)


class Net(torch.nn.Module):
def __init__(self):
super().__init__()

self.conv1 = GCNConv(dataset.num_features, 32)
self.conv2 = GCNConv(32, 32)
self.conv3 = GCNConv(32, 32)

self.pool = GraphMultisetTransformer(96, k=10, heads=4)

self.lin1 = Linear(96, 16)
self.lin2 = Linear(16, dataset.num_classes)

def forward(self, x0, edge_index, batch):
x1 = self.conv1(x0, edge_index).relu()
x2 = self.conv2(x1, edge_index).relu()
x3 = self.conv3(x2, edge_index).relu()
x = torch.cat([x1, x2, x3], dim=-1)

x = self.pool(x, batch)

x = self.lin1(x).relu()
x = F.dropout(x, p=0.5, training=self.training)
x = self.lin2(x)

return x


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)


def train():
model.train()

total_loss = 0
for data in train_loader:
data = data.to(device)
optimizer.zero_grad()
out = model(data.x, data.edge_index, data.batch)
loss = F.cross_entropy(out, data.y)
loss.backward()
total_loss += data.num_graphs * float(loss)
optimizer.step()
return total_loss / len(train_dataset)


@torch.no_grad()
def test(loader):
model.eval()
ys, preds = [], []
total_correct = 0
for data in loader:
data = data.to(device)
out = model(data.x, data.edge_index, data.batch)
ys.append(data.y.cpu().numpy())
max_values, _ = torch.max(out, dim=1)
preds.append(out.argmax(dim=-1).cpu().numpy())
total_correct += int((out.argmax(dim=-1) == data.y).sum())
f1_micro = f1_score(ys, preds, average='micro')
f1_macro = f1_score(ys, preds, average='macro')
return f1_micro, f1_macro
#return total_correct / len(loader.dataset)


t_total = time.time()
for epoch in range(1, 31):
start = time.time()
train_loss = train()
val_acc = test(val_loader)

f1_micro, f1_macro = test(test_loader)

print(f"micro: {f1_micro}; macro: {f1_macro}")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
112 changes: 112 additions & 0 deletions examples/GMT/rel-movielens1m/classification/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# GMT for classification task in rel-movielens1m dataset
# Paper: Baek, J., Kang, M., & Hwang, S. J. (2021). Accurate learning of graph representations with graph multiset pooling. arXiv preprint arXiv:2102.11533.
# Test f1_score micro: 0.33439635535307516; macro: 0.0799316725838052
# Runtime: 7.1744s on a single GPU
# Cost: N/A

import os.path as osp
import time
from sklearn.metrics import f1_score
import torch
import torch.nn.functional as F
from torch.nn import Linear
import numpy as np
from utils import separate_data,get_batches
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, GraphMultisetTransformer
import warnings
warnings.filterwarnings("ignore")
path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'PROTEINS')
dataset = TUDataset(path, name='PROTEINS').shuffle()

n = (len(dataset) + 9) // 10
train_dataset = dataset[2 * n:]
val_dataset = dataset[n:2 * n]
test_dataset = dataset[:n]

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128)
test_loader = DataLoader(test_dataset, batch_size=128)

data, adj, features, labels, idx_train, idx_test, y_train, y_test, train_adj, test_adj, train_feats, test_feats, test_labels, val_adj, val_feats, val_labels = separate_data()

class Net(torch.nn.Module):
def __init__(self):
super().__init__()

self.conv1 = GCNConv(features.shape[1], 32)
self.conv2 = GCNConv(32, 32)
self.conv3 = GCNConv(32, 32)

self.pool = GraphMultisetTransformer(96, k=10, heads=4)

self.lin1 = Linear(32, 16)
self.lin2 = Linear(16, y_train.shape[1])

def forward(self, x0, edge_index, batch):
x1 = self.conv1(x0, edge_index).relu()
x2 = self.conv2(x1, edge_index).relu()
x3 = self.conv3(x2, edge_index).relu()
x = torch.cat([x1, x2, x3], dim=-1)

#x = self.pool(x, batch)

x = self.lin1(x3).relu()
x = F.dropout(x, p=0.5, training=self.training)
x = self.lin2(x)

return x

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)


def train(train_ind, batch_size):
model.train()

total_loss = 0
for cur_ind, batch_labels, sampled_feats, sampled_adjs in get_batches(train_ind,
y_train,train_feats,train_adj, batch_size, False):
#data = data.to(device)
optimizer.zero_grad()
'''print(sampled_feats.size())
print(sampled_adjs.size())
print(batch_labels.size())'''
sampled_feats = sampled_feats.to(device)
sampled_adjs = sampled_adjs.to(device)
batch_labels = batch_labels.to(device)
out = model(sampled_feats, sampled_adjs, cur_ind)

loss = F.cross_entropy(out, batch_labels)
loss.backward()
total_loss += float(loss)
optimizer.step()
return total_loss / 100

@torch.no_grad()
def test():
model.eval()
ys, preds = [], []
test_nums = test_adj.shape[0]-1
for cur_ind, batch_labels, sampled_feats, sampled_adjs in get_batches(np.arange(test_nums),
y_test, test_feats, test_adj, 2, False):
sampled_feats = sampled_feats.to(device)
sampled_adjs = sampled_adjs.to(device)
out = model(sampled_feats, sampled_adjs, cur_ind)
ys.append(batch_labels)
preds.append((out > 0).cpu())
y, pred = torch.cat(ys, dim=0).numpy(), torch.cat(preds, dim=0).numpy()

f1_micro = f1_score(y, pred, average='micro')
f1_macro = f1_score(y, pred, average='macro')
return f1_micro, f1_macro

t_total = time.time()
for epoch in range(1, 11):
start = time.time()
train_loss = train(np.arange(100), 2)
f1_micro, f1_macro = test()
print(f"micro: {f1_micro}; macro: {f1_macro}")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
83 changes: 83 additions & 0 deletions examples/GMT/rel-movielens1m/classification/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import sys
import numpy as np
from load_data import load_data
import torch
def sample_mask(idx, lst):
"""Create mask."""
mask = np.zeros(lst)
mask[idx] = 1
return np.array(mask, dtype=bool)

def separate_data():
data, adj, features, labels, idx_train, idx_val, idx_test = load_data('movielens-classification')
label_origin = labels.detach().cpu().numpy()

# move to fit
device = torch.device("cuda")
train_mask = sample_mask(idx_train, label_origin.shape[0])
val_mask = sample_mask(idx_val, label_origin.shape[0])
test_mask = sample_mask(idx_test, label_origin.shape[0])

y_train = np.zeros(label_origin.shape)
y_val = np.zeros(label_origin.shape)
y_test = np.zeros(label_origin.shape)
y_train[train_mask, :] = label_origin[train_mask, :]
y_val[val_mask, :] = label_origin[val_mask, :]
y_test[test_mask, :] = label_origin[test_mask, :]
train_index = np.where(train_mask)[0]
y_train = y_train[train_index]
val_index = np.where(val_mask)[0]
y_val = y_val[val_index]
test_index = np.where(test_mask)[0]
y_test = y_test[test_index]

train_adj = adj[train_index, :][:, train_index]

train_feats = features[train_index]

test_adj = adj[idx_test, :][:, idx_test]
test_feats = features[test_index]
test_labels = y_test

val_adj = adj[idx_val, :]
val_feats = features
val_labels = y_val

return data, adj, features, labels, idx_train, idx_test, y_train, y_test, train_adj, test_adj, train_feats, test_feats, test_labels, val_adj, val_feats, val_labels

def get_batches(train_ind, train_labels, train_feats, train_adjs, batch_size=64, shuffle=True):
"""
Inputs:
train_ind: np.array
"""
nums = train_ind.shape[0]
if shuffle:
np.random.shuffle(train_ind)
i = 0
while i < nums:
cur_ind = train_ind[i:i + batch_size]
#print('cur_ind',cur_ind)

cur_labels = train_labels[cur_ind]
cur_labels = torch.tensor(cur_labels)
#print('cur_labels',cur_labels)
#print(type(cur_labels))
sampled_feats = train_feats[cur_ind]
sampled_adjs = train_adjs[cur_ind].long()
'''edges = torch.nonzero(sampled_adjs == 1).squeeze()
edges = torch.transpose(edges, 0, 1).long()'''
#sampled_adjs = train_adjs[cur_ind]
'''# 初始化空列表来存储图中所有边的矩阵
edges = []

# 遍历源数据的每一个节点
for i in range(train_adjs.size(0)):
for j in range(train_adjs.size(1)): # 仅遍历上三角矩阵
# 如果关系向量中有边(值为1),则将这两个节点的标号存储起来
if sampled_adjs[i][j] == 1:
edges.append([i, j])
edges = torch.tensor(edges)
edges = torch.transpose(edges, 0, 1)'''

yield cur_ind, cur_labels, sampled_feats, sampled_adjs
i += batch_size
Loading