Skip to content

Commit 462fd86

Browse files
committed
add the dataset class and data structure for proteins
1 parent 9fac912 commit 462fd86

File tree

6 files changed

+1945
-13
lines changed

6 files changed

+1945
-13
lines changed

torchdrug/data/__init__.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
from .dictionary import PerfectHash, Dictionary
22
from .graph import Graph, PackedGraph, cat
33
from .molecule import Molecule, PackedMolecule
4-
from .dataset import MoleculeDataset, ReactionDataset, NodeClassificationDataset, KnowledgeGraphDataset, \
5-
SemiSupervised, semisupervised, key_split, scaffold_split, ordered_scaffold_split
4+
from .protein import Protein, PackedProtein
5+
from .dataset import MoleculeDataset, ReactionDataset, ProteinDataset, \
6+
ProteinPairDataset, ProteinLigandDataset, \
7+
NodeClassificationDataset, KnowledgeGraphDataset, SemiSupervised, \
8+
semisupervised, key_split, scaffold_split, ordered_scaffold_split
69
from .dataloader import DataLoader, graph_collate
710
from . import constant
811
from . import feature
912

1013
__all__ = [
11-
"Graph", "PackedGraph", "Molecule", "PackedMolecule", "PerfectHash", "Dictionary",
14+
"Graph", "PackedGraph", "Molecule", "PackedMolecule", "Protein", "PackedProtein", "PerfectHash", "Dictionary",
1215
"MoleculeDataset", "ReactionDataset", "NodeClassificationDataset", "KnowledgeGraphDataset", "SemiSupervised",
16+
"ProteinDataset", "ProteinPairDataset", "ProteinLigandDataset",
1317
"semisupervised", "key_split", "scaffold_split", "ordered_scaffold_split",
1418
"DataLoader", "graph_collate", "feature", "constant",
1519
]

torchdrug/data/constant.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,24 @@
2929
"Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", "Lr", "Rf", "Db", "Sg", "Bh", "Hs",
3030
"Mt", "Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og"]
3131

32+
# ordered by molecular mass
33+
RESIDUE_NAME = ["Glycine", "Alanine", "Serine", "Proline", "Valine", "Threonine", "Cysteine", "Isoleucine",
34+
"Leucine", "Asparagine", "Aspartic acid", "Glutamine", "Lysine", "Glutamic acid", "Methionine",
35+
"Histidine", "Phenylalanine", "Arginine", "Tyrosine", "Tryptophan"]
36+
37+
RESIDUE_INITIAL = ["G", "A", "S", "P", "V", "T", "C", "I", "L", "N", "D", "Q", "K", "E", "M", "H", "F", "R", "Y", "W"]
38+
39+
RESIDUE_ATOM_NAME = ["C", "CA", "CB", "CD", "CD1", "CD2", "CE", "CE1", "CE2", "CE3", "CG", "CG1", "CG2", "CH2",
40+
"CZ", "CZ2", "CZ3", "N", "ND1", "ND2", "NE", "NE1", "NE2", "NH1", "NH2", "NZ", "O", "OD1",
41+
"OD2", "OE1", "OE2", "OG", "OG1", "OH", "OXT", "SD", "SG"]
42+
3243
NUM_ATOM = len(ATOM_NAME)
44+
NUM_AMINO_ACID = len(RESIDUE_NAME)
3345

3446
for i, name in enumerate(ATOM_NAME):
3547
if i == 0:
3648
continue
49+
setattr(module, name.upper(), i)
50+
51+
for i, name in enumerate(RESIDUE_NAME):
3752
setattr(module, name.upper(), i)

0 commit comments

Comments
 (0)