Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[*.py]
indent_style = tab
indent_size = 4
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# SCM syntax highlighting & preventing 3-way merges
pixi.lock merge=binary linguist-language=YAML linguist-generated=true
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,9 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# pixi environments

.pixi/*
!.pixi/config.toml
pixi.lock
.envrc
80 changes: 80 additions & 0 deletions GraphTsetlinMachine/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,83 @@ def encode(self):
self.signature = m.digest()

self.encoded = True


def __len__(self):
return self.number_of_graphs

def _get_indices(self, key) -> list[int]:
if isinstance(key, int):
if key < 0:
key += self.number_of_graphs
if key < 0 or key >= self.number_of_graphs:
raise IndexError("Graph index out of range")
indices = [key]
elif isinstance(key, slice):
indices = list(range(*key.indices(self.number_of_graphs)))
elif isinstance(key, list) or isinstance(key, np.ndarray):
indices = []
for k in key:
if k < 0:
k += self.number_of_graphs
if k < 0 or k >= self.number_of_graphs:
raise IndexError("Graph index out of range")
indices.append(k)
else:
raise TypeError("Invalid graph index type")

return indices

def _create_subset(self, indices: list[int]):
subset = Graphs(
number_of_graphs=len(indices),
double_hashing=self.double_hashing,
one_hot_encoding=self.one_hot_encoding,
init_with=self,
)

# Copy number_of_graph_nodes and graph_node_id
for new_id, old_id in enumerate(indices):
subset.number_of_graph_nodes[new_id] = self.number_of_graph_nodes[old_id]
subset.graph_node_id[new_id] = self.graph_node_id[old_id]

subset.prepare_node_configuration()

# Copy node-level data
for new_id, old_id in enumerate(indices):
old_start = self.node_index[old_id]
old_end = old_start + self.number_of_graph_nodes[old_id]
new_start = subset.node_index[new_id]
new_end = new_start + subset.number_of_graph_nodes[new_id]

subset.node_type[new_start:new_end] = self.node_type[old_start:old_end]
subset.number_of_graph_node_edges[new_start:new_end] = self.number_of_graph_node_edges[old_start:old_end]
subset.graph_node_edge_counter[new_start:new_end] = self.graph_node_edge_counter[old_start:old_end]
subset.X[new_start:new_end] = self.X[old_start:old_end]

# Prepare edge configuration for subset
subset.prepare_edge_configuration()

# Copy edge data
for new_id, old_id in enumerate(indices):
for node_id in range(self.number_of_graph_nodes[old_id]):
old_node_idx = self.node_index[old_id] + node_id
new_node_idx = subset.node_index[new_id] + node_id

old_edge_start = self.edge_index[old_node_idx]
old_edge_count = self.graph_node_edge_counter[old_node_idx]
new_edge_start = subset.edge_index[new_node_idx]

subset.edge[new_edge_start : new_edge_start + old_edge_count] = self.edge[
old_edge_start : old_edge_start + old_edge_count
]

subset.encode()

return subset

def __getitem__(self, index):
indices = self._get_indices(index)
if len(indices) == 0:
raise ValueError("No graphs selected")
return self._create_subset(indices)
28 changes: 11 additions & 17 deletions GraphTsetlinMachine/tm.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,8 @@

import pycuda.curandom as curandom
import pycuda.driver as cuda
import pycuda.autoinit
import pycuda.autoinit # noqa: F401
from pycuda.compiler import SourceModule
from scipy.sparse import csr_matrix
import sys
from time import time

g = curandom.XORWOWRandomNumberGenerator()

Expand Down Expand Up @@ -314,22 +311,19 @@ def set_state(self, state):

def save(self, fname=""):
# Copy data from GPU to CPU
if np.array_equal(self.ta_state, np.array([])):
self.ta_state = np.empty(
self.number_of_clauses * self.number_of_ta_chunks * self.number_of_state_bits, dtype=np.uint32
)
cuda.memcpy_dtoh(self.ta_state, self.ta_state_gpu)
self.ta_state = np.empty(
self.number_of_clauses * self.number_of_ta_chunks * self.number_of_state_bits, dtype=np.uint32
)
cuda.memcpy_dtoh(self.ta_state, self.ta_state_gpu)

for depth in range(self.depth - 1):
if np.array_equal(self.message_ta_state[depth], np.array([])):
self.message_ta_state[depth] = np.empty(
self.number_of_clauses * self.number_of_message_chunks * self.number_of_state_bits, dtype=np.uint32
)
cuda.memcpy_dtoh(self.message_ta_state[depth], self.message_ta_state_gpu[depth])
self.message_ta_state[depth] = np.empty(
self.number_of_clauses * self.number_of_message_chunks * self.number_of_state_bits, dtype=np.uint32
)
cuda.memcpy_dtoh(self.message_ta_state[depth], self.message_ta_state_gpu[depth])

if np.array_equal(self.clause_weights, np.array([])):
self.clause_weights = np.empty(self.number_of_outputs * self.number_of_clauses, dtype=np.int32)
cuda.memcpy_dtoh(self.clause_weights, self.clause_weights_gpu)
self.clause_weights = np.empty(self.number_of_outputs * self.number_of_clauses, dtype=np.int32)
cuda.memcpy_dtoh(self.clause_weights, self.clause_weights_gpu)

state_dict = {
# State arrays
Expand Down
121 changes: 121 additions & 0 deletions examples/test_graph_subsetting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import random
import numpy as np
from GraphTsetlinMachine.tm import MultiClassGraphTsetlinMachine
from GraphTsetlinMachine.graphs import Graphs

random.seed(42)

def generate_graphs(symbols, noise, graph_args: dict):
graphs = Graphs(**graph_args)
number_of_examples = graph_args["number_of_graphs"]

for graph_id in range(number_of_examples):
graphs.set_number_of_graph_nodes(graph_id, 2)

graphs.prepare_node_configuration()

for graph_id in range(number_of_examples):
for node_id in range(graphs.number_of_graph_nodes[graph_id]):
number_of_edges = 1
graphs.add_graph_node(graph_id, node_id, number_of_edges)

graphs.prepare_edge_configuration()

X = np.empty((number_of_examples, 2))
Y = np.empty(number_of_examples, dtype=np.uint32)

for graph_id in range(number_of_examples):
edge_type = "Plain"
source_node_id = 0
destination_node_id = 1
graphs.add_graph_node_edge(graph_id, source_node_id, destination_node_id, edge_type)

source_node_id = 1
destination_node_id = 0
graphs.add_graph_node_edge(graph_id, source_node_id, destination_node_id, edge_type)

x1 = random.choice(symbols)
x2 = random.choice(symbols)
X[graph_id] = np.array([x1, x2])
if (x1 % 2) == (x2 % 2):
Y[graph_id] = 0
else:
Y[graph_id] = 1

graphs.add_graph_node_property(graph_id, 0, x1)
graphs.add_graph_node_property(graph_id, 1, x2)

if np.random.rand() <= noise:
Y[graph_id] = 1 - Y[graph_id]

graphs.encode()

return graphs, X, Y


if __name__ == "__main__":
tm_params = {
"number_of_clauses": 1000,
"T": 2000,
"s": 1,
"message_size": 2048,
"message_bits": 2,
"double_hashing": True,
"depth": 2,
"grid": (16 * 13, 1, 1),
"block": (128, 1, 1),
}

epochs = 10
noise = 0.1
num_value = 100
symbols = [i for i in range(num_value)]
graph_params = {
"number_of_graphs": 50000,
"hypervector_size": 2048,
"hypervector_bits": 2,
"double_hashing": True,
"symbols": symbols,
}
graphs_train, X_train, y_train = generate_graphs(symbols, noise, graph_params)

graphs_test, X_test, y_test = generate_graphs(
symbols,
0.0,
{
"number_of_graphs": 2000,
"init_with": graphs_train,
},
)

print("====================Training with graph splits====================")
tm = MultiClassGraphTsetlinMachine(**tm_params)
for i in range(epochs):
print(f"Epoch {i} ---------------------")
fit_time = 0.0
for b in range(0, y_train.shape[0], 10000):
gsub = graphs_train[b : b + 10000]
ysub = y_train[b : b + 10000]
tm.fit(gsub, ysub, epochs=1, incremental=True)
result_sub = 100 * (tm.predict(gsub) == ysub).mean()
print(f" [Batch {b}-{b + 10000}] Train Acc: {result_sub:.4f}")

pred_test = tm.predict(graphs_test)
result_test = 100 * (pred_test == y_test).mean()
result_train = 100 * (tm.predict(graphs_train) == y_train).mean()
print(f"[Graph Splits] Epoch {i} | Train Acc: {result_train:.4f}, Test Acc: {result_test:.4f}")

print("====================Training with original graphs====================")
tm2 = MultiClassGraphTsetlinMachine(**tm_params)
for i in range(epochs):
tm2.fit(graphs_train, y_train, epochs=1, incremental=True)

pred_test = tm2.predict(graphs_test)

result_test = 100 * (pred_test == y_test).mean()
result_train = 100 * (tm2.predict(graphs_train) == y_train).mean()

print(
f"[Original Graphs] Epoch {i} | Train Acc: {result_train:.4f}, Test Acc: {result_test:.4f}"
)

36 changes: 36 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
[tool.pixi.workspace]
channels = ["conda-forge"]
platforms = ["linux-64"]

[tool.pixi.tasks]

[tool.pixi.system-requirements]
cuda = "12"

[tool.pixi.dependencies]
python = ">=3.11,<3.12"
numpy = ">=2.3.5,<3"
mamba = ">=2.4.0,<3"
cuda = ">=12.9.1,<13"
sympy = ">=1.14.0,<2"
numba = ">=0.62.1,<0.63"
scipy = ">=1.16.3,<2"

[tool.pixi.pypi-dependencies]
graphtsetlinmachine = { path = ".", editable = true }
pycuda = ">=2025.1.2, <2026"

[tool.basedpyright]
typeCheckingMode = "standard"
reportUnusedImport = false
exclude = [
"**/__pycache__*",
"**/.*",
"**/*.ipynb",
]

[tool.ruff]
line-length = 160

[tool.ruff.format]
indent-style = "tab"