Skip to content

Commit b19e580

Browse files
author
Tomasz Latkowski
committed
added dataset class
1 parent c6f48e8 commit b19e580

File tree

5 files changed

+56
-37
lines changed

5 files changed

+56
-37
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[SELECTION]
2+
num_features = 100
3+
method = 'fisher'
4+
5+
[CLASSIFIER]
6+
hidden_sizes = 20, 10

experiments/dataset.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,26 @@
1+
import numpy as np
2+
from sklearn.model_selection import StratifiedKFold
3+
4+
from utils.data_reader import read
5+
6+
17
class Dataset:
28

3-
def __init__(self, data, num_classes):
4-
pass
9+
def __init__(self, data_fn):
10+
self.data = read(data_fn)
11+
# FIXME
12+
self.labels = np.concatenate([np.ones(82, dtype=np.float64), np.zeros(64, dtype=np.float64)])
13+
self.labels = np.reshape(self.labels, (-1, 1))
14+
15+
self.skf = StratifiedKFold(n_splits=10)
16+
17+
def cross_validation(self):
18+
return enumerate(self.skf.split(self.data, self.labels.reshape(146)))
19+
20+
def get_data(self, indices):
21+
return self.data[indices, :]
22+
23+
def get_labels(self, indices):
24+
selected_labels = self.labels[indices]
25+
num_instances = [int(sum(selected_labels == 0)), int(sum(selected_labels == 1))]
26+
return num_instances, selected_labels

experiments/experiment.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,14 @@
44
from methods.selection_wrapper import SelectionWrapper
55

66

7-
class ExperimentModel:
7+
class Experiment:
88

99
def __init__(self, selection_method, num_features, num_instances, classifier, dataset):
10-
1110
with tf.name_scope('selection'):
1211
self.selection_wrapper = SelectionWrapper(dataset,
13-
num_instances=num_instances,
14-
selection_method=selection_method,
15-
num_features=num_features)
12+
num_instances=num_instances,
13+
selection_method=selection_method,
14+
num_features=num_features)
1615

1716
with tf.name_scope('classifier'):
1817
self.clf = NeuralNetworkClassifier(num_features, 20)

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
pandas==0.19.0
1+
pandas==0.22.0
22
tensorflow==1.4.0
33
numpy==1.13.3
4+
tqdm==4.19.5

run.py

Lines changed: 20 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,28 @@
1-
import numpy as np
21
import tensorflow as tf
3-
from utils.log_saver import LogSaver
4-
from experiments.experiment import ExperimentModel
5-
from methods.selection import fisher
62
from tqdm import tqdm
7-
from sklearn.model_selection import StratifiedKFold
8-
from utils.data_reader import read
93

4+
from experiments.dataset import Dataset
5+
from experiments.experiment import Experiment
6+
from methods.selection import fisher
7+
from utils.log_saver import LogSaver
108

11-
data_fn = 'data/autism.tsv'
12-
data = read(data_fn)
9+
dataset = Dataset('data/autism.tsv')
1310

1411
num_features = 100
1512
num_epochs = 1000
1613
eval_every = 10
1714

18-
labels = np.concatenate([np.ones(82, dtype=np.float64), np.zeros(64, dtype=np.float64)])
19-
labels = np.reshape(labels, (-1, 1))
20-
21-
22-
skf = StratifiedKFold(n_splits=10)
23-
24-
for fold_id, (train_idxs, test_idxs) in enumerate(skf.split(data, labels.reshape(146))):
15+
for fold_id, (train_idxs, test_idxs) in dataset.cross_validation():
2516

26-
data_train_fold = data[train_idxs, :]
27-
labels_train_fold = labels[train_idxs]
28-
num_instances = [int(sum(labels_train_fold == 0)), int(sum(labels_train_fold == 1))]
17+
data_train_fold = dataset.get_data(train_idxs)
18+
num_instances, labels_train_fold = dataset.get_labels(train_idxs)
2919

30-
data_test_fold = data[test_idxs, :]
31-
labels_test_fold = labels[test_idxs]
20+
data_test_fold = dataset.get_data(test_idxs)
21+
_, labels_test_fold = dataset.get_labels(test_idxs)
3222

3323
with tf.Graph().as_default() as graph:
3424

35-
model = ExperimentModel(fisher, num_features, num_instances, None, data_train_fold)
25+
experiment = Experiment(fisher, num_features, num_instances, None, data_train_fold)
3626

3727
with tf.Session() as session:
3828

@@ -41,21 +31,22 @@
4131

4232
log_saver = LogSaver('logs', 'fisher_fold{}'.format(fold_id), session.graph)
4333

44-
train_selected_data = session.run(model.selection_wrapper.selected_data)
45-
test_selected_data = session.run(model.selection_wrapper.select(data_test_fold))
34+
train_selected_data = session.run(experiment.selection_wrapper.selected_data)
35+
test_selected_data = session.run(experiment.selection_wrapper.select(data_test_fold))
4636

4737
tqdm_iter = tqdm(range(num_epochs), desc='Epochs')
4838

4939
for epoch in tqdm_iter:
50-
feed_dict = {model.clf.x: train_selected_data, model.clf.y: labels_train_fold}
51-
loss, _, summary = session.run([model.clf.loss, model.clf.opt, model.clf.summary_op], feed_dict=feed_dict)
40+
feed_dict = {experiment.clf.x: train_selected_data, experiment.clf.y: labels_train_fold}
41+
loss, _ = session.run([experiment.clf.loss, experiment.clf.opt],
42+
feed_dict=feed_dict)
5243

5344
if epoch % eval_every == 0:
54-
summary = session.run(model.clf.summary_op, feed_dict=feed_dict)
45+
summary = session.run(experiment.clf.summary_op, feed_dict=feed_dict)
5546
log_saver.log_train(summary, epoch)
5647

57-
feed_dict = {model.clf.x: test_selected_data, model.clf.y: labels_test_fold}
58-
summary = session.run(model.clf.summary_op, feed_dict=feed_dict)
48+
feed_dict = {experiment.clf.x: test_selected_data, experiment.clf.y: labels_test_fold}
49+
summary = session.run(experiment.clf.summary_op, feed_dict=feed_dict)
5950
log_saver.log_test(summary, epoch)
6051

61-
tqdm_iter.set_postfix(loss='{:.2f}'.format(float(loss)), epoch=epoch)
52+
tqdm_iter.set_postfix(loss='{:.2f}'.format(float(loss)), epoch=epoch)

0 commit comments

Comments
 (0)