Skip to content

Commit a782db7

Browse files
author
Tomasz Latkowski
committed
added experiments
1 parent db9f1f6 commit a782db7

File tree

14 files changed

+195
-20
lines changed

14 files changed

+195
-20
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,5 @@ ENV/
101101
.mypy_cache/
102102

103103
logs
104-
.idea/
104+
.idea/
105+
.pytest_cache/

experiments/__init__.py

Whitespace-only changes.

experiments/classifier.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import tensorflow as tf
2+
3+
4+
def ff_neural_network(inputs, units):
5+
layer = tf.layers.dense(inputs, units=units, activation=tf.nn.tanh)
6+
output = tf.layers.dense(layer, units=1)
7+
return output
8+
9+
10+
class NeuralNetworkClassifier:
11+
12+
def __init__(self, num_features, units):
13+
self.x = tf.placeholder(dtype=tf.float64, shape=[None, num_features], name='inputs')
14+
self.y = tf.placeholder(dtype=tf.float64, shape=[None, 1], name='labels')
15+
16+
output = ff_neural_network(self.x, units=units)
17+
18+
with tf.name_scope('loss'):
19+
self.loss = tf.losses.sigmoid_cross_entropy(self.y, output)
20+
self.opt = tf.train.AdamOptimizer(learning_rate=0.01).minimize(self.loss)
21+
22+
with tf.name_scope('metrics'):
23+
self.prediction = tf.nn.sigmoid(output)
24+
25+
self.correct_predictions = tf.equal(self.prediction, self.y)
26+
self.accuracy = tf.reduce_mean(tf.to_float(self.correct_predictions))
27+
tf.summary.scalar("accuracy", self.accuracy)
28+
tf.summary.scalar("loss", self.loss)
29+
self.summary_op = tf.summary.merge_all()

experiments/dataset.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
class Dataset:
2+
3+
def __init__(self, data, num_classes):
4+
pass

experiments/experiment.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import tensorflow as tf
2+
3+
from experiments.classifier import NeuralNetworkClassifier
4+
from methods.selection_wrapper import SelectionWrapper
5+
6+
7+
class ExperimentModel:
8+
9+
def __init__(self, selection_method, num_features, num_instances, classifier, dataset):
10+
11+
with tf.name_scope('selection'):
12+
self.selection_wrapper = SelectionWrapper(dataset,
13+
num_instances=num_instances,
14+
selection_method=selection_method,
15+
num_features=num_features)
16+
17+
with tf.name_scope('classifier'):
18+
self.clf = NeuralNetworkClassifier(num_features, 20)

methods/selection.py

Lines changed: 46 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ def selection_wrapper(data, num_instances, selection_method=None, num_features=N
1313
num_features = data.get_shape().as_list()[-1]
1414

1515
values, indices = selection_method(data, num_instances, num_features)
16-
return values, tf.gather(data, indices, axis=1)
16+
selected_features = tf.gather(data, indices, axis=1)
17+
return values, selected_features
1718

1819

1920
def fisher(data, num_instances: list, top_k_features=2):
@@ -27,15 +28,20 @@ def fisher(data, num_instances: list, top_k_features=2):
2728
:return: the list of most significant features.
2829
"""
2930
assert len(num_instances) == 2, "Fisher selection method can be performed for two-class problems."
31+
3032
data = tf.convert_to_tensor(data)
3133
_, num_features = data.get_shape().as_list()
3234
if top_k_features > num_features:
3335
top_k_features = num_features
3436
class1, class2 = tf.split(data, num_instances)
35-
mean1, std1 = tf.nn.moments(class1, axes=0)
36-
mean2, std2 = tf.nn.moments(class2, axes=0)
37-
fisher_coeffs = tf.abs(mean1 - mean2) / (std1 + std2)
38-
return tf.nn.top_k(fisher_coeffs, k=top_k_features)
37+
38+
with tf.name_scope('fisher_selection'):
39+
mean1, std1 = tf.nn.moments(class1, axes=0)
40+
mean2, std2 = tf.nn.moments(class2, axes=0)
41+
fisher_coeffs = tf.abs(mean1 - mean2) / (std1 + std2)
42+
selected_features = tf.nn.top_k(fisher_coeffs, k=top_k_features)
43+
44+
return selected_features
3945

4046

4147
def feature_correlation_with_class(data, num_instances: list, top_k_features=10):
@@ -49,11 +55,15 @@ def feature_correlation_with_class(data, num_instances: list, top_k_features=10)
4955
if top_k_features > num_features:
5056
top_k_features = num_features
5157
class1, class2 = tf.split(data, num_instances)
52-
mean1, std1 = tf.nn.moments(class1, axes=0)
53-
mean2, std2 = tf.nn.moments(class2, axes=0)
54-
mean, std = tf.nn.moments(data, axes=0)
55-
corr_coeffs = (tf.square(mean1 - mean) + tf.square(mean2 - mean)) / 2*tf.square(std)
56-
return tf.nn.top_k(corr_coeffs, k=top_k_features)
58+
59+
with tf.name_scope('corr_selection'):
60+
mean1, std1 = tf.nn.moments(class1, axes=0)
61+
mean2, std2 = tf.nn.moments(class2, axes=0)
62+
mean, std = tf.nn.moments(data, axes=0)
63+
corr_coeffs = (tf.square(mean1 - mean) + tf.square(mean2 - mean)) / 2 * tf.square(std)
64+
selected_features = tf.nn.top_k(corr_coeffs, k=top_k_features)
65+
66+
return selected_features
5767

5868

5969
def t_test(data, num_instances: list, top_k_features=10):
@@ -67,7 +77,29 @@ def t_test(data, num_instances: list, top_k_features=10):
6777
if top_k_features > num_features:
6878
top_k_features = num_features
6979
class1, class2 = tf.split(data, num_instances)
70-
mean1, std1 = tf.nn.moments(class1, axes=0)
71-
mean2, std2 = tf.nn.moments(class2, axes=0)
72-
t_test_coeffs = tf.abs(mean1 - mean2) / tf.sqrt(tf.square(std1)/num_instances[0] + tf.square(std2) / num_instances[1])
73-
return tf.nn.top_k(t_test_coeffs, k=top_k_features)
80+
81+
with tf.name_scope('t_test_selection'):
82+
mean1, std1 = tf.nn.moments(class1, axes=0)
83+
mean2, std2 = tf.nn.moments(class2, axes=0)
84+
t_test_coeffs = tf.abs(mean1 - mean2) / tf.sqrt(
85+
tf.square(std1) / num_instances[0] + tf.square(std2) / num_instances[1])
86+
selected_features = tf.nn.top_k(t_test_coeffs, k=top_k_features)
87+
88+
return selected_features
89+
90+
91+
def random(data, num_instances: list, top_k_features=10):
92+
data = tf.convert_to_tensor(data)
93+
_, num_features = data.get_shape().as_list()
94+
if top_k_features > num_features:
95+
top_k_features = num_features
96+
class1, class2 = tf.split(data, num_instances)
97+
98+
with tf.name_scope('random_selection'):
99+
mean1, std1 = tf.nn.moments(class1, axes=0)
100+
mean2, std2 = tf.nn.moments(class2, axes=0)
101+
t_test_coeffs = tf.abs(mean1 - mean2) / tf.sqrt(
102+
tf.square(std1) / num_instances[0] + tf.square(std2) / num_instances[1])
103+
selected_features = tf.nn.top_k(t_test_coeffs, k=top_k_features)
104+
105+
return selected_features

methods/selection_wrapper.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import tensorflow as tf
2+
3+
4+
class SelectionWrapper:
5+
6+
def __init__(self, data, num_instances, selection_method=None, num_features=None):
7+
if data is None:
8+
raise ValueError('Provide data to make selection.')
9+
10+
if selection_method is None:
11+
raise ValueError('Provide selection method.')
12+
13+
if num_features is None:
14+
data = tf.convert_to_tensor(data)
15+
num_features = data.get_shape().as_list()[-1]
16+
17+
self.values, indices = selection_method(data, num_instances, num_features)
18+
self.selected_features = tf.gather(data, indices, axis=1)

run.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import numpy as np
2+
import tensorflow as tf
3+
from utils.log_saver import LogSaver
4+
from experiments.experiment import ExperimentModel
5+
from methods.selection import fisher
6+
from tqdm import tqdm
7+
from sklearn.model_selection import StratifiedKFold
8+
from utils.data_reader import read
9+
10+
11+
data_fn = 'data/autism.tsv'
12+
data = read(data_fn)
13+
14+
num_features = 100
15+
num_epochs = 1000
16+
17+
labels = np.concatenate([np.ones(82, dtype=np.float64), np.zeros(64, dtype=np.float64)])
18+
labels = np.reshape(labels, (-1, 1))
19+
20+
21+
skf = StratifiedKFold(n_splits=10)
22+
23+
for fold_id, (train_idxs, test_idxs) in enumerate(skf.split(data, labels.reshape(146))):
24+
25+
data_fold = data[train_idxs, :]
26+
labels_fold = labels[train_idxs]
27+
num_instances = [int(sum(labels_fold == 0)), int(sum(labels_fold == 1))]
28+
29+
with tf.Graph().as_default() as graph:
30+
31+
model = ExperimentModel(fisher, num_features, num_instances, None, data_fold)
32+
33+
with tf.Session() as session:
34+
35+
global_step = 0
36+
session.run(tf.global_variables_initializer())
37+
38+
log_saver = LogSaver('logs', 'fisher_fold{}'.format(fold_id), session.graph)
39+
40+
selected_data = session.run(model.selection_wrapper.selected_features)
41+
42+
tqdm_iter = tqdm(range(num_epochs), desc='Epochs')
43+
44+
for epoch in tqdm_iter:
45+
feed_dict = {model.clf.x: selected_data, model.clf.y: labels_fold}
46+
loss, _, summary = session.run([model.clf.loss, model.clf.opt, model.clf.summary_op], feed_dict=feed_dict)
47+
log_saver.log_train(summary, epoch)
48+
tqdm_iter.set_postfix(loss='{:.2f}'.format(float(loss)), epoch=epoch)

tests/corr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ def testCorrelationWithClassCorrectScore(self):
88

99

1010
if __name__ == '__main__':
11-
tf.test.main()
11+
tf.test.main()

tests/test_pearson.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
import numpy as np
12
import tensorflow as tf
3+
24
from utils.statistics import pearson_correlation
3-
import numpy as np
45

56

67
class TestPearson(tf.test.TestCase):
@@ -37,4 +38,4 @@ def testPositivePearsonCoefficientValueForTwoVectors(self):
3738

3839

3940
if __name__ == '__main__':
40-
tf.test.main()
41+
tf.test.main()

0 commit comments

Comments
 (0)