sancarlim · sancarlim · Mar 7, 2022 · Mar 7, 2022 · Mar 7, 2022 · Mar 7, 2022
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
diff --git a/CNN_embeddings_projector/read_tsv.py b/CNN_embeddings_projector/read_tsv.py
@@ -1,39 +1,45 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# File       : read_tsv.py
-# Modified   : 01.02.2022
-# By         : Sandra Carrasco <sandra.carrasco@ai.se>
+# Last Modified   : 01.02.2022
+# By              : Sandra Carrasco <sandra.carrasco@ai.se>
 
 """
 Compute cosine distance between embeddings from tsv file.
 """
 
-import pandas as pd
 import csv
 from scipy.spatial import distance
 import numpy as np
 import matplotlib.pyplot as plt
+from argparse import ArgumentParser
 
-metadata = list(csv.reader(open("/workspace/stylegan2-ada-pytorch/CNN_embeddings_projector/projections_vs_reals_nosprite/00000/default/metadata.tsv"), delimiter="\t"))
-embeddings = list(csv.reader(open("/workspace/stylegan2-ada-pytorch/CNN_embeddings_projector/projections_vs_reals_nosprite/00000/default/tensors.tsv"), delimiter="\t"))
-
-#embeddings already ordered from x1, to x1, from x2, to x2 ....
-distances = []
-for i in range(0,len(embeddings),2):
-    emb_from = list(map(float, embeddings[i]))
-    emb_to = list(map(float, embeddings[i+1]))
-    distances.append( distance.cosine(emb_from,emb_to) )
-
-textfile = open("/workspace/stylegan2-ada-pytorch/CNN_embeddings_projector/projections_vs_reals_nosprite/distances.txt", "w")
-for element in distances:
-    textfile.write(str(element) + "\n")
-textfile.close()
-
-distances = np.array(distances)
-Q1 = np.quantile(distances, 0.25)
-Q2 = np.quantile(distances, 0.5)
-Q3 = np.quantile(distances, 0.75)
-his = plt.hist(distances)
-distances_indeces_ordered = np.argsort(distances) 
-indeces_min_distance = distances_indeces_ordered[:2] # index = img name img0000idx.class.x.from.png
 
+if __name__ == '__main__':
+    parser = ArgumentParser()
+    parser.add_argument("--metadata", type=str,
+                        help='path to metadata file')
+    parser.add_argument("--embeddings_path", type=str, default=None,
+                        help='path to embeddings saved as tensors.tsv')
+    parser.add_argument("--save_path", type=str,
+                        help='path to save distances in text file')
+    args = parser.parse_args()
+
+    metadata = csv.reader(open(args.metadata), delimiter="\t")
+    embeddings = list(csv.reader(open(args.embeddings_path), delimiter="\t"))
+
+    # embeddings already ordered from x1, to x1, from x2, to x2 ....
+    distances = []
+    for i in range(0, len(embeddings), 2):
+        emb_from = list(map(float, embeddings[i]))
+        emb_to = list(map(float, embeddings[i + 1]))
+        distances.append(distance.cosine(emb_from, emb_to))
+
+    textfile = open(args.save_path, "w")
+    for element in distances:
+        textfile.write(str(element) + "\n")
+    textfile.close()
+
+    distances = np.array(distances)
+    Q1 = np.quantile(distances, 0.25)
+    Q2 = np.quantile(distances, 0.5)
+    Q3 = np.quantile(distances, 0.75)
+    his = plt.hist(distances)
+    distances_indeces_ordered = np.argsort(distances)
diff --git a/README.md b/README.md
@@ -54,7 +54,7 @@ Datasets are stored as uncompressed ZIP archives containing uncompressed PNG fil
 
 Custom datasets can be created from a folder containing images; see [`python dataset_tool.py --help`](./docs/dataset-tool-help.txt) for more information. Alternatively, the folder can also be used directly as a dataset, without running it through `dataset_tool.py` first, but doing so may lead to suboptimal performance.
 
-**ISIC 2020**: Download the [ISIC 2020 dataset](https://www.kaggle.com/c/siim-isic-melanoma-classification) and create ZIP archive:
+**ISIC 2020**: Download the [ISIC 2020 dataset](https://www.kaggle.com/nroman/melanoma-external-malignant-256) and create ZIP archive:
 
 ```.bash
 python dataset_tool.py --source=/tmp/isic-dataset --dest=~/datasets/isic256x256.zip --width=256 --height=256
@@ -146,10 +146,35 @@ python generate.py --outdir=out --projected_w=out/projected_w.npz \
     --class=1 --network=~/pretrained/conditionalGAN.pkl
 ```
 
+## Classification with EfficientNet-B2
+
+In our studies generated synthetic images were used in binary classification task between melanoma and non-melanoma cases. To run training with Efficientnet-B2 use following command:
+
+```.bash
+python melanoma_classifier.py --syn_data_path=~/generated/  \
+    --real_data_path=~/melanoma-external-malignant-256/ \
+    --synt_n_imgs="0,15"
+```
+
+In above example `--syn_data_path` argument indicates path for synthetic images,
+`--real_data_path` - real images and `--synt_n_imgs` stands for n non-melanoma, k melanoma synthetic images (measured in kimg) to add to the real data. We reported our studis using wandb (use `--wandb_flag` argument to report accuracy and loss for your own experiments). `--only_reals` flag enable training only for real images, while `--only_syn` will allow to take all artificial images from directory with synthetic images.
+
+To make a diagnosis using trained model use [`predict.py`](predict.py) script.
+
+
+## Visualizing the latent space
+[`embeddings_projector.py`](https://github.com/aidotse/stylegan2-ada-pytorch/blob/main/embeddings_projector.py) performs the two following tasks:
+
+* Project embeddings of a CNN used as feature extractor. (`--use_cnn`)
+
+* Project w-vectors.
+
+This generates a `metadata.tsv`, `tensors.tsv` and (optionally using `--sprite` flag) a sprite of the images. These files can be uploaded in the [Tensorboard Projector](https://www.tensorflow.org/tensorboard/tensorboard_projector_plugin) , which graphically represent these embeddings.
+
 ## Measuring authenticity
 
-We additionaly calculated cosine distance between embeddings from tsv file.
-For details see [read_tsv.py`](./CNN_embeddings_projector/read_tsv.py).
+We additionaly calculated cosine distances between the CNN embeddings from the tsv file.
+For details see [`read_tsv.py`](./CNN_embeddings_projector/read_tsv.py).
 
 ```.bash
 python ./CNN_embeddings_projector/read_tsv.py --metadata=metadata.tsv \
@@ -165,4 +190,4 @@ This work is made available under the [Nvidia Source Code License](https://nvlab
 
 ## Acknowledgements
 
-The project was developed during the first rotation of the [Eye for AI Program](https://www.ai.se/en/eyeforai) at the AI Competence Center of [Sahlgrenska University Hospital](https://www.sahlgrenska.se/en/). Eye for AI initiative is a global program focused on bringing more international talents into the Swedish AI landscape.
+The project was developed during the first rotation of the [Eye for AI Program](https://www.ai.se/en/eyeforai) at the AI Competence Center of [Sahlgrenska University Hospital](https://www.sahlgrenska.se/en/). Eye for AI initiative is a global program focused on bringing more international talents into the Swedish AI landscape.
diff --git a/create_dataset_json.py b/create_dataset_json.py
diff --git a/docs/stylegan2-ada-teaser-1024x252.png b/docs/stylegan2-ada-teaser-1024x252.png