graphcore
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 1 deletion b/‎.gitignore‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 23 additions & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎CODEOWNERS‎
Lines changed: 1 addition & 0 deletions b/‎CODEOWNERS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 29 additions & 3 deletions b/‎README.md‎
Lines changed: 29 additions & 3 deletions
diff --git a/‎ai_for_simulation/cosmoflow/tensorflow1/README.md‎
Lines changed: 2 additions & 2 deletions b/‎ai_for_simulation/cosmoflow/tensorflow1/README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ai_for_simulation/deep_drive_md/tensorflow2/cvae/CVAE.py‎
Lines changed: 8 additions & 6 deletions b/‎ai_for_simulation/deep_drive_md/tensorflow2/cvae/CVAE.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎ai_for_simulation/deep_drive_md/tensorflow2/cvae/vae_conv_new.py‎
Lines changed: 35 additions & 60 deletions b/‎ai_for_simulation/deep_drive_md/tensorflow2/cvae/vae_conv_new.py‎
Lines changed: 35 additions & 60 deletions
diff --git a/‎ai_for_simulation/et0/tensorflow1/required_apt_packages.txt‎
Lines changed: 2 additions & 1 deletion b/‎ai_for_simulation/et0/tensorflow1/required_apt_packages.txt‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎gnn/cluster_gcn/tensorflow2/README.md‎
Lines changed: 16 additions & 31 deletions b/‎gnn/cluster_gcn/tensorflow2/README.md‎
Lines changed: 16 additions & 31 deletions
@@ -14,6 +14,8 @@
 **/data
 **/logs
 
+**/cifar-10-batches-bin
+
 *.pyc
 __pycache__
 .cache
@@ -37,6 +39,7 @@ vars.capnp
 
 # Remove VIM temp files
 *.swp
+**/.*.sw[a-p]
 
 # C++ examples build into the "build" directory
 **/build/
@@ -53,4 +56,3 @@ nohup.*
 
 utils/triton_server/backends
 !vision/cnns/pytorch/tests/tritonserver/models/*/*/*.json
-
 
@@ -0,0 +1,23 @@
+# NOTE: The versions can be updated by calling
+#        pre-commit autoupdate
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.3.0
+    hooks:
+      - id: no-commit-to-branch
+        args: [--branch, master, --branch, main]
+  - repo: https://github.com/pre-commit/mirrors-autopep8
+    rev: v1.6.0  # v1.7.0 is not compatible with python3.6
+    hooks:
+      - id: autopep8
+        args: [--in-place, --list-fixes, --ignore, 'E251,E303,E402,E501,E701,E226,E24,W50,W690']
+  - repo: local
+    hooks:
+    - id: copyright-header-check
+      name: Copyright header check
+      description: Ensures that files have the proper copyright line at the top
+      entry: python3 -m examples_utils test_copyright --amend --exclude_json utils/examples_tests/copyright_header_test_exclude.json
+      pass_filenames: false
+      language: python
+      additional_dependencies:
+        - 'git+https://github.com/graphcore/examples-utils.git@1aded5f35073d93fedcb516ad3782082daba3f87'
@@ -0,0 +1 @@
+*  @graphcore/applications
@@ -40,14 +40,15 @@ If you require POD128/256 setup and configuration for our applications, please c
 | Model | Domain | Type |Links |
 | ------- | ------- |------- | ------- |
 | ResNet  | Image Classification | Training & Inference | [TensorFlow 1](vision/cnns/tensorflow1/) , [TensorFlow 2](vision/cnns/tensorflow2/), [PyTorch](vision/cnns/pytorch/), [PyTorch Lightning](https://github.com/graphcore/pytorch-lightning-examples/tree/release/applications)|
-| ResNeXt  | Image Classification | Training & Inference | [TensorFlow 1](vision/cnns/tensorflow1/) , [PopART (Inference)](vision/resnext_inference/popart)
+| ResNeXt  | Image Classification | Training & Inference | [TensorFlow 1](vision/cnns/tensorflow1/) , [PopART (Inference)](vision/resnext_inference/popart), [PyTorch (Inference)](vision/cnns/pytorch/inference)
 | EfficientNet | Image Classification | Training & Inference | [TensorFlow 1](vision/cnns/tensorflow1/) , [PyTorch](vision/cnns/pytorch/), [PyTorch Lightning](https://github.com/graphcore/pytorch-lightning-examples/tree/release/applications)|
 | MobileNet | Image Classification | Inference | [TensorFlow 1](vision/cnns/tensorflow1/inference) |
 | MobileNetv2 | Image Classification | Inference | [TensorFlow 1](vision/cnns/tensorflow1/inference) |
 | MobileNetv3 | Image Classification | Training & Inference | [PyTorch](vision/cnns/pytorch/) |
 | ViT(Vision Transformer) | Image Classification | Training| [PyTorch](vision/vit/pytorch/), [Hugging Face Optimum](https://huggingface.co/Graphcore/vit-base-ipu) |
 | DINO | Image Classification | Training| [PyTorch](vision/dino/pytorch) |
 | Swin | Image Classification | Training | [PyTorch](vision/swin/pytorch)  |
+| MAE (Masked AutoEncoder) | Image Classification | Training | [PyTorch](vision/mae/pytorch)  |
 | Yolov3 | Object Detection | Training & Inference | [TensorFlow 1](vision/yolo_v3/tensorflow1) |
 | Yolov4-P5 | Object Detection | Inference | [PyTorch](vision/yolo_v4/pytorch) |
 | Faster RCNN | Object Detection | Training & Inference | [PopART](vision/faster_rcnn/popart) |
@@ -66,6 +67,8 @@ If you require POD128/256 setup and configuration for our applications, please c
 | Group BERT | NLP | Training |[TensorFlow 1](nlp/bert/tensorflow1/README.md#GroupBERT_model) |
 | Packed BERT | NLP | Training |[PyTorch](nlp/bert/pytorch), [PopART](nlp/bert/popart) |
 | GPT2 | NLP | Training |[PyTorch](nlp/gpt2/pytorch) , [Hugging Face Optimum](https://huggingface.co/Graphcore/gpt2-medium-ipu) |
+| GPTJ | NLP | Training |[PopXL](nlp/gpt_j/popxl)|  
+| GPT3-2.7B | NLP | Training |[PopXL](nlp/gpt3_2.7B/popxl) |
 | RoBERTa | NLP | Training | [Hugging Face Optimum](https://huggingface.co/Graphcore/roberta-large-ipu)|
 | DeBERTa | NLP | Training | [Hugging Face Optimum](https://huggingface.co/Graphcore/deberta-base-ipu)|
 | HuBERT | NLP | Training | [Hugging Face Optimum](https://huggingface.co/Graphcore/hubert-base-ipu)|
@@ -96,7 +99,8 @@ If you require POD128/256 setup and configuration for our applications, please c
 | miniDALL-E | multimodal | Training | [PyTorch](multimodal/mini_dalle/pytorch) |
 | CLIP | multimodal | Training |[PyTorch](multimodal/CLIP/pytorch)|
 | LXMERT | multimodal | Training | [Hugging Face Optimum](https://huggingface.co/Graphcore/lxmert-base-ipu)|
-
+| Frozen in time | multimodal | Training & Inference |[PyTorch](multimodal/frozen_in_time/pytorch)|
+| ruDalle (Preview) | multimodal | Inference |[PopXL](preview/multimodal/rudalle)|
 
 <br>
 
@@ -184,6 +188,16 @@ The following applications have been archived. More information can be provided
 
 <br>
 
+## Benchmarking tools
+To easily run the examples with tested and optimised configurations and to reproduce the performance shown on our [performance results page](https://www.graphcore.ai/performance-results), you can use the examples-utils benchmarking module, which comes with every example when you install its requirements. To use this simple, shared interface for almost any of the examples provided here, locate and look through the example's `benchmarks.yml` file and run:
+
+```python
+python3 -m examples_utils benchmark --spec <path to benchmarks.yml file> --benchmark <name of benchmark>
+```
+
+For more information on using the examples-utils benchmarking module, please refer to [the README](https://github.com/graphcore/examples-utils/blob/master/examples_utils/benchmarks/README.md).
+
+<br>
 
 ## PopVision™ Tools
 Visualise your code's inner workings with a user-friendly, graphical interface to optimise your machine learning models.
@@ -193,7 +207,8 @@ Visualise your code's inner workings with a user-friendly, graphical interface t
 <br>
 
 ## Support
-Please note we are not currently accepting pull requests or issues on this repository. If you are actively using this repository and want to report any issues, please raise a ticket through the [Graphcore support portal](https://support.graphcore.ai/).
+If you encounter a problem or want to suggest an improvement to our examples please raise a Github issue or contact us at
+ [support@graphcore.ai](mailto:support@graphcore.ai?subject=General%20Feedback).
 
 <br>
 
@@ -211,6 +226,17 @@ Unless otherwise specified by a LICENSE file in a subdirectory, the LICENSE refe
 <br>
 
 ## Changelog
+
+<details>
+<summary>Sep 2022</summary>
+<br> 
+
+*  Added those models below to reference models
+    *  Vision : MAE (PyTorch), G16 EfficientNet (PyTorch)
+    *  NLP : GPTJ (PopXL), GPT3-2.7B (PopXL)
+    *  Multimodal : Frozen in time (PyTorchs), ruDalle(Preview) (PopXL)
+</details>
+
 <details>
 <summary>Aug 2022</summary>
 <br> 
 
@@ -33,13 +33,13 @@ This README describes how to run a conv3D based model called CosmoFlow on IPU ha
  - run without tensorflow estimator, with 2 IPUs:
    The workload is heavily IO bound, so merely increasing IPUs without increasing CPU numa-aware threads to pre-process
    the dataset will show marginal scalability. We use poprun to increase threads involved in processing
-   `poprun --num-replicas 2 --num-instances 2 --ipus-per-replica 1 --numa-aware 1 python train.py configs/graphcore.yaml`
+   `poprun --num-replicas 2 --num-instances 2 --ipus-per-replica 1 python train.py configs/graphcore.yaml`
 
  - run with tensorflow estimator, with 1 IPU:
    `python train.py configs/graphcore.yaml --use-estimator`
 
  - run with tensorflow estimator, with 2 IPUs:
-   `poprun --num-replicas 2 --num-instances 2 --ipus-per-replica 1 --numa-aware 1 python train.py configs/graphcore.yaml --use-estimator`
+   `poprun --num-replicas 2 --num-instances 2 --ipus-per-replica 1 python train.py configs/graphcore.yaml --use-estimator`
 
 ## Licensing
 
 
@@ -52,11 +52,13 @@ def CVAE(input_shape, steps_per_exec, latent_dim=3):
     return autoencoder
 
 
-def create_datasets(x_train, y_train, x_val, y_val, batch_size):
-    train_ds = tf.data.Dataset.from_tensor_slices(x_train).batch(
-        batch_size, drop_remainder=True).repeat().prefetch(16)
-    val_ds = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(
-        batch_size, drop_remainder=True).repeat().prefetch(16)
+def create_datasets(x_train, x_val, batch_size):
+    train_ds = tf.data.Dataset.from_tensor_slices(x_train)
+    train_ds = train_ds.map(lambda x : (x, 0.)) # 0. is a dummy value that will be ignored
+    train_ds = train_ds.batch(batch_size, drop_remainder=True).repeat().prefetch(16)
+    val_ds = tf.data.Dataset.from_tensor_slices(x_val)
+    val_ds = val_ds.map(lambda x : (x, 0.)) # 0. is a dummy value that will be ignored
+    val_ds = val_ds.batch(batch_size, drop_remainder=True).repeat().prefetch(16)
 
     return train_ds, val_ds
 
@@ -73,7 +75,7 @@ def run_cvae(hyper_dim=3, epochs=10, batch_size=200, cm_data_input=None, validat
     steps_epoch = len(cm_data_train) // batch_size
     steps_val = len(cm_data_val) // batch_size if validation else None
 
-    train_ds, val_ds = create_datasets(cm_data_train, cm_data_train, cm_data_val, cm_data_val, batch_size=batch_size)
+    train_ds, val_ds = create_datasets(cm_data_train, cm_data_val, batch_size=batch_size)
     cm_data_train = train_ds
     cm_data_val = val_ds if validation else None
 
 
@@ -81,6 +81,27 @@ def call(self, inputs):
         return z_mean + K.exp(0.5 * z_log_var) * epsilon
 
 
+class ReconstructionLossLayer(tf.keras.layers.Layer):
+
+    def call(self, inputs):
+        data, reconstruction = inputs
+        reconstruction_loss = tf.reduce_mean(
+            tf.reduce_sum(
+                binary_crossentropy(data, reconstruction), axis=(1, 2)
+            )
+        )
+        return reconstruction_loss
+
+
+class KLLossLayer(tf.keras.layers.Layer):
+
+    def call(self, inputs):
+        z_mean, z_log_var = inputs
+        kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
+        kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
+        return kl_loss
+
+
 def encoder_decoder(latent_dim, channels, image_size, feature_maps, filter_shapes, activation,
                     strides, conv_layers, dense_layers, dense_neurons, dense_dropouts, eps_mean,
                     eps_std):
@@ -240,71 +261,23 @@ def __init__(self, image_size, channels, conv_layers, feature_maps, filter_shape
             latent_dim, channels, image_size, feature_maps, filter_shapes, activation,
             strides, conv_layers, dense_layers, dense_neurons, dense_dropouts, eps_mean,
             eps_std)
-        self.total_loss_tracker = metrics.Mean(name="loss")
-        self.reconstruction_loss_tracker = metrics.Mean(
-            name="reconstruction_loss"
-        )
-        self.kl_loss_tracker = metrics.Mean(name="kl_loss")
+        # Overriding train_step() is not supported at the moment, but the VAEs loss calculation requires customization.
+        # Therefore, we define losses as layers so that they could be caclulated in call().
+        self.reconstruction_loss = ReconstructionLossLayer(name='reconstruction')
+        self.kl_loss = KLLossLayer(name='kl')
 
         self.optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
         self.encoder.set_infeed_queue_options(prefetch_depth=16)
         self.decoder.set_infeed_queue_options(prefetch_depth=16)
-        self.compile(optimizer=self.optimizer, steps_per_execution=steps_per_exec)
-        self.inputs = self.encoder.inputs
         self.build(tf.TensorShape((1, image_size[0], image_size[1], channels)))
+        self.compile(optimizer=self.optimizer, loss=self.dummy_loss, steps_per_execution=steps_per_exec)
+        self.inputs = self.encoder.inputs
         self.summary()
 
-    @property
-    def metrics(self):
-        return [
-            self.total_loss_tracker,
-            self.reconstruction_loss_tracker,
-            self.kl_loss_tracker,
-        ]
-
-    def train_step(self, data):
-        with tf.GradientTape() as tape:
-            z_mean, z_log_var, z = self.encoder(data)
-            reconstruction = self.decoder(z)
-            reconstruction_loss = tf.reduce_mean(
-                tf.reduce_sum(
-                    binary_crossentropy(data, reconstruction), axis=(1, 2)
-                )
-            )
-            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
-            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
-            total_loss = reconstruction_loss + kl_loss
-        grads = tape.gradient(total_loss, self.trainable_weights)
-        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
-        self.total_loss_tracker.update_state(total_loss)
-        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
-        self.kl_loss_tracker.update_state(kl_loss)
-        return {
-            "loss": self.total_loss_tracker.result(),
-            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
-            "kl_loss": self.kl_loss_tracker.result(),
-        }
-
-    def test_step(self, data):
-        if isinstance(data, tuple):
-            data = data[0]
-
-            z_mean, z_log_var, z = self.encoder(data)
-            reconstruction = self.decoder(z)
-            reconstruction_loss = tf.reduce_mean(
-                tf.reduce_sum(
-                    binary_crossentropy(data, reconstruction), axis=(1, 2)
-                )
-            )
-            kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
-            kl_loss = tf.reduce_mean(kl_loss)
-            kl_loss *= -0.5
-            total_loss = reconstruction_loss + kl_loss
-            return {
-                "loss": total_loss,
-                "reconstruction_loss": reconstruction_loss,
-                "kl_loss": kl_loss,
-            }
+    def dummy_loss(self, y_true, y_pred):
+        # y_pred is already the loss since loss is calculated in call(), so y_true (which we defined as 0.) will be ignored
+        loss = y_pred
+        return loss
 
     def save(self, filepath):
         '''
@@ -374,9 +347,11 @@ def generate(self, embedding):
         return self.decoder(embedding)
 
     def call(self, inputs):
-        _, _, z = self.encoder(inputs)
+        z_mean, z_log_var, z = self.encoder(inputs)
         reconstruction = self.decoder(z)
-        return reconstruction
+        reconstruction_loss = self.reconstruction_loss([inputs, reconstruction])
+        kl_loss = self.kl_loss([z_mean, z_log_var])
+        return reconstruction_loss, kl_loss
 
     def train(self, train_data, validation_data, batch_size, epochs, steps_per_epoch=1, validation_steps=1):
         self.fit(
 
@@ -1,2 +1,3 @@
 zip
-unzip
+unzip
+libtiff5
@@ -131,6 +131,22 @@ For example, the following configuration will load the data from or download to
 }
 ```
 
+## Running and benchmarking
+
+To run a tested and optimised configuration and to reproduce the performance shown on our [performance results page](https://www.graphcore.ai/performance-results), please follow the setup instructions in this README to setup the environment, and then use the `examples_utils` module (installed automatically as part of the environment setup) to run one or more benchmarks. For example:
+
+```python
+python3 -m examples_utils benchmark --spec <path to benchmarks.yml file>
+```
+
+Or to run a specific benchmark in the `benchmarks.yml` file provided:
+
+```python
+python3 -m examples_utils benchmark --spec <path to benchmarks.yml file> --benchmark <name of benchmark>
+```
+
+For more information on using the examples-utils benchmarking module, please refer to [the README](https://github.com/graphcore/examples-utils/blob/master/examples_utils/benchmarks/README.md).
+
 ## Run training and validation <a name='training_validation' ></a>
 
 ```shell
@@ -171,34 +187,3 @@ Note that the `NUM_INSTANCES` should be divisible by `NUM_REPLICAS`
 and it is recommended to use `EPOCHS_PER_EXECUTION` equal to the `NUM_INSTANCES` 
 for best balance between accuracy and performance.
 
-## Benchmarking
-
-To reproduce the benchmarks, please follow the setup instructions in this README to setup the environment, and then from this dir, use the `examples_utils` module to run one or more benchmarks. For example:
-```
-python3 -m examples_utils benchmark --spec benchmarks.yml
-```
-
-or to run a specific benchmark in the `benchmarks.yml` file provided:
-```
-python3 -m examples_utils benchmark --spec benchmarks.yml --benchmark <benchmark_name>
-```
-
-For more information on how to use the examples_utils benchmark functionality, please see the <a>benchmarking readme<a href=<https://github.com/graphcore/examples-utils/tree/master/examples_utils/benchmarks>
-
-## Profiling
-
-Profiling can be done easily via the `examples_utils` module, simply by adding the `--profile` argument when using the `benchmark` submodule (see the <strong>Benchmarking</strong> section above for further details on use). For example:
-```
-python3 -m examples_utils benchmark --spec benchmarks.yml --profile
-```
-Will create folders containing popvision profiles in this applications root directory (where the benchmark has to be run from), each folder ending with "_profile". 
-
-The `--profile` argument works by allowing the `examples_utils` module to update the `POPLAR_ENGINE_OPTIONS` environment variable in the environment the benchmark is being run in, by setting:
-```
-POPLAR_ENGINE_OPTIONS = {
-    "autoReport.all": "true",
-    "autoReport.directory": <current_working_directory>,
-    "autoReport.outputSerializedGraph": "false",
-}
-```
-Which can also be done manually by exporting this variable in the benchmarking environment, if custom options are needed for this variable.
-Original file line number
+Diff line change
@@ @@ -1,2 +1,3 @@ @@
 zip
 -unzip
 +unzip
 +libtiff5