|
| 1 | +# Copyright 2021 The TensorFlow Authors. All Rights Reserved. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | +# ============================================================================== |
| 15 | +import numpy as np |
| 16 | + |
| 17 | +import popdist |
| 18 | +import popdist.tensorflow |
| 19 | + |
| 20 | +import tensorflow as tf |
| 21 | +from tensorflow.python import ipu |
| 22 | +from tensorflow.python.framework import constant_op, test_util |
| 23 | +from tensorflow.python.ipu.horovod import ipu_multi_replica_strategy |
| 24 | +from tensorflow.python.platform import test |
| 25 | +from tensorflow.python.ipu import horovod as hvd |
| 26 | + |
| 27 | + |
| 28 | +class DistributedTF2Test(test_util.TensorFlowTestCase): |
| 29 | + def assert_all_instances_equal(self, local_value, name=None): |
| 30 | + """Assert that the current instance has the same value as the root instance.""" |
| 31 | + local_tensor = constant_op.constant(local_value) |
| 32 | + root_tensor = hvd.broadcast(local_tensor, root_rank=0) |
| 33 | + np.testing.assert_equal(local_value, root_tensor.numpy(), name) |
| 34 | + |
| 35 | + def test_tf2_distributed(self): |
| 36 | + config = ipu.config.IPUConfig() |
| 37 | + popdist.tensorflow.set_ipu_config(config, ipus_per_replica=1) |
| 38 | + config.configure_ipu_system() |
| 39 | + |
| 40 | + hvd.init() |
| 41 | + |
| 42 | + strategy = ipu_multi_replica_strategy.IPUMultiReplicaStrategy() |
| 43 | + |
| 44 | + def generator(): |
| 45 | + for _ in range(100): |
| 46 | + yield np.random.rand(32, 32, 1), np.random.randint(1, 10, size=1) |
| 47 | + |
| 48 | + dataset = tf.data.Dataset.from_generator( |
| 49 | + generator, |
| 50 | + output_types=(tf.float32, tf.float32), |
| 51 | + output_shapes=((32, 32, 1), (1,)), |
| 52 | + ) |
| 53 | + |
| 54 | + options = tf.data.Options() |
| 55 | + options.experimental_distribute.auto_shard_policy =\ |
| 56 | + tf.data.experimental.AutoShardPolicy.OFF |
| 57 | + dataset = dataset.with_options(options) |
| 58 | + |
| 59 | + dataset = dataset.shard(num_shards=popdist.getNumInstances(), |
| 60 | + index=popdist.getInstanceIndex()) |
| 61 | + dataset = dataset.batch(10, drop_remainder=True) |
| 62 | + |
| 63 | + with strategy.scope(): |
| 64 | + model = tf.keras.models.Sequential([ |
| 65 | + tf.keras.layers.Conv2D(32, 3, activation='relu'), |
| 66 | + tf.keras.layers.MaxPooling2D(), |
| 67 | + tf.keras.layers.Conv2D(32, 3, activation='relu'), |
| 68 | + tf.keras.layers.MaxPooling2D(), |
| 69 | + tf.keras.layers.Flatten(), |
| 70 | + tf.keras.layers.Dense(32, activation='relu'), |
| 71 | + tf.keras.layers.Dense(10), |
| 72 | + ]) |
| 73 | + |
| 74 | + optimizer = tf.keras.optimizers.SGD(learning_rate=0.01) |
| 75 | + loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) |
| 76 | + |
| 77 | + model.compile(optimizer=optimizer, |
| 78 | + loss=loss_fn, |
| 79 | + steps_per_execution=popdist.getNumTotalReplicas()) |
| 80 | + history = model.fit(dataset, |
| 81 | + steps_per_epoch=popdist.getNumTotalReplicas(), |
| 82 | + epochs=1) |
| 83 | + |
| 84 | + # Make sure the losses and weights are identical as we reduce over all IPUs |
| 85 | + self.assert_all_instances_equal(history.history['loss']) |
| 86 | + |
| 87 | + for v in model.trainable_variables: |
| 88 | + self.assert_all_instances_equal(v) |
| 89 | + |
| 90 | + |
| 91 | +if __name__ == "__main__": |
| 92 | + test.main() |
0 commit comments