File tree Expand file tree Collapse file tree 1 file changed +10
-0
lines changed
tensorflow/compiler/plugin/poplar/driver Expand file tree Collapse file tree 1 file changed +10
-0
lines changed Original file line number Diff line number Diff line change @@ -1624,6 +1624,8 @@ StatusOr<std::unique_ptr<PoplarExecutableCore>> CompileEngine(
16241624
16251625 const auto num_local_ipus = poplar_executor->GetNumIpusInLocalProcess (target);
16261626 const auto local_replication_factor = num_local_ipus / num_shards;
1627+ const auto replica_group_size =
1628+ poplar_executor->ExperimentalDistributedBatchNormReplicaGroupSize ();
16271629
16281630 if (num_local_ipus % num_shards) {
16291631 return xla::InternalErrorStrCat (
@@ -1632,6 +1634,14 @@ StatusOr<std::unique_ptr<PoplarExecutableCore>> CompileEngine(
16321634 " The number of shards needs to divide the number of local IPUs." );
16331635 }
16341636
1637+ if (replica_group_size && replication_factor % replica_group_size) {
1638+ return xla::InternalErrorStrCat (
1639+ " The number of replicas (" , replication_factor,
1640+ " ) must be divisible by" ,
1641+ " distributed_batch_norm_replica_group_size (" , replica_group_size,
1642+ " )." );
1643+ }
1644+
16351645 // Currently we only support performing replica partitioning across the local
16361646 // replicas in each process, as this allows access to all the parts of a
16371647 // partitioned remote buffer locally. This means that copying to/from all the
You can’t perform that action at this time.
0 commit comments