Allow external functional captures when capturing by value

hakos · georgepaw · commit 09ed82631fda · 2021-08-05T09:24:56.000+01:00
Summary: When an outer FuncGraph is set to be capturing by value, allow external non-resource captures by default when compiling/constructing an inner FuncGraph. This allows capturing resource variables by their constant tensor values (i.e. freezing them) when using the compile op to compile a pipeline op with resource variables. Fixes T43559. Reviewers: #tensorflow, simonl, #framework_ip_review_-_any_oss_or_third-party_code_use_has_been_approved, georgep Reviewed By: #tensorflow, #framework_ip_review_-_any_oss_or_third-party_code_use_has_been_approved, georgep Subscribers: georgep Maniphest Tasks: T43559 Differential Revision: https://phabricator.sourcevertex.net/D49317
diff --git a/tensorflow/python/ipu/ops/functional_ops.py b/tensorflow/python/ipu/ops/functional_ops.py
@@ -112,17 +112,25 @@ class _InvalidCaptureException(Exception):
   pass
 
 
+def _is_capturing_by_value(graph):
+  return (isinstance(graph, func_graph_module.FuncGraph)
+          and graph.capture_by_value)
+
+
 def _compile_function(func,
                       args,
                       scope,
                       control_outputs,
-                      allow_external_captures=False,
+                      allow_external_captures=None,
                       capture_by_value=None):
   parent_graph = ops.get_default_graph()
   # Automatic control dependencies are added in defuns, but not in v1
   # graphs. Propagate that behavior here.
   add_control_dependencies = parent_graph._add_control_dependencies  # pylint: disable=protected-access
 
+  if allow_external_captures is None:
+    allow_external_captures = _is_capturing_by_value(parent_graph)
+
   # Functions inherit frontend attributes and the gradient override map from the
   # parent graph.
   proto = xla_data_pb2.FrontendAttributes()
diff --git a/tensorflow/python/ipu/tests/application_compile_test.py b/tensorflow/python/ipu/tests/application_compile_test.py
@@ -17,6 +17,7 @@
 import tempfile
 import numpy as np
 
+from absl.testing import parameterized
 from tensorflow.python.client import session
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import dtypes
@@ -26,6 +27,7 @@
 from tensorflow.python.ipu import loops
 from tensorflow.python.ipu.config import DeviceConnectionType
 from tensorflow.python.ipu.config import IPUConfig
+from tensorflow.python.ipu.ops import pipelining_ops
 from tensorflow.python.ipu.ops.application_compile_op import experimental_application_compile_op as application_compile_op
 from tensorflow.python.keras import layers
 from tensorflow.python.ops import array_ops
@@ -36,7 +38,8 @@
 from tensorflow.python.training import gradient_descent
 
 
-class TestApplicationCompile(test_util.TensorFlowTestCase):
+class TestApplicationCompile(test_util.TensorFlowTestCase,
+                             parameterized.TestCase):
   def setUp(self):
     super().setUp()
 
@@ -209,6 +212,39 @@ def my_net(lr):
 
       self.assertGreater(os.path.getsize(compiled_path.decode()), 0)
 
+  @parameterized.named_parameters(("resources", False), ("constants", True))
+  @test_util.deprecated_graph_mode_only
+  def test_compile_pipeline(self, freeze_variables):
+    with session.Session() as sess:
+
+      dataset = dataset_ops.Dataset.from_tensor_slices((np.ones(
+          (10, 5), dtype=np.float32),))
+      dataset = dataset.batch(1, drop_remainder=True)
+      infeed_queue = ipu_infeed_queue.IPUInfeedQueue(dataset)
+      outfeed_queue = ipu_outfeed_queue.IPUOutfeedQueue()
+
+      def stage1(offset, x):
+        return layers.Dense(5, activation="relu")(x) + offset
+
+      def stage2(x):
+        return layers.Dense(10, activation="softmax")(x)
+
+      def my_net():
+        return pipelining_ops.pipeline(computational_stages=[stage1, stage2],
+                                       gradient_accumulation_count=4,
+                                       infeed_queue=infeed_queue,
+                                       inputs=[42.0],
+                                       outfeed_queue=outfeed_queue,
+                                       device_mapping=[0, 0])
+
+      result = application_compile_op(my_net,
+                                      freeze_variables=freeze_variables)
+
+      sess.run(variables.global_variables_initializer())
+      compiled_path = sess.run(result)
+
+      self.assertGreater(os.path.getsize(compiled_path.decode()), 0)
+
 
 if __name__ == "__main__":
   test.main()