Propagate constants from functional graphs

georgepaw · georgepaw · commit dc90fd9ac0ab · 2021-07-31T10:27:48.000+01:00
Summary: The TF2XLA bridge is less aggressive. Gather ops generate sparse tensors which get converted to dense tensors - to do that the shapes (using VariableShape) need to be constant, but that needs propagating. Fix T43247 TF2.4 Only Test Plan: CI Reviewers: jackh, jakeh, alfiee, samuelh, #tensorflow, #framework_ip_review_-_any_oss_or_third-party_code_use_has_been_approved, davidn Reviewed By: #tensorflow, #framework_ip_review_-_any_oss_or_third-party_code_use_has_been_approved, davidn Subscribers: davidn Maniphest Tasks: T43247 Differential Revision: https://phabricator.sourcevertex.net/D49099
diff --git a/tensorflow/python/ipu/ops/application_compile_op.py b/tensorflow/python/ipu/ops/application_compile_op.py
@@ -84,7 +84,7 @@ def wrapped_func(*args):
     xla_context = control_flow_ops.XLAControlFlowContext()
     try:
       xla_context.Enter()
-      func_graph, captured_args = _compile_function(
+      func_graph, captured_args, _ = _compile_function(
           wrapped_func,
           inputs,
           scope, [],
diff --git a/tensorflow/python/ipu/ops/functional_ops.py b/tensorflow/python/ipu/ops/functional_ops.py
@@ -21,11 +21,14 @@
 from tensorflow.compiler.xla import xla_data_pb2
 from tensorflow.core.framework import attr_value_pb2
 from tensorflow.compiler.plugin.poplar.ops import gen_functional_ops
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import func_graph as func_graph_module
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
 from tensorflow.python.ipu import scopes
 from tensorflow.python.ops import control_flow_util_v2 as util
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.util import nest
 
 
@@ -75,7 +78,7 @@ def decorated(inner_func):
     def func_wrapper(*args):
       args = _convert_to_list(args)
       with ops.name_scope(name) as scope:
-        func_graph, captured_args = _compile_function(
+        func_graph, captured_args, constant_outputs = _compile_function(
             inner_func, args, scope, [], allow_external_captures=True)
 
         with ops.control_dependencies(list(func_graph.control_captures)):
@@ -87,6 +90,7 @@ def func_wrapper(*args):
               unique_sharding=unique_sharding,
               keep_input_layouts=keep_input_layouts,
               name=name)
+          outputs = _replace_outputs(outputs, constant_outputs)
 
           # pack_sequence_as requires a list of Tensors, but the gen_ operation
           # returns an Operation under some circumstances (probably when that
@@ -169,7 +173,8 @@ def func_wrapper(*args, **kwargs):
     op._set_shape_list_attr("_xla_inferred_shapes", output_shapes)
     # pylint: enable=protected-access
 
-  return func_graph, captured_args
+  constant_outputs = _get_constant_outputs(func_graph, captured_args)
+  return func_graph, captured_args, constant_outputs
 
 
 def _pack_sequence_as(structured_outputs, op_outputs):
@@ -203,3 +208,48 @@ def _convert_to_list(xs):
   if not isinstance(xs, (list, tuple)):
     return [xs]
   return list(xs)
+
+
+def _get_constant_outputs(func_graph, func_inputs):
+  """Get constant outputs for a functional graph.
+
+  Get constant outputs in order to propagate them in the XLA graph. This
+  includes `VariableShape` operation which needs to return a constant."""
+  if not func_graph.outputs:
+    return None
+
+  def get_output_info(output):
+    while output.op.type == "Identity":
+      output = output.op.inputs[0]
+    if constant_op.is_constant(output):
+      # Propagate constants.
+      return constant_op.constant(tensor_util.constant_value(output),
+                                  dtype=output.dtype)
+
+    if output.op.type == "VariableShape":
+      # Propagate variable shapes.
+      # Find the variable inside the function and its inputs index.
+      var = output.op.inputs[0]
+      assert var.dtype == dtypes.resource
+      index = [
+          i for i, v in enumerate(func_graph.inputs)
+          if v.dtype == dtypes.resource and v is var
+      ]
+      assert len(index) == 1
+      # Get the input variable.
+      outter_var = func_inputs[index[0]]
+      return resource_variable_ops.variable_shape(outter_var,
+                                                  out_type=output.dtype)
+    return None
+
+  return [get_output_info(x) for x in nest.flatten(func_graph.outputs)]
+
+
+def _replace_outputs(outputs, to_replace_with):
+  flat_outputs = nest.flatten(outputs)
+  flat_to_replace_with = nest.flatten(to_replace_with)
+  assert len(flat_outputs) == len(flat_to_replace_with)
+  flat_outputs = [
+      x if y is None else y for x, y in zip(flat_outputs, flat_to_replace_with)
+  ]
+  return nest.pack_sequence_as(outputs, flat_outputs)
diff --git a/tensorflow/python/ipu/ops/functional_ops_grad.py b/tensorflow/python/ipu/ops/functional_ops_grad.py
@@ -170,16 +170,19 @@ def _get_gradients_for_function(op, *grads):
     fwd_op._set_shape_list_attr("output_shapes", func_graph.output_shapes)
     fwd_op._add_outputs([t.dtype for t in extra_func_outputs],
                         [t.shape for t in extra_func_outputs])
+    # pylint: enable=protected-access
 
   func_grad_inputs = _resolve_grad_inputs(func_graph, func_grad_graph, op)
-  # pylint: enable=protected-access
-  return func_grad_graph, func_grad_inputs
+  constant_outputs = functional_ops._get_constant_outputs(  # pylint: disable=protected-access
+      func_grad_graph, func_grad_inputs)
+  return func_grad_graph, func_grad_inputs, constant_outputs
 
 
 @ops.RegisterGradient("Function")
 def _function_grad(op, *grads):
   """The gradient of a Function op."""
-  func_grad_graph, func_grad_inputs = _get_gradients_for_function(op, *grads)
+  func_grad_graph, func_grad_inputs, constant_outputs = \
+    _get_gradients_for_function(op, *grads)
   outputs = gen_functional_ops.function(
       func_grad_inputs,
       to_apply=util.create_new_tf_function(func_grad_graph),
@@ -188,6 +191,7 @@ def _function_grad(op, *grads):
       unique_sharding=op.get_attr("unique_sharding"),
       keep_input_layouts=True)
 
+  outputs = functional_ops._replace_outputs(outputs, constant_outputs)  # pylint: disable=protected-access
   return functional_ops._pack_sequence_as(  # pylint: disable=protected-access
       func_grad_graph.structured_outputs, outputs)
 
diff --git a/tensorflow/python/ipu/ops/nn_ops.py b/tensorflow/python/ipu/ops/nn_ops.py
@@ -168,7 +168,8 @@ def func_wrapper(*args):
 
       args = functional_ops._convert_to_list(args)  # pylint: disable=protected-access
       with ops.name_scope("multi_conv") as scope:
-        func_graph, captured_args = functional_ops._compile_function(  # pylint: disable=protected-access
+        func_graph, captured_args, constant_outputs = \
+          functional_ops._compile_function(  # pylint: disable=protected-access
             func_wrapper,
             args,
             scope, [],
@@ -181,6 +182,7 @@ def func_wrapper(*args):
               Tout=func_graph.output_types,
               output_shapes=func_graph.output_shapes,
               option_flags=json_format.MessageToJson(option_proto))
+          outputs = functional_ops._replace_outputs(outputs, constant_outputs)  # pylint: disable=protected-access
 
       return functional_ops._pack_sequence_as(  # pylint: disable=protected-access
           func_graph.structured_outputs, outputs)
diff --git a/tensorflow/python/ipu/ops/nn_ops_grad.py b/tensorflow/python/ipu/ops/nn_ops_grad.py
@@ -49,7 +49,7 @@ def _ipu_swish_grad(op, grad):
 @ops.RegisterGradient("MultiConv")
 def _multi_conv_grad(op, *grads):
   """The gradient of a MultiConv op."""
-  func_grad_graph, func_grad_inputs = \
+  func_grad_graph, func_grad_inputs, constant_outputs = \
     functional_ops_grad._get_gradients_for_function(op, *grads) # pylint: disable=protected-access
   outputs = gen_functional_ops.multi_conv(
       func_grad_inputs,
@@ -58,6 +58,7 @@ def _multi_conv_grad(op, *grads):
       output_shapes=func_grad_graph.output_shapes,
       option_flags=op.get_attr("option_flags"))
 
+  outputs = functional_ops._replace_outputs(outputs, constant_outputs)  # pylint: disable=protected-access
   return functional_ops._pack_sequence_as(  # pylint: disable=protected-access
       func_grad_graph.structured_outputs, outputs)
 
diff --git a/tensorflow/python/ipu/ops/pipelining_ops.py b/tensorflow/python/ipu/ops/pipelining_ops.py
@@ -1021,7 +1021,8 @@ def resource_update_():
             resource_update_ops.append(enqueue)
 
       with ops.name_scope(name + "/WU") as scope:
-        func_graph, captured_args = functional_ops._compile_function(  # pylint: disable=protected-access
+        func_graph, captured_args, constant_outputs = \
+          functional_ops._compile_function(  # pylint: disable=protected-access
             resource_update_, [], scope, resource_update_ops, True)
 
       # Create the pipeline resource update stage and lower the function into XLA.
@@ -1035,6 +1036,7 @@ def resource_update_():
             replicated_optimizer_state_sharding=
             replicated_optimizer_state_sharding,
             num_batches_to_accumulate=gradient_accumulation_count)
+        outputs = functional_ops._replace_outputs(outputs, constant_outputs)  # pylint: disable=protected-access
 
     if not isinstance(outputs, ops.Operation):
       if not outfeed_queue:
@@ -1053,7 +1055,7 @@ def resource_update_():
   with ops.name_scope(name) as scope:
     # pylint: disable=protected-access
     try:
-      func_graph, captured_args = functional_ops._compile_function(
+      func_graph, captured_args, _ = functional_ops._compile_function(
           _pipeline, inputs, scope, control_outputs)
     except functional_ops._InvalidCaptureException as e:
       raise ValueError(
@@ -1212,7 +1214,8 @@ def gradient_override_wrapper(*args, **kwargs):
   with ops.name_scope(name) as scope:
     # pylint: disable=protected-access
     try:
-      func_graph, captured_args = functional_ops._compile_function(
+      func_graph, captured_args, constant_outputs = \
+        functional_ops._compile_function(
           gradient_override_wrapper, args, scope, control_outputs)
     except functional_ops._InvalidCaptureException as e:
       raise ValueError(
@@ -1233,6 +1236,7 @@ def gradient_override_wrapper(*args, **kwargs):
     if isinstance(outputs, ops.Operation):
       return outputs
 
+    outputs = functional_ops._replace_outputs(outputs, constant_outputs)  # pylint: disable=protected-access
     return functional_ops._pack_sequence_as(  # pylint: disable=protected-access
         func_graph.structured_outputs, outputs)
 
diff --git a/tensorflow/python/ipu/ops/pipelining_ops_grad.py b/tensorflow/python/ipu/ops/pipelining_ops_grad.py
@@ -25,7 +25,7 @@
 @ops.RegisterGradient("PipelineStage")
 def _pipeline_stage_grad(op, *grads):
   """The gradient of a PipelineStage op."""
-  func_grad_graph, func_grad_inputs = \
+  func_grad_graph, func_grad_inputs, constant_outputs = \
       functional_ops_grad._get_gradients_for_function(op, *grads)  # pylint: disable=protected-access
   stage_op = op.outputs[0].op
   stage_id = stage_op.get_attr('stage_id')
@@ -37,6 +37,7 @@ def _pipeline_stage_grad(op, *grads):
       output_shapes=func_grad_graph.output_shapes,
       stage_id=stage_id)
 
+  outputs = functional_ops._replace_outputs(outputs, constant_outputs)  # pylint: disable=protected-access
   return functional_ops._pack_sequence_as(  # pylint: disable=protected-access
       func_grad_graph.structured_outputs, outputs)
 
diff --git a/tensorflow/python/ipu/optimizers/gradient_accumulation_optimizer.py b/tensorflow/python/ipu/optimizers/gradient_accumulation_optimizer.py
@@ -137,7 +137,8 @@ def apply_gradient_accumulation(resource_update_, name, apply_grad_ops,
                                   replicated_optimizer_state_sharding,
                                   num_mini_batches):
     with ops.name_scope(name + "/WU") as scope:
-      func_graph, captured_args = functional_ops._compile_function(  # pylint: disable=protected-access
+      func_graph, captured_args, constant_outputs = \
+        functional_ops._compile_function(  # pylint: disable=protected-access
           resource_update_, [], scope, apply_grad_ops, True)
 
     # Create the resource update and lower the function into XLA.
@@ -151,6 +152,7 @@ def apply_gradient_accumulation(resource_update_, name, apply_grad_ops,
           replicated_optimizer_state_sharding=
           replicated_optimizer_state_sharding,
           num_batches_to_accumulate=num_mini_batches)
+      outputs = functional_ops._replace_outputs(outputs, constant_outputs)  # pylint: disable=protected-access
 
     return outputs
 
diff --git a/tensorflow/python/ipu/tests/pipelining_test.py b/tensorflow/python/ipu/tests/pipelining_test.py
@@ -2276,7 +2276,7 @@ def stage1(indices):
             shape=[300, 300],
             dtype=dtypes.float16,
             initializer=init_ops.ones_initializer())
-        return embedding_ops.embedding_lookup(table, indices)
+        return array_ops.gather(table, indices)
 
     def identity(*args):
       return args