Simplify implementation of sample_temperature_per_example and make it work with dynamic shapes.

T2T Team · copybara-github · commit 72b8f7b1c47d · 2020-04-28T19:22:04.000-07:00
PiperOrigin-RevId: 308944400
diff --git a/tensor2tensor/layers/common_layers.py b/tensor2tensor/layers/common_layers.py
@@ -2886,25 +2886,6 @@ def sample_with_temperature(logits, temperature, sampling_keep_top_k=-1):
     return choices
 
 
-def _to_nd_indices(indices):
-  """Returns indices used for tf.gather_nd or tf.scatter_nd.
-
-  Args:
-    indices: A `Tensor` of shape [batch_size, size] with integer values. The
-      values are the indices of another `Tensor`. For example, `indices` is the
-      output of tf.argsort or tf.math.top_k.
-
-  Returns:
-    A `Tensor` with shape [batch_size, size, 2] that can be used by tf.gather_nd
-    or tf.scatter_nd.
-
-  """
-  indices.get_shape().assert_has_rank(2)
-  batch_ids = tf.ones_like(indices) * tf.expand_dims(
-      tf.range(tf.shape(input=indices)[0]), 1)
-  return tf.stack([batch_ids, indices], axis=-1)
-
-
 def _select_top_k(logits, top_k):
   """Replaces logits, expect the top k highest values, with small number (-1e6).
 
@@ -2918,26 +2899,15 @@ def _select_top_k(logits, top_k):
     A `Tensor` with same shape  as logits.
   """
   vocab_size = logits.shape[-1]
-  flat_logits = tf.reshape(logits, [-1, vocab_size])
+
   top_k = tf.where(
       tf.not_equal(top_k, -1), top_k,
       tf.ones_like(top_k) * vocab_size)
-  values, idx = tf.math.top_k(flat_logits, k=vocab_size, sorted=False)
-  nd_idx = _to_nd_indices(idx)
 
-  mask_idx = tf.reshape(
-      tf.range(vocab_size), [1] * (len(logits.shape) - 1) + [-1])
-  for i, size in enumerate(logits.shape[:-1]):
-    mask_idx = tf.repeat(mask_idx, size, axis=i)
-  mask = tf.reshape(
-      mask_idx < tf.reshape(top_k, [-1] + [1] * (len(logits.shape) - 1)), [-1])
-
-  topk_logits = tf.tensor_scatter_nd_update(
-      tf.ones_like(flat_logits) * -1e6,
-      tf.reshape(nd_idx, [-1, 2])[mask],
-      tf.reshape(values, [-1])[mask])
-
-  return tf.reshape(topk_logits, logits.shape)
+  return tf.where(
+      tf.argsort(logits) < tf.reshape(top_k, [-1] + [1] *
+                                      (len(logits.shape) - 1)), logits,
+      tf.ones_like(logits) * -1e6)
 
 
 def sample_temperature_per_example(logits, temperature, sampling_keep_top_k=-1):
@@ -2950,9 +2920,7 @@ def sample_temperature_per_example(logits, temperature, sampling_keep_top_k=-1):
   Returns:
     a Tensor with one fewer dimension than logits.
   """
-  if sampling_keep_top_k != -1:
-    logits = _select_top_k(logits, sampling_keep_top_k)
-
+  logits = _select_top_k(logits, sampling_keep_top_k)
   logits /= tf.reshape(temperature, [-1] + [1] * (len(logits.shape) - 1))
   reshaped_logits = tf.reshape(logits, [-1, shape_list(logits)[-1]])
   choices = tf.multinomial(reshaped_logits, 1)
diff --git a/tensor2tensor/layers/common_layers_test.py b/tensor2tensor/layers/common_layers_test.py
@@ -704,11 +704,55 @@ def testSampleTemperaturePerExample(self):
     logits = np.random.randn(batch_size, seq_len, 1, 1, vocab_size)
     temperature = np.random.rand(batch_size)
 
-    out = common_layers.sample_temperature_per_example(logits, temperature)
+    out = common_layers.sample_temperature_per_example(logits, temperature, -1)
 
     self.assertAllEqual(
         self.evaluate(tf.shape(out)), [batch_size, seq_len, 1, 1])
 
+  @test_utils.run_in_graph_and_eager_modes()
+  def testSampleTemperaturePerExampleWithTopK(self):
+    batch_size = 3
+    seq_len = 5
+    vocab_size = 7
+
+    logits = np.random.randn(batch_size, seq_len, 1, 1, vocab_size)
+    temperature = np.random.rand(batch_size)
+    top_k = np.array([3, -1, 4], dtype=np.int32)
+
+    out = common_layers.sample_temperature_per_example(logits, temperature,
+                                                       top_k)
+
+    self.assertAllEqual(
+        self.evaluate(tf.shape(out)), [batch_size, seq_len, 1, 1])
+
+  @test_utils.run_in_graph_and_eager_modes()
+  def testSampleTemperaturePerExampleWithTopK2(self):
+    batch_size = 3
+    vocab_size = 7
+
+    logits = np.random.randn(batch_size, vocab_size)
+    temperature = np.random.rand(batch_size)
+    top_k = np.array([3, -1, 4], dtype=np.int32)
+
+    out = common_layers.sample_temperature_per_example(logits, temperature,
+                                                       top_k)
+
+    self.assertAllEqual(self.evaluate(tf.shape(out)), [batch_size])
+
+  @test_utils.run_in_graph_mode_only()
+  def testSampleTemperaturePerExampleDynamicBatchSize(self):
+    batch_size = None
+    vocab_size = 7
+
+    logits = tf.placeholder(tf.float32, shape=(batch_size, vocab_size))
+    temperature = tf.placeholder(tf.float32, shape=(batch_size, 1))
+    sampling_keep_top_k = tf.placeholder(tf.int32, shape=(batch_size, 1))
+
+    out = common_layers.sample_temperature_per_example(logits, temperature,
+                                                       sampling_keep_top_k)
+
+    self.assertAllEqual(out.shape.as_list(), [batch_size])
+
   @test_utils.run_in_graph_and_eager_modes()
   def testCycleGANUpsampleNnUpsampleConv(self):
     batch = 8