internal.

Ashish Vaswani · lukaszkaiser · commit 5a06e7a4a4d8 · 2017-07-07T17:27:13.000-07:00
PiperOrigin-RevId: 161130093
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name='tensor2tensor',
-    version='1.0.11',
+    version='1.0.10',
     description='Tensor2Tensor',
     author='Google Inc.',
     author_email='no-reply@google.com',
diff --git a/tensor2tensor/data_generators/generator_utils.py b/tensor2tensor/data_generators/generator_utils.py
@@ -46,13 +46,10 @@ def to_example(dictionary):
     elif isinstance(v[0], float):
       features[k] = tf.train.Feature(float_list=tf.train.FloatList(value=v))
     elif isinstance(v[0], six.string_types):
-      v = [bytes(x, 'utf-8') for x in v]
-      features[k] = tf.train.Feature(bytes_list=tf.train.BytesList(value=v))
-    elif isinstance(v[0], bytes):
       features[k] = tf.train.Feature(bytes_list=tf.train.BytesList(value=v))
     else:
-      raise ValueError("Value for %s is neither an int nor a float; v: %s type: %s" %
-                       (k, str(v[0]), str(type(v[0]))))
+      raise ValueError("Value is neither an int nor a float; v: %s type: %s" %
+                       (str(v[0]), str(type(v[0]))))
   return tf.train.Example(features=tf.train.Features(feature=features))
 
 
diff --git a/tensor2tensor/data_generators/image.py b/tensor2tensor/data_generators/image.py
@@ -68,7 +68,7 @@ def image_generator(images, labels):
         yield {
             "image/encoded": [enc_string],
             "image/format": ["png"],
-            "image/class/label": [int(label)],
+            "image/class/label": [label],
             "image/height": [height],
             "image/width": [width]
         }
diff --git a/tensor2tensor/models/modalities.py b/tensor2tensor/models/modalities.py
@@ -124,6 +124,10 @@ def top(self, body_output, targets):
 class SmallImageModality(modality.Modality):
   """Performs strided conv compressions for small image data."""
 
+  def __init__(self, model_hparams, vocab_size):
+    super(SmallImageModality, self).__init__(model_hparams, vocab_size)
+    self._channels = 3
+
   @property
   def top_dimensionality(self):
     return 256
@@ -161,15 +165,30 @@ def targets_bottom(self, inputs):
 
   def top(self, body_output, _):
     with tf.variable_scope("rgb_softmax"):
-      var = tf.get_variable(
+      # seperate embedding for each channel
+      # assuming the body output returns a tensor of shape
+      # [batch_size, rows, cols, channels, self._body_input_depth]
+      body_output_split = tf.split(body_output, self._channels, axis=3)
+      output_rgb_embedding_var = tf.get_variable(
           "output_rgb_embedding",
-          [self.top_dimensionality, self._body_input_depth],
+          [self._channels, self.top_dimensionality, self._body_input_depth],
           initializer=tf.random_normal_initializer(0.0, self._body_input_depth
                                                    **-0.5))
-      body_output = tf.reshape(body_output, [-1, self._body_input_depth])
-      logits = tf.matmul(body_output, var, transpose_b=True)
+      # compute logits separately for each channel
+      rgb_channel_logits = []
+      for i in self._channels:
+        shape = tf.shape(body_output_split[i])[:-1]
+        body_output = tf.reshape(body_output_split[i],
+                                 [-1, self._body_input_depth])
+        channel_logits = tf.matmul(body_output,
+                                   output_rgb_embedding_var[i],
+                                   transpose_b=True)
+        rgb_channel_logits.append(tf.reshape(
+            channel_logits, tf.concat([shape, [self.top_dimensionality]],
+                                      0)))
+
+      logits = tf.concat(rgb_channel_logits, axis=3)
       # Reshape logits to conform to CIFAR image shapes (32 by 32 by 3)
-      logits = tf.reshape(logits, [-1, 32, 32, 3, 256])
 
       return logits
 

Original file line number	Diff line number	Diff line change
`@@ -68,7 +68,7 @@ def image_generator(images, labels):`
`68`	`68`	`yield {`
`69`	`69`	`"image/encoded": [enc_string],`
`70`	`70`	`"image/format": ["png"],`
`71`		`- "image/class/label": [int(label)],`
	`71`	`+ "image/class/label": [label],`
`72`	`72`	`"image/height": [height],`
`73`	`73`	`"image/width": [width]`
`74`	`74`	`}`