Renaming config for hparams.

Błażej O · Błażej O · commit 1bb6fb4d1984 · 2018-02-10T00:17:07.000+01:00
diff --git a/tensor2tensor/rl/collect.py b/tensor2tensor/rl/collect.py
@@ -18,9 +18,9 @@
 import tensorflow as tf
 
 
-def define_collect(policy_factory, batch_env, config):
+def define_collect(policy_factory, batch_env, hparams):
   """Collect trajectories."""
-  memory_shape = [config.epoch_length] + [batch_env.observ.shape.as_list()[0]]
+  memory_shape = [hparams.epoch_length] + [batch_env.observ.shape.as_list()[0]]
   memories_shapes_and_types = [
       # observation
       (memory_shape + [batch_env.observ.shape.as_list()[1]], tf.float32),
@@ -34,11 +34,11 @@ def define_collect(policy_factory, batch_env, config):
   memory = [tf.Variable(tf.zeros(shape, dtype), trainable=False)
             for (shape, dtype) in memories_shapes_and_types]
   cumulative_rewards = tf.Variable(
-      tf.zeros(config.num_agents, tf.float32), trainable=False)
+      tf.zeros(hparams.num_agents, tf.float32), trainable=False)
 
   should_reset_var = tf.Variable(True, trainable=False)
   reset_op = tf.cond(should_reset_var,
-                     lambda: batch_env.reset(tf.range(config.num_agents)),
+                     lambda: batch_env.reset(tf.range(hparams.num_agents)),
                      lambda: 0.0)
   with tf.control_dependencies([reset_op]):
     reset_once_op = tf.assign(should_reset_var, False)
@@ -59,7 +59,7 @@ def step(index, scores_sum, scores_num):
       pdf = policy.prob(action)[0]
       with tf.control_dependencies(simulate_output):
         reward, done = simulate_output
-        done = tf.reshape(done, (config.num_agents,))
+        done = tf.reshape(done, (hparams.num_agents,))
         to_save = [obs_copy, reward, done, action[0, ...], pdf,
                    actor_critic.value[0]]
         save_ops = [tf.scatter_update(memory_slot, index, value)
@@ -83,7 +83,7 @@ def step(index, scores_sum, scores_num):
 
     init = [tf.constant(0), tf.constant(0.0), tf.constant(0)]
     index, scores_sum, scores_num = tf.while_loop(
-        lambda c, _1, _2: c < config.epoch_length,
+        lambda c, _1, _2: c < hparams.epoch_length,
         step,
         init,
         parallel_iterations=1,