Add EarlyStoppingHook, PlateauOpHook, and MetricsBasedHook base class

Ryan Sepassi · Ryan Sepassi · commit 87bfac5c9773 · 2017-12-21T15:46:07.000-08:00
PiperOrigin-RevId: 179860572
diff --git a/tensor2tensor/bin/t2t-trainer b/tensor2tensor/bin/t2t-trainer
@@ -77,9 +77,6 @@ def create_hparams():
 
 
 def create_experiment_fn():
-  use_validation_monitor = (FLAGS.schedule in
-                            ["train_and_evaluate", "continuous_train_and_eval"]
-                            and FLAGS.local_eval_frequency)
   return tpu_trainer_lib.create_experiment_fn(
       model_name=FLAGS.model,
       problem_name=get_problem_name(),
@@ -92,9 +89,9 @@ def create_experiment_fn():
       decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams),
       use_tfdbg=FLAGS.tfdbg,
       use_dbgprofile=FLAGS.dbgprofile,
-      use_validation_monitor=use_validation_monitor,
       eval_early_stopping_steps=FLAGS.eval_early_stopping_steps,
       eval_early_stopping_metric=FLAGS.eval_early_stopping_metric,
+      eval_early_stopping_metric_delta=FLAGS.eval_early_stopping_metric_delta,
       eval_early_stopping_metric_minimize=FLAGS.
       eval_early_stopping_metric_minimize,
       use_tpu=FLAGS.use_tpu)
diff --git a/tensor2tensor/bin/t2t_trainer.py b/tensor2tensor/bin/t2t_trainer.py
@@ -76,9 +76,6 @@ def create_hparams():
 
 
 def create_experiment_fn():
-  use_validation_monitor = (FLAGS.schedule in
-                            ["train_and_evaluate", "continuous_train_and_eval"]
-                            and FLAGS.local_eval_frequency)
   return tpu_trainer_lib.create_experiment_fn(
       model_name=FLAGS.model,
       problem_name=get_problem_name(),
@@ -91,9 +88,9 @@ def create_experiment_fn():
       decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams),
       use_tfdbg=FLAGS.tfdbg,
       use_dbgprofile=FLAGS.dbgprofile,
-      use_validation_monitor=use_validation_monitor,
       eval_early_stopping_steps=FLAGS.eval_early_stopping_steps,
       eval_early_stopping_metric=FLAGS.eval_early_stopping_metric,
+      eval_early_stopping_metric_delta=FLAGS.eval_early_stopping_metric_delta,
       eval_early_stopping_metric_minimize=FLAGS.
       eval_early_stopping_metric_minimize,
       use_tpu=FLAGS.use_tpu)
diff --git a/tensor2tensor/tpu/tpu_trainer.py b/tensor2tensor/tpu/tpu_trainer.py
@@ -76,9 +76,6 @@ def create_hparams():
 
 
 def create_experiment_fn():
-  use_validation_monitor = (FLAGS.schedule in
-                            ["train_and_evaluate", "continuous_train_and_eval"]
-                            and FLAGS.local_eval_frequency)
   return tpu_trainer_lib.create_experiment_fn(
       model_name=FLAGS.model,
       problem_name=get_problem_name(),
@@ -91,9 +88,9 @@ def create_experiment_fn():
       decode_hparams=decoding.decode_hparams(FLAGS.decode_hparams),
       use_tfdbg=FLAGS.tfdbg,
       use_dbgprofile=FLAGS.dbgprofile,
-      use_validation_monitor=use_validation_monitor,
       eval_early_stopping_steps=FLAGS.eval_early_stopping_steps,
       eval_early_stopping_metric=FLAGS.eval_early_stopping_metric,
+      eval_early_stopping_metric_delta=FLAGS.eval_early_stopping_metric_delta,
       eval_early_stopping_metric_minimize=FLAGS.
       eval_early_stopping_metric_minimize,
       use_tpu=FLAGS.use_tpu)
diff --git a/tensor2tensor/tpu/tpu_trainer_lib.py b/tensor2tensor/tpu/tpu_trainer_lib.py
@@ -19,10 +19,13 @@
 from __future__ import division
 from __future__ import print_function
 
+import os
+
 # Dependency imports
 
 from tensor2tensor.utils import devices
 from tensor2tensor.utils import expert_utils
+from tensor2tensor.utils import metrics_hook
 from tensor2tensor.utils import registry
 from tensor2tensor.utils import t2t_model
 
@@ -186,7 +189,8 @@ def create_estimator(model_name,
 
 
 def create_hooks(use_tfdbg=False, use_dbgprofile=False, dbgprofile_kwargs=None,
-                 use_validation_monitor=False, validation_monitor_kwargs=None):
+                 use_validation_monitor=False, validation_monitor_kwargs=None,
+                 use_early_stopping=False, early_stopping_kwargs=None):
   """Create train and eval hooks for Experiment."""
   train_monitors = []
   eval_hooks = []
@@ -208,6 +212,12 @@ def create_hooks(use_tfdbg=False, use_dbgprofile=False, dbgprofile_kwargs=None,
         tf.contrib.learn.monitors.ValidationMonitor(
             hooks=eval_hooks, **validation_monitor_kwargs))
 
+  if use_early_stopping:
+    hook = metrics_hook.EarlyStoppingHook(**early_stopping_kwargs)
+    # Adding to both training and eval so that eval aborts as well
+    train_monitors.append(hook)
+    eval_hooks.append(hook)
+
   return train_monitors, eval_hooks
 
 
@@ -224,9 +234,9 @@ def create_experiment(run_config,
                       decode_hparams=None,
                       use_tfdbg=False,
                       use_dbgprofile=False,
-                      use_validation_monitor=False,
                       eval_early_stopping_steps=None,
                       eval_early_stopping_metric=None,
+                      eval_early_stopping_metric_delta=None,
                       eval_early_stopping_metric_minimize=True,
                       use_tpu=False):
   """Create Experiment."""
@@ -264,12 +274,29 @@ def create_experiment(run_config,
         early_stopping_rounds=eval_early_stopping_steps,
         early_stopping_metric=eval_early_stopping_metric,
         early_stopping_metric_minimize=eval_early_stopping_metric_minimize)
+    early_stopping_kwargs = dict(
+        events_dir=os.path.join(run_config.model_dir, "eval_continuous"),
+        tag=eval_early_stopping_metric,
+        num_plateau_steps=eval_early_stopping_steps,
+        plateau_decrease=eval_early_stopping_metric_minimize,
+        plateau_delta=eval_early_stopping_metric_delta,
+        every_n_steps=min_eval_frequency)
+
+    # In-process eval (and possible early stopping)
+    local_schedules = ["train_and_evaluate", "continuous_train_and_eval"]
+    use_validation_monitor = (
+        schedule in local_schedules and min_eval_frequency)
+    # Distributed early stopping
+    use_early_stopping = (
+        schedule not in local_schedules and eval_early_stopping_steps)
     train_monitors, eval_hooks = create_hooks(
         use_tfdbg=use_tfdbg,
         use_dbgprofile=use_dbgprofile,
         dbgprofile_kwargs=dbgprofile_kwargs,
         use_validation_monitor=use_validation_monitor,
-        validation_monitor_kwargs=validation_monitor_kwargs)
+        use_early_stopping=use_early_stopping,
+        validation_monitor_kwargs=validation_monitor_kwargs,
+        early_stopping_kwargs=early_stopping_kwargs)
     hooks_kwargs = {"train_monitors": train_monitors, "eval_hooks": eval_hooks}
 
   # Experiment
diff --git a/tensor2tensor/tpu/tpu_trainer_lib_test.py b/tensor2tensor/tpu/tpu_trainer_lib_test.py
@@ -68,7 +68,8 @@ def testExperiment(self):
         eval_steps=1,
         min_eval_frequency=1,
         use_tpu=False)
-    run_config = tpu_trainer_lib.create_run_config(num_gpus=0, use_tpu=False)
+    run_config = tpu_trainer_lib.create_run_config(
+        model_dir=self.data_dir, num_gpus=0, use_tpu=False)
     hparams = registry.hparams("transformer_tiny_tpu")()
     exp = exp_fn(run_config, hparams)
     exp.test()
diff --git a/tensor2tensor/utils/flags.py b/tensor2tensor/utils/flags.py
@@ -55,14 +55,14 @@
 flags.DEFINE_integer("train_steps", 250000,
                      "The number of steps to run training for.")
 flags.DEFINE_string("eval_early_stopping_metric", "loss",
-                    "If --schedule=train_and_evaluate and "
-                    "--eval_early_stopping_steps is not None, then stop when "
-                    "--eval_early_stopping_metric has not decreased for "
+                    "If --eval_early_stopping_steps is not None, then stop "
+                    "when --eval_early_stopping_metric has not decreased for "
                     "--eval_early_stopping_steps")
+flags.DEFINE_float("eval_early_stopping_metric_delta", 0.1,
+                   "Delta determining whether metric has plateaued.")
 flags.DEFINE_integer("eval_early_stopping_steps", None,
-                     "If --schedule=train_and_evaluate and "
-                     "--eval_early_stopping_steps is not None, then stop when "
-                     "--eval_early_stopping_metric has not decreased for "
+                     "If --eval_early_stopping_steps is not None, then stop "
+                     "when --eval_early_stopping_metric has not decreased for "
                      "--eval_early_stopping_steps")
 flags.DEFINE_bool("eval_early_stopping_metric_minimize", True,
                   "Whether to check for the early stopping metric going down "
diff --git a/tensor2tensor/utils/metrics_hook.py b/tensor2tensor/utils/metrics_hook.py
diff --git a/tensor2tensor/utils/metrics_hook_test.py b/tensor2tensor/utils/metrics_hook_test.py