|
70 | 70 | "How many recent checkpoints to keep.") |
71 | 71 | flags.DEFINE_bool("experimental_optimize_placement", False, |
72 | 72 | "Optimize ops placement with experimental session options.") |
| 73 | +flags.DEFINE_integer("keep_checkpoint_every_n_hours", 10000, |
| 74 | + "Number of hours between each checkpoint to be saved. " |
| 75 | + "The default value of 10,000 hours effectively disables the feature.") |
| 76 | +flags.DEFINE_integer("save_checkpoints_secs", 0, |
| 77 | + "Save checkpoints every this many seconds. " |
| 78 | + "Default=0 means let tensorflow.contrib.learn.python.learn decide, " |
| 79 | + "which is currently equivalent to 600, i.e. 10 minutes.") |
73 | 80 |
|
74 | 81 | # Distributed training flags |
75 | 82 | flags.DEFINE_string("master", "", "Address of TensorFlow master.") |
@@ -203,7 +210,9 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name): |
203 | 210 | model_dir=output_dir, |
204 | 211 | gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction, |
205 | 212 | session_config=session_config(), |
206 | | - keep_checkpoint_max=FLAGS.keep_checkpoint_max)) |
| 213 | + keep_checkpoint_max=FLAGS.keep_checkpoint_max, |
| 214 | + keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours, |
| 215 | + save_checkpoints_secs=FLAGS.save_checkpoints_secs,)) |
207 | 216 | # Store the hparams in the estimator as well |
208 | 217 | estimator.hparams = hparams |
209 | 218 | return estimator, { |
|
0 commit comments