Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit f616cd0

Browse files
authored
Merge pull request #249 from martinpopel/checkpoint-interval
add options save_checkpoints_secs and keep_checkpoint_every_n_hours
2 parents 2958ac1 + 1df0fe9 commit f616cd0

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

tensor2tensor/utils/trainer_utils.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,13 @@
7070
"How many recent checkpoints to keep.")
7171
flags.DEFINE_bool("experimental_optimize_placement", False,
7272
"Optimize ops placement with experimental session options.")
73+
flags.DEFINE_integer("keep_checkpoint_every_n_hours", 10000,
74+
"Number of hours between each checkpoint to be saved. "
75+
"The default value of 10,000 hours effectively disables the feature.")
76+
flags.DEFINE_integer("save_checkpoints_secs", 0,
77+
"Save checkpoints every this many seconds. "
78+
"Default=0 means let tensorflow.contrib.learn.python.learn decide, "
79+
"which is currently equivalent to 600, i.e. 10 minutes.")
7380

7481
# Distributed training flags
7582
flags.DEFINE_string("master", "", "Address of TensorFlow master.")
@@ -203,7 +210,9 @@ def create_experiment_components(hparams, output_dir, data_dir, model_name):
203210
model_dir=output_dir,
204211
gpu_memory_fraction=FLAGS.worker_gpu_memory_fraction,
205212
session_config=session_config(),
206-
keep_checkpoint_max=FLAGS.keep_checkpoint_max))
213+
keep_checkpoint_max=FLAGS.keep_checkpoint_max,
214+
keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,
215+
save_checkpoints_secs=FLAGS.save_checkpoints_secs,))
207216
# Store the hparams in the estimator as well
208217
estimator.hparams = hparams
209218
return estimator, {

0 commit comments

Comments
 (0)