Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit 28eb48f

Browse files
author
Ryan Sepassi
committed
Limit number of concurrent processes in GeneExpressionProblem
PiperOrigin-RevId: 163241281
1 parent 92101af commit 28eb48f

File tree

1 file changed

+14
-5
lines changed

1 file changed

+14
-5
lines changed

tensor2tensor/data_generators/genetics.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from __future__ import print_function
3737

3838
import itertools
39+
import math
3940
import multiprocessing as mp
4041
import os
4142

@@ -54,6 +55,7 @@
5455

5556
import tensorflow as tf
5657

58+
MAX_CONCURRENT_PROCESSES = 10
5759
_bases = list("ACTG")
5860

5961

@@ -122,12 +124,19 @@ def generate_data(self, data_dir, tmp_dir, num_shards=None, task_id=-1):
122124
start_idx, end_idx))
123125
processes.append(p)
124126

125-
# Start and wait for processes
127+
# Start and wait for processes in batches
126128
assert len(processes) == num_shards + 2 # 1 per training shard + dev + test
127-
for p in processes:
128-
p.start()
129-
for p in processes:
130-
p.join()
129+
130+
num_batches = int(
131+
math.ceil(float(len(processes)) / MAX_CONCURRENT_PROCESSES))
132+
for i in xrange(num_batches):
133+
start = i * MAX_CONCURRENT_PROCESSES
134+
end = start + MAX_CONCURRENT_PROCESSES
135+
current = processes[start:end]
136+
for p in current:
137+
p.start()
138+
for p in current:
139+
p.join()
131140

132141
# Shuffle
133142
generator_utils.shuffle_dataset(all_filepaths)

0 commit comments

Comments
 (0)