|
18 | 18 |
|
19 | 19 |
|
20 | 20 | use_cuda = torch.cuda.is_available() and False |
| 21 | +torch.backends.cudnn.deterministic = True |
21 | 22 | num_mels = 80 |
22 | 23 | num_freq = 513 |
23 | 24 | outputs_per_step = 4 |
@@ -145,13 +146,45 @@ def test_multi_speaker_deepvoice3(): |
145 | 146 | print("Done:", done.size()) |
146 | 147 |
|
147 | 148 |
|
148 | | -@attr("local_only") |
| 149 | +@attr("issue38") |
| 150 | +def test_incremental_path_multiple_times(): |
| 151 | + texts = ["they discarded this for a more completely Roman and far less beautiful letter."] |
| 152 | + seqs = np.array([text_to_sequence(t) for t in texts]) |
| 153 | + text_positions = np.arange(1, len(seqs[0]) + 1).reshape(1, len(seqs[0])) |
| 154 | + |
| 155 | + r = 4 |
| 156 | + mel_dim = 80 |
| 157 | + sequence = Variable(torch.LongTensor(seqs)) |
| 158 | + text_positions = Variable(torch.LongTensor(text_positions)) |
| 159 | + |
| 160 | + for model, speaker_ids in [ |
| 161 | + (_get_model(force_monotonic_attention=False), None), |
| 162 | + (_get_model(force_monotonic_attention=False, n_speakers=32, speaker_embed_dim=16), Variable(torch.LongTensor([1])))]: |
| 163 | + model.eval() |
| 164 | + |
| 165 | + # first call |
| 166 | + mel_outputs, linear_outputs, alignments, done = model( |
| 167 | + sequence, text_positions=text_positions, speaker_ids=speaker_ids) |
| 168 | + |
| 169 | + # second call |
| 170 | + mel_outputs2, linear_outputs2, alignments2, done2 = model( |
| 171 | + sequence, text_positions=text_positions, speaker_ids=speaker_ids) |
| 172 | + |
| 173 | + # Should get same result |
| 174 | + c = (mel_outputs - mel_outputs2).abs() |
| 175 | + print(c.mean(), c.max()) |
| 176 | + |
| 177 | + assert np.allclose(mel_outputs.cpu().data.numpy(), |
| 178 | + mel_outputs2.cpu().data.numpy(), atol=1e-5) |
| 179 | + |
| 180 | + |
149 | 181 | def test_incremental_correctness(): |
150 | 182 | texts = ["they discarded this for a more completely Roman and far less beautiful letter."] |
151 | 183 | seqs = np.array([text_to_sequence(t) for t in texts]) |
152 | 184 | text_positions = np.arange(1, len(seqs[0]) + 1).reshape(1, len(seqs[0])) |
153 | 185 |
|
154 | | - mel = np.load("/home/ryuichi/Dropbox/sp/deepvoice3_pytorch/data/ljspeech/ljspeech-mel-00035.npy") |
| 186 | + mel_path = join(dirname(__file__), "data", "ljspeech-mel-00001.npy") |
| 187 | + mel = np.load(mel_path) |
155 | 188 | max_target_len = mel.shape[0] |
156 | 189 | r = 4 |
157 | 190 | mel_dim = 80 |
|
0 commit comments