@@ -305,13 +305,13 @@ def forward(self, encoder_out, inputs=None,
305305 w = self .key_position_rate
306306 # TODO: may be useful to have projection per attention layer
307307 if self .speaker_proj1 is not None :
308- w = w * F .sigmoid (self .speaker_proj1 (speaker_embed )).view (- 1 )
308+ w = w * torch .sigmoid (self .speaker_proj1 (speaker_embed )).view (- 1 )
309309 text_pos_embed = self .embed_keys_positions (text_positions , w )
310310 keys = keys + text_pos_embed
311311 if frame_positions is not None :
312312 w = self .query_position_rate
313313 if self .speaker_proj2 is not None :
314- w = w * F .sigmoid (self .speaker_proj2 (speaker_embed )).view (- 1 )
314+ w = w * torch .sigmoid (self .speaker_proj2 (speaker_embed )).view (- 1 )
315315 frame_pos_embed = self .embed_query_positions (frame_positions , w )
316316
317317 # transpose only once to speed up attention layers
@@ -357,10 +357,10 @@ def forward(self, encoder_out, inputs=None,
357357 x = x .transpose (1 , 2 )
358358
359359 # project to mel-spectorgram
360- outputs = F .sigmoid (x )
360+ outputs = torch .sigmoid (x )
361361
362362 # Done flag
363- done = F .sigmoid (self .fc (x ))
363+ done = torch .sigmoid (self .fc (x ))
364364
365365 return outputs , torch .stack (alignments ), done , decoder_states
366366
@@ -373,7 +373,7 @@ def incremental_forward(self, encoder_out, text_positions, speaker_embed=None,
373373 w = self .key_position_rate
374374 # TODO: may be useful to have projection per attention layer
375375 if self .speaker_proj1 is not None :
376- w = w * F .sigmoid (self .speaker_proj1 (speaker_embed )).view (- 1 )
376+ w = w * torch .sigmoid (self .speaker_proj1 (speaker_embed )).view (- 1 )
377377 text_pos_embed = self .embed_keys_positions (text_positions , w )
378378 keys = keys + text_pos_embed
379379
@@ -399,7 +399,7 @@ def incremental_forward(self, encoder_out, text_positions, speaker_embed=None,
399399 frame_pos = keys .data .new (B , 1 ).fill_ (t + 1 ).long ()
400400 w = self .query_position_rate
401401 if self .speaker_proj2 is not None :
402- w = w * F .sigmoid (self .speaker_proj2 (speaker_embed )).view (- 1 )
402+ w = w * torch .sigmoid (self .speaker_proj2 (speaker_embed )).view (- 1 )
403403 frame_pos_embed = self .embed_query_positions (frame_pos , w )
404404
405405 if test_inputs is not None :
@@ -457,8 +457,8 @@ def incremental_forward(self, encoder_out, text_positions, speaker_embed=None,
457457 ave_alignment = ave_alignment .div_ (num_attention_layers )
458458
459459 # Ooutput & done flag predictions
460- output = F .sigmoid (x )
461- done = F .sigmoid (self .fc (x ))
460+ output = torch .sigmoid (x )
461+ done = torch .sigmoid (self .fc (x ))
462462
463463 decoder_states += [decoder_state ]
464464 outputs += [output ]
@@ -601,4 +601,4 @@ def forward(self, x, speaker_embed=None):
601601 # Back to B x T x C
602602 x = x .transpose (1 , 2 )
603603
604- return F .sigmoid (x )
604+ return torch .sigmoid (x )
0 commit comments