update comments and the functions about PNE()

LvHang · LvHang · commit a758ba43916e · 2019-03-02T16:55:46.000-05:00
diff --git a/src/decoder/lattice-faster-decoder-combine.cc b/src/decoder/lattice-faster-decoder-combine.cc
@@ -41,6 +41,8 @@ LatticeFasterDecoderCombineTpl<FST, Token>::LatticeFasterDecoderCombineTpl(
     const LatticeFasterDecoderCombineConfig &config, FST *fst):
     fst_(fst), delete_fst_(true), config_(config), num_toks_(0) {
   config.Check();
+  prev_toks_.reserve(1000);
+  cur_toks_.reserve(1000);
 }
 
 
@@ -53,8 +55,8 @@ LatticeFasterDecoderCombineTpl<FST, Token>::~LatticeFasterDecoderCombineTpl() {
 template <typename FST, typename Token>
 void LatticeFasterDecoderCombineTpl<FST, Token>::InitDecoding() {
   // clean up from last time:
+  prev_toks_.clear();
   cur_toks_.clear();
-  next_toks_.clear();
   cost_offsets_.clear();
   ClearActiveTokens();
 
@@ -67,7 +69,7 @@ void LatticeFasterDecoderCombineTpl<FST, Token>::InitDecoding() {
   active_toks_.resize(1);
   Token *start_tok = new Token(0.0, 0.0, NULL, NULL, NULL);
   active_toks_[0].toks = start_tok;
-  next_toks_[start_state] = start_tok;  // initialize current tokens map
+  cur_toks_[start_state] = start_tok;  // initialize current tokens map
   num_toks_++;
 }
 
@@ -87,9 +89,7 @@ bool LatticeFasterDecoderCombineTpl<FST, Token>::Decode(DecodableInterface *deco
       PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
     ProcessForFrame(decodable);
   }
-  // Procss non-emitting arcs for the last frame.
-  ProcessNonemitting(NULL);
-
+  // A complete token list of the last frame will be generated in FinalizeDecoding()
   FinalizeDecoding();
 
   // Returns true if we have any kind of traceback available (not necessarily
@@ -126,11 +126,10 @@ bool LatticeFasterDecoderCombineTpl<FST, Token>::GetRawLattice(
     KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
               << "GetRawLattice() with use_final_probs == false";
 
-  std::unordered_map<Token*, BaseFloat> *recover_map = NULL;
+  std::unordered_map<Token*, BaseFloat> recover_map;
   if (!decoding_finalized_) {
-    recover_map = new std::unordered_map<Token*, BaseFloat>();
     // Process the non-emitting arcs for the unfinished last frame.
-    ProcessNonemitting(recover_map);
+    ProcessNonemitting(&recover_map);
   }
 
 
@@ -202,9 +201,8 @@ bool LatticeFasterDecoderCombineTpl<FST, Token>::GetRawLattice(
     }
   }
   
-  if (recover_map) {  // recover last token list
+  if (!decoding_finalized_) {  // recover last token list
     RecoverLastTokenList(recover_map);
-    delete recover_map;
   }
   return (ofst->NumStates() > 0);
 }
@@ -217,13 +215,13 @@ bool LatticeFasterDecoderCombineTpl<FST, Token>::GetRawLattice(
 // will not be affacted.
 template<typename FST, typename Token>
 void LatticeFasterDecoderCombineTpl<FST, Token>::RecoverLastTokenList(
-    std::unordered_map<Token*, BaseFloat> *recover_map) {
-  if (recover_map) {
+    const std::unordered_map<Token*, BaseFloat> &recover_map) {
+  if (!recover_map.empty()) {
     for (Token* tok = active_toks_[active_toks_.size() - 1].toks;
          tok != NULL;) {
-      if (recover_map->find(tok) != recover_map->end()) {
+      if (recover_map.find(tok) != recover_map.end()) {
         DeleteForwardLinks(tok);
-        tok->tot_cost = (*recover_map)[tok];
+        tok->tot_cost = recover_map.find(tok)->second;
         tok->in_current_queue = false;
         tok = tok->next;
       } else {
@@ -588,8 +586,8 @@ void LatticeFasterDecoderCombineTpl<FST, Token>::ComputeFinalCosts(
   BaseFloat best_cost = infinity,
       best_cost_with_final = infinity;
 
-  // The final tokens are recorded in unordered_map "next_toks_".
-  for (IterType iter = next_toks_.begin(); iter != next_toks_.end(); iter++) {
+  // The final tokens are recorded in unordered_map "cur_toks_".
+  for (IterType iter = cur_toks_.begin(); iter != cur_toks_.end(); iter++) {
     StateId state = iter->first;
     Token *tok = iter->second;
     BaseFloat final_cost = fst_->Final(state).Value();
@@ -658,7 +656,6 @@ void LatticeFasterDecoderCombineTpl<FST, Token>::AdvanceDecoding(
     }
     ProcessForFrame(decodable);
   }
-  ProcessNonemitting(NULL);
 }
 
 // FinalizeDecoding() is a version of PruneActiveTokens that we call
@@ -686,7 +683,7 @@ void LatticeFasterDecoderCombineTpl<FST, Token>::FinalizeDecoding() {
 template <typename FST, typename Token>
 BaseFloat LatticeFasterDecoderCombineTpl<FST, Token>::GetCutoff(
     const StateIdToTokenMap &toks, BaseFloat *adaptive_beam,
-    StateId *best_elem_id, Token **best_elem) {
+    StateId *best_state_id, Token **best_token) {
   // positive == high cost == bad.
   // best_weight is the minimum value.
   BaseFloat best_weight = std::numeric_limits<BaseFloat>::infinity();
@@ -696,9 +693,9 @@ BaseFloat LatticeFasterDecoderCombineTpl<FST, Token>::GetCutoff(
       BaseFloat w = static_cast<BaseFloat>(iter->second->tot_cost);
       if (w < best_weight) {
         best_weight = w;
-        if (best_elem) {
-          *best_elem_id = iter->first;
-          *best_elem = iter->second;
+        if (best_token) {
+          *best_state_id = iter->first;
+          *best_token = iter->second;
         }
       }
     }
@@ -711,9 +708,9 @@ BaseFloat LatticeFasterDecoderCombineTpl<FST, Token>::GetCutoff(
       tmp_array_.push_back(w);
       if (w < best_weight) {
         best_weight = w;
-        if (best_elem) {
-          *best_elem_id = iter->first; 
-          *best_elem = iter->second;
+        if (best_token) {
+          *best_state_id = iter->first; 
+          *best_token = iter->second;
         }
       }
     }
@@ -722,8 +719,8 @@ BaseFloat LatticeFasterDecoderCombineTpl<FST, Token>::GetCutoff(
         min_active_cutoff = std::numeric_limits<BaseFloat>::infinity(),
         max_active_cutoff = std::numeric_limits<BaseFloat>::infinity();
 
-    KALDI_VLOG(6) << "Number of tokens active on frame " << NumFramesDecoded()
-                  << " is " << tmp_array_.size();
+    KALDI_VLOG(6) << "Number of emitting tokens on frame " 
+                  << NumFramesDecoded() - 1 << " is " << tmp_array_.size();
 
     if (tmp_array_.size() > static_cast<size_t>(config_.max_active)) {
       std::nth_element(tmp_array_.begin(),
@@ -766,9 +763,9 @@ void LatticeFasterDecoderCombineTpl<FST, Token>::ProcessForFrame(
                                          // from the decodable object.
   active_toks_.resize(active_toks_.size() + 1);
 
+  prev_toks_.swap(cur_toks_);
   cur_toks_.clear();
-  cur_toks_.swap(next_toks_);
-  if (cur_toks_.empty()) {
+  if (prev_toks_.empty()) {
     if (!warned_) {
       KALDI_WARN << "Error, no surviving tokens on frame " << frame;
       warned_ = true;
@@ -780,7 +777,7 @@ void LatticeFasterDecoderCombineTpl<FST, Token>::ProcessForFrame(
   StateId best_tok_state_id;
   // "cur_cutoff" is used to constrain the epsilon emittion in current frame.
   // It will not be updated.
-  BaseFloat cur_cutoff = GetCutoff(cur_toks_, &adaptive_beam,
+  BaseFloat cur_cutoff = GetCutoff(prev_toks_, &adaptive_beam,
                                    &best_tok_state_id, &best_tok);
   KALDI_VLOG(6) << "Adaptive beam on frame " << NumFramesDecoded() << " is "
                 << adaptive_beam;
@@ -801,7 +798,8 @@ void LatticeFasterDecoderCombineTpl<FST, Token>::ProcessForFrame(
   // Notice: As the difference between the combine version and the traditional
   // version, this "best_tok" is choosen from emittion tokens. Normally, the
   // best token of one frame comes from an epsilon non-emittion. So the best
-  // token is a looser boundary. Use it to estimate a bound on the next cutoff.
+  // token is a looser boundary. We use it to estimate a bound on the next
+  // cutoff and we will update the "next_cutoff" once we have better tokens.
   // The "next_cutoff" will be updated in further processing.
   if (best_tok) {
     cost_offset = - best_tok->tot_cost;
@@ -827,7 +825,7 @@ void LatticeFasterDecoderCombineTpl<FST, Token>::ProcessForFrame(
 
   // Build a queue which contains the emittion tokens from previous frame.
   std::vector<StateId> cur_queue;
-  for (IterType iter = cur_toks_.begin(); iter != cur_toks_.end(); iter++) {
+  for (IterType iter = prev_toks_.begin(); iter != prev_toks_.end(); iter++) {
     cur_queue.push_back(iter->first);
     iter->second->in_current_queue = true;
   }
@@ -837,9 +835,11 @@ void LatticeFasterDecoderCombineTpl<FST, Token>::ProcessForFrame(
     StateId state = cur_queue.back();
     cur_queue.pop_back();
 
-    KALDI_ASSERT(cur_toks_.find(state) != cur_toks_.end());
-    Token *tok = cur_toks_[state];
+    KALDI_ASSERT(prev_toks_.find(state) != prev_toks_.end());
+    Token *tok = prev_toks_[state];
+
     BaseFloat cur_cost = tok->tot_cost;
+    tok->in_current_queue = false;  // out of queue
     if (cur_cost > cur_cutoff)  // Don't bother processing successors.
       continue;
     // If "tok" has any existing forward links, delete them,
@@ -857,7 +857,7 @@ void LatticeFasterDecoderCombineTpl<FST, Token>::ProcessForFrame(
         BaseFloat tot_cost = cur_cost + graph_cost;
         if (tot_cost < cur_cutoff) {
           Token *new_tok = FindOrAddToken(arc.nextstate, frame, tot_cost,
-                                          tok, &cur_toks_, &changed);
+                                          tok, &prev_toks_, &changed);
 
           // Add ForwardLink from tok to new_tok. Put it on the head of
           // tok->link list
@@ -882,29 +882,29 @@ void LatticeFasterDecoderCombineTpl<FST, Token>::ProcessForFrame(
         
         // no change flag is needed
         Token *next_tok = FindOrAddToken(arc.nextstate, frame + 1, tot_cost,
-                                         tok, &next_toks_, NULL);
+                                         tok, &cur_toks_, NULL);
         // Add ForwardLink from tok to next_tok. Put it on the head of tok->link
         // list
         tok->links = new ForwardLinkT(next_tok, arc.ilabel, arc.olabel,
                                       graph_cost, ac_cost, tok->links);
       }
     }  // for all arcs
-    tok->in_current_queue = false;  // out of queue
   }  // end of while loop
-  KALDI_VLOG(6) << "toks after: " << cur_toks_.size();
+  KALDI_VLOG(6) << "Number of tokens active on frame " << NumFramesDecoded() - 1 
+                << " is " << prev_toks_.size();
 }
 
 
 template <typename FST, typename Token>
 void LatticeFasterDecoderCombineTpl<FST, Token>::ProcessNonemitting(
     std::unordered_map<Token*, BaseFloat> *recover_map) {
   if (recover_map) {  // Build the elements which are used to recover
-    for (IterType iter = next_toks_.begin(); iter != next_toks_.end(); iter++) {
+    for (IterType iter = cur_toks_.begin(); iter != cur_toks_.end(); iter++) {
       (*recover_map)[iter->second] = iter->second->tot_cost;
     }
   }
 
-  StateIdToTokenMap tmp_toks(next_toks_);
+  StateIdToTokenMap tmp_toks(cur_toks_);
   int32 frame = active_toks_.size() - 1;
   // Build the queue to process non-emitting arcs
   std::vector<StateId> cur_queue;
diff --git a/src/decoder/lattice-faster-decoder-combine.h b/src/decoder/lattice-faster-decoder-combine.h
@@ -243,7 +243,9 @@ class LatticeFasterDecoderCombineTpl {
   using Weight = typename Arc::Weight;
   using ForwardLinkT = decodercombine::ForwardLink<Token>;
 
-  using StateIdToTokenMap = typename std::unordered_map<StateId, Token*>;
+  using StateIdToTokenMap = typename std::unordered_map<StateId, Token*,
+        std::hash<StateId>, std::equal_to<StateId>,
+        fst::PoolAllocator<std::pair<const StateId, Token*> > >;
   using IterType = typename StateIdToTokenMap::const_iterator;
 
   // Instantiate this class once for each thing you have to decode.
@@ -295,9 +297,10 @@ class LatticeFasterDecoderCombineTpl {
   /// of the graph then it will include those as final-probs, else
   /// it will treat all final-probs as one.
   /// The raw lattice will be topologically sorted.
-  /// The function can be called during decoding, it will take "next_toks_" map
-  /// and generate the complete token list for the last frame. Then recover it
-  /// to ensure the consistency of ProcessForFrame().
+  /// The function can be called during decoding, it will process non-emitting
+  /// arcs from "cur_toks_" map to get tokens from both non-emitting and 
+  /// emitting arcs for getting raw lattice. Then recover it to ensure the
+  /// consistency of ProcessForFrame().
   ///
   /// See also GetRawLatticePruned in lattice-faster-online-decoder.h,
   /// which also supports a pruning beam, in case for some reason
@@ -447,15 +450,18 @@ class LatticeFasterDecoderCombineTpl {
   void PruneActiveTokens(BaseFloat delta);
 
   /// Processes non-emitting (epsilon) arcs and emitting arcs for one frame
-  /// together. It takes the emittion tokens in "cur_toks_" from last frame.
-  /// Generates non-emitting tokens for current frame and emitting tokens for
+  /// together. It takes the emittion tokens in "prev_toks_" from last frame.
+  /// Generates non-emitting tokens for previous frame and emitting tokens for
   /// next frame.
+  /// Notice: The emitting tokens for the current frame means the token take
+  /// acoustic scores of the current frame. (i.e. the destnations of emitting
+  /// arcs.)
   void ProcessForFrame(DecodableInterface *decodable);
 
   /// Processes nonemitting (epsilon) arcs for one frame.
   /// Calls this function once when all frames were processed.
   /// Or calls it in GetRawLattice() to generate the complete token list for
-  /// the last frame. [Deal With the tokens in map "next_toks_" which would 
+  /// the last frame. [Deal With the tokens in map "cur_toks_" which would 
   /// only contains emittion tokens from previous frame.]
   /// If "recover_map" isn't NULL, we build the recover_map which will be used
   /// to recover "active_toks_[last_frame]" token list for the last frame. 
@@ -466,17 +472,18 @@ class LatticeFasterDecoderCombineTpl {
   /// ProcessForFrame(), recover it.
   /// Notice: as new token will be added to the head of TokenList, tok->next
   /// will not be affacted.
-  void RecoverLastTokenList(std::unordered_map<Token*, BaseFloat> *recover_map);
+  void RecoverLastTokenList(
+      const std::unordered_map<Token*, BaseFloat> &recover_map);
 
 
-  /// The "cur_toks_" and "next_toks_" actually allow us to maintain current
+  /// The "prev_toks_" and "cur_toks_" actually allow us to maintain current
   /// and next frames. They are indexed by StateId. It is indexed by frame-index
   /// plus one, where the frame-index is zero-based, as used in decodable object.
   /// That is, the emitting probs of frame t are accounted for in tokens at
   /// toks_[t+1].  The zeroth frame is for nonemitting transition at the start of
   /// the graph.
+  StateIdToTokenMap prev_toks_;
   StateIdToTokenMap cur_toks_;
-  StateIdToTokenMap next_toks_;
 
   /// Gets the weight cutoff.
   /// Notice: In traiditional version, the histogram prunning method is applied
@@ -485,7 +492,7 @@ class LatticeFasterDecoderCombineTpl {
   /// and min_active values might be narrowed.
   BaseFloat GetCutoff(const StateIdToTokenMap& toks,
                       BaseFloat *adaptive_beam, 
-                      StateId *best_elem_id, Token **best_elem);
+                      StateId *best_state_id, Token **best_token);
 
   std::vector<TokenList> active_toks_; // Lists of tokens, indexed by
   // frame (members of TokenList are toks, must_prune_forward_links,