// fstext/pre-determinize.h

// Copyright 2009-2011  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_H_
#define KALDI_FSTEXT_PRE_DETERMINIZE_H_

#include <fst/fst-decl.h>
#include <fst/fstlib.h>

#include <algorithm>
#include <map>
#include <set>
#include <string>
#include <vector>

#include "base/kaldi-common.h"

namespace fst {

/* PreDeterminize inserts extra symbols on the input side of an FST as necessary
   to ensure that, after epsilon removal, it will be compactly determinizable by
   the determinize* algorithm.  By compactly determinizable we mean that no
   original FST state is represented in more than one determinized state).

   Caution: this code is now only used in testing.

   The new symbols start from the value "first_new_symbol", which should be
   higher than the largest-numbered symbol currently in the FST.  The new
   symbols added are put in the array syms_out, which should be empty at start.
*/

template <class Arc, class Int>
void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_symbol,
                    std::vector<Int> *syms_out);

/* CreateNewSymbols is a helper function used inside PreDeterminize, and is also
   useful when you need to add a number of extra symbols to a different
   vocabulary from the one modified by PreDeterminize. */

template <class Label>
void CreateNewSymbols(SymbolTable *inputSymTable, int nSym, std::string prefix,
                      std::vector<Label> *syms_out);

/** AddSelfLoops is a function you will probably want to use alongside
   PreDeterminize, to add self-loops to any FSTs that you compose on the left
   hand side of the one modified by PreDeterminize.

    This function inserts loops with "special symbols" [e.g. \#0, \#1] into an
   FST. This is done at each final state and each state with non-epsilon output
   symbols on at least one arc out of it.  This is to ensure that these symbols,
   when inserted into the input side of an FST we will compose with on the
   right, can "pass through" this FST.

    At input, isyms and osyms must be vectors of the same size n, corresponding
    to symbols that currently do not exist in 'fst'.  For each state in n that
   has non-epsilon symbols on the output side of arcs leaving it, or which is a
   final state, this function inserts n self-loops with unit weight and one of
   the n pairs of symbols on its input and output.
*/
template <class Arc>
void AddSelfLoops(MutableFst<Arc> *fst,
                  const std::vector<typename Arc::Label> &isyms,
                  const std::vector<typename Arc::Label> &osyms);

/* DeleteSymbols replaces any instances of symbols in the vector symsIn,
   appearing on the input side, with epsilon. */
/* It returns the number of instances of symbols deleted. */
template <class Arc>
int64 DeleteISymbols(MutableFst<Arc> *fst,
                     std::vector<typename Arc::Label> symsIn);

/* CreateSuperFinal takes an FST, and creates an equivalent FST with a single
   final state with no transitions out and unit final weight, by inserting
   epsilon transitions as necessary. */
template <class Arc>
typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst);

}  // end namespace fst

#include "fstext/pre-determinize-inl.h"

#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_H_
