// util/parse-options.cc

// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
//                      Saarland University (Author: Arnab Ghoshal);
// Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
//                      Frantisek Skala;  Arnab Ghoshal
// Copyright 2013       Tanel Alumae
//
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iomanip>
#include <iostream>

#include "base/kaldi-common.h"
#include "util/parse-options.h"
#include "util/text-utils.h"

namespace kaldi {

ParseOptions::ParseOptions(const std::string &prefix, OptionsItf *other)
    : print_args_(false), help_(false), usage_(""), argc_(0), argv_(NULL) {
  ParseOptions *po = dynamic_cast<ParseOptions *>(other);
  if (po != NULL && po->other_parser_ != NULL) {
    // we get here if this constructor is used twice, recursively.
    other_parser_ = po->other_parser_;
  } else {
    other_parser_ = other;
  }
  if (po != NULL && po->prefix_ != "") {
    prefix_ = po->prefix_ + std::string(".") + prefix;
  } else {
    prefix_ = prefix;
  }
}

void ParseOptions::Register(const std::string &name, bool *ptr,
                            const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name, int32 *ptr,
                            const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name, uint32 *ptr,
                            const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name, float *ptr,
                            const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name, double *ptr,
                            const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name, std::string *ptr,
                            const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

// old-style, used for registering application-specific parameters
template <typename T>
void ParseOptions::RegisterTmpl(const std::string &name, T *ptr,
                                const std::string &doc) {
  if (other_parser_ == NULL) {
    this->RegisterCommon(name, ptr, doc, false);
  } else {
    KALDI_ASSERT(prefix_ != "" &&
                 "Cannot use empty prefix when registering with prefix.");
    std::string new_name = prefix_ + '.' + name;  // name becomes prefix.name
    other_parser_->Register(new_name, ptr, doc);
  }
}

// does the common part of the job of registering a parameter
template <typename T>
void ParseOptions::RegisterCommon(const std::string &name, T *ptr,
                                  const std::string &doc, bool is_standard) {
  KALDI_ASSERT(ptr != NULL);
  std::string idx = name;
  NormalizeArgName(&idx);
  if (doc_map_.find(idx) != doc_map_.end())
    KALDI_WARN << "Registering option twice, ignoring second time: " << name;
  this->RegisterSpecific(name, idx, ptr, doc, is_standard);
}

// used to register standard parameters (those that are present in all of the
// applications)
template <typename T>
void ParseOptions::RegisterStandard(const std::string &name, T *ptr,
                                    const std::string &doc) {
  this->RegisterCommon(name, ptr, doc, true);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx, bool *b,
                                    const std::string &doc, bool is_standard) {
  bool_map_[idx] = b;
  doc_map_[idx] =
      DocInfo(name, doc + " (bool, default = " + ((*b) ? "true)" : "false)"),
              is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx, int32 *i,
                                    const std::string &doc, bool is_standard) {
  int_map_[idx] = i;
  std::ostringstream ss;
  ss << doc << " (int, default = " << *i << ")";
  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx, uint32 *u,
                                    const std::string &doc, bool is_standard) {
  uint_map_[idx] = u;
  std::ostringstream ss;
  ss << doc << " (uint, default = " << *u << ")";
  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx, float *f,
                                    const std::string &doc, bool is_standard) {
  float_map_[idx] = f;
  std::ostringstream ss;
  ss << doc << " (float, default = " << *f << ")";
  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx, double *f,
                                    const std::string &doc, bool is_standard) {
  double_map_[idx] = f;
  std::ostringstream ss;
  ss << doc << " (double, default = " << *f << ")";
  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx, std::string *s,
                                    const std::string &doc, bool is_standard) {
  string_map_[idx] = s;
  doc_map_[idx] =
      DocInfo(name, doc + " (string, default = \"" + *s + "\")", is_standard);
}
void ParseOptions::DisableOption(const std::string &name) {
  if (argv_ != NULL)
    KALDI_ERR << "DisableOption must not be called after calling Read().";
  if (doc_map_.erase(name) == 0)
    KALDI_ERR << "Option " << name
              << " was not registered so cannot be disabled: ";
  bool_map_.erase(name);
  int_map_.erase(name);
  uint_map_.erase(name);
  float_map_.erase(name);
  double_map_.erase(name);
  string_map_.erase(name);
}

int ParseOptions::NumArgs() const { return positional_args_.size(); }

std::string ParseOptions::GetArg(int i) const {
  // use KALDI_ERR if code error
  if (i < 1 || i > static_cast<int>(positional_args_.size()))
    KALDI_ERR << "ParseOptions::GetArg, invalid index " << i;
  return positional_args_[i - 1];
}

// We currently do not support any other options.
enum ShellType { kBash = 0 };

// This can be changed in the code if it ever does need to be changed (as it's
// unlikely that one compilation of this tool-set would use both shells).
static ShellType kShellType = kBash;

// Returns true if we need to escape a string before putting it into
// a shell (mainly thinking of bash shell, but should work for others)
// This is for the convenience of the user so command-lines that are
// printed out by ParseOptions::Read (with --print-args=true) are
// paste-able into the shell and will run. If you use a different type of
// shell, it might be necessary to change this function.
// But it's mostly a cosmetic issue as it basically affects how
// the program echoes its command-line arguments to the screen.
static bool MustBeQuoted(const std::string &str, ShellType st) {
  // Only Bash is supported (for the moment).
  KALDI_ASSERT(st == kBash && "Invalid shell type.");

  const char *c = str.c_str();
  if (*c == '\0') {
    return true;  // Must quote empty string
  } else {
    const char *ok_chars[2];

    // These seem not to be interpreted as long as there are no other "bad"
    // characters involved (e.g. "," would be interpreted as part of something
    // like a{b,c}, but not on its own.
    ok_chars[kBash] = "[]~#^_-+=:.,/";

    // Just want to make sure that a space character doesn't get automatically
    // inserted here via an automated style-checking script, like it did before.
    KALDI_ASSERT(!strchr(ok_chars[kBash], ' '));

    for (; *c != '\0'; c++) {
      // For non-alphanumeric characters we have a list of characters which
      // are OK. All others are forbidden (this is easier since the shell
      // interprets most non-alphanumeric characters).
      if (!isalnum(*c)) {
        const char *d;
        for (d = ok_chars[st]; *d != '\0'; d++)
          if (*c == *d) break;
        // If not alphanumeric or one of the "ok_chars", it must be escaped.
        if (*d == '\0') return true;
      }
    }
    return false;  // The string was OK. No quoting or escaping.
  }
}

// Returns a quoted and escaped version of "str"
// which has previously been determined to need escaping.
// Our aim is to print out the command line in such a way that if it's
// pasted into a shell of ShellType "st" (only bash for now), it
// will get passed to the program in the same way.
static std::string QuoteAndEscape(const std::string &str, ShellType st) {
  // Only Bash is supported (for the moment).
  KALDI_ASSERT(st == kBash && "Invalid shell type.");

  // For now we use the following rules:
  // In the normal case, we quote with single-quote "'", and to escape
  // a single-quote we use the string: '\'' (interpreted as closing the
  // single-quote, putting an escaped single-quote from the shell, and
  // then reopening the single quote).
  char quote_char = '\'';
  const char *escape_str = "'\\''";  // e.g. echo 'a'\''b' returns a'b

  // If the string contains single-quotes that would need escaping this
  // way, and we determine that the string could be safely double-quoted
  // without requiring any escaping, then we double-quote the string.
  // This is the case if the characters "`$\ do not appear in the string.
  // e.g. see http://www.redhat.com/mirrors/LDP/LDP/abs/html/quotingvar.html
  const char *c_str = str.c_str();
  if (strchr(c_str, '\'') && !strpbrk(c_str, "\"`$\\")) {
    quote_char = '"';
    escape_str = "\\\"";  // should never be accessed.
  }

  char buf[2];
  buf[1] = '\0';

  buf[0] = quote_char;
  std::string ans = buf;
  const char *c = str.c_str();
  for (; *c != '\0'; c++) {
    if (*c == quote_char) {
      ans += escape_str;
    } else {
      buf[0] = *c;
      ans += buf;
    }
  }
  buf[0] = quote_char;
  ans += buf;
  return ans;
}

// static function
std::string ParseOptions::Escape(const std::string &str) {
  return MustBeQuoted(str, kShellType) ? QuoteAndEscape(str, kShellType) : str;
}

int ParseOptions::Read(int argc, const char *const argv[]) {
  argc_ = argc;
  argv_ = argv;
  std::string key, value;
  int i;
  if (argc > 0) {
    // set global "const char*" g_program_name (name of the program)
    // so it can be printed out in error messages;
    // it's useful because often the stderr of different programs will
    // be mixed together in the same log file.
#ifdef _MSC_VER
    const char *c = strrchr(argv[0], '\\');
#else
    const char *c = strrchr(argv[0], '/');
#endif
    SetProgramName(c == NULL ? argv[0] : c + 1);
  }
  // first pass: look for config parameter, look for priority
  for (i = 1; i < argc; i++) {
    if (std::strncmp(argv[i], "--", 2) == 0) {
      if (std::strcmp(argv[i], "--") == 0) {
        // a lone "--" marks the end of named options
        break;
      }
      bool has_equal_sign;
      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
      NormalizeArgName(&key);
      Trim(&value);
      if (key.compare("config") == 0) {
        ReadConfigFile(value);
      }
      if (key.compare("help") == 0) {
        PrintUsage();
        exit(0);
      }
    }
  }
  bool double_dash_seen = false;
  // second pass: add the command line options
  for (i = 1; i < argc; i++) {
    if (std::strncmp(argv[i], "--", 2) == 0) {
      if (std::strcmp(argv[i], "--") == 0) {
        // A lone "--" marks the end of named options.
        // Skip that option and break the processing of named options
        i += 1;
        double_dash_seen = true;
        break;
      }
      bool has_equal_sign;
      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
      NormalizeArgName(&key);
      Trim(&value);
      if (!SetOption(key, value, has_equal_sign)) {
        PrintUsage(true);
        KALDI_ERR << "Invalid option " << argv[i];
      }
    } else {
      break;
    }
  }

  // process remaining arguments as positional
  for (; i < argc; i++) {
    if ((std::strcmp(argv[i], "--") == 0) && !double_dash_seen) {
      double_dash_seen = true;
    } else {
      positional_args_.push_back(std::string(argv[i]));
    }
  }

  // if the user did not suppress this with --print-args = false....
  if (print_args_) {
    std::ostringstream strm;
    for (int j = 0; j < argc; j++) strm << Escape(argv[j]) << " ";
    strm << '\n';
    std::cerr << strm.str() << std::flush;
  }
  return i;
}

void ParseOptions::PrintUsage(bool print_command_line) {
  std::cerr << '\n' << usage_ << '\n';
  DocMapType::iterator it;
  // first we print application-specific options
  bool app_specific_header_printed = false;
  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
    if (it->second.is_standard_ == false) {  // application-specific option
      if (app_specific_header_printed == false) {  // header was not yet printed
        std::cerr << "Options:" << '\n';
        app_specific_header_printed = true;
      }
      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
                << " : " << it->second.use_msg_ << '\n';
    }
  }
  if (app_specific_header_printed == true) {
    std::cerr << '\n';
  }

  // then the standard options
  std::cerr << "Standard options:" << '\n';
  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
    if (it->second.is_standard_ == true) {  // we have standard option
      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
                << " : " << it->second.use_msg_ << '\n';
    }
  }
  std::cerr << '\n';
  if (print_command_line) {
    std::ostringstream strm;
    strm << "Command line was: ";
    for (int j = 0; j < argc_; j++) strm << Escape(argv_[j]) << " ";
    strm << '\n';
    std::cerr << strm.str() << std::flush;
  }
}

void ParseOptions::PrintConfig(std::ostream &os) {
  os << '\n' << "[[ Configuration of UI-Registered options ]]" << '\n';
  std::string key;
  DocMapType::iterator it;
  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
    key = it->first;
    os << it->second.name_ << " = ";
    if (bool_map_.end() != bool_map_.find(key)) {
      os << (*bool_map_[key] ? "true" : "false");
    } else if (int_map_.end() != int_map_.find(key)) {
      os << (*int_map_[key]);
    } else if (uint_map_.end() != uint_map_.find(key)) {
      os << (*uint_map_[key]);
    } else if (float_map_.end() != float_map_.find(key)) {
      os << (*float_map_[key]);
    } else if (double_map_.end() != double_map_.find(key)) {
      os << (*double_map_[key]);
    } else if (string_map_.end() != string_map_.find(key)) {
      os << "'" << *string_map_[key] << "'";
    } else {
      KALDI_ERR << "PrintConfig: unrecognized option " << key << "[code error]";
    }
    os << '\n';
  }
  os << '\n';
}

void ParseOptions::ReadConfigFile(const std::string &filename) {
  std::ifstream is(filename.c_str(), std::ifstream::in);
  if (!is.good()) {
    KALDI_ERR << "Cannot open config file: " << filename;
  }

  std::string line, key, value;
  int32 line_number = 0;
  while (std::getline(is, line)) {
    line_number++;
    // trim out the comments
    size_t pos;
    if ((pos = line.find_first_of('#')) != std::string::npos) {
      line.erase(pos);
    }
    // skip empty lines
    Trim(&line);
    if (line.length() == 0) continue;

    if (line.substr(0, 2) != "--") {
      KALDI_ERR << "Reading config file " << filename << ": line "
                << line_number << " does not look like a line "
                << "from a Kaldi command-line program's config file: should "
                << "be of the form --x=y.  Note: config files intended to "
                << "be sourced by shell scripts lack the '--'.";
    }

    // parse option
    bool has_equal_sign;
    SplitLongArg(line, &key, &value, &has_equal_sign);
    NormalizeArgName(&key);
    Trim(&value);
    if (!SetOption(key, value, has_equal_sign)) {
      PrintUsage(true);
      KALDI_ERR << "Invalid option " << line << " in config file " << filename;
    }
  }
}

void ParseOptions::SplitLongArg(const std::string &in, std::string *key,
                                std::string *value, bool *has_equal_sign) {
  KALDI_ASSERT(in.substr(0, 2) == "--");  // precondition.
  size_t pos = in.find_first_of('=', 0);
  if (pos == std::string::npos) {  // we allow --option for bools
    // defaults to empty.  We handle this differently in different cases.
    *key = in.substr(2, in.size() - 2);  // 2 because starts with --.
    *value = "";
    *has_equal_sign = false;
  } else if (pos == 2) {  // we also don't allow empty keys: --=value
    PrintUsage(true);
    KALDI_ERR << "Invalid option (no key): " << in;
  } else {                         // normal case: --option=value
    *key = in.substr(2, pos - 2);  // 2 because starts with --.
    *value = in.substr(pos + 1);
    *has_equal_sign = true;
  }
}

void ParseOptions::NormalizeArgName(std::string *str) {
  std::string out;
  std::string::iterator it;

  for (it = str->begin(); it != str->end(); ++it) {
    if (*it == '_')
      out += '-';  // convert _ to -
    else
      out += std::tolower(*it);
  }
  *str = out;

  KALDI_ASSERT(str->length() > 0);
}

bool ParseOptions::SetOption(const std::string &key, const std::string &value,
                             bool has_equal_sign) {
  if (bool_map_.end() != bool_map_.find(key)) {
    if (has_equal_sign && value == "")
      KALDI_ERR << "Invalid option --" << key << "=";
    *(bool_map_[key]) = ToBool(value);
  } else if (int_map_.end() != int_map_.find(key)) {
    *(int_map_[key]) = ToInt(value);
  } else if (uint_map_.end() != uint_map_.find(key)) {
    *(uint_map_[key]) = ToUint(value);
  } else if (float_map_.end() != float_map_.find(key)) {
    *(float_map_[key]) = ToFloat(value);
  } else if (double_map_.end() != double_map_.find(key)) {
    *(double_map_[key]) = ToDouble(value);
  } else if (string_map_.end() != string_map_.find(key)) {
    if (!has_equal_sign)
      KALDI_ERR << "Invalid option --" << key << " (option format is --x=y).";
    *(string_map_[key]) = value;
  } else {
    return false;
  }
  return true;
}

bool ParseOptions::ToBool(std::string str) {
  std::transform(str.begin(), str.end(), str.begin(), ::tolower);

  // allow "" as a valid option for "true", so that --x is the same as --x=true
  if ((str.compare("true") == 0) || (str.compare("t") == 0) ||
      (str.compare("1") == 0) || (str.compare("") == 0)) {
    return true;
  }
  if ((str.compare("false") == 0) || (str.compare("f") == 0) ||
      (str.compare("0") == 0)) {
    return false;
  }
  // if it is neither true nor false:
  PrintUsage(true);
  KALDI_ERR << "Invalid format for boolean argument [expected true or false]: "
            << str;
  return false;  // never reached
}

int32 ParseOptions::ToInt(const std::string &str) {
  int32 ret;
  if (!ConvertStringToInteger(str, &ret))
    KALDI_ERR << "Invalid integer option \"" << str << "\"";
  return ret;
}

uint32 ParseOptions::ToUint(const std::string &str) {
  uint32 ret;
  if (!ConvertStringToInteger(str, &ret))
    KALDI_ERR << "Invalid integer option \"" << str << "\"";
  return ret;
}

float ParseOptions::ToFloat(const std::string &str) {
  float ret;
  if (!ConvertStringToReal(str, &ret))
    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
  return ret;
}

double ParseOptions::ToDouble(const std::string &str) {
  double ret;
  if (!ConvertStringToReal(str, &ret))
    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
  return ret;
}

// instantiate templates
template void ParseOptions::RegisterTmpl(const std::string &name, bool *ptr,
                                         const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name, int32 *ptr,
                                         const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name, uint32 *ptr,
                                         const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name, float *ptr,
                                         const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name, double *ptr,
                                         const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name,
                                         std::string *ptr,
                                         const std::string &doc);

template void ParseOptions::RegisterStandard(const std::string &name, bool *ptr,
                                             const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                                             int32 *ptr,
                                             const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                                             uint32 *ptr,
                                             const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                                             float *ptr,
                                             const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                                             double *ptr,
                                             const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                                             std::string *ptr,
                                             const std::string &doc);

template void ParseOptions::RegisterCommon(const std::string &name, bool *ptr,
                                           const std::string &doc,
                                           bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name, int32 *ptr,
                                           const std::string &doc,
                                           bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name, uint32 *ptr,
                                           const std::string &doc,
                                           bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name, float *ptr,
                                           const std::string &doc,
                                           bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name, double *ptr,
                                           const std::string &doc,
                                           bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name,
                                           std::string *ptr,
                                           const std::string &doc,
                                           bool is_standard);

}  // namespace kaldi
