import argparse
import os
import sys
from fairseq.scoring import wer


def get_parser():
    parser = argparse.ArgumentParser(
        description="Command-line script for WER scoring."
    )
    # fmt: off
    parser.add_argument('-s', '--sys', default='-', help='system output')
    parser.add_argument('-r', '--ref', required=True, help='references')
    parser.add_argument('--wer-lowercase', action='store_true',
                        help='case-insensitive scoring')
    parser.add_argument('--wer-remove-punct', action='store_true',
                        help="remove punctuation")
    parser.add_argument('--wer-tokenizer', default="none", choices=["none", "13a", "intl", "zh", "ja-mecab"],
                        help="tokenizer")
    parser.add_argument('--wer-char-level', action='store_true')
    # fmt: on
    return parser


def cli_main():
    parser = get_parser()
    args = parser.parse_args()
    print(args)

    assert args.sys == "-" or os.path.exists(
        args.sys
    ), "System output file {} does not exist".format(args.sys)
    assert os.path.exists(args.ref), "Reference file {} does not exist".format(args.ref)

    def readlines(fd):
        for line in fd.readlines():
            if args.wer_lowercase:
                yield line.lower()
            else:
                yield line

    def score(fdsys):
        with open(args.ref) as fdref:
            scorer = wer.WerScorer(
                wer.WerScorerConfig(
                    wer_tokenizer=args.wer_tokenizer,
                    wer_remove_punct=args.wer_remove_punct,
                    wer_char_level=args.wer_char_level,
                    wer_lowercase=args.wer_lowercase))

            for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
                scorer.add_string(ref_tok, sys_tok)
            print(scorer.result_string())

    if args.sys == "-":
        score(sys.stdin)
    else:
        with open(args.sys, "r") as f:
            score(f)
 

if __name__ == "__main__":
    cli_main()

