import argparse
from nltk.tokenize import word_tokenize


def modify_output(line):
    return "<START> " + line.replace('\\n', ' ').strip() + " <END>\n"


def modify_input(line):
    line = line.strip()
    terms = word_tokenize(line)
    terms = ["<START>"] + terms + ["<END>\n"]
    line = " ".join(terms)
    return line


def main(args):
    if args.output:
        raw_file = "cfq_data/raw_output.txt"
        mod_file = "cfq_data/output.txt"
    else:
        raw_file = "cfq_data/raw_input.txt"
        mod_file = "cfq_data/input.txt"

    with open(raw_file, 'r') as f:
        lines = f.readlines()
    outputs = []
    for line in lines:
        if args.output:
            mod_line = modify_output(line)
        else:
            mod_line = modify_input(line)
        outputs.append(mod_line)
    with open(mod_file, 'w') as f:
        f.writelines(outputs)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--output', action='store_true', default=False,
                        help='Process output.')
    main(parser.parse_args())
