import argparse
import os, io

from alpha_integrate.synthetic_data.expr_utils import TokensToSympy
from alpha_integrate.synthetic_data.expression_tokenizer import ExpressionTokenizer

# Setup command line argument parsing
parser = argparse.ArgumentParser(description='Shuffle and dump data.')
parser.add_argument('--save_path', type=str, required=True, help='Path where the shuffled data will be saved')
parser.add_argument('--data_path', type=str, required=True, help='Path to the data to be processed')
args = parser.parse_args()

e0 = TokensToSympy()
e1 = ExpressionTokenizer()

s_path = args.save_path
d_path = args.data_path

s_lines = []

with io.open(d_path, mode='r', encoding='utf-8') as f:
    for i, line in enumerate(f):
        print(f'Count: {i}', end='\r')
        line = line.rstrip()
        line = line.split('\t')
        if len(line) != 2:
            continue
        expr = line[0].split()[2:]
        res = line[1].split()
        try:
            expr = e0.seq_to_sp_direct(expr)
            expr_str = ' '.join(e1.sp_to_seq(expr))
            res = e0.seq_to_sp_direct(res)
            res_str = ' '.join(e1.sp_to_seq(res))
            s_lines.append(expr_str + '\t' + res_str)
        except:
            continue
    print()

# save s_lines line by line to s_path
with open(s_path, 'w') as f:
    for line in s_lines:
        f.write(line+'\n')
