import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import re
import random
import csv
import numpy as np
import torch
import sympy

expression = []
rulexpression = []
notexpression = []
notrulexpression = []
nequalexpression = []
nequalrulexpression = []
filename = 'dataset_check.csv'
colnames=['Index', 'function', 'labels', 'our_rule', 'sympy_rule']
chunksize = (10 ** 6)*2
with pd.read_csv(filename, chunksize=chunksize, names=colnames, header=None, usecols=[1,2,3]) as reader:
    for chunk in tqdm(reader):
        process(chunk)  
        
new_data = {'function' : expression,
            'rules' : rulexpression}
out_data = pd.DataFrame(new_data)

for i in tqdm(range(0, len(out_data))):
    if ')' in out_data.rules.values[i]:
        out_data.rules.values[i] = out_data.rules.values[i].replace(')','')
    if len(out_data.rules.values[i].split(',')) == 2 and ('(' in out_data.rules.values[i]):
        cnt = out_data.rules.values[i].count("(")
        out_data.rules.values[i] = out_data.rules.values[i] + ")"*cnt + ","
    elif len(out_data.rules.values[i].split(',')) == 2 and out_data.rules.values[i][-2] != '(':
        out_data.rules.values[i] = out_data.rules.values[i] + ","
    
out_data = out_data.drop_duplicates(keep = 'first')
out_data.to_csv(f'expr_{len(out_data)}_yet.csv', encoding='utf-8', index=False)  
