import pandas as pd
import numpy as np
import os
import pickle


def data_filter(x):
    return 0 if x == -1 else 1
path = '/Users/wangzhenlei/Desktop/Causal Ideas/Robust Causal inference/NeurIPS2024-Rubuttle/dataset/covariate.pickle'
file = open(path, "rb")
covariate = pickle.load(file)
covariate = np.array(covariate)
print(covariate.shape)
name_list = ['x' + str(i) for i in range(1, 3073)]
x = {}
text_path = '/Users/wangzhenlei/Desktop/Github/Causally_Rebuttle/dataset/MultiText/'
for index,key in enumerate(name_list):
    x[key] = covariate[:,index]
x = pd.DataFrame(x)
num = 1
path = '/Users/wangzhenlei/Desktop/Causal Ideas/Robust Causal inference/NeurIPS2024-Rubuttle/dataset/'
for index,file_name in enumerate(os.listdir(path)):
    if file_name.endswith('.csv'):
        path_file = os.path.join(path,file_name)
        data = pd.read_csv(path_file,usecols=['y','t','yF1','yF0'])
        data.columns = ['yf','treatment','mu1','mu0']

        data['yf'] = data['yf'].apply(data_filter)
        data['mu1'] = data['mu1'].apply(data_filter)
        data['mu0'] = data['mu0'].apply(data_filter)

        data = pd.concat([data,x],axis=1)
        treat_data = data[data['treatment']==1].reset_index(drop=True)
        control_data = data[data['treatment']==0].reset_index(drop=True)
        treat_lengths = treat_data.shape[0]
        control_lengths = treat_lengths * 3

        control_data = control_data.sample(frac=control_lengths / len(control_data)).reset_index(drop=True)
        saved_data = pd.concat([treat_data,control_data]).reset_index(drop=True)

        saved_file = os.path.join(text_path,'MultiText{}.csv'.format(num))
        num += 1
        saved_data.to_csv(saved_file,index=False,sep=',')

