import argparse
import os
import shutil
from datetime import datetime

import sys
sys.path.insert(1, '..')

import pandas as pd
import numpy as np
import torch 
import torch.backends.cudnn as cudnn
import torch.optim
import torch.nn as nn
import numpy as np
from sklearn.model_selection import train_test_split


from sklearn.linear_model import RidgeCV
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn import metrics


from tqdm.auto import tqdm

import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import set_seed
from huggingface_hub import Repository
from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    PretrainedConfig,
    SchedulerType,
    default_data_collator,
    get_scheduler,
)
from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry
from transformers.utils.versions import require_version

from bert2 import Bert

logger = get_logger(__name__)

import numpy as np


device = 'cuda' if torch.cuda.is_available() else 'cpu'

from sys import argv 


import torchvision
from torch.utils import data
from torchvision import datasets
from torchvision import transforms
import torch
import os
import numpy as np
import torch.nn.functional as F

import time

first=argv[1] 
second=argv[2]
under_model=argv[3]
over_model=argv[4]

cnumber={}
cnumber["64"], cnumber["32"], cnumber["16"]= 4, 15, 59

concat_number=cnumber[under_model]


start = time.time()
print(first+second)


rando=""
if first=="ro" or first=="ru":
    rando="random"

for se1 in range(1,5):
    curr_dir=os.getcwd()+"/../features" 
    paths_feat=[]
    
    
    if first=="to":
        paths_feat.append(curr_dir + "/"+str(over_model)+"/"+ str(se1) +"/")
        print(paths_feat[-1])
    elif first=="tu":
        for i in range(30+1,30+1+concat_number):
            paths_feat.append(curr_dir + "/"+str(under_model)+ "/" + str( i +(se1-1)* concat_number ) +"/")
            print(paths_feat[-1])
                
                
    X_training_cs=[]
    X_test_cs=[]
        
    while(paths_feat):
        path=paths_feat.pop()

        X_training=pd.read_csv(path+'training.csv', header=None, index_col=False).astype(np.float32)
        X_training, target_training=X_training.iloc[:, 1:], X_training.iloc[:, 0] 

        X_test=pd.read_csv(path + 'test.csv', header=None, index_col=False).astype(np.float32)
        X_test, target_test =X_test.iloc[:, 1:], X_test.iloc[:, 0]

        X_training_cs.append(X_training)
        X_test_cs.append(X_test)
        
    X_test_c=pd.concat(X_test_cs, axis=1)
    X_training_c=pd.concat(X_training_cs, axis=1)
         
    X_training=X_training_c.to_numpy()
    del X_training_c
        
    X_test=X_test_c.to_numpy()
    del X_test_c
    
    target_training=target_training.to_numpy().reshape(-1,1)
    target_test= target_test.to_numpy().reshape(-1,1)


    ######################################
    ##concatenate the target
    #########
    
    
    print(se1)
    print(first)
    print(second)
    
    paths_target=[]
        
    curr_dir=os.getcwd()+"/../features" 

    if second=="to":
        paths_target.append(curr_dir + "/"+str(over_model)+"/"+ str(se1) +"/")
        print(paths_target[-1])
    elif second=="tu":
        for i in range(30+1,30+1+concat_number):
            paths_target.append(curr_dir + "/"+str(under_model)+ "/" + str( i +(se1-1)* concat_number ) +"/")
            print(paths_target[-1])

    Y_training_cs=[]
    Y_test_cs=[]
        
    while(paths_target):
        path= paths_target.pop()

        Y_training=pd.read_csv(path+'training.csv', header=None, index_col=False).astype(np.float32)
        Y_training=Y_training.iloc[:, 1:]

        Y_test=pd.read_csv(path + 'test.csv', header=None, index_col=False).astype(np.float32)
        Y_test=Y_test.iloc[:, 1:]
           
        Y_training_cs.append(Y_training)
        Y_test_cs.append(Y_test)
       
    Y_test_c=pd.concat(Y_test_cs, axis=1)
    Y_training_c=pd.concat(Y_training_cs, axis=1)
         
    Y_training=Y_training_c.to_numpy()
    del Y_training_c
        
    Y_test=Y_test_c.to_numpy()
    del Y_test_c
        
    print("data loading finished/n")
    print("REGRESSION DATA SIZE")
    print("number of features: {val}".format(val=X_training[0].size))
    print("number of targets: {val}".format(val = Y_training[0].size))
    print("training data size: {val}".format(val=len(X_training)))
    print("test data size: {val}".format(val=len(X_test)))
    

    scaler=StandardScaler()
    X_training = scaler.fit_transform(X_training)
    X_test = scaler.transform(X_test)

    print("TIME:::::::::::::")
    print(time.time() - start)
    print("::::::::::::::::::::::")
            

    errors_validation=[]
    errors_training=[]
    errors_test=[]

    siz=len(Y_training[0])

    alphas= np.array([0.00001,0.0001,0.005,0.001,0.05,0.01,0.1,0.5,1,2,3,5,7,8,10,11,13,15,20,25,30,35,40,45,50,55,65,75,80,90,100,110,125,140,160,180,200,250,300,400,500,600,700,850,1000,1200,1400,1500,1600,1800,2000,2100,2200,2500,3000,3500,4000,4500,5000,6000,7000,8000,9000,10000,11000,12000,13000,14000,15000,16000,20000,30000])
    #nalphass=np.linspace(1000,10000,91)
    #alphas=np.concatenate((nalphass,alphass))
    #alphas=np.sort(alphas)
    #alphas=np.unique(alphas)
    
    
    clf = RidgeCV(alphas=alphas, cv= None, alpha_per_target=True, scoring="r2").fit(X_training, Y_training)
     
    alphastars=clf.alpha_
    print("best_alpha_indeces")
    print(alphastars)
    
    Yhat_training = clf.predict(X_training)
    epsilon_training= Y_training-Yhat_training

    Yhat_test = clf.predict(X_test)
    epsilon_test= Y_test-Yhat_test

    #generate features
    curr_dir=os.getcwd()+"/predicted_features/"
    curr_dir=curr_dir+"/"+over_model+"/"+under_model

    if not os.path.exists(curr_dir): 
        os.makedirs(curr_dir)

    if first=="to":
        print(curr_dir + "/under_" + 'epsilon_training.csv')
        pd.DataFrame(epsilon_training).to_csv(curr_dir + "/under_" + 'epsilon_' +str(se1)+ '_training.csv',header=False, index=False)
        pd.DataFrame(epsilon_test).to_csv(curr_dir + "/under_" + 'epsilon_' +str(se1)+ '_test.csv',header=False, index=False)
    else:
        print(curr_dir + "/over_" + 'epsilon_training.csv')
        pd.DataFrame(epsilon_training).to_csv(curr_dir + "/over_" + 'epsilon_' +str(se1)+ '_training_csv',header=False, index=False)
        pd.DataFrame(epsilon_test).to_csv(curr_dir + "/over_" + 'epsilon_' +str(se1)+ '_test.csv',header=False, index=False)   
    


    print("TIME:::::::::::::")
    print(time.time() - start)
    print("::::::::::::::::::::::")

   
    
    

       
