from sys import argv 
import pandas as pd
import argparse
import json
import logging
import math
import os
import random
from pathlib import Path

import datasets
import torch
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm.auto import tqdm

import evaluate
import transformers
from accelerate import Accelerator
from accelerate.logging import get_logger
from accelerate.utils import set_seed
from huggingface_hub import Repository
from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    PretrainedConfig,
    SchedulerType,
    default_data_collator,
    get_scheduler,
)
from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry
from transformers.utils.versions import require_version

from bert2 import Bert

logger = get_logger(__name__)


from sklearn.linear_model import RidgeCV
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn import metrics


import time



first=argv[1] 
second=argv[2]
under_model=int(argv[3])
over_model=int(argv[4])

concat_number=int(argv[5])


if under_model==64:
    if concat_number>10:
        quit()

if under_model==32:
    if concat_number>15:
        quit()

if under_model==16:
    if concat_number>59:
        quit()

if concat_number>10 and (first=="to" or first=="ro"):
    quit()

start = time.time()

if (first=="to" and second=="to") or (first=="ro" and second=="ro"):
    if not under_model==16:
         print("finised(to-to)")
         quit()    

print(first+second)

rando=""
if first=="ro" or first=="ru":
    rando="random"


def main():

    logger.info("REGRESSION STARTS")
    logger.info(first+second)


    for se1 in range(1,9):
        curr_dir=os.getcwd()+"/features" 
        paths_feat=[]
        
        if first=="ro":
            for i in range(1,concat_number+1):
                #paths.append(curr_dir + "/"+str(over_model)+"/"+"random_"
                paths_feat.append(curr_dir + "/"+str(over_model)+"/"+ str(30+concat_number*(se1-1)+i) +"/random_")
                print(paths_feat[-1])
        elif first=="ru":
            for i in range(1,concat_number+1):
                #paths.append(curr_dir + "/"+under_model+"/"+"result_random_"+ str(30+concat_number1*(se1-1)+i+100))
                paths_feat.append(curr_dir + "/"+str(under_model)+"/"+ str(30+concat_number*(se1-1)+i) +"/random_")
                print(paths_feat[-1])

        elif first=="to":
            for i in range(1,concat_number+1):
                paths_feat.append(curr_dir + "/"+str(over_model)+"/"+ str(30+concat_number*(se1-1)+i) +"/")
                print(paths_feat[-1])
     
        elif first=="tu":
            for i in range(1,concat_number+1):
                paths_feat.append(curr_dir + "/"+str(under_model)+"/"+ str(30+concat_number*(se1-1)+i) +"/")
                print(paths_feat[-1])
                
                
        X_training_cs=[]
        X_test_cs=[]
        
        while(paths_feat):
            path=paths_feat.pop()

            X_training=pd.read_csv(path+'training.csv', header=None, index_col=False).astype(np.float32)
            X_training=X_training.iloc[:, 1:]

            X_test=pd.read_csv(path + 'test.csv', header=None, index_col=False).astype(np.float32)
            X_test=X_test.iloc[:, 1:]
            
            X_training_cs.append(X_training)
            X_test_cs.append(X_test)
        
        X_test_c=pd.concat(X_test_cs, axis=1)
        X_training_c=pd.concat(X_training_cs, axis=1)
         
        X_training=X_training_c.to_numpy()
        del X_training_c
        
        X_test=X_test_c.to_numpy()
        del X_test_c
    
    
        scaler=StandardScaler()
        X_training = scaler.fit_transform(X_training)
        X_test = scaler.transform(X_test)
        
        ######################################
        ##concatenate the target
        #########
        
    
        
        print(se1)
        print(first)
        print(second)
        
        paths_target=[]
        
        curr_dir=os.getcwd()+"/features" 

        if second=="ro":
            for i in range(1,10+1):
                paths_target.append(curr_dir + "/"+str(over_model)+ "/" + str(i) +"/random_")
                print(paths_target[-1])           
        elif second=="ru":
            for i in range(1,10+1):
                paths_target.append(curr_dir + "/"+str(under_model)+ "/" + str(i) +"/random_")
                print(paths_target[-1])
        elif second=="to":
            for i in range(1,10+1):
                paths_target.append(curr_dir + "/"+str(over_model)+ "/" + str(i) +"/")
                print(paths_target[-1])
        elif second=="tu":
            for i in range(1,10+1):
                paths_target.append(curr_dir + "/"+str(under_model)+ "/" + str(i) +"/")
                print(paths_target[-1])

        Y_training_cs=[]
        Y_test_cs=[]
        
        while(paths_target):
            path= paths_target.pop()

            Y_training=pd.read_csv(path+'training.csv', header=None, index_col=False).astype(np.float32)
            Y_training=Y_training.iloc[:, 1:]

            Y_test=pd.read_csv(path + 'test.csv', header=None, index_col=False).astype(np.float32)
            Y_test=Y_test.iloc[:, 1:]
           
            Y_training_cs.append(Y_training)
            Y_test_cs.append(Y_test)
       
        Y_test_c=pd.concat(Y_test_cs, axis=1)
        Y_training_c=pd.concat(Y_training_cs, axis=1)
         
        Y_training=Y_training_c.to_numpy()
        del Y_training_c
        
        Y_test=Y_test_c.to_numpy()
        del Y_test_c
        
        print("data loading finished/n")
        print("REGRESSION DATA SIZE")
        print("number of features: {val}".format(val=X_training[0].size))
        print("number of targets: {val}".format(val = Y_training[0].size))
        print("training data size: {val}".format(val=len(X_training)))
        print("test data size: {val}".format(val=len(X_test)))
    
        end = time.time()
        print("TIME:version2::::::::::::")
        print(end - start)
        print("::::::::::::::::::::::")        
    
        errors_training=[]
    
        errors_test=[]
    
        siz=len(Y_training[0])
    
        #alphas= np.array([0,0.0000001,0.00001,0.0001,0.001, 0.01, 0.03, 0.05, 0.08, 0.1,0.2,0.3,0.7,0.8, 0.5, 1,1.5,2,2.5,3,4,5,10,20,50,75,100,125,200])
        #alphas= np.array([0,0.1, 1,2,2.5,3,5,7.5,10,12.5,15,17.5,20,25,30,50,75,100])
        #alphas=np.concatenate((nalphass,alphass))
        #alphas=np.sort(alphas)
        #alphas=np.unique(alphas)
        alphas = np.array([0.00001,0.0001,0.005,0.001,0.05,0.01,0.1,0.5,1,2,3,5,7,8,10,11,13,15,20,25,30,35,40,45,50,55,65,75,80,90,100,110,125,140,160,180,200,250,300,400,500,600,700,850,1000,1200,1400,1500,1600,1800,2000,2100,2200,2500,3000,3500,4000,4500,5000,6000,7000,8000,9000,10000,11000,12000,13000,14000,15000,16000,20000,30000])

        clf = RidgeCV(alphas=alphas, cv=None, alpha_per_target=True,scoring="r2").fit(X_training, Y_training)
        
    
        alphastars=clf.alpha_
        print("best_alpha_indeces")
        print(alphastars)
        
        errors_training = list(1-clf.best_score_)
        errors_test = list(1-metrics.r2_score(Y_test, clf.predict(X_test), multioutput='raw_values'))
        
        #write validation
        curr_dir=os.getcwd()+"/reg_results/"
        curr_dir=curr_dir+ rando +"/concat_"+str(concat_number) + "/validation/"+ str(over_model) +"/"+ str(under_model)+ "/" + first + "_" + second 
        
        if not os.path.exists(curr_dir): 
            os.makedirs(curr_dir)
        
        path=curr_dir +"/" +"result"+  "_" + str(se1) +".txt" 
        print(path)
        
        with open(path, 'w') as f:
            for e in errors_training:
                f.write("%s\n" % e )
        
        #write test                 
        curr_dir=os.getcwd()+"/reg_results/"
        curr_dir=curr_dir+ rando +"/concat_"+str(concat_number) +"/test/"+ str(over_model) +"/"+ str(under_model) + "/" + first + "_" + second 
        if not os.path.exists(curr_dir): 
            os.makedirs(curr_dir)
        
        path=curr_dir +"/" +"result"+  "_" + str(se1) +".txt" 
        print(path)
        
        with open(path, 'w') as f:
            for e in errors_test:
                f.write("%s\n" % e )   
    
        end = time.time()
        print("TIME:version2::::::::::::")
        print(end - start)
        print("::::::::::::::::::::::")
    
       


     
if __name__ == "__main__":
    main()
