import glob
import os
import pandas as pd

RAW_PATH = 'datasets_raw/REVS_Program_Vehicle_Dynamics_Database/'
PROCESSED_PATH = 'datasets/REVS_Program_Vehicle_Dynamics_Database/'

FEATURES_TO_KEEP = [
                    "sideSlip",
                    "vxCG",
                    "vyCG",
                    "time",
                    "engineSpeed",
                    "handwheelAngle",
                    "throttle",
                    "brake",
                    "axCG",
                    "ayCG",
                    "yawRate",
                    "chassisAccelFL",
                    "chassisAccelFR",
                    "chassisAccelRL",
                    "chassisAccelRR",
                    "longitude",
                    "latitude"
                ]

SPINNING_CSV = "20130817_01_01_02_grandsport.csv"
ENGINE_FAILURE_CSV = "20130815_01_01_02_grandsport.csv"
MONTERY_TEST_CSV = "20130817_02_01_02_grandsport.csv"

CSVS_TO_EXCLUDE = [SPINNING_CSV, ENGINE_FAILURE_CSV]

def process_revs_data(raw_path, processed_path, features_to_keep, exclude, check_nan = False):
    # get path to repo and path to raw data folders
    path_to_repo = os.path.split(os.path.dirname(os.path.realpath(__file__)))[0]
    path = os.path.join(path_to_repo,raw_path)

    # create folder for excluded csvs
    os.makedirs(os.path.join(path_to_repo, processed_path, "Excluded"), exist_ok=True) 

    # get all folders in raw data folder
    target_folder = [name for name in os.listdir(path) if os.path.isdir(os.path.join(path, name))]

    # loop through all folders in the raw data folder
    for folder in target_folder:
        
        # create folders for the processed data
        os.makedirs(os.path.join(path_to_repo, processed_path, folder), exist_ok=True) 
        
        # loop through all .csv files in the respective folder
        subfolder = f"{os.path.join(path,folder)}/*.csv"
        for fname in glob.glob(subfolder):
            print(fname)
            
            # get the respective file name
            file_name = os.path.split(fname)[1].split(".")[0]
            
            # read csv, drop firsts rows, remove unnecessary columns
            df = pd.read_csv(fname, skiprows=10, header=[0, 1], encoding="unicode_escape", sep=",")
            df.columns = df.columns.droplevel(-1)
            df = df[features_to_keep]

            # downsample to 20 hz -> 1/20 = 0.05s
            df.index = pd.to_datetime(df["time"], unit = 's')
            df = df.resample("0.05s").first()
            df.reset_index(inplace=True, drop=True)

            # check for nans, only positional information (longitude, latitude) should contain nans
            if check_nan:
                if df.isnull().sum().sum() > 0:
                    print("None Values found in:")
                    print(fname)
                    print(df.isnull().sum())

                
            if file_name + ".csv" in exclude:
                # move engine error data & car spinning data to different folder
                df.to_csv(
                    f"{os.path.join(path_to_repo, processed_path, 'Excluded', file_name)}.csv",
                    index=False,
                )
            else:
                # store processed data in processed folder
                df.to_csv(
                    f"{os.path.join(path_to_repo, processed_path, folder, file_name)}.csv",
                    index=False,
                )

process_revs_data(raw_path = RAW_PATH,
                  processed_path = PROCESSED_PATH,
                  features_to_keep = FEATURES_TO_KEEP,
                  exclude = CSVS_TO_EXCLUDE,
                  check_nan = False)