import numpy as np
import pandas as pd
import torch

YEARS = [2014, 2015, 2016, 2017, 2018]

class ToNum:
    def __init__(self):
        self.symbols = []

    def convert(self, symbol):
        if symbol not in self.symbols:
            self.symbols.append(symbol)
        return self.symbols.index(symbol)

    def index(self, key):
        return self.symbols.index(key)

def get_years(years=YEARS):
    data_frames = [pd.read_csv(f'data/{year}_clean.csv', index_col=0) for year in years]

    data= []
    for year, df in zip(years, data_frames):
        #df.dropna(inplace=True)
        df = df.dropna()
        k_data = [col for col in df.columns if col != 'ugpa']  
        k_target = 'ugpa' 

        x_tensor = torch.tensor(df[k_data].values, dtype=torch.float32) 
        y_tensor = torch.tensor(df[k_target].values, dtype=torch.float32) 
        
        mask = ~torch.isnan(x_tensor).any(dim=1)
        x_tensor = x_tensor[mask]
        y_tensor = y_tensor[mask]
        
        data.append({'images': x_tensor, 'labels': y_tensor.reshape(-1, 1), 'info': year})

    return data



#if __name__ == '__main__':
#     envs = get_years()




#f __name__ == '__main__':
#    x, y = get_envs()
