import pandas as pd
import sklearn as sk
import sys
from sklearn.ensemble import RandomForestClassifier
from evaluation import classifier_evaluation, regressor_evaluation, classifier_eval, regressor_eval

# Increase recursion limit
sys.setrecursionlimit(10**6)

# Example usage

# Classification

# data = pd.read_csv('diabetes.csv')
# data.dropna(inplace=True)
# X = data.drop('Outcome', axis=1)  # Input features
# Y = data['Outcome']  # Target variable

# data = pd.read_csv('wine.data', delimiter=',', header=None)
# data.columns = ['class', 'alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols', 'proanthocyanins', 'color_intensity', 'hue', 'od280/od315_of_diluted_wines', 'proline']
# X = data.drop('class', axis=1)  # Input features
# Y = data['class']  # Target variable

data = pd.read_csv('adult.data', header=None)
data = data.replace('?', pd.NA)
data.dropna(inplace=True)
data.columns = ['age', 'workclass', 'fnlwgt', 'education', 'education_num', 'marital_status', 'occupation', 'relationship', 'race', 'sex', 'capital_gain', 'capital_loss', 'hours_per_week', 'native_country', 'income']

X = data.drop('income', axis=1)  # Input features
Y = data['income']  # Target variable

# Convert Y to categorical
Y = Y.apply(lambda x: 1 if x == ' >50K' else 0)

classifier_eval(X, Y)

###################################################

# Regression

# data = pd.read_csv('auto-mpg.data', delim_whitespace=True, header=None)
# data = data.replace('?', pd.NA)
# data.dropna(inplace=True)
# data.columns = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model_year', 'origin', 'car_name']
# data = data.drop('car_name', axis=1)

# X = data.drop('mpg', axis=1)  # Input features
# Y = data['mpg']  # Target variable

# X['horsepower'] = X['horsepower'].astype(float)

# data = pd.read_excel('Concrete_Data.xls')
# data = data.replace('?', pd.NA)
# data.dropna(inplace=True)
# X = data.drop('Concrete compressive strength(MPa, megapascals) ', axis=1)  # Input features
# Y = data['Concrete compressive strength(MPa, megapascals) ']  # Target variable

# regressor_eval(X, Y)