#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.inspection import DecisionBoundaryDisplay
from python.width_lib import C_tree
from python.class_lib_beta import C_tree_class
from python.pred_bakeoff_settings import estimator, max_depth, fname, cp

import pandas as pd
import numpy as np
from sklearn import tree
from time import time
from tqdm import tqdm

exec(open("python/sim_settings.py").read())

np.random.seed(123)

datdf = pd.DataFrame(np.zeros([8,3]).astype(int))
datdf.index = datasets_to_use
datdf.columns = ['N','P','URL']

for di,ds in enumerate(tqdm(datasets_to_use, leave = False)):
    outname = data_dir+ds+'.csv'
    df = pd.read_csv(outname)

    # For infra.
    df = df.loc[~np.any(df.isnull(), axis = 1),:]
    print(ds)
    print(df.shape)

    datdf.loc[ds,'N'] = df.shape[0]
    datdf.loc[ds,'P'] = df.shape[1]


with open("tables/data.tex",'w') as f:
    datdf.to_latex(f)
