import numpy as np
import os, csv, pickle, string
import bow_data
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

def logisticRegression(X_train, Y_train, X_test, Y_test, n_jobs=-2, with_mean=True, CV_=True):
    scaler = StandardScaler(with_mean=with_mean)
    scaler.fit(X_train)
    if(CV_):
        clf = LogisticRegressionCV(max_iter=6000, cv=3, solver='liblinear', n_jobs=n_jobs, multi_class='auto').fit(scaler.transform(X_train), Y_train)
    else: 
        clf = LogisticRegression(max_iter=6000, solver='liblinear', multi_class='auto').fit(X_train, Y_train)
    
    preds = clf.predict(scaler.transform(X_test))
    test_accuracy = np.sum(Y_test == preds)/len(Y_test)

    preds = clf.predict(scaler.transform(X_train))
    train_accuracy = np.sum(Y_train == preds)/len(Y_train)
    return(test_accuracy, train_accuracy)


def compute_accuracies(X_train, Y_train, X_test, Y_test, nfracs=25, with_mean=True, CV_=True):
    test_accuracies = []
    train_accuracies = []
    for frac in np.linspace(0.025, 1, nfracs):
        X_partial = X_train
        Y_partial = Y_train
        if(frac < 1.0):
            X_partial, _, Y_partial, _ = train_test_split(X_train, Y_train, train_size=frac)
            
        test_accuracy, train_accuracy = logisticRegression(X_partial, Y_partial, X_test, Y_test, with_mean=with_mean, CV_=CV_)
        test_accuracies.append(test_accuracy)
        train_accuracies.append(train_accuracy)
    
    return(np.array(test_accuracies), np.array(train_accuracies))