'''
Script takes in two scores files (proxy and caml) and calculates faithfulness

'''
import argparse
import logging
import time
import json
import numpy as np
from tqdm import tqdm
import datasets
import evaluation
from sklearn import metrics
from scipy import stats
from utils import read_json_preds, scores_to_matrix, probs_to_preds, calc_classification_metrics, calc_regression_metrics


def main(args):
    dicts = datasets.load_lookups(args)
    print('reading caml')
    caml = read_json_preds(args.caml_scores)
    print('converting caml to matrix')
    caml = scores_to_matrix(caml, dicts['ind2c'])

    print('reading proxy')
    proxy = read_json_preds(args.proxy_scores)
    print('converting proxy to matrix')
    proxy = scores_to_matrix(proxy, dicts['ind2c'])

    calc_classification_metrics(caml, proxy, dicts['ind2c'], thresholds_path=args.thresholds)
    calc_regression_metrics(caml, proxy, args.outfile)

if __name__=="__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('caml_scores')
    parser.add_argument('data_path')
    parser.add_argument('vocab')
    parser.add_argument('outfile')
    parser.add_argument('--proxy_scores', help="Path to json scores. Defaults to running mimic_proxy.compile_nice_json()")
    parser.add_argument('--thresholds', help="Path to thresholds per class. Defaults to 0.5")
    parser.add_argument('--Y', default='full')
    parser.add_argument('--version', default='mimic3')
    parser.add_argument("--public-model", dest="public_model", action="store_const", required=False, const=True,
                        help="optional flag for testing pre-trained models from the public github")
    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO, format='%(levelname)s - %(message)s')

    start = time.time()
    main(args)
    end = time.time()
    logging.info(f'Time to run script: {end-start} secs')