""" """
import csv
import json
import os
from typing import List

from absl import app
from absl import flags

import numpy as np


###############################################################################

FLAGS = flags.FLAGS

flags.DEFINE_list('label_filepaths', None, '')

# TODO: Support multiple annotators.
flags.DEFINE_list('evaluation_filepaths', None, '')


###############################################################################
# Evaluation labels:
NO = 0
YES = 1
MAYBE = 2
###############################################################################


def _load_labels(filepaths: List[str]) -> np.ndarray:
    ret = []
    for filepath in filepaths:
        with open(os.path.expanduser(filepath), 'r') as f:
            ret.extend(json.load(f))
    return np.array(ret, dtype=np.int64)


def _label_int_from_str(s: str) -> int:
    s = s.strip().lower()
    if s == 'no':
        return NO
    elif s == 'yes':
        return YES
    elif s == 'maybe':
        return MAYBE
    else:
        raise ValueError(s)


def _load_evaluations(filepaths: List[str]) -> np.ndarray:
    ret = []
    for filepath in filepaths:
        with open(os.path.expanduser(filepath), 'r') as f:
            cf = csv.reader(f)
            for i, row in enumerate(cf):
                # Skip the header.
                if i == 0:
                    continue
                ret.append(_label_int_from_str(row[1]))
    return np.array(ret, dtype=np.int64)


def _set_maybe_to(evaluations: np.ndarray, replacement: int) -> np.ndarray:
    # Returns a modified copy.
    evaluations = np.copy(evaluations)
    evaluations[evaluations == MAYBE] = replacement
    return evaluations


###############################################################################

def _print_evaluation_label_counts(evaluations: np.ndarray):
    print(f'no count: {np.sum(evaluations == NO)}')
    print(f'yes count: {np.sum(evaluations == YES)}')
    print(f'maybe count: {np.sum(evaluations == MAYBE)}')


def _print_evaluation_accuracy(prefix: str, labels: np.ndarray, evaluations: np.ndarray):
    acc = (labels == evaluations).mean()
    print(f'{prefix}: {acc}')


###############################################################################


def main(_):
    labels = _load_labels(FLAGS.label_filepaths)
    evaluations = _load_evaluations(FLAGS.evaluation_filepaths)

    #

    _print_evaluation_label_counts(evaluations)

    # Evaluate with maybes treated as always wrong, maybes removed, maybes treated as yes, maybes treated as no
    _print_evaluation_accuracy('maybes always wrong', labels, evaluations)
    _print_evaluation_accuracy('maybes as yes', labels, _set_maybe_to(evaluations, YES))
    _print_evaluation_accuracy('maybes as no', labels, _set_maybe_to(evaluations, NO))
    _print_evaluation_accuracy('maybes removed', labels[evaluations != MAYBE], evaluations[evaluations != MAYBE])

    # Treat maybes as yes, look at fraction with yes/maybe each of ones with labels 0 and 1 each
    # Compare annotator agreement, for each groups of true labels

    evaluations2 = _set_maybe_to(evaluations, YES)
    _print_evaluation_accuracy('label=1', evaluations2[labels == 1], labels[labels == 1])
    _print_evaluation_accuracy('label=0', evaluations2[labels == 0], labels[labels == 0])


if __name__ == "__main__":
    app.run(main)
