"""
This script is for 
- split test set into proxy and test set
- partition train set into multiple clients
- get optimal budget for proxy set
- train budget prediction model
- use budget prediction model in testing time, to predict the budget allocation for each client per query
"""

import torch

from datasets import concatenate_datasets, load_dataset, DatasetDict, Dataset
from accelerate import Accelerator
from rank_bm25 import BM25Okapi
import numpy as np
from tqdm import trange
from nltk.tokenize import word_tokenize

from openicl.icl_retriever import BaseRetriever, BM25Retriever
from openicl.utils.check_type import _check_str
from openicl import DatasetReader, PromptTemplate, PPLInferencer, AccEvaluator

import argparse
import json
import os
import wandb
import socket
import copy

from huggingface_hub import login, HfApi, HfFolder

token = "HFTOKEN"
login(token=token)

from util.template import gen_template_classification
from util.retriever import get_retriever, get_fedretriever, get_server_retriever
from util.partition import (
    cls_iid_partition,
    cls_noniid_partition,
    datasplit_subset,
    data2usename,
    data2numcls,
)

from util.icl_ppl_fed_budgetmodel_inferencer import PPLFedBudgetModelInferencer
from util.misc import setup_seed


def print_score(
    args,
    output_json_path,
):
    pred_file = os.path.join(
        output_json_path,
        f"prediction_{'debug' if args.debug else 'run'}.json",
    )

    label_file = os.path.join(
        output_json_path,
        f"label_{'debug' if args.debug else 'run'}.json",
    )
    # load
    with open(pred_file, "r") as f:
        prediction = data_loaded = json.load(f)
    with open(label_file, "r") as f:
        reference = data_loaded = json.load(f)

    evaluator = AccEvaluator()
    score = evaluator.score(predictions=prediction, references=reference)
    print(score)
    return score


def run(args):
    # ===== 1. Data Setting Preparation =====
    # load dataset
    dataset = load_dataset(data2usename[args.dataset])

    if args.dataset.startswith("gen-"):
        input_col = "paraphrase"
    else:
        input_col = "sentence"

    if "test" not in dataset:
        test_split = "validation"
    else:
        test_split = "test"

    # slice a subset for proxy dataset on server
    proxy_data, remain_data = datasplit_subset(
        dataset[args.proxy_split],
        subset_num=args.proxy_size,
        split=args.proxy_split,
        verbose=True,
        return_remain=True,
    )
    dataset[args.proxy_split] = remain_data

    # partition data into clients
    if args.partition == "iid":
        fed_dataset = cls_iid_partition(
            dataset=dataset,
            split="train",
            data_name=args.dataset,
            num_clients=args.num_clients,
            test_split=test_split,
            subset_num=args.subset_num,
        )
    elif args.partition == "noniid":
        fed_dataset = cls_noniid_partition(
            dataset=dataset,
            split="train",
            data_name=args.dataset,
            major_classes_num=args.major_classes_num,
            num_clients=args.num_clients,
            test_split=test_split,
            subset_num=args.subset_num,
        )
    else:
        raise ValueError(
            f"args.partition can only be 'iid' or 'noniid', rather than '{args.partition}'."
        )

    # save selected test query subset
    subset_orig_idxs = fed_dataset[test_split]["idx"]
    subset_orig_idxs_file = os.path.join(args.log_dir, "query_subset_orig_idxs.json")
    with open(subset_orig_idxs_file, "w") as f:
        json.dump(subset_orig_idxs, f)

    # add client idx for each local training sample (used in proxy set's optimal budget allocation process)
    for cid in range(args.num_clients):
        local_sample_num = len(fed_dataset[f"train-client{cid}"])
        cid_col = [cid for _ in range(local_sample_num)]
        fed_dataset[f"train-client{cid}"] = fed_dataset[
            f"train-client{cid}"
        ].add_column("cid", copy.deepcopy(cid_col))
    print("Add client ID column for each local training dataset.")

    # make datareader for all clients' local training data
    fed_reader = []
    for cid in range(args.num_clients):
        fed_reader.append(
            DatasetReader(
                fed_dataset[f"train-client{cid}"],
                input_columns=[input_col],
                output_column="label",
            )
        )

    # get template for classification
    tp_dict = gen_template_classification(args)
    template = PromptTemplate(tp_dict, {input_col: "</text>"}, ice_token="</E>")

    # generate retriever for each client: each local train data will be used to build local train corpus
    fed_retrievers = []
    RETRIEVER = get_fedretriever(args)
    for cid in range(args.num_clients):
        retriever = RETRIEVER(fed_reader[cid], ice_num=args.local_ice_num)
        fed_retrievers.append(retriever)

    #  ===== 2. Obtain the optimal budget allocation for proxy dataset =====
    setup_seed(args.seed)
    inferencer = PPLFedBudgetModelInferencer(
        model_name=args.model, output_json_filepath=args.log_dir, args=args
    )

    # ===== 3. Obtain the optimal budget allocation for proxy dataset =====
    if args.train_budget_model:
        proxy_ice_client_source, proxy_opt_per_client_budget, proxy_res_list = (
            inferencer.opt_budget_allocation(
                fed_retrievers,
                query_dataset=proxy_data,
                ice_template=template,
                output_json_filepath=args.log_dir,
                local_ice_num=args.local_ice_num,
                server_ice_num=args.server_ice_num,
                prefix="proxy",
                args=args,
            )
        )

        # ===== 4. Train local budget models based on =====
        clients_best_val_accs = inferencer.train_local_budget_model(
            query_res_list=proxy_res_list,
            opt_per_client_budget=proxy_opt_per_client_budget,
            model_name=args.budget_model_name,
            model_width=args.budget_model_width,
            num_classes_per_client=args.num_classes_per_client,
            epochs=args.budget_model_epochs,
            lr=args.budget_model_lr,
            batch_size=args.budget_model_batch_size,
            train_ratio=args.budget_model_train_ratio,
            output_json_filepath=args.log_dir,
            output_model_filepath=args.budget_model_dir,
            seed=args.seed,
        )

    if args.inference:
        predictions = inferencer.inference(
            retrievers=fed_retrievers,
            query_dataset=fed_dataset[test_split],
            ice_template=template,
            output_json_filepath=args.prediction_results_dir,
            output_model_filepath=args.budget_model_dir,
            concat="reorder",
            strategy=args.strategy,
            buffer=args.buffer,
            args=args,
        )

        # ----- save inference prediction
        prediction_file = os.path.join(
            args.prediction_results_dir,
            f"prediction_{'debug' if args.debug else 'run'}.json",
        )
        with open(prediction_file, "w") as f:
            json.dump(predictions, f)

        label_file = os.path.join(
            args.prediction_results_dir,
            f"label_{'debug' if args.debug else 'run'}.json",
        )
        with open(label_file, "w") as f:
            # json.dump(data.references, f)
            json.dump(fed_dataset["test"]["label"], f)

        clients_best_val_accs = None

    return clients_best_val_accs


if __name__ == "__main__":
    args = argparse.ArgumentParser()
    args.add_argument("--dataset", type=str, default="sst2", help="Dataset name")
    args.add_argument(
        "--subset_num",
        type=int,
        default=None,
        help="Number of subset test set for query",
    )
    args.add_argument("--proxy_split", default="test", type=str)
    args.add_argument("--proxy_size", default=500, type=int)
    args.add_argument("--partition", default="iid", choices=["iid", "noniid"], type=str)
    args.add_argument("--num_clients", type=int, default=3)
    args.add_argument("--major_classes_num", default=-1, type=int)
    args.add_argument(
        "--model",
        type=str,
        default="EleutherAI/gpt-neo-2.7B",
        help="Pretrained LLM model name",
    )
    args.add_argument("--local_ice_num", default=None, type=int)
    args.add_argument(
        "--server_ice_num",
        default=-1,
        type=int,
        help="Server side ICE Number, server_ice_num <= num_clients * local_ice_num",
    )
    # args.add_argument(
    #     "--concat", default="reorder", type=str, choices=["simple", "merge", "reorder"]
    # )
    args.add_argument(
        "--retriever", type=str, default="topk", help="Server side Retriever Type"
    )  # use 'bm25'

    # ---- budget model args
    args.add_argument("--budget_model_name", default="SMLP", type=str)
    args.add_argument("--budget_model_width", default=300, type=int)
    args.add_argument("--num_classes_per_client", default=4, type=int)
    args.add_argument("--budget_model_train_ratio", default=0.8, type=float)
    args.add_argument("--budget_model_batch_size", default=8, type=int)
    args.add_argument("--budget_model_lr", default=0.01, type=float)
    args.add_argument("--budget_model_epochs", default=500, type=int)

    # ---- inference arguments
    args.add_argument("--inference", action="store_true")
    args.add_argument("--train_budget_model", action="store_true")
    args.add_argument("--buffer", type=int, default=0)
    args.add_argument(
        "--strategy",
        default="medium",
        type=str,
        help="The strategy to used for budget value mapping after budget model prediction. Only accept 'max', 'min', 'medium', or string of float number.",
    )

    args.add_argument(
        "--log_dir",
        type=str,
        default=f"fed_icl_log",
        help="Logging directory",
    )
    args.add_argument("--debug", action="store_true")
    args.add_argument("--run", action="store_true")
    args.add_argument("--seed", default=0, type=int)
    args.add_argument("--proj_name", default="FL-ICL-debug")
    args.add_argument("--group_name", default=None, type=str)

    args = args.parse_args()

    args.num_classes = data2numcls[args.dataset]

    # prepare output files
    model = args.model.replace("/", "_")
    part_name = f"{args.partition}_clients={args.num_clients}"
    if args.partition == "noniid":
        part_name += f"_majorclass={args.major_classes_num}"

    data_folder = f"{args.dataset}"
    if args.subset_num is not None:
        data_folder += f"_query-num={args.subset_num}"

    if args.proxy_split is not None:
        data_folder += f"_proxy={args.proxy_split}-{args.proxy_size}"

    host_name = socket.gethostname()
    if "server_name" in host_name.lower():
        cache_root = "cache/root/directory"
    else:
        raise ValueError(f"Check the server hostname for log_dir initialization.")

    # add budget model info in log_dir
    args.log_dir = os.path.join(
        cache_root,
        args.log_dir,
        f"{data_folder}/{part_name}/model={model}_retriever=fed{args.retriever}_local-ice={args.local_ice_num}_server-ice={args.server_ice_num}/seed={args.seed}",
    )
    os.makedirs(args.log_dir, exist_ok=True)

    budget_model_setting = f"{args.budget_model_name}_W={args.budget_model_width}_classes={args.num_classes_per_client}_train-ratio={args.budget_model_train_ratio:.1f}_bs={args.budget_model_batch_size}-lr={args.budget_model_lr:.3f}-E={args.budget_model_epochs}-seed={args.seed}"
    args.budget_model_dir = os.path.join(args.log_dir, budget_model_setting)
    os.makedirs(args.budget_model_dir, exist_ok=True)

    if args.inference:
        inference_setting = (
            f"concat=reorder-strategy={args.strategy}-buffer={args.buffer}"
        )
        args.prediction_results_dir = os.path.join(
            args.budget_model_dir, inference_setting
        )
        os.makedirs(args.prediction_results_dir, exist_ok=True)

    # perform FL-ICL pipeline
    if args.run:
        run_name = (
            f"budgetmodel_fed_{data_folder}_{part_name}_model={model}_retriever=fed{args.retriever}_local-ice={args.local_ice_num}_server-ice={args.server_ice_num}_seed={args.seed}"
            + f"_{budget_model_setting}"
        )
        if args.inference:
            run_name += f"_{inference_setting}"

        wb_run = wandb.init(
            config=args, project=args.proj_name, name=run_name, group=args.group_name
        )

    if args.run:
        clients_best_val_accs = run(args)

        if args.train_budget_model:
            best_val_acc_list = [
                clients_best_val_accs[f"client-{cid}-best-val-acc"]
                for cid in range(args.num_clients)
            ]
            mean_best_val = np.mean(best_val_acc_list).item()

            for cid in range(args.num_clients):
                wb_run.log({"per-client-best-val-acc": best_val_acc_list[cid]})
            wb_run.log({"mean-best-val-acc": mean_best_val})

        if args.inference:
            scores = print_score(args, args.prediction_results_dir)
            wb_run.log(scores)

    wb_run.finish()
