import torch

from datasets import concatenate_datasets, load_dataset, DatasetDict, Dataset
from accelerate import Accelerator
from rank_bm25 import BM25Okapi
import numpy as np
from tqdm import trange
from nltk.tokenize import word_tokenize

from openicl.icl_retriever import BaseRetriever, BM25Retriever
from openicl.utils.check_type import _check_str
from openicl import DatasetReader, PromptTemplate, PPLInferencer, AccEvaluator

import argparse
import json
import os
import wandb
import socket
import copy

from util.template import gen_template_classification
from util.retriever import get_retriever, get_fedretriever
from util.partition import (
    cls_iid_partition,
    cls_noniid_partition,
    datasplit_subset,
    data2usename,
    data2numcls,
)
from util.icl_bm25_fedretriever import BM25FedRetriever
from util.icl_topk_fedretriever import TopkFedRetriever
from util.icl_ppl_fed_inferencer import PPLFedInferencer
from util.icl_ppl_fed_opt_budget_inferencer import PPLFedOptBudgetInferencer
from util.misc import setup_seed


def print_score(args, pred_file, label_file):
    # load
    with open(pred_file, "r") as f:
        prediction = data_loaded = json.load(f)
    with open(label_file, "r") as f:
        reference = data_loaded = json.load(f)

    evaluator = AccEvaluator()
    score = evaluator.score(predictions=prediction, references=reference)
    print(score)
    return score


def get_res_list(dataset, retriever, retriever_model):
    orig_idxs = dataset["idx"]
    datalist = retriever.dataset_reader.generate_input_field_corpus(dataset)
    dataloader = retriever.create_dataloader(datalist)
    res_list = retriever.forward(
        retriever_model,
        dataloader,
        orig_idxs=orig_idxs,
        process_bar=True,
        information="Embedding data query...",
    )
    return res_list


def run(args):
    # ===== 1. Data Setting Preparation =====
    # load dataset
    dataset = load_dataset(data2usename[args.dataset])

    if "test" not in dataset:
        test_split = "validation"
    else:
        test_split = "test"

    # save selected test query subset
    subset_orig_idxs = dataset[test_split]["idx"]
    subset_orig_idxs_file = os.path.join(args.log_dir, "query_subset_orig_idxs.json")
    with open(subset_orig_idxs_file, "w") as f:
        json.dump(subset_orig_idxs, f)

    # make datareader for all clients' local training data
    reader = DatasetReader(
        dataset["train"],
        input_columns=["sentence"],
        output_column="label",
    )

    # get template for classification
    tp_dict = gen_template_classification(args)
    template = PromptTemplate(tp_dict, {"sentence": "</text>"}, ice_token="</E>")

    # generate retriever for each client: each local train data will be used to build local train corpus
    fed_retrievers = []
    RETRIEVER = get_fedretriever(args)
    retriever = RETRIEVER(reader, ice_num=args.local_ice_num)

    # ===== 2. Get budget ground for dataset =====
    setup_seed(args.seed)
    inferencer = PPLFedOptBudgetInferencer(
        model_name=args.model, output_json_filepath=args.log_dir, args=args
    )

    # ----- get the embedding of remain test dataset
    # test_res_list = get_res_list(
    #     dataset[test_split], retriever, inferencer.retriever_model
    # )

    # # save test dataset embedding
    # torch.save(
    #     {"res_list": test_res_list},
    #     os.path.join(args.log_dir, "test_forward_result.pt"),
    # )
    # print("Test forward result saved.")
    # print("-" * 60)

    #  ----- get the embedding of train dataset
    train_res_list = get_res_list(
        dataset["train"], retriever, inferencer.retriever_model
    )
    train_labels = {}
    for sample in dataset["train"]:
        orig_idx = sample["idx"]
        train_labels[orig_idx] = sample["label"]

    # save test dataset embedding
    torch.save(
        {"res_list": train_res_list},
        os.path.join(args.log_dir, "train_forward_result.pt"),
    )
    torch.save(
        {"labels": train_labels},
        os.path.join(args.log_dir, "train_labels.pt"),
    )
    print("Test forward result saved.")
    print("-" * 60)


if __name__ == "__main__":
    args = argparse.ArgumentParser()
    args.add_argument("--dataset", type=str, default="sst2", help="Dataset name")
    args.add_argument(
        "--subset_num",
        type=int,
        default=None,
        help="Number of subset test set for query",
    )
    # args.add_argument("--proxy_split", default="test", type=str)
    # args.add_argument("--proxy_size", default=None, type=int)
    # args.add_argument("--num_clients", type=int, default=3)
    # args.add_argument("--major_classes_num", default=-1, type=int)
    args.add_argument(
        "--model",
        type=str,
        default="EleutherAI/gpt-neo-2.7B",
        help="Pretrained LLM model name",
    )
    args.add_argument("--local_ice_num", default=None, type=int)
    args.add_argument(
        "--overall_local_ice_num",
        default=None,
        type=int,
        help="Optional choice, automatically assign local_ice_num based on overall_local_ice_num and num_clients",
    )
    args.add_argument(
        "--server_ice_num",
        default=-1,
        type=int,
        help="Server side ICE Number, server_ice_num <= num_clients * local_ice_num",
    )
    # args.add_argument(
    #     "--concat", default="simple", type=str, choices=["simple", "merge", "reorder"]
    # )
    args.add_argument(
        "--retriever", type=str, default="topk", help="Server side Retriever Type"
    )  # use 'bm25'
    args.add_argument(
        "--log_dir",
        type=str,
        default=f"fed_icl_log",
        help="Logging directory",
    )
    args.add_argument("--debug", action="store_true")
    args.add_argument("--run", action="store_true")
    args.add_argument("--seed", default=0, type=int)
    args.add_argument("--proj_name", default="FL-ICL-debug")
    args.add_argument("--group_name", default=None, type=str)

    args = args.parse_args()
    args.partition = "iid"
    args.concat = "simple"
    args.num_clients = 1

    args.num_classes = data2numcls[args.dataset]

    # if args.overall_local_ice_num is not None and args.local_ice_num is None:
    #     args.local_ice_num = int(args.overall_local_ice_num / args.num_clients)

    if args.server_ice_num == -1:
        args.server_ice_num = int(args.local_ice_num * args.num_clients)

    # if args.local_ice_num * args.num_clients > args.server_ice_num:
    #     args.concat = "reorder"

    # prepare output files
    model = args.model.replace("/", "_")
    part_name = f"{args.partition}_clients={args.num_clients}"
    if args.partition == "noniid":
        part_name += f"_majorclass={args.major_classes_num}"

    data_folder = f"{args.dataset}"
    if args.subset_num is not None:
        data_folder += f"_query-num={args.subset_num}"

    # if args.proxy_size is not None and args.proxy_size > 0:
    #     data_folder += f"_proxy={args.proxy_split}-{args.proxy_size}"

    host_name = socket.gethostname()
    if "server_name" in host_name.lower():
        cache_root = "cache/root/directory"
    else:
        raise ValueError(f"Check the server hostname for log_dir initialization.")

    args.log_dir = os.path.join(
        cache_root,
        args.log_dir,
        f"{data_folder}/{part_name}/model={model}_retriever=fed{args.retriever}_local-ice={args.local_ice_num}_server-ice={args.server_ice_num}_concat={args.concat}/seed={args.seed}",
    )
    os.makedirs(args.log_dir, exist_ok=True)

    # perform FL-ICL pipeline
    if args.run:
        run_name = f"centr_{data_folder}_{part_name}_model={model}_retriever=fed{args.retriever}_local-ice={args.local_ice_num}_server-ice={args.server_ice_num}_concat={args.concat}_seed={args.seed}"
        wb_run = wandb.init(
            config=args, project=args.proj_name, name=run_name, group=args.group_name
        )

    if args.run:
        run(args)

    # # read result from saved files and calculate performance score
    # prediction_file = os.path.join(
    #     args.log_dir, f"prediction_{'debug' if args.debug else 'run'}.json"
    # )
    # label_file = os.path.join(
    #     args.log_dir, f"label_{'debug' if args.debug else 'run'}.json"
    # )
    # scores = print_score(args, prediction_file, label_file)

    # proxy_prediction_file = os.path.join(
    #     args.log_dir, f"proxy_prediction_{'debug' if args.debug else 'run'}.json"
    # )
    # proxy_label_file = os.path.join(
    #     args.log_dir, f"proxy_label_{'debug' if args.debug else 'run'}.json"
    # )
    # proxy_scores = print_score(args, proxy_prediction_file, proxy_label_file)

    # results = {
    #     "test_accuracy": scores["accuracy"],
    #     "proxy_accuracy": proxy_scores["accuracy"],
    # }

    if args.run:
        # wb_run.log(results)
        wb_run.finish()
