import json
import pdb
import os
import pyarrow as pa
import pyarrow.parquet as pq
import pandas as pd
from data.eval_event import EvalEvent
from .meta import Evaluation
from .registry import auto_register
from actions.fennec import (
    FennecBranchAction,
    FennecScoringAction,
    FennecPairwiseSolvingAction,
    FennecPairwiseMergeAction,
    FennecCorrectionAction,
    FennecBranchSelectionAction,
    FennecBranchQuickSortAction,
    FennecCoupledScoringAction,
    FennecCoupledPairwiseSolvingAction,
)
from config.task_config import TaskConfig
from scipy.stats import kendalltau
from scipy.stats import spearmanr
from scipy.stats import pearsonr
import threading


write_lock = threading.Lock()


@auto_register("fennec")
class FennecEvaluation(Evaluation):
    task_name = "fennec"

    def __init__(self, config: TaskConfig, task_func) -> None:
        super().__init__(config)

        self.task_func = task_func
        self.train_parquet_file = self.config.get_train_parquet_file(self.task_name)
        self.train_filter_parquet_file = self.config.get_train_filter_parquet_file(self.task_name)

        self.eval_score = {}
        self.eval_gen = {}
        self.eval_filter_gen = {}

        self.correction = {}

    def eval(self, eval_event: EvalEvent):
        dialogue = eval_event.get_dialogue()

        if self.task_func == "pairwise_eval_func":
            meta_info = dialogue.get_meta_info()
            turn = meta_info["turn"] - 1

            fb_action_feedback = eval_event.get_memories(
                self.task_name,
                FennecBranchAction.action_name,
                str("turn{}".format(turn)),
            )
            fs_action_feedback = eval_event.get_memories(
                self.task_name,
                FennecScoringAction.action_name,
                str("turn{}".format(turn)),
            )
            fps_action_feedback = eval_event.get_memories(
                self.task_name,
                FennecPairwiseSolvingAction.action_name,
                str("turn{}".format(turn)),
            )
            fpm_action_feedback = eval_event.get_memories(
                self.task_name,
                FennecPairwiseMergeAction.action_name,
                str("turn{}".format(turn)),
            )
            # fbs_action_feedback = eval_event.get_memories(
            #     self.task_name,
            #     FennecBranchSelectionAction.action_name,
            #     str("turn{}".format(turn)),
            # )
            # fbqs_action_feedback = eval_event.get_memories(
            #     self.task_name,
            #     FennecBranchQuickSortAction.action_name,
            #     str("turn{}".format(turn)),
            # )
            
            judge = meta_info["judge"][0]
            # import pdb; pdb.set_trace()
            # print(fpm_action_feedback)

            # print(len(fb_action_feedback['branch_list']))
            # fc_action_feedback = eval_event.get_memories(
            #     self.task_name,
            #     FennecCorrectionAction.action_name,
            #     str("turn{}".format(turn)),
            # )
            
            query = dialogue.get_query_by_idx(0)["content"]
            response_1 = dialogue.get_pairwise_response_by_idx(0, "model_a")["content"]
            response_2 = dialogue.get_pairwise_response_by_idx(0, "model_b")["content"]
            # self.correction[meta_info['question_id']] = {
            #     "query": query,
            #     "response_1": response_1,
            #     "correction_1": fc_action_feedback['result_a'].replace("\n<|assistant|>\n", "") if "result_a" in fc_action_feedback else "",
            #     "response_2": response_2,
            #     "correction_2": fc_action_feedback['result_b'].replace("\n<|assistant|>\n", "") if "result_b" in fc_action_feedback else "",
            #     "judge": judge,
            #     "score_1": fpm_action_feedback["model_a"],
            #     "score_2": fpm_action_feedback["model_b"],
            # }
            if "error" not in self.eval_gen:
                self.eval_gen["error"] = 0
                self.eval_gen["server_error"] = 0
                self.eval_gen["count"] = 0
                self.eval_gen["collect"] = []
                self.eval_gen["table"] = None
                self.eval_gen["aggrement"] = []
                self.eval_gen["win"] = 0
                self.eval_gen["lose"] = 0
                self.eval_gen["tie"] = 0
                
            if isinstance(judge, str):
                if judge == "model_a":
                    judge = 0
                elif judge == "model_b":
                    judge = 1
                elif "tie" in judge:
                    judge = 2
            elif len(meta_info["judge"]) == 3:
                # panda lm
                if judge == 0:
                    judge = 2
                elif judge == 1:
                    judge = 0
                else:
                    judge = 1

            if "agreement" not in self.eval_score:
                self.eval_score["agreement"] = []
                self.eval_score["consistency"] = []
                self.eval_score["single_agreement"] = []
                self.eval_score["selected_single_agreement"] = []
                self.eval_score["selected_win"] = 0
                self.eval_score["selected_lose"] = 0
                self.eval_score["selected_tie"] = 0
                self.eval_score["error"] = 0
                self.eval_score["hit"] = 0

                self.eval_score["g_win"] = 0
                self.eval_score["win"] = 0
                self.eval_score["g_lose"] = 0
                self.eval_score["lose"] = 0
                self.eval_score["g_tie"] = 0
                self.eval_score["tie"] = 0

            for rating_a, rating_b in zip(fps_action_feedback['rating_a'], fps_action_feedback['rating_b']):
                if rating_a > rating_b and judge == 0:
                    self.eval_score["hit"] += 1
                    break
                elif rating_a < rating_b and judge == 1:
                    self.eval_score["hit"] += 1
                    break
                elif rating_a == rating_b and judge == 2:
                    self.eval_score["hit"] += 1
                    break
            
            if judge == 0:
                self.eval_score["g_win"] += 1
            elif judge == 1:
                self.eval_score["g_lose"] += 1
            elif judge == 2:
                self.eval_score["g_tie"] += 1
            
            if fpm_action_feedback["model_a"] > fpm_action_feedback["model_b"]:
                self.eval_score["win"] += 1
            elif fpm_action_feedback["model_a"] < fpm_action_feedback["model_b"]:
                self.eval_score["lose"] += 1
            elif fpm_action_feedback["model_a"] == fpm_action_feedback["model_b"]:
                self.eval_score["tie"] += 1
                # pdb.set_trace()

            if judge == 2:
                return
            save = False
            if (
                fpm_action_feedback["model_a"] > fpm_action_feedback["model_b"]
                and judge == 0
            ):
                self.eval_score["single_agreement"].append(1)
            elif (
                fpm_action_feedback["model_a"] < fpm_action_feedback["model_b"]
                and judge == 1
            ):
                self.eval_score["single_agreement"].append(1)
            elif (
                fpm_action_feedback["model_a"] == fpm_action_feedback["model_b"]
                and judge == 2
            ):
                self.eval_score["single_agreement"].append(1)
            else:
                self.eval_score["single_agreement"].append(0)
                save = True
                
            # if (
            #     sum(fbs_action_feedback["rating_a"]) > sum(fbs_action_feedback["rating_b"])
            #     and judge == 0
            # ):
            #     self.eval_score["selected_single_agreement"].append(1)
            # elif (
            #     sum(fbs_action_feedback["rating_a"]) < sum(fbs_action_feedback["rating_b"])
            #     and judge == 1
            # ):
            #     self.eval_score["selected_single_agreement"].append(1)
            # elif (
            #     sum(fbs_action_feedback["rating_a"]) == sum(fbs_action_feedback["rating_b"])
            #     and judge == 2
            # ):
            #     self.eval_score["selected_single_agreement"].append(1)
            # else:
            #     self.eval_score["selected_single_agreement"].append(0)
                
            # if sum(fbs_action_feedback["rating_a"]) > sum(fbs_action_feedback["rating_b"]):
            #     self.eval_score["selected_win"] += 1
            # elif sum(fbs_action_feedback["rating_a"]) < sum(fbs_action_feedback["rating_b"]):
            #     self.eval_score["selected_lose"] += 1
            # elif sum(fbs_action_feedback["rating_a"]) == sum(fbs_action_feedback["rating_b"]):
            #     self.eval_score["selected_tie"] += 1
            
            # topk = 1
            # if (
            #     sum(fbqs_action_feedback["rating_a"][:topk]) > sum(fbqs_action_feedback["rating_b"][:topk])
            #     and judge == 0
            # ):
            #     self.eval_score["selected_single_agreement"].append(1)
            # elif (
            #     sum(fbqs_action_feedback["rating_a"][:topk]) < sum(fbqs_action_feedback["rating_b"][:topk])
            #     and judge == 1
            # ):
            #     self.eval_score["selected_single_agreement"].append(1)
            # elif (
            #     sum(fbqs_action_feedback["rating_a"][:topk]) == sum(fbqs_action_feedback["rating_b"][:topk])
            #     and judge == 2
            # ):
            #     self.eval_score["selected_single_agreement"].append(1)
            # else:
            #     self.eval_score["selected_single_agreement"].append(0)
                
            # if sum(fbqs_action_feedback["rating_a"][:topk]) > sum(fbqs_action_feedback["rating_b"][:topk]):
            #     self.eval_score["selected_win"] += 1
            # elif sum(fbqs_action_feedback["rating_a"][:topk]) < sum(fbqs_action_feedback["rating_b"][:topk]):
            #     self.eval_score["selected_lose"] += 1
            # elif sum(fbqs_action_feedback["rating_a"][:topk]) == sum(fbqs_action_feedback["rating_b"][:topk]):
            #     self.eval_score["selected_tie"] += 1

            no_skip = False
            if (
                fpm_action_feedback["model_a"] > fpm_action_feedback["model_b"]
                and fpm_action_feedback["ex_model_a"]
                > fpm_action_feedback["ex_model_b"]
            ):
                if judge == 0:
                    no_skip = True
                    self.eval_score["agreement"].append(1)
                else:
                    self.eval_score["agreement"].append(0)
                self.eval_score["consistency"].append(1)
            elif (
                fpm_action_feedback["model_a"] < fpm_action_feedback["model_b"]
                and fpm_action_feedback["ex_model_a"]
                < fpm_action_feedback["ex_model_b"]
            ):
                if judge == 1:
                    no_skip = True
                    self.eval_score["agreement"].append(1)
                else:
                    self.eval_score["agreement"].append(0)
                self.eval_score["consistency"].append(1)
            elif (
                fpm_action_feedback["model_a"] == fpm_action_feedback["model_b"]
                and fpm_action_feedback["ex_model_a"]
                == fpm_action_feedback["ex_model_b"]
            ):
                if judge == 2:
                    no_skip = True
                    self.eval_score["agreement"].append(1)
                else:
                    self.eval_score["agreement"].append(0)
                self.eval_score["consistency"].append(1)
            else:
                # if (
                #     fpm_action_feedback["model_a"] == 0
                #     or fpm_action_feedback["model_b"] == 0
                # ):
                #     self.eval_score["error"] += 1
                # import pdb;pdb.set_trace()
                self.eval_score["consistency"].append(0)
                self.eval_score["agreement"].append(0)
            
            # if no_skip:
            #     query = dialogue.get_query_by_idx(0)["content"]
            #     self.eval_filter_gen[meta_info['question_id']] = {
            #         "query": query,
            #         "judge": judge,
            #     }
            # import pdb;pdb.set_trace()
            correction_1 = ""
            correction_2 = ""
            # import pdb;pdb.set_trace()
            new_data = pd.DataFrame(
                {
                    "idx": [meta_info["question_id"]],
                    "query": [query],
                    "judge": [meta_info["judge"][0]],
                    "rating_a": [fps_action_feedback['rating_a']],
                    "rating_b": [fps_action_feedback['rating_b']],
                    "response_1": [response_1],
                    "response_2": [response_2],
                    "branch": [fb_action_feedback["branch_list"]],
                    "branch_text": [fb_action_feedback["result"]],
                    "scoring": [fs_action_feedback["result"]],
                    "solving": [fps_action_feedback["result"]],
                    "correction_1": [correction_1],
                    "correction_2": [correction_2],
                    "context": [meta_info["context"]] if "context" in meta_info and meta_info["context"] else [[""]],
                    # "selection": [fbs_action_feedback]
                }
            )
            # import pdb;pdb.set_trace()
            if save:
            # if True:
                table = pa.Table.from_pandas(new_data)
                if self.eval_gen["table"] is None:
                    self.eval_gen["table"] = table
                else:
                    self.eval_gen["table"] = pa.concat_tables(
                        [self.eval_gen["table"], table]
                    )
        elif self.task_func == "pairwise_single_eval_func":

            meta_info = dialogue.get_meta_info()
            turn = meta_info["turn"] - 1

            fpm_action_feedback = eval_event.get_memories(
                self.task_name,
                FennecPairwiseMergeAction.action_name,
                str("turn{}".format(turn)),
            )
            judge = meta_info["judge"][0]

            if isinstance(judge, str):
                if judge == "model_a":
                    judge = 0
                elif judge == "model_b":
                    judge = 1
                elif "tie" in judge:
                    judge = 2
            elif len(meta_info["judge"]) == 3:
                # panda lm
                if judge == 0:
                    judge = 2
                elif judge == 1:
                    judge = 0
                else:
                    judge = 1

            if "single_agreement" not in self.eval_score:
                self.eval_score["single_agreement"] = []
                self.eval_score["error"] = 0
                self.eval_score["single_score"] = {}

            if meta_info['category'] not in self.eval_score["single_score"]:
                self.eval_score["single_score"][meta_info['category']] = [fpm_action_feedback["model_a"] / 5]
            else:
                self.eval_score["single_score"][meta_info['category']].append(fpm_action_feedback["model_a"] / 5)

            if (
                fpm_action_feedback["model_a"] > fpm_action_feedback["model_b"]
                and judge == 0
            ):
                self.eval_score["single_agreement"].append(1)
            elif (
                fpm_action_feedback["model_a"] < fpm_action_feedback["model_b"]
                and judge == 1
            ):
                self.eval_score["single_agreement"].append(1)
            elif (
                fpm_action_feedback["model_a"] == fpm_action_feedback["model_b"]
                and judge == 2
            ):
                self.eval_score["single_agreement"].append(1)
            else:
                self.eval_score["single_agreement"].append(0)
            
            if (
                fpm_action_feedback["model_a"] <= 0
                or fpm_action_feedback["model_b"] <= 0
            ):
                self.eval_score["error"] += 1

        elif self.task_func == "pairwise_gen_func":
            meta_info = dialogue.get_meta_info()
            turn = meta_info["turn"] - 1

            if not eval_event.not_empty():
                return

            fb_action_feedback = eval_event.get_memories(
                self.task_name,
                FennecBranchAction.action_name,
                str("turn{}".format(turn)),
            )

            query = dialogue.get_query_by_idx(0)["content"]
            response_1 = dialogue.get_pairwise_response_by_idx(0, "model_a")["content"]
            response_2 = dialogue.get_pairwise_response_by_idx(0, "model_b")["content"]

            fs_action_feedback = eval_event.get_memories(
                self.task_name,
                FennecScoringAction.action_name,
                str("turn{}".format(turn)),
            )
            fps_action_feedback = eval_event.get_memories(
                self.task_name,
                FennecPairwiseSolvingAction.action_name,
                str("turn{}".format(turn)),
            )
            # fcs_action_feedback = eval_event.get_memories(
            #     self.task_name,
            #     FennecCoupledScoringAction.action_name,
            #     str("turn{}".format(turn)),
            # )
            # fcps_action_feedback = eval_event.get_memories(
            #     self.task_name,
            #     FennecCoupledPairwiseSolvingAction.action_name,
            #     str("turn{}".format(turn)),
            # )
            
            # fc_action_feedback = eval_event.get_memories(
            #     self.task_name,
            #     FennecCorrectionAction.action_name,
            #     str("turn{}".format(turn)),
            # )

            if "error" not in self.eval_gen:
                self.eval_gen["error"] = 0
                self.eval_gen["server_error"] = 0
                self.eval_gen["count"] = 0
                self.eval_gen["collect"] = []
                self.eval_gen["table"] = None
                self.eval_gen["aggrement"] = []
                self.eval_gen["win"] = 0
                self.eval_gen["lose"] = 0
                self.eval_gen["tie"] = 0
                self.eval_gen["score_consis"] = []

            # for r_a, cr_a in zip(fcps_action_feedback['rating_a'], fcps_action_feedback['coupled_rating_a']):
            #     if r_a == cr_a:
            #         self.eval_gen["score_consis"].append(1)
            #     else:
            #         self.eval_gen["score_consis"].append(0)
            # for r_b, cr_b in zip(fcps_action_feedback['rating_b'], fcps_action_feedback['coupled_rating_b']):
            #     if r_b == cr_b:
            #         self.eval_gen["score_consis"].append(1)
            #     else:
            #         self.eval_gen["score_consis"].append(0)
    
            if (
                len(fb_action_feedback["branch_list"]) == 0
                or len(fs_action_feedback["result"]) == 0
                or len(fps_action_feedback["result"]) == 0
            ):
                self.eval_gen["server_error"] += 1
                return
            
            for r in (
                fb_action_feedback["branch_list"]
                + fs_action_feedback["result"]
                + fps_action_feedback["result"]
            ):
                if r == "" or r == "server error":
                    self.eval_gen["error"] += 1
            
            # with write_lock:
            #     judge = meta_info["judge"][0]
            #     if (
            #         sum(fps_action_feedback["rating_a"])
            #         > sum(fps_action_feedback["rating_b"])
            #         and judge == 0
            #     ):
            #         self.eval_gen["aggrement"].append(1)
            #     elif (
            #         sum(fps_action_feedback["rating_a"])
            #         < sum(fps_action_feedback["rating_b"])
            #         and judge == 1
            #     ):
            #         self.eval_gen["aggrement"].append(1)
            #     elif (
            #         sum(fps_action_feedback["rating_a"])
            #         == sum(fps_action_feedback["rating_b"])
            #         and judge == 2
            #     ):
            #         self.eval_gen["aggrement"].append(1)
            #     else:
            #         self.eval_gen["aggrement"].append(0)

            #     if (
            #         0 in fps_action_feedback["rating_a"]
            #         or 0 in fps_action_feedback["rating_b"]
            #     ):
            #         self.eval_gen["error"] += 1
            #         return
            
            if sum(fps_action_feedback["rating_a"][:]) > sum(fps_action_feedback["rating_b"][:]):
                self.eval_gen["win"] += 1
            elif sum(fps_action_feedback["rating_a"][:]) < sum(fps_action_feedback["rating_b"][:]):
                self.eval_gen["lose"] += 1
            else:
                self.eval_gen["tie"] += 1
            
            # idx = 0
            # for r_a, cr_a, r_b, cr_b in zip(fcps_action_feedback['rating_a'], fcps_action_feedback['coupled_rating_a'], fcps_action_feedback['rating_b'], fcps_action_feedback['coupled_rating_b']):
            #     if (r_a > r_b and cr_a > cr_b) or (r_a < r_b and cr_a < cr_b) or (r_a == r_b and cr_a == cr_b):
            #         self.eval_gen["score_consis"].append(1)
            #     else:
            #         self.eval_gen["score_consis"].append(0)
                    
            #         if (sum(fps_action_feedback['rating_a']) > sum(fps_action_feedback['rating_b']) and r_a > r_b) or (sum(fps_action_feedback['rating_a']) < sum(fps_action_feedback['rating_b']) and r_a < r_b) or (sum(fps_action_feedback['rating_a']) == sum(fps_action_feedback['rating_b']) and r_a == r_b):
            #             chosen_j = fcps_action_feedback["result"][idx]
            #             rejected_j = fcps_action_feedback["coupled_result"][idx]
            #         else:
            #             chosen_j = fcps_action_feedback["coupled_result"][idx]
            #             rejected_j = fcps_action_feedback["result"][idx]
                    
            #         correction_1 = ""
            #         correction_2 = ""
            #         # if "result_a" in fc_action_feedback:
            #         #     correction_1 = fc_action_feedback['result_a']
            #         # if "result_b" in fc_action_feedback:
            #         #     correction_2 = fc_action_feedback['result_b']
            #         print(meta_info['question_id'])
            #         new_data = pd.DataFrame(
            #             {
            #                 "query": [query],
            #                 "branch": [fb_action_feedback["branch_list"][idx]],
            #                 "scoring": [fs_action_feedback["result"][idx]],
            #                 "solving": [fps_action_feedback["result"][idx]],
            #                 "response_1": [response_1],
            #                 "response_2": [response_2],
            #                 "chosen_solving": [chosen_j],
            #                 "rejected_solving": [rejected_j],
            #                 "correction_1": [correction_1],
            #                 "correction_2": [correction_2],
            #                 "context": [meta_info["context"]] if meta_info["context"] else [[""]]
            #             }
            #         )
            #         # if sum(new_data["rating_a"][0][:]) > sum(new_data["rating_b"][0][:]):
            #         #     self.eval_gen["win"] += 1
            #         # elif sum(new_data["rating_a"][0][:]) < sum(new_data["rating_b"][0][:]):
            #         #     self.eval_gen["lose"] += 1
            #         # else:
            #         #     self.eval_gen["tie"] += 1
            #         # max_abs = 1 
            #         # flag = 0
            #         # for i in range(5):
            #         #     if abs(new_data["rating_a"][0][i] - new_data["rating_b"][0][i]) > max_abs:
            #         #         max_abs = abs(new_data["rating_a"][0][i] - new_data["rating_b"][0][i])
            #         #         flag = 1 if new_data["rating_a"][0][i] > new_data["rating_b"][0][i] else -1
            #         # if flag == 0:
            #         #     self.eval_gen["tie"] += 1
            #         # elif flag == 1:
            #         #     self.eval_gen["win"] += 1
            #         # else:
            #         #     self.eval_gen["lose"] += 1
                    
            #         table = pa.Table.from_pandas(new_data)
            #         if self.eval_gen["table"] is None:
            #             self.eval_gen["table"] = table
            #         else:
            #             self.eval_gen["table"] = pa.concat_tables(
            #                 [self.eval_gen["table"], table]
            #             )
            #     idx += 1

    def serialize(self):
        if len(self.eval_score):
            if self.task_func == "pairwise_eval_func":

                if "single_agreement" in self.eval_score:
                    single_agreement = self.eval_score["single_agreement"]
                    self.logger.info(
                        "Single Agreement Average Score = {} = {} / {}".format(
                            str(
                                sum(single_agreement)
                                / (len(single_agreement) - self.eval_score["error"])
                            ),
                            str(sum(single_agreement)),
                            str(len(single_agreement) - self.eval_score["error"]),
                        )
                    )

                    self.logger.info("G win {} lose {} tie {}".format(
                        self.eval_score['g_win'], self.eval_score['g_lose'], self.eval_score['g_tie']
                    ))
                    self.logger.info("win {} lose {} tie {}".format(
                        self.eval_score['win'], self.eval_score['lose'], self.eval_score['tie']
                    ))

                if "agreement" in self.eval_score:
                    agreement = self.eval_score["agreement"]
                    consistency = self.eval_score["consistency"]
                    self.logger.info("Hit = {}".format(self.eval_score["hit"]))
                    self.logger.info("Error = {}".format(self.eval_score["error"]))
                    self.logger.info(
                        "Agreement Average Score = {} = {} / {}".format(
                            str(
                                sum(agreement) / (len(agreement) - self.eval_score["error"])
                            ),
                            str(sum(agreement)),
                            str(len(agreement) - self.eval_score["error"]),
                        )
                    )
                    self.logger.info(
                        "Consistency Average Score = {} = {} / {}".format(
                            str(
                                sum(consistency)
                                / (len(consistency) - self.eval_score["error"])
                            ),
                            str(sum(consistency)),
                            str(len(consistency) - self.eval_score["error"]),
                        )
                    )
                if "selected_single_agreement" in self.eval_score:
                    selected_single_agreement = self.eval_score["selected_single_agreement"]
                    self.logger.info(
                        "Selected Single Agreement Average Score = {} = {} / {}".format(
                            str(
                                sum(selected_single_agreement)
                                / (len(selected_single_agreement) - self.eval_score["error"])
                            ),
                            str(sum(selected_single_agreement)),
                            str(len(selected_single_agreement) - self.eval_score["error"]),
                        )
                    )
                    self.logger.info("selected win: {}, lose: {}, tie: {}".format(str(self.eval_score["selected_win"]),
                                                                     str(self.eval_score["selected_lose"]),
                                                                     str(self.eval_score["selected_tie"])
                                                                     ))
                # print(len(self.eval_filter_gen))
                pq.write_table(self.eval_gen["table"], self.train_filter_parquet_file)
                # json.dump(self.eval_filter_gen, open(self.train_filter_parquet_file, 'w'))
                # json.dump(self.correction, open("correction_llama2_7bchat_test.json", 'w'))
            elif self.task_func == "pairwise_single_eval_func":
                self.logger.info("Error = {}".format(self.eval_score["error"]))
                if "single_agreement" in self.eval_score:
                    single_agreement = self.eval_score["single_agreement"]
                    self.logger.info(
                        "Single Agreement Average Score = {} = {} / {}".format(
                            str(
                                sum(single_agreement)
                                / (len(single_agreement) - self.eval_score["error"])
                            ),
                            str(sum(single_agreement)),
                            str(len(single_agreement) - self.eval_score["error"]),
                        )
                    )

                    for key, item in self.eval_score["single_score"].items():
                        self.logger.info("{} score = {}".format(key, sum(item)/len(item)))    
        if len(self.eval_gen):
            if self.task_func == "pairwise_gen_func":
                server_error = self.eval_gen["server_error"]
                self.logger.info("Server Error = {}".format(server_error))
                error = self.eval_gen["error"]
                self.logger.info("Error = {}".format(error))
                count = self.eval_gen["count"]
                self.logger.info("Count = {}".format(count))
                
                # self.logger.info("Table size = {}".format(len(self.eval_gen["table"])))
                # pq.write_table(self.eval_gen["table"], self.train_parquet_file)
                
                self.logger.info("win: {}, lose: {}, tie: {}".format(str(self.eval_gen["win"]),
                                                                     str(self.eval_gen["lose"]),
                                                                     str(self.eval_gen["tie"])
                                                                     ))

                aggrement = self.eval_gen["aggrement"]
                if len(aggrement):
                    self.logger.info(
                        "Aggreement Average Score = {} = {} / {}".format(
                            str(sum(aggrement) / len(aggrement)),
                            str(sum(aggrement)),
                            str(len(aggrement)),
                        )
                    )
                
                score_consis = self.eval_gen["score_consis"]
                self.logger.info(
                    "Scoring Consistency = {} = {} / {}".format(
                        str(sum(score_consis) / len(score_consis)),
                        str(sum(score_consis)),
                        str(len(score_consis)),
                    )
                )
