import os
import pandas as pd
import json


def get_args(path):
    try:
        args = {}
        args_file_path = os.path.join(path, "args.txt")
        with open(args_file_path, "r") as f:
            args = json.load(f)
        return args
    except:
        print(f"error: {path}")
        f.seek(0)
        first_line = f.readline()
        print(first_line)
        return None

def get_target_mean_std(path, target):
    files = os.listdir(path)
    for json_file in files:
        if target not in json_file:
            continue
        with open(os.path.join(path, json_file), "r") as f:
            try:
                json_dict = json.load(f)
            except:
                print(f"error: {path}/{json_file}")
                f.seek(0)
                first_line = f.readline()
                print(first_line)
                continue
            res = {}
            for key in json_dict.keys():
                values = json_dict[key]
                mean = sum(values) / len(values)
                std = (sum([(t - mean) ** 2 for t in values]) / len(values)) ** 0.5
                res[key] = mean
                res[key + " std"] = std

            if target == "op" and target not in json_dict.keys():
                ops =  [0.0 for _ in range(len(json_dict[key]))]
                for key in ["mergesort", "exponential search", "point bucket"]:
                    if key not in json_dict:
                        continue
                    for i in range(len(ops)):
                        ops[i] += json_dict[key][i]
                res["op"] = sum(ops) / len(ops)
                res["op std"] = (sum([(t - res["op"]) ** 2 for t in ops]) / len(ops)) ** 0.5
                
            return list(res.keys()), list(res.values())
    return None, None

def load_all_result(RESULT_DIR, target):
    result_path = f"./{RESULT_DIR}/{target}/"
    all_results = []
    all_cols_set = set()
    for dir in os.listdir(result_path):
        if not os.path.isdir(os.path.join(result_path, dir)):
            continue
        args = get_args(os.path.join(result_path, dir))
        if args is None:
            continue
        if target == "sort_time" or target == "index_time":
            time_keys, time_values = get_target_mean_std(os.path.join(result_path, dir), "time")
            if time_keys is None:
                continue
            if "splay tree sort" in time_keys:
                if "rmi" not in dir:
                    print(f"splay tree sort is in {dir}")
            for key, value in zip(time_keys, time_values):
                args[key] = value
        elif target == "sort_op":
            op_keys, op_values = get_target_mean_std(os.path.join(result_path, dir), "op")
            if op_keys is None:
                continue
            for key, value in zip(op_keys, op_values):
                args[key] = value
        else:
            print("unknown target: ", target)
            return None

        all_results.append(args)
        all_cols_set |= set(args.keys())
    df = pd.DataFrame(all_results)
    # print(df)
    return df

if __name__ == "__main__":
    RESULT_DIR = "result"
    target = "sort_time"
    df = load_all_result(RESULT_DIR, target)
    df.sort_values(by=["dist", "sort", "n"], inplace=True)
    print(df)
    df.to_csv(f"./{RESULT_DIR}/{target}.csv", index=False)
    target = "sort_op"
    df = load_all_result(RESULT_DIR, target)
    df.sort_values(by=["dist", "sort", "n"], inplace=True)
    print(df)
    df.to_csv(f"./{RESULT_DIR}/{target}.csv", index=False)
