import pickle
import random
from collections import defaultdict


def calculate_transfers(target, actual):
    diff = {k: target[k] - actual.get(k, 0) for k in target}
    surplus = {k: -v for k, v in diff.items() if v < 0}
    deficit = {k: v for k, v in diff.items() if v > 0}
    transfers = []
    for from_cat, extra in surplus.items():
        for to_cat, need in list(deficit.items()):
            if extra == 0:
                break
            transfer_amount = min(extra, need)
            if transfer_amount > 0:
                transfers.append((from_cat, to_cat, transfer_amount))
                surplus[from_cat] -= transfer_amount
                deficit[to_cat] -= transfer_amount
                extra -= transfer_amount
                if deficit[to_cat] == 0:
                    del deficit[to_cat]
    return transfers


def extract_and_remove(stat, src, num):
    selected = random.sample(stat[src], num)
    selected_set = set(selected)
    stat[src] = [x for x in stat[src] if x not in selected_set]
    return selected


pkl_file = "pkl/data_test.pkl"
with open(pkl_file, 'rb') as file:
    old_data = pickle.load(file)
with open(pkl_file, 'rb') as file:
    data = pickle.load(file)

target_cnt_1 = {'A': 38, 'B': 38, 'C': 37, 'D': 37}
target_cnt_2 = {'A': 37, 'B': 37, 'C': 38, 'D': 38}

for question in ['Recognition', 'Understanding', 'Grounding', 'Reasoning']:
    for cate in range(10):
        target_cnt = target_cnt_1 if cate < 5 else target_cnt_2
        stat = {"A": [], "B": [], "C": [], "D": []}
        cnt = {"A": 0, "B": 0, "C": 0, "D": 0}
        for idx in range(cate * 150, (cate + 1) * 150):
            cnt[data[idx]['QAs'][question]['Answer']] += 1
            stat[data[idx]['QAs'][question]['Answer']].append(idx)
        transfers = calculate_transfers(target_cnt, cnt)
        for src, tgt, num in transfers:
            selected_indices = extract_and_remove(stat, src, num)
            for sel_idx in selected_indices:
                data[sel_idx]['QAs'][question]['Answer'] = tgt
                options = data[sel_idx]['QAs'][question]['Options']
                options[src], options[tgt] = options[tgt], options[src]

with open("pkl/data_test_shuffle.pkl", 'wb') as file:
    pickle.dump(data, file)

