
import json
import os
import numpy as np

target_folder = "jobs_arxiv/250512_collection"
union_sample_id = {}


for curr_fld in sorted(os.listdir(target_folder)):
    
    spt = curr_fld.split("_")
    spt.remove("exStWd")
    del spt[:3]
    
    if len(spt) == 1:
        mid = spt[0]
        did = "known1000"
    else: 
        mid = spt[0]
        did = "_".join(spt[1:])
        
    
    mid_did_key = did
    if mid_did_key not in union_sample_id.keys():
        union_sample_id[mid_did_key] = []
    
    curr_fld_path = os.path.join(target_folder, curr_fld)
    print("------------------")
    print(curr_fld_path)
    
    inp_num = len(os.listdir(os.path.join(curr_fld_path, "inp_info")))
    
    res_folders = os.listdir(os.path.join(curr_fld_path, "results"))
    res_num = len(res_folders)
    
    if inp_num!=res_num:
        print("Mismatched Inp vs. Results")
        inp_idx = sorted([int(i.split(".txt")[0].split("I")[-1]) for i in os.listdir(os.path.join(curr_fld_path, "inp_info"))])
        res_idx = sorted([int(i.split("R")[-1]) for i in res_folders])
        
        differ = []
        differ.extend(np.setdiff1d(inp_idx, res_idx).tolist())
        differ.extend(np.setdiff1d(res_idx, inp_idx).tolist())
        print("\t{}".format(differ))
    else:
        incompleted_result = []
        for res_fld in res_folders:
            json_path = os.path.join(curr_fld_path, "results", res_fld, "C{:06d}.json".format(int(res_fld.split("R")[-1])))
            if os.path.isfile(json_path) is False:
                incompleted_result.append(os.path.join(res_fld, "C{:06d}.json".format(int(res_fld.split("R")[-1]))))
        
        if len(incompleted_result)!=0:
            print("Incompleted Result.json!! =>")
            for i in incompleted_result:
                print("\t {}".format(i))
        else:
            curr_res_ind = sorted([int(i.split("R")[-1]) for i in res_folders])
            
            union_sample_id[mid_did_key].extend(curr_res_ind)
            
            total_ind = np.loadtxt(os.path.join(curr_fld_path, "correct_data_idx.txt"), dtype=int).tolist()
            
            remaining_ind = np.setdiff1d(total_ind, curr_res_ind).tolist()
            error_ind = np.setdiff1d(curr_res_ind, total_ind).tolist()
            if len(error_ind)!=0:
                print("Error Sample Idx!! =>\n\t{}".format(error_ind))
            else:
                print("Curr File Number: {}".format(res_num))
                print("Remaining Number: {} \n{}".format(len(remaining_ind), remaining_ind))
                
union_sample_data = {}
for k, v in union_sample_id.items():
    union_sample_data.update({k: list(map(str, sorted(list(set(v)))))})

f = open(os.path.join(target_folder, "meta.json"), "w")
json.dump(union_sample_data, f, indent=4)
