import os 
import glob 
import json 


DATA_DIR ="/path/to/home/lltm/02_codeexec_etcot/debug_dump"


GROUP1_PREFIXES =['iter0210','iter0211']
GROUP2_PREFIXES =['iter0212','iter0213']
GROUP3_PREFIXES =['iter0946','iter0947']

def load_jsons_from_files (directory ,prefix ):
    json_objects =set ()
    file_pattern =os .path .join (directory ,f"{prefix }*.jsonl")

    for filepath in glob .glob (file_pattern ):
        try :
            with open (filepath ,'r',encoding ='utf-8')as f :
                for line in f :
                    if not line .strip ():
                        continue 
                    try :
                        data =json .loads (line )
                        canonical_string =json .dumps (data ,sort_keys =True )
                        json_objects .add (canonical_string )
                    except json .JSONDecodeError :
                        print (f"警告: {filepath } 内に不正なJSON行がありました。スキップします: {line .strip ()}")
        except FileNotFoundError :
            print (f"警告: ファイルが見つかりません: {filepath }")

    return json_objects 

def combine_jsons_for_group (directory ,prefixes ):
    combined_set =set ()
    for prefix in prefixes :
        print (f"  > '{prefix }' のファイル群を処理中...")
        combined_set .update (load_jsons_from_files (directory ,prefix ))
    return combined_set 

def compare_groups (set_a ,set_b ,label_a ,label_b ):
    print (f"--- {label_a } と {label_b } の比較 ---")
    duplicates =set_a .intersection (set_b )
    if not duplicates :
        print ("一致オブジェクトなし")
    else :
        print (f"発見した一致オブジェクト: {len (duplicates )}件")
        for i ,json_str in enumerate (duplicates ,1 ):
            pretty_json =json .dumps (json .loads (json_str ),indent =2 ,ensure_ascii =False )
            print (f"\n----- 一致オブジェクト {i } -----")
            print (pretty_json )
    print ()

def main ():
    if not os .path .isdir (DATA_DIR ):
        print (f"エラー: ディレクトリが見つかりません: {DATA_DIR }")
        return 

    print ("--- グループ1 (iter0210 + iter0211) ---")
    group1_set =combine_jsons_for_group (DATA_DIR ,GROUP1_PREFIXES )
    print (f"ユニーク数: {len (group1_set )}\n")

    print ("--- グループ2 (iter0212 + iter0213) ---")
    group2_set =combine_jsons_for_group (DATA_DIR ,GROUP2_PREFIXES )
    print (f"ユニーク数: {len (group2_set )}\n")

    print ("--- グループ3 (iter0946 + iter0947) ---")
    group3_set =combine_jsons_for_group (DATA_DIR ,GROUP3_PREFIXES )
    print (f"ユニーク数: {len (group3_set )}\n")


    compare_groups (group1_set ,group2_set ,"グループ1","グループ2")
    compare_groups (group1_set ,group3_set ,"グループ1","グループ3")
    compare_groups (group2_set ,group3_set ,"グループ2","グループ3")

if __name__ =="__main__":
    main ()
