import json
import argparse

def read_names_from_txt(file_path):
    names = set()
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                name = line.strip()
                if name: 
                    names.add(name)
        return names
    except FileNotFoundError:
        print(f"Error: {file_path} not found.")
        return set()
    except Exception as e:
        print(f"Error reading {file_path}: {str(e)}")
        return set()

def read_names_from_json(file_path):
    names = set()
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            if isinstance(data, list):
                for entry in data:
                    if isinstance(entry, dict) and "full_name" in entry:
                        names.add(entry["full_name"])
                    elif isinstance(entry, str):
                        names.add(entry.strip())  
            elif isinstance(data, dict) and "full_name" in data:
                names.add(data["full_name"])
        return names
    except FileNotFoundError:
        print(f"Error: {file_path} not found.")
        return set()
    except json.JSONDecodeError:
        print(f"Error: {file_path} is not a valid JSON file.")
        return set()
    except Exception as e:
        print(f"Error reading {file_path}: {str(e)}")
        return set()

def find_duplicates(txt_names, json_names):
    duplicates = txt_names.intersection(json_names)
    return duplicates

def main():
    parser = argparse.ArgumentParser(description="Check for duplicate names between a TXT and JSON file.")
    parser.add_argument("-t", "--txt_file", type=str, default="hallucinate_small/new_person10.txt", help="Path to the TXT file with names")
    parser.add_argument("-j", "--json_file", type=str, default="hallucinate_small/pretrain_perturbed10/downsample_people_10.json", help="Path to the JSON file with names")
    args = parser.parse_args()
    txt_names = read_names_from_txt(args.txt_file)
    json_names = read_names_from_json(args.json_file)
    print(f"Names in {args.txt_file}: {len(txt_names)}")
    print(f"Names in {args.json_file}: {len(json_names)}")
    duplicates = find_duplicates(txt_names, json_names)

    if duplicates:
        print(f"\nFound {len(duplicates)} duplicate names:")
        for name in sorted(duplicates):
            print(f" - {name}")
    else:
        print("\nNo duplicate names found.")

if __name__ == "__main__":
    main()