#!/usr/bin/env python3
"""
Check overlaps between useful/not_useful instances and gpt_resolved_id/agentless_resolved_id
"""

# Import the lists from seeresult.py
from seeresult import gpt_resolved_id, agentless_resolved_id

def check_overlaps():
    # Read useful instances
    with open('useful_instances.txt', 'r', encoding='utf-8') as f:
        useful_instances = [line.strip() for line in f if line.strip()]
    
    # Read not useful instances
    with open('not_useful_instances.txt', 'r', encoding='utf-8') as f:
        not_useful_instances = [line.strip() for line in f if line.strip()]
    
    print("=== Overlap Analysis ===\n")
    
    # Check overlap between useful_instances and gpt_resolved_id
    gpt_overlap = []
    for instance in useful_instances:
        if instance in gpt_resolved_id:
            gpt_overlap.append(instance)
    
    print(f"Overlap between useful_instances.txt and gpt_resolved_id:")
    print(f"Count: {len(gpt_overlap)}")
    if gpt_overlap:
        print("Matching instances:")
        for instance in gpt_overlap:
            print(f"  - {instance}")
    else:
        print("No matching instances found.")
    print()
    
    # Check overlap between not_useful_instances and agentless_resolved_id
    agentless_overlap = []
    for instance in not_useful_instances:
        if instance in agentless_resolved_id:
            agentless_overlap.append(instance)
    
    print(f"Overlap between not_useful_instances.txt and agentless_resolved_id:")
    print(f"Count: {len(agentless_overlap)}")
    if agentless_overlap:
        print("Matching instances:")
        for instance in agentless_overlap:
            print(f"  - {instance}")
    else:
        print("No matching instances found.")
    print()
    
    # Summary statistics
    print("=== Summary ===")
    print(f"Total useful instances: {len(useful_instances)}")
    print(f"Total not useful instances: {len(not_useful_instances)}")
    print(f"GPT resolved instances in dataset: {len(gpt_resolved_id)}")
    print(f"Agentless resolved instances in dataset: {len(agentless_resolved_id)}")
    print(f"Useful instances matching GPT resolved: {len(gpt_overlap)} ({len(gpt_overlap)/len(useful_instances)*100:.1f}% of useful)")
    print(f"Not useful instances matching Agentless resolved: {len(agentless_overlap)} ({len(agentless_overlap)/len(not_useful_instances)*100:.1f}% of not useful)")
    
    return gpt_overlap, agentless_overlap

if __name__ == "__main__":
    gpt_matches, agentless_matches = check_overlaps()