import pandas as pd
from tqdm import tqdm

# Load CSV
csv_path = 'filtered_object_instance_relative_depth_questions.csv'
df = pd.read_csv(csv_path)

# Create a normalized tuple pair (sorted alphabetically)
def make_tuple(row):
    return tuple(sorted([row['object1'], row['object2']]))

df['Tuple Pair'] = df.apply(make_tuple, axis=1)

# Initialize counter
pair_data = {}

# Add tqdm to the loop
for pair in tqdm(df['Tuple Pair'].unique(), desc="Processing label pairs"):
    sub_df = df[df['Tuple Pair'] == pair]
    l1, l2 = pair

    total_questions = len(sub_df)
    answer_l1 = (sub_df['answer(object1)'] == l1).sum()
    answer_l2 = (sub_df['answer(object1)'] == l2).sum()
    unique_images = sub_df['image_id'].nunique()

    pair_data[pair] = {
        'Tuple Pair': f"({l1}, {l2})",
        '#QuestionTotal': total_questions,
        '#QuestionwithAnswerL1': answer_l1,
        '#QuestionwithAnswerL2': answer_l2,
        '#ImageInvolved': unique_images
    }

# Create output DataFrame
output_df = pd.DataFrame(pair_data.values())

# Save to CSV
output_df.to_csv('pairwise_label_summary.csv', index=False)
print("Saved: pairwise_label_summary.csv")

# Print the number of unique pairs
num_unique_pairs = df['Tuple Pair'].nunique()
print(f"Total unique label pairs: {num_unique_pairs}")

# (Optional) Print top 10 most common pairs by number of questions
top10_pairs = output_df.sort_values(by='#QuestionTotal', ascending=False).head(10)
print("\nTop 10 most frequent label pairs:\n")
print(top10_pairs[['Tuple Pair', '#QuestionTotal']])
