import pandas as pd
from nltk.corpus import wordnet as wn

# Ensure nltk WordNet data is downloaded
import nltk
nltk.download('wordnet')
nltk.download('omw-1.4')

# Define pairs of WordNet synsets for the case study
concept_pairs = [
    ('dog.n.01', 'wolf.n.01'),
    ('car.n.01', 'bicycle.n.01'),
    ('cat.n.01', 'lion.n.01'),
    ('violin.n.01', 'piano.n.01'),
    ('teacher.n.01', 'student.n.01'),
]

# Helper to convert synset name to Synset object
def get_synset(name):
    return wn.synset(name)

# Find the lowest common hypernym (join)
def get_join(syn1, syn2):
    common_hypernyms = syn1.lowest_common_hypernyms(syn2)
    return common_hypernyms[0].lemma_names()[0] if common_hypernyms else 'None'

# Find the highest common hyponym (meet) — approximate using shared hyponyms
def get_meet(syn1, syn2):
    hypos1 = set(syn1.closure(lambda s: s.hyponyms()))
    hypos2 = set(syn2.closure(lambda s: s.hyponyms()))
    common_hypos = hypos1.intersection(hypos2)
    return list(common_hypos)[0].lemma_names()[0] if common_hypos else 'None'

# Build join and meet tables
join_data = []
meet_data = []

for c1_name, c2_name in concept_pairs:
    syn1 = get_synset(c1_name)
    syn2 = get_synset(c2_name)
    join = get_join(syn1, syn2)
    meet = get_meet(syn1, syn2)
    join_data.append((c1_name, c2_name, join))
    meet_data.append((c1_name, c2_name, meet))

# Create dataframes
join_df = pd.DataFrame(join_data, columns=["Concept 1", "Concept 2", "Join (Least Upper Bound)"])
meet_df = pd.DataFrame(meet_data, columns=["Concept 1", "Concept 2", "Meet (Greatest Lower Bound)"])


# Save as CSV
join_df_path = "./datasets/wordnet_join_table.csv"
meet_df_path = "./datasets/wordnet_meet_table.csv"

join_df.to_csv(join_df_path, index=False)
meet_df.to_csv(meet_df_path, index=False)
