# -*- coding: utf-8 -*-
import pandas as pd


def load_dsg_file(file_path='dsg/data/dsg_benchmark.csv'): 
    dsg_df = pd.read_csv(file_path)

    # collect data of each prompt
    dsg_itemid2data = {}
    for _, row in dsg_df.iterrows():
        item_id = row['item_id']
        
        if item_id not in dsg_itemid2data:
            data = []
        else:
            data = dsg_itemid2data[item_id]
        
        data.append(row)
        dsg_itemid2data[item_id] = data
    
    # merge the data
    for item_id, data in dsg_itemid2data.items():
        dsg_itemid2data[item_id] = pd.concat(data, axis=1).T
    
    # item: text, keywords, proposition_id, dependency, category_broad, category_detailed, tuple, question_natural_language
    dsg_data = {}
    dsg_column_data = {}

    for column_id in dsg_df.columns.values: 
        dsg_column_data = {}
        for item_id, item_df in dsg_itemid2data.items():
            try:
                qid2tup = {}
                for _, row in item_df.iterrows():
                    qid = row['proposition_id']
                    output = row[column_id]
                    qid2tup[qid] = output

            except Exception:
                qid2tup = {}
            dsg_column_data[item_id] = qid2tup
        dsg_data[column_id] = dsg_column_data
    
    return dsg_data

