# script to change the format of anonymous's annotated dataset into a format compatible with the previous annotation
from datasets import load_dataset, Dataset
from collections import defaultdict
import argparse

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument('--dataset', default="anonymous/hh-generated_flan_t5_large_with_features2")
    ap.add_argument('--save_local', action='store_true', help='Option to save data in local')

    args = ap.parse_args()
    
    ds = load_dataset(args.dataset)
    dataset_name = args.dataset.split('/')[1]

    rows = defaultdict(list)

    for row in ds['train']:
        for response, features in zip(row['response'], row['features']):
            rows['prompt'].append(row['prompt'])
            rows['response'].append(response)
            for feature_name, feature_value in features.items():
                rows[feature_name].append(feature_value)

    new_ds = Dataset.from_dict(rows)
    if args.save_local:
        fname = f"out/{dataset_name}"
        new_ds.save_to_disk(fname)
        print(f'Saved to {fname}')
    else:
        new_ds.push_to_hub(f"anonymous/{dataset_name}")

main()