import datetime
import json
import os
import random


fine_annotation_to_label = {'False': "false",
    'Half True': "mixed",
    'Mostly False': "false",
    'True': "true",
    'Mostly True': "true",
    'Pants on Fire': "false",
    '': "mixed",
    'Labeled Satire': "false",
    'Miscaptioned': "false",
    'Fake': "false",
    'Incorrect Attribution': "false",
    'Mixture': "mixed",
    'Unfounded': "false",
    'Correct Attribution': "true",
    'Scam': "false",
    'Research In Progress': "mixed",
    'Unproven': "false",
    'Originated as Satire': "false",
    'Recall': "mixed", 
    'Outdated': "false",
    'Legend': "false",
    'Legit': "true",
    'Lost Legend': "false",
    "Mixed": "mixed",
    "No label": "mixed"
}


def load_fc30(split: str = "temporal", seed=0):
    
    assert split in ["random", "temporal"], f"Split not in ['random', 'temporal'], got {split}"
    
    claims = []
    labels = []
    annotations = []
    dates = []
    
    root_folder = "./data/"
    files = os.listdir(root_folder)
    for file in files:
        sample = json.loads(open(root_folder + file, 'r').readlines()[0])
        date = datetime.datetime(
            year=sample["year"],
            month=sample["month"],
            day=sample["day"],
        )
        claims.append(sample["claim"])
        labels.append(fine_annotation_to_label[sample["label"]])
        annotations.append(sample["label"])
        dates.append(date)
    
    if split == "random":
        random_indices = list(range(len(dates)))
        random.Random(seed).shuffle(random_indices)
        random_claims = [claims[i] for i in random_indices]
        random_labels = [labels[i] for i in random_indices]
        random_annotations = [annotations[i] for i in random_indices]
        random_dates = [dates[i] for i in random_indices]
        claims, labels, annotations, dates = random_claims, random_labels, random_annotations, random_dates
        
        
    elif split == "temporal":
        sorted_claims = [sample for _, sample in sorted(zip(dates, claims))]
        sorted_labels = [sample for _, sample in sorted(zip(dates, labels))]
        sorted_annotations = [sample for _, sample in sorted(zip(dates, annotations))]
        sorted_dates = sorted(dates)
        claims, labels, annotations, dates = sorted_claims, sorted_labels, sorted_annotations, sorted_dates
    
    data = []
    for i in range(len(claims)):
        data.append(
            {
                "claim": claims[i],
                "label": labels[i],
                "annotation": annotations[i],
                "date": dates[i],
            }
        )
    
    return data