R"""


cd ~/Desktop/projects/extract_merge1
export PYTHONPATH=$PYTHONPATH:~/Desktop/projects/extract_merge1


python3 -i local_scripts/ll/hans_parsing_01.py

CUDA_VISIBLE_DEVICES= python -i local_scripts/ll/hans_parsing_01.py

"""
from importlib import reload
import itertools
import os
import time

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf
import tensorflow_datasets as tfds
from transformers import AutoTokenizer

from em import datasets as em_datasets
from em.projects.ll import hans_util
from em.projects.ll import hans_parsing


###############################################################################

# NOTE: The entailment and non-entailment examples might come from different sets of templates.

# tfds.load('hans/lexical_overlap_ne', split='validation')
examples = hans_util.get_first_hans_examples(
    'validation',
    5000,
    lambda ds: ds.filter(em_datasets.hans._filter_by_heuristic_fn('lexical_overlap')).filter(em_datasets.hans._filter_by_label_fn(1))
)
# print(examples[:10])

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
sequence_length = 64

ds = em_datasets.load('hans/lexical_overlap_ne_with_flipped',
                      split='validation', tokenizer=tokenizer, sequence_length=sequence_length)

for i, x in enumerate(ds):
    pass
print(i)

# # template = 'temp1'
# # template = 'temp2'
# # template = 'temp3'
# # template = 'temp4'
# template = 'temp25'

# for x in examples:
#     if x['template'] == template:
#         print(x['premise'])
#         print(x['hypothesis'])
#         if template in hans_parsing.ADSJHKKAS:
#             print(hans_parsing.get_flipped_label_hypothesis_for_example(x))
#         print()


# #
