R"""


cd ~/Desktop/projects/extract_merge1
export PYTHONPATH=$PYTHONPATH:~/Desktop/projects/extract_merge1


python3 -i local_scripts/pb/signal_peptide/getting_started/pb_dev001.py

CUDA_VISIBLE_DEVICES=0 python -i local_scripts/pb/signal_peptide/getting_started/pb_dev001.py

"""
import numpy as np
import tensorflow as tf
from transformers import AutoTokenizer

from em import datasets as em_datasets

###############################################################################

# task = 'signal_peptide/sp6'
task = 'signal_peptide/sp6_binary'

# split = 'train'
split = 'validation'

sequence_length = 72

tokenizer = AutoTokenizer.from_pretrained('Rostlab/prot_bert')

###############################################################################

ds = em_datasets.load(task, split=split, tokenizer=tokenizer, sequence_length=sequence_length)

ds = ds.batch(64)

count = 0
for x, y in ds.as_numpy_iterator():
    # count += 1
    print(x)
    print(y)
    break
    # pass

print(count)
# 20290
