import pandas as pd
from transformers import AutoTokenizer

def get_tokens(x, prefix):
    try:
        if prefix:
            return x['tokens'][:int(x['wrong_index'])]
        return x['tokens'][int(x['wrong_index']):]
    except IndexError:
        return ['N/A']


df = pd.read_csv('/mnt/home/dongkeun/L2U/outputs/init_08.29.09.09.csv')
tokenizer = AutoTokenizer.from_pretrained('EleutherAI/gpt-neo-125M')

df['tokens'] = df['text'].apply(lambda x: tokenizer.tokenize(x))
df['wrong_index'] = 127 - df['eoe_init']
df['prefix'] = df.apply(lambda x: x['tokens'][:int(x['wrong_index'])], axis=1)
df['suffix'] = df.apply(lambda x: x['tokens'][int(x['wrong_index']):], axis=1)

df = df.drop(['tokens', 'wrong_index', 'doc_id'], axis=1)
df = df.to_csv('/mnt/home/dongkeun/L2U/outputs/doc_id_reset_count.csv', index_label='doc_id')