def process_in_ds(ds):
    ds = ds.flatten()
    ds = ds.rename_column("translation.en", "input")
    ds = ds.rename_column("translation.ro", "reference")

    def _to_id(s: str):
        import hashlib

        return hashlib.sha1(s.encode("utf-8")).hexdigest()

    def add_id(example):
        if isinstance(example["input"], list):
            example["id"] = [_to_id(s) for s in example["input"]]
        elif isinstance(example["input"], str):
            example["id"] = _to_id(example["input"])
        return example

    ds = ds.map(add_id, batched=True)
    return ds


import tqdm
from datasets import Dataset
import pandas as pd
# def get_in_ds_undetectable_exp(prompt_num=1000,repeat_num=1):
def get_in_ds_undetectable_exp(prompt_num=10,repeat_num=1000):
    from datasets import load_dataset

    wmt17 = load_dataset("wmt16", "ro-en").shuffle(seed=42)
    ds = wmt17["test"]
    ds = ds.flatten()
    ds = ds.rename_column("translation.en", "input")
    ds = ds.rename_column("translation.ro", "reference")


    # assert prompt_num*repeat_num<=len(ds) #for generating id list
    # id_list=ds['id'][:prompt_num*repeat_num]
    
    if prompt_num==-1:
        prompt_num=len(ds)
        # print(prompt_num)
        # raise NotImplementedError
    
    id_list=range(prompt_num*repeat_num)
    
    ds_subset=[]
    for repeat_idx in tqdm.tqdm(range(0,repeat_num)):
        for prompt_idx in range(prompt_num):
            id_idx=repeat_idx*prompt_num+prompt_idx
            new_item={}
            new_item['input']=ds[prompt_idx]['input']
            new_item['reference']=ds[prompt_idx]['reference']
            new_item['id']=id_list[id_idx]
            new_item['reference_id']=id_list[prompt_idx]
            ds_subset.append(new_item)
    ds_subset=pd.DataFrame(ds_subset)
    ds_subset=Dataset.from_pandas(ds_subset,preserve_index=False)
    
    # ds_subset = process_in_ds(ds_subset)
    ds_subset = ds_subset.sort("id")

    return ds_subset

from . import get_output
from . import evaluate
from . import evaluate_bleu
from . import evaluate_ppl
