import os
import io
import pickle
import argparse
from tqdm import tqdm
from PIL import Image


def get_pngs_from_bytes(args):
    dataset_path = os.path.join(args.data_folder, "WikiWeb")
    file_name = os.path.join(dataset_path, "wiki_data.pkl")
    with open(file_name, 'rb') as file:
        dataset = pickle.load(file)
        idx_list = list(range(len(dataset)))
    if args.mode == "chunk":
        idx_list = idx_list[args.chunk_idx * args.chunk_size:(args.chunk_idx + 1) * args.chunk_size]
    else:
        idx_list = eval(f"idx_list[{args.slice}]") if ":" in args.slice else [idx for idx in eval(args.slice)]

    images_dir = os.path.join(dataset_path, "others", "images")
    if not os.path.exists(images_dir):
        os.makedirs(images_dir)
    for cnt, idx in tqdm(enumerate(idx_list), total=len(idx_list), desc="Getting images"):
        img_path = os.path.join(images_dir, str(idx) + ".png")
        if os.path.exists(img_path):
            continue
        image = Image.open(io.BytesIO(dataset[idx]['image_bytes']))
        image.convert('RGB').save(img_path, "PNG", optimize=True)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--func', type=str, default="get_pngs_from_bytes")
    parser.add_argument('--data_folder', type=str, default="../../Datasets")
    parser.add_argument('--mode', type=str, default="debug", help="[debug, chunk]")
    parser.add_argument('--slice', type=str, default="")
    parser.add_argument('--chunk_size', type=int, default=20000)
    parser.add_argument('--chunk_idx', type=int, default=0)
    args = parser.parse_args()

    get_pngs_from_bytes(args)
