#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from skimage.measure import block_reduce
import os
import numpy as np
from PIL import Image, UnidentifiedImageError
import pickle
from tqdm import tqdm
from glob import glob
#import random

print("Please get rid of images BEFORE subsetting.")

maxsize = 500
targsize = 60 # About 22:00 mins per sim
downsample = True
nocolor = True
shuffle = True
for dataset in ['imagenette','nature','cells','crops','butterfly','cityscape','weather','satellite','intel','sports']:
    print(dataset)
    fpath = f'your_filepath_here'

    extensions = ['jpg','jpeg','png']
    extensions += [x.upper() for x in extensions]
    files = [f for ext in extensions for f in glob(fpath + "**/*"+ ext, recursive=True)]
    files = [f for f in files if os.path.isfile(f)]
    files = sorted(files)
    if shuffle:
        np.random.seed(123)
        np.random.shuffle(files)
    else:
        print("Not shuffling!")
    if len(files)>maxsize:
        files = files[:maxsize]

    ims = []
    for f in tqdm(files):
        #fn = fpath+f
        fn = f
        try:
            img = Image.open(fn)
            img = img.resize([targsize,targsize])
            arr = np.array(img)
            if nocolor:
                if len(arr.shape)>2:
                    arr = arr[:,:,0]
            ims.append(arr)
        except UnidentifiedImageError:
            print(f"Bad file: {f}")

    print(f"After non-images, I have {len(files)} files.")
   
    print(np.min([x.shape[0] for x in ims]))
    print(np.max([x.shape[0] for x in ims]))
    print(np.min([x.shape[1] for x in ims]))
    print(np.max([x.shape[1] for x in ims]))
    print(len(ims))

    # To [0,1]
    for i in range(len(ims)):
        ims[i] = ((ims[i]-np.min(ims[i])) / (np.max(ims[i])-np.min(ims[i])))

    with open(f"pickles/{dataset}.pkl",'wb') as f:
        pickle.dump(ims, f)

