
from PIL import Image
from tqdm import tqdm
import sys
import pickle
import os
broken = []

if not os.path.exists('/scratch/shared/beegfs/yuki/tmp/'+sys.argv[1]):
    f_list = open(sys.argv[1], 'r')
    test_dataset = f_list.readlines()
    test_dataset = [_d.strip() for _d in test_dataset]
    print(f'done preparing dataset!, N={len(test_dataset)}', flush=True)
    num_images = len(test_dataset)

    for file in tqdm(test_dataset):
        try:
            img = Image.open(file)
            s = img.size
        except:
            broken.append(file)
    print(len(broken))
    pickle.dump(broken, open('/scratch/shared/beegfs/yuki/tmp/'+sys.argv[1], 'wb'), protocol=4)
else:
    files = pickle.load(open('/scratch/shared/beegfs/yuki/tmp/'+sys.argv[1],'rb'))
    for file in files:
        os.remove(file)
    # import glob
    # ddir = '/scratch/shared/beegfs/yuki/tmp/'
    # for f in os.listdir(ddir):
    #     brokens = pickle.load(open(ddir+f,'rb'))
    #     print(len(brokens))


"""
from PIL import Image
import glob,os
import tqdm

files = glob.glob('/home/ubuntu/datauserbal800/user_balanced_800/*/**')
print(len(files))
broken = []


for f in tqdm.tqdm(files):
    try:
        _= Image.open(f)
    except:
        broken.append(f)

print(len(broken))
for f in broken:
    os.remove(f)


"""