#coding=utf-8
import glob
import os
import xml.dom.minidom

from PIL import Image
from scipy import io

source_dir = '/shared/sets/datasets/stanford_dogs/'
annotations = './Annotation/'
target_dir = '/home/z1164034/datasets/dogs/'


for split in ['train', 'test']:
    file_list = io.loadmat(source_dir + f'lists/{split}_list.mat')['file_list']

    for file in file_list:
        search_pattern = os.path.join(annotations, '**', file[0][0].split('.')[0])
        annotation_file = glob.glob(search_pattern, recursive=True)[0]
        print(annotation_file)
        dom = xml.dom.minidom.parse(annotation_file)
        root = dom.documentElement

        xmin = root.getElementsByTagName('xmin')
        x = xmin[0].firstChild.data
        ymin = root.getElementsByTagName('ymin')
        y = ymin[0].firstChild.data
        xmax = root.getElementsByTagName('xmax')
        X = xmax[0].firstChild.data
        ymax = root.getElementsByTagName('ymax')
        Y = ymax[0].firstChild.data

        print(f'source file: {source_dir}Images/{file[0][0]}')
        print(f'pos: {x}, {y}, {X}, {Y}')
        im = Image.open(f'{source_dir}Images/{file[0][0]}')
        region = im.crop((int(x), int(y), int(X), int(Y)))

        sub_dir = '/' + '/'.join(f'{target_dir}{split}/{file[0][0]}'.split('/')[:-1])
        if not os.path.isdir(sub_dir):
            os.mkdir(sub_dir)

        region.save(f'{target_dir}{split}/{file[0][0]}')
        print(f'target file: {target_dir}{split}/{file[0][0]}')


