
#
import os
import json
import pandas as pd
from shutil import copy, copyfile
import glob
import xml.etree.ElementTree as ET
from PIL import Image

from IPython import embed

raw_path = 'raw'
processed_path = 'processed'

with open('idx_to_class.json') as fp:
    idx_to_class = json.load(fp)

# make dirs
for _class in idx_to_class.values():
    _path = os.path.join(processed_path, _class)
    if not os.path.exists(_path):
        os.makedirs(_path)

# copy files from raw to procssed
annotated_path = 'raw/VOCdevkit/VOC2008/Annotations/'
image_class = pd.read_csv('apy_image_label.csv')
with open('ayahoo_test.txt') as fp:
    yahoo_lines = [line.strip() for line in fp.readlines()]

yahoo_mapping = {}
for line in yahoo_lines:
    split_words = line.split()
    file_name = split_words[0]

    x = {
        'class_name': split_words[1],
        'xmin': float(split_words[2]),
        'ymin': float(split_words[3]),
        'xmax': float(split_words[4]),
        'ymax': float(split_words[5])
    }

    if file_name in yahoo_mapping:
        yahoo_mapping[file_name].append(x)
    else:
        yahoo_mapping[file_name] = [x]

seen_images = {}
for i, row in image_class.iterrows():
    if row['image_path'] in seen_images:
        continue
    seen_images[row['image_path']] = 0

    src_path = os.path.join(raw_path, row['image_path'])
    dst_path = os.path.join(processed_path, row['label'])
    image_path = os.path.basename(src_path)
    img_dst_path = os.path.join(dst_path, image_path)
    if 'VOCdevkit' in src_path:
        annotation_file = annotated_path + image_path[:-4] + '.xml'
        xml_doc = ET.parse(annotation_file).getroot()
        file_path = img_dst_path[:-4]

        for obj in xml_doc.findall('object'):
            name = obj.find('name').text
            seen_images[row['image_path']] += 1
            xmin = float(obj.find('bndbox/xmin').text)
            ymin = float(obj.find('bndbox/ymin').text)
            xmax = float(obj.find('bndbox/xmax').text)
            ymax = float(obj.find('bndbox/ymax').text)

            im = Image.open(src_path)
            w, h = im.size
            xmin = max(xmin-15, 0)
            ymin = max(ymin-15, 0)
            xmax = min(xmax+15, w)
            ymax = min(ymax+15, h)
            im = im.crop((xmin, ymin, xmax, ymax))


            dst_path = os.path.join(processed_path, name)
            file_path = os.path.join(dst_path, image_path[:-4])
            files = glob.glob(file_path+'*')
            new_img_dst_path = file_path + f'_{len(files) + 1}.jpg'

            im.save(new_img_dst_path, 'JPEG')

    else:
        for annotation in yahoo_mapping[image_path]:
            seen_images[row['image_path']] += 1
            im = Image.open(src_path)
            xmin = annotation['xmin']
            xmax = annotation['xmax']
            ymin = annotation['ymin']
            ymax = annotation['ymax']
            w, h = im.size
            xmin = max(xmin-15, 0)
            ymin = max(ymin-15, 0)
            xmax = min(xmax+15, w)
            ymax = min(ymax+15, h)
            if image_path == 'bag_227.jpg' or image_path == 'mug_308.jpg':
                pass
            else:
                im = im.crop((xmin, ymin, xmax, ymax))

            dst_path = os.path.join(processed_path, annotation['class_name'])
            file_path = os.path.join(dst_path, image_path[:-4])
            files = glob.glob(file_path+'*')
            new_img_dst_path = file_path + f'_{len(files) + 1}.jpg'
            im.save(new_img_dst_path, 'JPEG')
