import os
import xml.etree.ElementTree as ET
from shutil import copyfile
import os.path as osp

if __name__ == '__main__':
    imgs_s = {}
    xmls_s = {}
    ids_s = {}
    for s in ['train', 'valid', 'test']:
        files = sorted(os.listdir(osp.join('bccd', s)))
        imgs = [f for f in files if f.endswith('.jpg')]
        xmls = [f for f in files if f.endswith('.xml')]

        ids = [int(s.split('_')[1]) for s in imgs]
        imgs_s[s] = imgs
        xmls_s[s] = xmls
        ids_s[s] = ids

    objects = set()
    for s in ['train', 'valid', 'test']:

        ids = ids_s[s]
        imgs = imgs_s[s]
        xmls = xmls_s[s]
        with open(osp.join('bccd', '%s.txt' % s), 'w') as f:
            f.writelines('\n'.join(['%s.jpg %s.xml' % (str(idx).zfill(5), str(idx).zfill(5)) for idx in ids]))
        for i in range(len(ids)):
            idx = ids[i]
            img = imgs[i]
            xml = xmls[i]
            tree = ET.parse(os.path.join('bccd', s, xml))
            for obj in tree.findall('object'):
                objects.add(obj.find('name').text)
            root = tree.getroot()
            file_id = str(idx).zfill(5)
            for n in ['filename', 'path']:
                root.find(n).text = file_id + '.jpg'
                root.set('updated', 'yes')
            tree.write(os.path.join('bccd', s + '_new', file_id + '.xml'))
            copyfile(os.path.join('bccd', s, img), os.path.join('bccd', s + '_new', file_id + '.jpg'))

    with open(osp.join('bccd', 'objects_vocab.txt'), 'w') as f:
        f.writelines('\n'.join(objects))
