import logging
import os.path as osp
import tempfile

import mmcv
import numpy as np

import zipfile

from .custom import CustomDataset
from .builder import DATASETS


@DATASETS.register_module
class KSoundDataset(CustomDataset):

    def load_annotations(self, ann_file):
        img_infos = []

        tmp = ann_file.split('/')
        z = zipfile.ZipFile('/'.join(tmp[:-1]) + '/train-audio-npy.zip')
        namelist = z.namelist()
        
        ss = set()
        for n in namelist:
            tmp = n.split('/')[-1]
            ss.add(tmp.replace('.npy', '')) 


        z = zipfile.ZipFile(ann_file)
        namelist = z.namelist()
        video_map = {}
        for n in namelist:
            if n.endswith('.jpg'):
                tmp = n.split('/')
                jpg_name = tmp[-1]
                video_name = tmp[-2]
                if not video_name in video_map:
                    video_map[video_name] = []
                video_map[video_name].append(video_name + '/' + jpg_name)
        for key in video_map:
            if key in ss and len(video_map[key]) >= 32:
                video_map[key] = sorted(video_map[key])
                img_infos.append({"filename": video_map[key], 'width': 240, 'height': 240, 'audioname': key + '.npy'})

        return img_infos

    def pre_pipeline(self, results):
        results['img_prefix'] = self.img_prefix

    def __getitem__(self, idx):
        data = self.prepare_train_img(idx)
        return data

    def prepare_train_img(self, idx):
        img_info = self.img_infos[idx]
        results = dict(img_info=img_info)
        self.pre_pipeline(results)
        return self.pipeline(results)


