import os
import sys
import numpy as np 
import pandas as pd 
import cv2
import multiprocessing
import argparse
from tqdm import tqdm
from joblib import Parallel, delayed
from args import *

def get_movie_frame(s_t, e_t, video_file_path, color = 'RGB'):
	video_cap = cv2.VideoCapture(video_file_path)
	sr = video_cap.get(cv2.CAP_PROP_FPS)
	assert sr != 0, 'No movie was found under: {}'.format(video_file_path)

	if np.isnan(s_t):
		return None #Set filtered word for this index to True so that the words_df won't include this index
	s_i, e_i = int(s_t * 1000), int(e_t * 1000)
	frame_num = int(np.ceil((e_i - s_i) * sr / 1000))
	video_cap.set(cv2.CAP_PROP_POS_MSEC, s_i - 1000 / sr)
	success, prev_frame = video_cap.read()
	success, current_frame = video_cap.read()
	video_cap.release()
	return current_frame

if __name__ == '__main__':
	args = image_args()
	movie_names = args.movies
	for movie_name in movie_names:
		transcript_df = pd.read_csv(f'/storage/datasets/neuroscience/ecog/transcripts/{movie_name}/manual/word-times-stanford.csv')
		transcript_df = transcript_df.dropna().reset_index(drop = True)
		video_file_path = os.path.join('/storage/datasets/neuroscience/ecog', 'stimuli', 'movies', movie_name+'.mp4')
		num_cores = multiprocessing.cpu_count()
		idx_itr = tqdm(transcript_df.index, desc='Collecting movie frames')
		images = np.array(Parallel(n_jobs = num_cores)(delayed(get_movie_frame)(transcript_df.loc[i, 'start'], \
																					transcript_df.loc[i, 'end'], video_file_path) for i in idx_itr))
		if not os.path.exists(os.path.join('movie-frames', movie_name+'-images')):
			os.mkdir(os.path.join('movie-frames', movie_name+'-images'))

		for i, image in tqdm(enumerate(images)):
			image = cv2.resize(image, (1280, 540))
			cv2.imwrite(os.path.join('movie-frames', movie_name+'-images', f'{movie_name}-{i}.png'), image)