import argparse
import sys
from datetime import datetime
from pathlib import Path
from tqdm import trange

import pandas as pd


if __name__ == '__main__':
    """
    Check if all of the images and image meta files specified in the index exist.
    If not, save the missing images to a csv file, and remove the corresponding
    scene and model from processed_records. So that the next time we run
    the image generation scripts, the missing images will be generated.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--index', default='', help='the path to saved index')
    parser.add_argument('-s', '--scene', default='all', help='names of the scene to check')
    args = parser.parse_args()

    index_path = Path(args.index)
    dataset_path = index_path.parent

    full_df = pd.read_csv(index_path, index_col=0)
    print(f'Checking {index_path} ...')
    if args.scene != 'all':
        full_df = full_df[full_df['scene'] == args.scene]
        print(f'Checking scene: {args.scene} ...')
    else:
        print('Checking all scenes ...')
    
    missing_idx = []
    for idx in trange(len(full_df)):
        row = full_df.iloc[idx]
        image_folder = dataset_path.joinpath('images', row['scene'], row['wnid'], row['model'])
        image_path = image_folder.joinpath(f'img_{row.name:010d}.jpg')
        image_meta_path = image_folder.joinpath(f'img_{row.name:010d}_info.csv')

        # Check if the image file and the image meta file exists
        if not image_path.is_file() or not image_meta_path.is_file():
            missing_idx.append(idx)

    # Convert the list of missing images to a DataFrame
    missing_df = full_df.iloc[missing_idx]
    
    if len(missing_df) == 0:
        print("No missing images. Dataset is complete!")
        complete_time = datetime.now()
        complete_path = dataset_path.joinpath(f'dataset_scene_{args.scene}_complete.txt')
        complete_path.write_text(f'Dataset scene {args.scene} is complete, checked: {complete_time.strftime("%Y-%m-%d %H:%M:%S")}', 
                                 encoding="utf-8")
        dataset_path.joinpath(f'scene_{args.scene}_missing.csv').unlink(missing_ok=True)
        sys.exit("exit program.")
    
    print(f"Scene {args.scene} missing {len(missing_df)} images.")
    missing_df.to_csv(dataset_path.joinpath(f'scene_{args.scene}_missing.csv'))
