import os
import glob
import pickle
import json
import numpy as np
from concurrent.futures import ProcessPoolExecutor, as_completed

def convert_pickle_to_npz_and_json(pickle_path: str, output_dir: str) -> str:
    """
    Converts a single pickle file into two files in output_dir:
    
    1) 'metadata.json' -- a dict where keys are the original frame numbers (from data["frames"])
       and the value is a dict mapping object IDs to their computed bounding box.
    2) 'masks.npz' -- containing all processed mask arrays, with keys formatted as 
       "{old_frame}_{object_id}".
    
    Each mask tensor is converted from shape (1, h, w) to (h, w), and the bounding box is 
    computed as [x1, y1, x2, y2] from the True pixels. If the mask is all False, bbox is set to None.
    """
    try:
        with open(pickle_path, 'rb') as f:
            data = pickle.load(f)
            # data has two keys: "frames" and "video_segments"
            # "frames": mapping from new frame index to original frame number.
            # "video_segments": dict mapping new frame indices to per-frame object dict.
        
        frame_mapping = data["frames"]          # new frame index -> original frame number
        video_segments = data["video_segments"]   # new frame index -> {object id: mask tensor}
        
        metadata_dict = {}  # keys: original frame number (as string), value: metadata dict for that frame.
        masks_dict = {}

        # Iterate over new frame indices (sorted for consistency)
        for new_frame_idx in sorted(video_segments.keys()):
            # Look up the corresponding original frame number
            old_frame = frame_mapping[new_frame_idx]
            frame_obj_dict = video_segments[new_frame_idx]  # dict mapping object id -> mask tensor
            
            frame_metadata = {}
            for obj_id, mask_tensor in frame_obj_dict.items():
                # Convert the mask to a numpy array and flatten from (1, h, w) to (h, w)
                mask_np = mask_tensor.numpy().squeeze(0)
                
                # Compute bounding box from the mask.
                true_indices = np.where(mask_np)
                if true_indices[0].size == 0:
                    bbox = None
                else:
                    # np.where returns (rows, cols): rows are y, cols are x
                    y_coords = true_indices[0]
                    x_coords = true_indices[1]
                    x1 = int(x_coords.min())
                    y1 = int(y_coords.min())
                    x2 = int(x_coords.max())
                    y2 = int(y_coords.max())
                    bbox = [x1, y1, x2, y2]
                
                # In metadata, we include only the bbox.
                frame_metadata[str(obj_id)] = {"bbox": bbox}
                
                # Store the mask using the original frame number in its key.
                mask_key = f"{old_frame}_{obj_id}"
                masks_dict[mask_key] = mask_np
            
            # Use the original frame number as key in metadata
            metadata_dict[str(old_frame)] = frame_metadata

        os.makedirs(output_dir, exist_ok=True)

        # Write metadata.json as a dict with keys as original frame numbers.
        metadata_json_path = os.path.join(output_dir, "metadata.json")
        with open(metadata_json_path, 'w') as f:
            json.dump(metadata_dict, f)

        # Save masks.npz using np.savez (without compression)
        masks_npz_path = os.path.join(output_dir, "masks.npz")
        np.savez(masks_npz_path, **masks_dict)

        return f"Success: {pickle_path}"
    
    except Exception as e:
        return f"Failed: {pickle_path} | Error: {e}"

def main():
    original_dir = #TODO
    new_base_dir = #TODO

    os.makedirs(new_base_dir, exist_ok=True)

    pickle_files = glob.glob(os.path.join(original_dir, "*.pkl"))
    
    failed = []

    with ProcessPoolExecutor(max_workers=100) as executor:
        future_to_file = {
            executor.submit(
                convert_pickle_to_npz_and_json, 
                pkl_path, 
                os.path.join(new_base_dir, os.path.splitext(os.path.basename(pkl_path))[0])
            ): pkl_path
            for pkl_path in pickle_files
        }

        for future in as_completed(future_to_file):
            result = future.result()
            if "Failed" in result:
                failed.append(result)
            print(result)

    print("Conversion finished!")
    if failed:
        print("Failed files:")
        for f in failed:
            print(f)

if __name__ == "__main__":
    main()
