{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mRunning cells with 'Python 3.9.2' requires the ipykernel package.\n",
      "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n",
      "\u001b[1;31mCommand: '\"c:/Program Files/Python39/python.exe\" -m pip install ipykernel -U --user --force-reinstall'"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import json\n",
    "import shutil\n",
    "import glob\n",
    "\n",
    "def copy_raw_images(json_folder, base_image_folder, target_folder):\n",
    "    \"\"\"\n",
    "    Copy raw images based on the metadata provided in JSON files.\n",
    "    \"\"\"\n",
    "    # Ensure the target folder exists, if not, create it\n",
    "    if not os.path.exists(target_folder):\n",
    "        os.makedirs(target_folder)\n",
    "\n",
    "    # Iterate over all JSON files in the json_folder\n",
    "    for json_file in glob.glob(os.path.join(json_folder, '*.json')):\n",
    "        try:\n",
    "            # Open and load the JSON file\n",
    "            with open(json_file, 'r') as file:\n",
    "                image_info = json.load(file)\n",
    "            \n",
    "            # Construct the path to the raw image\n",
    "            # `folder` in JSON may specify a sub-directory path\n",
    "            json_image_folder = image_info.get('folder', '')\n",
    "            image_filename = image_info['filename']\n",
    "            raw_image_path = os.path.join(base_image_folder, json_image_folder, image_filename)\n",
    "\n",
    "            # Construct the target path for the image\n",
    "            target_image_path = os.path.join(target_folder, image_filename)\n",
    "\n",
    "            # Copy the image if it exists\n",
    "            if os.path.exists(raw_image_path):\n",
    "                shutil.copy(raw_image_path, target_image_path)\n",
    "                print(f\"Successfully copied {raw_image_path} to {target_image_path}\")\n",
    "            else:\n",
    "                print(f\"Raw image not found: {raw_image_path}\")\n",
    "\n",
    "        except Exception as e:\n",
    "            print(f\"Error processing JSON file {json_file}: {e}\")\n",
    "\n",
    "# Example usage\n",
    "json_folder = 'same_objects_0.01_selected'  # Directory containing the JSON files\n",
    "base_image_folder = 'images/train2017/'  # Base directory where the raw images are stored\n",
    "target_folder = ''  # Target directory to store the raw images\n",
    "\n",
    "copy_raw_images(json_folder, base_image_folder, target_folder)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mRunning cells with 'Python 3.9.2' requires the ipykernel package.\n",
      "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n",
      "\u001b[1;31mCommand: '\"c:/Program Files/Python39/python.exe\" -m pip install ipykernel -U --user --force-reinstall'"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import json\n",
    "import shutil\n",
    "import glob\n",
    "\n",
    "def copy_raw_images(merged_json_path, validation_json_folder, target_folder):\n",
    "    \"\"\"\n",
    "    Copy raw images based on the metadata provided in a merged JSON file.\n",
    "    Additional metadata for validation is sourced from JSON files within the data_validation folder.\n",
    "    \"\"\"\n",
    "    # Ensure the target folder exists, if not, create it\n",
    "    if not os.path.exists(target_folder):\n",
    "        os.makedirs(target_folder)\n",
    "    \n",
    "    # Load the merged JSON file containing all image data\n",
    "    try:\n",
    "        with open(merged_json_path, 'r') as file:\n",
    "            image_data_list = json.load(file)\n",
    "    except Exception as e:\n",
    "        print(f\"Error loading merged JSON file: {e}\")\n",
    "        return\n",
    "\n",
    "    # Iterate over all image data entries\n",
    "    for image_data in image_data_list:\n",
    "        if image_data['data_source'] == 'COCO':\n",
    "            try:\n",
    "                image_filename = image_data['filename']\n",
    "                validation_json_path = os.path.join(validation_json_folder, os.path.splitext(image_filename)[0] + '.json')\n",
    "\n",
    "                if os.path.exists(validation_json_path):\n",
    "                    with open(validation_json_path, 'r') as file:\n",
    "                        validation_info = json.load(file)\n",
    "                        full_image_path = os.path.join('images/val2017', image_filename)\n",
    "\n",
    "                        # Construct the target path for the image\n",
    "                        target_image_path = os.path.join(target_folder, image_filename)\n",
    "\n",
    "                        # Copy the image if it exists\n",
    "                        if os.path.exists(full_image_path):\n",
    "                            shutil.copy(full_image_path, target_image_path)\n",
    "                            print(f\"Successfully copied {full_image_path} to {target_image_path}\")\n",
    "                        else:\n",
    "                            print(f\"Raw image not found: {full_image_path}\")\n",
    "                else:\n",
    "                    print(f\"Validation JSON file not found for {image_filename}\")\n",
    "\n",
    "            except Exception as e:\n",
    "                print(f\"Error processing image data for {image_filename}: {e}\")\n",
    "\n",
    "# Example usage\n",
    "merged_json_path = ''  # Path to the merged JSON file\n",
    "validation_json_folder = 'data_json_validation'  # Directory with validation JSON files\n",
    "target_folder = ''  # Target directory for images\n",
    "\n",
    "copy_raw_images(merged_json_path, validation_json_folder, target_folder)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "mind_wandering",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
