{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "from joblib import Parallel, delayed\n",
    "import glob\n",
    "\n",
    "def get_top_categories(file_path, top_n):\n",
    "    with open(file_path, 'r') as f:\n",
    "        lines = f.readlines()\n",
    "        top_categories = [line.split(':')[0].strip() for line in lines[:top_n]]\n",
    "    return set(top_categories)\n",
    "\n",
    "def process_json_file(json_file_path, output_folder, top_categories):\n",
    "    filename = os.path.basename(json_file_path)\n",
    "    new_json_file_path = os.path.join(output_folder, filename)\n",
    "\n",
    "    try:\n",
    "        with open(json_file_path, 'r') as file:\n",
    "            data = json.load(file)\n",
    "\n",
    "        filtered_objects = [obj for obj in data['objects'] if obj['name'] in top_categories]\n",
    "        if filtered_objects:\n",
    "            data['objects'] = filtered_objects\n",
    "            with open(new_json_file_path, 'w') as outfile:\n",
    "                json.dump(data, outfile, indent=4)\n",
    "    except Exception as e:\n",
    "        print(f\"Error processing file {json_file_path}: {e}\")\n",
    "\n",
    "def process_directory_parallel(input_dir, output_dir, top_categories):\n",
    "    if not os.path.exists(output_dir):\n",
    "        os.makedirs(output_dir)\n",
    "\n",
    "    json_files = glob.glob(os.path.join(input_dir, '*.json'))\n",
    "    Parallel(n_jobs=-1)(delayed(process_json_file)(json_file, output_dir, top_categories) for json_file in json_files)\n",
    "\n",
    "class_counts_file = 'object_class_counts.txt' \n",
    "top_categories = get_top_categories(class_counts_file, 50)\n",
    "\n",
    "\n",
    "input_dir = 'data_without_overlap' \n",
    "output_dir = 'data_without_overlap_top50'  \n",
    "\n",
    "process_directory_parallel(input_dir, output_dir, top_categories)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "detectai",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
