{
 "cells": [
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "# Sort train samples into genre folders\n",
    "import os\n",
    "import shutil\n",
    "import json\n",
    "\n",
    "# Paths\n",
    "base_folder = \"tmp\"\n",
    "metadata_file = \"metadata.json\"\n",
    "\n",
    "# Load metadata JSON\n",
    "with open(metadata_file, \"r\") as f:\n",
    "    metadata = json.load(f)\n",
    "\n",
    "# Ensure \"none\" folder exists for missing metadata or genre\n",
    "none_folder = os.path.join(base_folder, \"none\")\n",
    "os.makedirs(none_folder, exist_ok=True)\n",
    "\n",
    "# Iterate over all files in the base_folder\n",
    "for filename in os.listdir(base_folder):\n",
    "    filepath = os.path.join(base_folder, filename)\n",
    "    if os.path.isfile(filepath):\n",
    "        # Extract key from filename\n",
    "        key = filename.split(\"_\")[0]\n",
    "\n",
    "        # Get genre from metadata\n",
    "        genre = \"none\"  # default folder\n",
    "        if key in metadata:\n",
    "            genre = metadata[key].get(\"metadata\", {}).get(\"genre\", \"none\")\n",
    "\n",
    "        # Create genre folder if it doesn't exist\n",
    "        genre_folder = os.path.join(base_folder, genre)\n",
    "        os.makedirs(genre_folder, exist_ok=True)\n",
    "\n",
    "        # Copy file to genre folder\n",
    "        shutil.copy(filepath, genre_folder)"
   ],
   "id": "2c9ff5f2bc85acc5"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "\n",
    "# Path to your samples folder\n",
    "root_dir = \"samples\"\n",
    "\n",
    "# Prepare list to collect sample data\n",
    "samples_data = []\n",
    "\n",
    "# Walk through each genre subfolder\n",
    "for genre_folder in sorted(os.listdir(root_dir)):\n",
    "    # Walk through each origin subfolder\n",
    "    for origin_folder in sorted(os.listdir(os.path.join(root_dir, genre_folder))):\n",
    "        genre_origin_path = os.path.join(root_dir, genre_folder, origin_folder)\n",
    "\n",
    "        if os.path.isdir(genre_origin_path):\n",
    "            # Get all mp3 files\n",
    "            files = sorted(os.listdir(genre_origin_path))\n",
    "\n",
    "            # Loop through each file\n",
    "            for i, file in enumerate(files):\n",
    "                if file.endswith(\".mp3\"):\n",
    "                    sample_id = f\"{genre_folder}_{origin_folder}_{i:04d}\"\n",
    "                    file_path = os.path.join(root_dir, genre_folder, origin_folder, file).replace(\"\\\\\", \"/\")\n",
    "\n",
    "                    samples_data.append({\n",
    "                        \"sample_id\": sample_id,\n",
    "                        \"genre\": genre_folder,\n",
    "                        \"origin\": origin_folder,\n",
    "                        \"file_path\": file_path\n",
    "                    })\n",
    "\n",
    "# Convert to DataFrame\n",
    "samples_df = pd.DataFrame(samples_data)\n",
    "\n",
    "# Sort for cleanliness\n",
    "samples_df = samples_df.sort_values([\"genre\", \"origin\", \"sample_id\"])\n",
    "\n",
    "# Preview\n",
    "samples_df.head()"
   ],
   "id": "d01f98cb9bfda67e"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "\n",
    "# Assume samples_df already exists from your previous code!\n",
    "\n",
    "# Path to your master CSV\n",
    "master_csv_path = \"samples.csv\"\n",
    "\n",
    "if os.path.exists(master_csv_path):\n",
    "    existing_samples_df = pd.read_csv(master_csv_path, dtype={\"sample_id\": str})\n",
    "    \n",
    "    # Merge the new data\n",
    "    combined_df = pd.concat([existing_samples_df, samples_df], ignore_index=True)\n",
    "    \n",
    "    # Drop duplicate sample_ids\n",
    "    combined_df = combined_df.drop_duplicates(subset=[\"sample_id\"])\n",
    "else:\n",
    "    combined_df = samples_df\n",
    "\n",
    "# Save merged result\n",
    "combined_df.to_csv(master_csv_path, index=False)\n",
    "\n",
    "print(\"✅ samples.csv has been updated successfully!\")"
   ],
   "id": "cf42ec8e69d13ff6"
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "",
   "id": "b7b241b8e52ee207"
  }
 ],
 "metadata": {},
 "nbformat": 4,
 "nbformat_minor": 5
}
