{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Datasets preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#release; stable\n",
    "\n",
    "from src.utils import myrepr\n",
    "import json\n",
    "import os\n",
    "import argparse\n",
    "import subprocess\n",
    "import shlex\n",
    "\n",
    "# Function to create and submit SLURM job scripts\n",
    "def submit_slurm_job(script_path, args, folder, job_name, submit):\n",
    "    # Define SLURM job parameters\n",
    "    \n",
    "    outs_folder = os.path.join(folder, \"slurm_outs\")\n",
    "    sh_folder = os.path.join(folder, \"slurm_sh\")\n",
    "    os.makedirs(outs_folder, exist_ok=True)\n",
    "    os.makedirs(sh_folder, exist_ok=True)\n",
    "    slurm_script = os.path.join(sh_folder, f\"{job_name}.sh\")\n",
    "    \n",
    "    \n",
    "    # Create SLURM job script\n",
    "    with open(slurm_script, \"w\") as f:\n",
    "        f.write(\"#!/bin/bash\\n\")\n",
    "        f.write(f\"#SBATCH --job-name={job_name}\\n\")\n",
    "        f.write(\"#SBATCH --cpus-per-task=8\\n\")\n",
    "        f.write(\"#SBATCH -N 1\\n\")\n",
    "        f.write(\"#SBATCH --partition=batch\\n\")\n",
    "        f.write(\"#SBATCH --mem=128GB\\n\")\n",
    "        f.write(\"#SBATCH --time=6:00:00\\n\")\n",
    "        f.write(f\"#SBATCH --output={outs_folder}/{job_name}.out\\n\")\n",
    "        f.write(f\"python {script_path} \")\n",
    "        for arg in args:\n",
    "            f.write(f\"{arg} \")\n",
    "        f.write(\"\\n\")\n",
    "\n",
    "    # Submit the job using sbatch and capture the output\n",
    "    if submit:\n",
    "        result = subprocess.run([\"sbatch\", slurm_script], capture_output=True, text=True)\n",
    "        \n",
    "        # Print the result of the job submission\n",
    "        if result.returncode == 0:\n",
    "            print(f\"Job submitted successfully: {result.stdout.strip()}\")\n",
    "        else:\n",
    "            print(f\"Job submission failed: {result.stderr.strip()}\")\n",
    "\n",
    "# Define the JSON configuration directly within the script\n",
    "\n",
    "#TODO apply the following changes to before running the script:\n",
    "# - change <path_to_project> to the path to the main folder of the repo\n",
    "# - change <path_to_anaconda3> to the path containing anaconda3 \n",
    "\n",
    "json_config = \"\"\"\n",
    "{\n",
    "    \"version\": \"0.2.0\",\n",
    "    \"configurations\": [\n",
    "        {\n",
    "            \"name\": \"data_preprocessing_quad\",\n",
    "            \"type\": \"debugpy\",\n",
    "            \"request\": \"launch\",\n",
    "            \"program\": \"<path_to_project>/data_preprocessing.py\",\n",
    "            \"console\": \"integratedTerminal\",\n",
    "            \"justMyCode\": true,\n",
    "            \"args\": [\n",
    "                \"--dataset\", \"synthetic_sparse_zero\",\n",
    "                \"--loss_func\", \"l1_norm\",\n",
    "                \"--regularizer_type\", \"str-cvx\",\n",
    "                \"--use_ray\", \"0\",\n",
    "                \"--is_sparse_dataset\", \"1\",\n",
    "                \"--generate_dataset\", \"1\",\n",
    "                \"--is_minimize\", \"1\",\n",
    "                \"--load_prepared_dataset\", \"0\",\n",
    "                \"--load_raw_dataset\", \"0\",\n",
    "                \"--print_args\", \"1\",\n",
    "                \"--print_status\", \"1\",\n",
    "                \"--hetero\", \"0\",\n",
    "                \"--num_samples\", \"1\",\n",
    "                \"--la\", \"0\",\n",
    "                \"--dim\", \"1000\",\n",
    "                \"--computable_params\", \"[]\",\n",
    "                \"--loadable_params\", \"[]\"\n",
    "            ],\n",
    "            \"python\": \"<path_to_anaconda3>/anaconda3/envs/pyten/bin/python\"\n",
    "        }\n",
    "    ]\n",
    "}\n",
    "\"\"\"\n",
    "\n",
    "noise_scale_ar = [0.01, 0.1, 1, 10, 100]\n",
    "num_workers_ar = [10,100,1000]\n",
    "\n",
    "submit = 1\n",
    "\n",
    "# Parse the JSON configuration\n",
    "config = json.loads(json_config)\n",
    "# Extract the script path and args\n",
    "script_path = config[\"configurations\"][0][\"program\"]\n",
    "args = config[\"configurations\"][0][\"args\"]\n",
    "# Properly format the --computable_params argument\n",
    "for i in range(len(args)):\n",
    "    if args[i] == \"--computable_params\":\n",
    "        args[i + 1] = f'\"{args[i + 1]}\"'  # Wrap the list in double quotes\n",
    "        \n",
    "    if args[i] == \"--loadable_params\":\n",
    "        args[i + 1] = f'\"{args[i + 1]}\"'  # Wrap the list in double quotes\n",
    "\n",
    "parser = argparse.ArgumentParser(description=\"Submit SLURM job with JSON configuration\")\n",
    "parser.add_argument(\"--folder\", type=str, help=\"Custom folder path to use as the working directory\", default=\"\")\n",
    "args_namespace = parser.parse_args()\n",
    "working_directory = os.getcwd()\n",
    "\n",
    "\n",
    "for noise_scale in noise_scale_ar:\n",
    "    for num_workers in num_workers_ar:\n",
    "        job_name = f\"data_preprocessing_nw{myrepr(num_workers)}_ns{myrepr(noise_scale)}\"\n",
    "        all_conf_args = args + [\"--num_workers\", str(num_workers)] + [\"--noise_scale\", str(noise_scale)]\n",
    "        submit_slurm_job(script_path, all_conf_args, working_directory, job_name, submit)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!rm -r slurm_outs/*.out\n",
    "!rm -r slurm_sh/*.sh"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%bash\n",
    "squeue -u <username> --format=\"%10i %9P %85j %8u %1t %4M %.6D  %10M %10l %8Q %7m\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!tail -n 100 slurm_outs/data_preprocessing*.out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!cat slurm_outs/data_preprocessing*.out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%bash\n",
    "scancel -u <username>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pyten",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
