{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0c7d4aad",
   "metadata": {},
   "outputs": [],
   "source": [
    "import ast\n",
    "import json\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pathlib import Path\n",
    "\n",
    "SEED = 42\n",
    "rng = np.random.default_rng(SEED)\n",
    "\n",
    "csv_paths = sorted(Path(\".\").glob(\"*.csv\"))\n",
    "dfs = {p.stem: pd.read_csv(p) for p in csv_paths}\n",
    "df_all = pd.concat(\n",
    "    (pd.read_csv(p).assign(__file__=p.name) for p in csv_paths),\n",
    "    ignore_index=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1af672c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "def parse_listlike(x):\n",
    "    if isinstance(x, list):\n",
    "        return x\n",
    "    if pd.isna(x):\n",
    "        return None\n",
    "    if isinstance(x, str):\n",
    "        s = x.strip()\n",
    "        try:\n",
    "            v = json.loads(s)\n",
    "            if isinstance(v, list):\n",
    "                return v\n",
    "        except Exception:\n",
    "            pass\n",
    "        try:\n",
    "            v = ast.literal_eval(s)\n",
    "            if isinstance(v, list):\n",
    "                return v\n",
    "        except Exception:\n",
    "            pass\n",
    "    return None\n",
    "\n",
    "def shuffle_row(row):\n",
    "    opts = parse_listlike(row.get(\"options\"))\n",
    "    if not isinstance(opts, list) or len(opts) == 0:\n",
    "        return row\n",
    "\n",
    "    n = len(opts)\n",
    "    perm = rng.permutation(n)\n",
    "\n",
    "    new_opts = [opts[i] for i in perm]\n",
    "    row[\"options\"] = json.dumps(new_opts, ensure_ascii=False)\n",
    "\n",
    "    ak = parse_listlike(row.get(\"answer_keys\"))\n",
    "    if isinstance(ak, list) and len(ak) == n:\n",
    "        new_ak = [ak[i] for i in perm]\n",
    "        row[\"answer_keys\"] = json.dumps(new_ak, ensure_ascii=False)\n",
    "\n",
    "    ans = row.get(\"answer\")\n",
    "    if pd.notna(ans):\n",
    "        try:\n",
    "            old_idx = int(ans)\n",
    "        except Exception:\n",
    "            old_idx = None\n",
    "        if old_idx is not None and 0 <= old_idx < n:\n",
    "            new_idx = int(np.where(perm == old_idx)[0][0])\n",
    "            row[\"answer\"] = new_idx\n",
    "\n",
    "    return row\n",
    "\n",
    "def process_df(df):\n",
    "    return df.apply(shuffle_row, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e8e50fb0",
   "metadata": {},
   "outputs": [],
   "source": [
    "for csv_path in sorted(Path(\".\").glob(\"*.csv\")): # sorting options\n",
    "    df = pd.read_csv(csv_path)\n",
    "    if 'answer_keys' in df.keys():\n",
    "        df['answer_keys'] = [ast.literal_eval(data) for data in df['answer_keys']]\n",
    "    df['options'] = [ast.literal_eval(data) for data in df['options']]\n",
    "    df_out = process_df(df)\n",
    "\n",
    "    df_out.to_csv(csv_path, index=False, encoding=\"utf-8-sig\")\n",
    "    print(f\"updated -> {csv_path.name}, shape={df_out.shape}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "topo",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
