{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e99486a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "from collections import defaultdict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "05b537dc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of iterations: 50\n",
      "Average iteration time: 1914.357 ms\n",
      "Rank 2: Average allreduce time: 0.288 ms, avg size: 288.000 MB, calls per iteration: 166.6, avg time per iteration: 48.038 ms\n",
      "Rank 3: Average allreduce time: 0.286 ms, avg size: 288.000 MB, calls per iteration: 166.6, avg time per iteration: 47.603 ms\n",
      "Rank 6: Average allreduce time: 0.285 ms, avg size: 288.000 MB, calls per iteration: 166.6, avg time per iteration: 47.422 ms\n",
      "Rank 0: Average allreduce time: 0.286 ms, avg size: 288.000 MB, calls per iteration: 166.6, avg time per iteration: 47.648 ms\n",
      "Rank 1: Average allreduce time: 0.284 ms, avg size: 288.000 MB, calls per iteration: 166.6, avg time per iteration: 47.338 ms\n",
      "Rank 7: Average allreduce time: 0.285 ms, avg size: 288.000 MB, calls per iteration: 166.6, avg time per iteration: 47.537 ms\n",
      "Rank 4: Average allreduce time: 0.282 ms, avg size: 288.000 MB, calls per iteration: 166.6, avg time per iteration: 47.030 ms\n",
      "Rank 5: Average allreduce time: 0.284 ms, avg size: 288.000 MB, calls per iteration: 166.6, avg time per iteration: 47.236 ms\n",
      "Itr0: slowest: 6, 2nd slowest: 7, time: 47.461 ms, 2nd time: 47.702 ms, difference: 0.241 ms\n",
      "Itr1: slowest: 1, 2nd slowest: 4, time: 46.949 ms, 2nd time: 48.029 ms, difference: 1.080 ms\n",
      "Itr2: slowest: 1, 2nd slowest: 2, time: 46.719 ms, 2nd time: 47.544 ms, difference: 0.825 ms\n",
      "Itr3: slowest: 2, 2nd slowest: 1, time: 47.707 ms, 2nd time: 47.894 ms, difference: 0.187 ms\n",
      "Itr4: slowest: 6, 2nd slowest: 1, time: 47.133 ms, 2nd time: 48.154 ms, difference: 1.021 ms\n",
      "Itr5: slowest: 2, 2nd slowest: 4, time: 47.825 ms, 2nd time: 48.160 ms, difference: 0.335 ms\n",
      "Itr6: slowest: 7, 2nd slowest: 4, time: 48.562 ms, 2nd time: 48.962 ms, difference: 0.400 ms\n",
      "Itr7: slowest: 5, 2nd slowest: 0, time: 47.671 ms, 2nd time: 47.796 ms, difference: 0.125 ms\n",
      "Itr8: slowest: 0, 2nd slowest: 4, time: 46.993 ms, 2nd time: 47.150 ms, difference: 0.157 ms\n",
      "Itr9: slowest: 4, 2nd slowest: 5, time: 47.170 ms, 2nd time: 47.545 ms, difference: 0.375 ms\n",
      "Itr10: slowest: 4, 2nd slowest: 1, time: 47.423 ms, 2nd time: 48.162 ms, difference: 0.739 ms\n",
      "Itr11: slowest: 1, 2nd slowest: 0, time: 47.216 ms, 2nd time: 47.412 ms, difference: 0.196 ms\n",
      "Itr12: slowest: 5, 2nd slowest: 6, time: 47.825 ms, 2nd time: 48.024 ms, difference: 0.199 ms\n",
      "Itr13: slowest: 4, 2nd slowest: 0, time: 46.899 ms, 2nd time: 47.200 ms, difference: 0.301 ms\n",
      "Itr14: slowest: 4, 2nd slowest: 5, time: 47.050 ms, 2nd time: 47.725 ms, difference: 0.675 ms\n",
      "Itr15: slowest: 4, 2nd slowest: 0, time: 47.511 ms, 2nd time: 47.862 ms, difference: 0.351 ms\n",
      "Itr16: slowest: 4, 2nd slowest: 1, time: 47.030 ms, 2nd time: 47.287 ms, difference: 0.257 ms\n",
      "Itr17: slowest: 5, 2nd slowest: 6, time: 47.029 ms, 2nd time: 47.313 ms, difference: 0.284 ms\n",
      "Itr18: slowest: 5, 2nd slowest: 0, time: 47.093 ms, 2nd time: 47.110 ms, difference: 0.017 ms\n",
      "Itr19: slowest: 6, 2nd slowest: 0, time: 47.172 ms, 2nd time: 47.248 ms, difference: 0.076 ms\n",
      "Itr20: slowest: 4, 2nd slowest: 5, time: 46.818 ms, 2nd time: 47.014 ms, difference: 0.196 ms\n",
      "Itr21: slowest: 5, 2nd slowest: 0, time: 46.919 ms, 2nd time: 47.058 ms, difference: 0.139 ms\n",
      "Itr22: slowest: 4, 2nd slowest: 1, time: 47.211 ms, 2nd time: 47.391 ms, difference: 0.180 ms\n",
      "Itr23: slowest: 4, 2nd slowest: 3, time: 47.401 ms, 2nd time: 47.640 ms, difference: 0.239 ms\n",
      "Itr24: slowest: 5, 2nd slowest: 4, time: 47.433 ms, 2nd time: 48.019 ms, difference: 0.586 ms\n",
      "Itr25: slowest: 6, 2nd slowest: 0, time: 48.289 ms, 2nd time: 48.401 ms, difference: 0.112 ms\n",
      "Itr26: slowest: 4, 2nd slowest: 7, time: 46.771 ms, 2nd time: 47.179 ms, difference: 0.408 ms\n",
      "Itr27: slowest: 4, 2nd slowest: 5, time: 46.943 ms, 2nd time: 47.066 ms, difference: 0.123 ms\n",
      "Itr28: slowest: 0, 2nd slowest: 4, time: 47.513 ms, 2nd time: 47.517 ms, difference: 0.004 ms\n",
      "Itr29: slowest: 5, 2nd slowest: 3, time: 47.288 ms, 2nd time: 47.541 ms, difference: 0.253 ms\n",
      "Itr30: slowest: 7, 2nd slowest: 1, time: 47.301 ms, 2nd time: 47.765 ms, difference: 0.464 ms\n",
      "Itr31: slowest: 6, 2nd slowest: 2, time: 47.155 ms, 2nd time: 47.837 ms, difference: 0.682 ms\n",
      "Itr32: slowest: 5, 2nd slowest: 0, time: 46.680 ms, 2nd time: 46.957 ms, difference: 0.277 ms\n",
      "Itr33: slowest: 4, 2nd slowest: 5, time: 46.562 ms, 2nd time: 47.066 ms, difference: 0.504 ms\n",
      "Itr34: slowest: 4, 2nd slowest: 3, time: 47.368 ms, 2nd time: 47.409 ms, difference: 0.041 ms\n",
      "Itr35: slowest: 3, 2nd slowest: 5, time: 47.365 ms, 2nd time: 47.560 ms, difference: 0.195 ms\n",
      "Itr36: slowest: 4, 2nd slowest: 7, time: 46.482 ms, 2nd time: 46.610 ms, difference: 0.128 ms\n",
      "Itr37: slowest: 7, 2nd slowest: 5, time: 47.484 ms, 2nd time: 47.743 ms, difference: 0.259 ms\n",
      "Itr38: slowest: 6, 2nd slowest: 4, time: 47.343 ms, 2nd time: 47.507 ms, difference: 0.164 ms\n",
      "Itr39: slowest: 7, 2nd slowest: 5, time: 47.210 ms, 2nd time: 47.324 ms, difference: 0.114 ms\n",
      "Itr40: slowest: 5, 2nd slowest: 0, time: 46.801 ms, 2nd time: 46.933 ms, difference: 0.132 ms\n",
      "Itr41: slowest: 3, 2nd slowest: 1, time: 47.335 ms, 2nd time: 47.390 ms, difference: 0.055 ms\n",
      "Itr42: slowest: 4, 2nd slowest: 7, time: 46.707 ms, 2nd time: 47.035 ms, difference: 0.328 ms\n",
      "Itr43: slowest: 1, 2nd slowest: 5, time: 46.648 ms, 2nd time: 46.769 ms, difference: 0.121 ms\n",
      "Itr44: slowest: 7, 2nd slowest: 3, time: 46.919 ms, 2nd time: 46.968 ms, difference: 0.049 ms\n",
      "Itr45: slowest: 6, 2nd slowest: 4, time: 46.577 ms, 2nd time: 46.788 ms, difference: 0.211 ms\n",
      "Itr46: slowest: 7, 2nd slowest: 5, time: 47.393 ms, 2nd time: 47.714 ms, difference: 0.321 ms\n",
      "Itr47: slowest: 4, 2nd slowest: 6, time: 46.720 ms, 2nd time: 47.273 ms, difference: 0.553 ms\n",
      "Itr48: slowest: 5, 2nd slowest: 1, time: 46.910 ms, 2nd time: 47.401 ms, difference: 0.491 ms\n",
      "Itr49: slowest: 0, 2nd slowest: 0, time: 0.000 ms, 2nd time: 0.000 ms, difference: 0.000 ms\n",
      "defaultdict(<class 'int'>, {6: 7, 1: 4, 2: 2, 7: 6, 5: 10, 0: 3, 4: 16, 3: 2})\n"
     ]
    }
   ],
   "source": [
    "# Read log from file\n",
    "with open('log_tp_8_h100_0.txt', 'r') as f:\n",
    "    log_lines = f.readlines()\n",
    "\n",
    "NUM_RANK = 8\n",
    "\n",
    "# Parse allreduce times per rank\n",
    "allreduce_times = defaultdict(float)\n",
    "allreduce_cnt = defaultdict(int)\n",
    "allreduce_size = defaultdict(float)\n",
    "\n",
    "allreduce_times_per_iter = defaultdict(list)\n",
    "\n",
    "pattern = re.compile(r'Rank (\\d+): All reduce time: ([\\d.]+) ms \\| All reduce size: ([\\d.]+) KB')\n",
    "\n",
    "# Count the number of iterations by counting occurrences of the iteration completion line for any rank\n",
    "iteration_pattern = re.compile(r'Rank 0: Iter \\d+ complete \\| Forward: [\\d.]+ ms \\| Backward: [\\d.]+ ms \\| Iteration: [\\d.]+ ms')\n",
    "num_iterations = sum(1 for line in log_lines if iteration_pattern.search(line))\n",
    "print(f\"Number of iterations: {num_iterations}\")\n",
    "\n",
    "# Parse iteration times\n",
    "iteration_times = []\n",
    "iteration_time_pattern = re.compile(r'Rank 0: Iter \\d+ complete \\| Forward: [\\d.]+ ms \\| Backward: [\\d.]+ ms \\| Iteration: ([\\d.]+) ms')\n",
    "\n",
    "for line in log_lines:\n",
    "    match = iteration_time_pattern.search(line)\n",
    "    if match:\n",
    "        iteration_times.append(float(match.group(1)))\n",
    "\n",
    "# Calculate average iteration time\n",
    "if iteration_times:\n",
    "    avg_iteration_time = sum(iteration_times) / len(iteration_times)\n",
    "    print(f\"Average iteration time: {avg_iteration_time:.3f} ms\")\n",
    "else:\n",
    "    print(\"No iteration times found.\")\n",
    "\n",
    "\n",
    "current_iter = -1\n",
    "\n",
    "for line in log_lines:\n",
    "    if iteration_pattern.search(line):\n",
    "        current_iter += 1\n",
    "    match = pattern.search(line)\n",
    "    if match and current_iter >= 0:\n",
    "        rank = int(match.group(1))\n",
    "        time = float(match.group(2))\n",
    "        allreduce_times[rank] += time\n",
    "        allreduce_cnt[rank] += 1\n",
    "        allreduce_size[rank] += float(match.group(3))\n",
    "        \n",
    "        if rank not in allreduce_times_per_iter:\n",
    "            allreduce_times_per_iter[rank] = [0 for _ in range(num_iterations)]\n",
    "\n",
    "        allreduce_times_per_iter[rank][current_iter] += time\n",
    "\n",
    "# Print total allreduce time per rank\n",
    "# for rank, total_time in allreduce_times.items():\n",
    "#     print(f\"Rank {rank}: Total allreduce time: {total_time:.3f} ms over {allreduce_cnt[rank]} calls\")\n",
    "\n",
    "# Calculate average allreduce time per rank\n",
    "for rank, total_time in allreduce_times.items():\n",
    "    if allreduce_cnt[rank] > 0:\n",
    "        avg_time = total_time / allreduce_cnt[rank]\n",
    "        avg_size = allreduce_size[rank] / allreduce_cnt[rank]\n",
    "        print(f\"Rank {rank}: Average allreduce time: {avg_time:.3f} ms, avg size: {avg_size / 1024:.3f} MB, calls per iteration: {allreduce_cnt[rank] / num_iterations:}, avg time per iteration: {total_time / num_iterations:.3f} ms\")\n",
    "\n",
    "slowest_rank_count = defaultdict(int)\n",
    "\n",
    "for itr in range(num_iterations):\n",
    "    iter_time = [allreduce_times_per_iter[rank][itr] for rank in range(NUM_RANK)]\n",
    "    \n",
    "    slowest_iter_time, second_slowest_iter_time = sorted(iter_time)[0], sorted(iter_time)[1]\n",
    "    second_slowest_iter_index = iter_time.index(second_slowest_iter_time)\n",
    "    slowest_iter_index = iter_time.index(slowest_iter_time)\n",
    "    print(f\"Itr{itr}: slowest: {slowest_iter_index}, 2nd slowest: {second_slowest_iter_index}, time: {slowest_iter_time:.3f} ms, 2nd time: {second_slowest_iter_time:.3f} ms, difference: {second_slowest_iter_time - slowest_iter_time:.3f} ms\")\n",
    "\n",
    "    # Count the occurrences of each rank being the slowest\n",
    "    \n",
    "    slowest_rank_count[slowest_iter_index] += 1\n",
    "\n",
    "    # Calculate and print the percentage of each rank being the slowest\n",
    "print(slowest_rank_count)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "50fadcb0",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dead369a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dfcbf7e0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of iterations: 3\n"
     ]
    }
   ],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "meshsolver",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "undefined.undefined.undefined"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
