{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from vllm import LLM, SamplingParams\n",
    "# import os\n",
    "# import gc\n",
    "# modelp=\"/volume/ailab4sci/txie/ydl/short_ablation2/ShortRL-logic1e-6-200-1/actor/global_step_1686\"\n",
    "# kimip=\"/volume/ailab4sci/txie/ydl/short_ablation2/ShortRL-kk_kimi-0.01/actor/global_step_1686\"\n",
    "# batch_size = 4\n",
    "\n",
    "# llm = LLM(model=modelp,gpu_memory_utilization=0.3)\n",
    "# import gc\n",
    "# import torch\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "datap=\"/volume/ailab4sci/txie/ydl/Short-RL/Logic-RL/data/kk/instruct/5ppl/test.parquet\"\n",
    "#read parquet file\n",
    "import pandas as pd\n",
    "data=pd.read_parquet(datap)\n",
    "# prompt=data.iloc[0]['prompt'][0]['content']\n",
    "# sampling_params = SamplingParams(temperature=0.7, n=batch_size,max_tokens=1000)\n",
    "# text_list = []\n",
    "# outputs = llm.generate(prompt, sampling_params)\n",
    "for output in outputs:\n",
    "    prompt = output.prompt\n",
    "    for i in range(batch_size):\n",
    "        generated_text = output.outputs[i].text\n",
    "        print(f\"Generated text: {generated_text!r}\")\n",
    "        text_list.append(generated_text)\n",
    "kimitext_list = []\n",
    "outputs = kimi.generate(prompt, sampling_params)\n",
    "for output in outputs:\n",
    "    prompt = output.prompt\n",
    "    for i in range(batch_size):\n",
    "        generated_text = output.outputs[i].text\n",
    "        print(f\"Generated text: {generated_text!r}\")\n",
    "        kimitext_list.append(generated_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100\n"
     ]
    }
   ],
   "source": [
    "print(data.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "driver_worker",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32m/volume/ailab4sci/txie/ydl/Short-RL/diversity-eval/test.ipynb Cell 2\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> <a href='vscode-notebook-cell://console.scitix.ai/volume/ailab4sci/txie/ydl/Short-RL/diversity-eval/test.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39mdel\u001b[39;00m llm\u001b[39m.\u001b[39mllm_engine\u001b[39m.\u001b[39mmodel_executor\u001b[39m.\u001b[39mdriver_worker\n\u001b[1;32m      <a href='vscode-notebook-cell://console.scitix.ai/volume/ailab4sci/txie/ydl/Short-RL/diversity-eval/test.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=1'>2</a>\u001b[0m gc\u001b[39m.\u001b[39mcollect()\n\u001b[1;32m      <a href='vscode-notebook-cell://console.scitix.ai/volume/ailab4sci/txie/ydl/Short-RL/diversity-eval/test.ipynb#X14sdnNjb2RlLXJlbW90ZQ%3D%3D?line=2'>3</a>\u001b[0m torch\u001b[39m.\u001b[39mcuda\u001b[39m.\u001b[39mempty_cache()\n",
      "\u001b[0;31mAttributeError\u001b[0m: driver_worker"
     ]
    }
   ],
   "source": [
    "del llm.llm_engine.model_executor.driver_worker\n",
    "gc.collect()\n",
    "torch.cuda.empty_cache()\n",
    "# kimi= LLM(model=kimip,gpu_memory_utilization=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.0\n",
      "1.0\n",
      "bert\n",
      "['0.151', '0.180', '0.339', '0.337', '0.293', '0.601']\n",
      "-0.3168333333333333\n",
      "['0.151', '0.180', '0.339', '0.337', '0.293', '0.601']\n",
      "-0.3168333333333333\n",
      "syn\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 4/4 [00:00<00:00,  9.25it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.80751777\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 4/4 [00:00<00:00, 30.34it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.80751777\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "from diversity_metrics import DistinctNgrams,SentBert,SyntacticDiversity\n",
    "batch_size=4\n",
    "inpp=\"/volume/ailab4sci/txie/ydl/Short-RL/diversity-eval\"\n",
    "text_list = [\"How are you\",\"How are you\",\"How are you\",\"How are you\"]\n",
    "text_list = [\"ABC\",\"I love it\",\"Do you have me?\",\"What about this one\"]\n",
    "kimitext_list=text_list\n",
    "config = {'n': batch_size,'num_sets':1,'samples_per_set':batch_size,'input_path':inpp}\n",
    "div_metric=DistinctNgrams(config)\n",
    "diversity=div_metric(text_list)\n",
    "print(diversity)\n",
    "div_metric=DistinctNgrams(config)\n",
    "kimidiversity=div_metric(kimitext_list)\n",
    "print(kimidiversity)\n",
    "\n",
    "\n",
    "print(\"bert\")\n",
    "config = {'n': batch_size,'num_sets':1,'samples_per_set':batch_size,'input_path':inpp}\n",
    "div_metric=SentBert(config)\n",
    "\n",
    "diversity=div_metric(0,text_list)\n",
    "print(diversity)\n",
    "\n",
    "kimidiversity=div_metric(0,kimitext_list)\n",
    "print(kimidiversity)\n",
    "\n",
    "div_metric=SyntacticDiversity()\n",
    "print(\"syn\")\n",
    "diversity=div_metric(text_list)\n",
    "print(diversity)\n",
    "\n",
    "kimidiversity=div_metric(kimitext_list)\n",
    "print(kimidiversity)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Short-RL:   0%|          | 0/12 [00:00<?, ?it/s]/tmp/ipykernel_210490/2210840948.py:48: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
      "  diversity_trend_short_rl = pd.concat([diversity_trend_short_rl, pd.DataFrame({'steps': [steps], 'diversity_score': [ppl5_diversity_score_avg]})], ignore_index=True)\n",
      "Short-RL: 100%|██████████| 12/12 [00:00<00:00, 3927.86it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5ppl diversity score: 0.1\n",
      "5ppl diversity score: 0.1\n",
      "5ppl diversity score: 0.1\n",
      "5ppl diversity score: 0.1\n",
      "5ppl diversity score: 0.1\n",
      "5ppl diversity score: 0.1\n",
      "5ppl diversity score: 0.1\n",
      "5ppl diversity score: 0.1\n",
      "5ppl diversity score: 0.1\n",
      "5ppl diversity score: 0.1\n",
      "5ppl diversity score: 0.1\n",
      "5ppl diversity score: 0.1\n",
      "   steps  diversity_score\n",
      "0     50              0.1\n",
      "1    100              0.1\n",
      "2    150              0.1\n",
      "3    200              0.1\n",
      "4    250              0.1\n",
      "5    300              0.1\n",
      "6    350              0.1\n",
      "7    400              0.1\n",
      "8    450              0.1\n",
      "9    500              0.1\n",
      "10   550              0.1\n",
      "11   562              0.1\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "kimitext_list=['observation.</think><answer>\\n\\n\\n(1) Amelia is a knight\\n(2) Penelope is a knight\\n(3) Evelyn is a knight </answer>']*4"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
