{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "modelName = ''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import openai\n",
    "\n",
    "# Function to evaluate the fluency of a text using GPT-4\n",
    "def rate_fluency(text_list):\n",
    "    \n",
    "    fluency_ratings = []\n",
    "    \n",
    "    for text in text_list:\n",
    "        prompt = f\"Please rate the fluency of the following text on a scale of 1 to 5, where 1 is least fluent and 5 is most fluent: \\\"{text}\\\". Provide only the number.\"\n",
    "        \n",
    "        messages = [\n",
    "            {\"role\": \"system\", \"content\": \"You are a text evaluation assistant.\"},\n",
    "            {\"role\": \"user\", \"content\": prompt}\n",
    "        ]\n",
    "        rating = outputModel_Managed_LLM(GPT_4_O_API_ENPOINT, GPT_4_O_MODEL_NAME, messages)\n",
    "        \n",
    "        # Append the rating to the list\n",
    "        fluency_ratings.append({text: rating})\n",
    "    \n",
    "    return fluency_ratings\n",
    "\n",
    "# Example list of strings\n",
    "texts = [\n",
    "    \"This is a simple sentence.\",\n",
    "    \"I are not good at speaking English.\",\n",
    "    \"The quick brown fox jumps over the lazy dog.\",\n",
    "    \"Grammar mistake sentence writing bad.\"\n",
    "]\n",
    "\n",
    "# Call the function to rate fluency\n",
    "fluency_results = rate_fluency(texts)\n",
    "\n",
    "# Print the fluency ratings\n",
    "for result in fluency_results:\n",
    "    print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "modelName = 'Qwen/Qwen2-1.5B-Instruct'\n",
    "task = 'toxicity'\n",
    "scale = False\n",
    "saveFilePath = f'{modelName.split(\"/\")[1]}_{task}_scaling_{scale}_fluency_ratings.json'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from Utils.utils import *\n",
    "layers, fileName = getInfo(modelName, task, scale)\n",
    "import os\n",
    "if(os.path.exists(f'{saveFilePath}') == False):\n",
    "    import json\n",
    "    with open(f'{saveFilePath}', 'w') as f:\n",
    "        json.dump([], f)\n",
    "    print(f'File created: {saveFilePath}')\n",
    "with open(f'{saveFilePath}', 'r') as f:\n",
    "    data = json.load(f)\n",
    "print(f'File loaded: {saveFilePath}')\n",
    "print(f'Previous data length: {len(data)}')\n",
    "import pickle\n",
    "with open(f'{fileName}', 'rb') as f:\n",
    "    texts = pickle.load(f)\n",
    "for i in range(len(data), len(texts)):\n",
    "    print(f'{i+1}/{len(texts)}')\n",
    "    text = texts[i]\n",
    "    fluency_ratings = rate_fluency([text])\n",
    "    data.append({text: fluency_ratings[0]})\n",
    "    with open(f'{saveFilePath}', 'w') as f:\n",
    "        json.dump(data, f)\n",
    "    print(f'Fluency rating for text {i+1}: {fluency_ratings[0]}')\n",
    "with open(f'{saveFilePath}', 'r') as f:\n",
    "    data = json.load(f)\n",
    "    \n",
    "    \n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "with open('BaysianOptimization/Meta-Llama-3-8B-Instruct_toxicity_scaling_False_fluency_ratings.json', 'r') as f:\n",
    "    data = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "avg = 0\n",
    "count = 0\n",
    "for i in range(len(data)):\n",
    "    try:\n",
    "        key = list(data[i].keys())[0]\n",
    "        # print(key)\n",
    "        rating = data[i][key][key]\n",
    "        avg += int(rating)\n",
    "        count += 1\n",
    "    except:\n",
    "        pass\n",
    "print(count)\n",
    "print(avg/count)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ALTI",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
