{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/lidong1/miniconda3/envs/llama_factory/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "from concurrent.futures import ThreadPoolExecutor, as_completed  \n",
    "import os\n",
    "import pandas as pd\n",
    "from gpt4o import Openai, API_INFOS\n",
    "from datasets import load_dataset, Dataset\n",
    "from tqdm import tqdm\n",
    "\n",
    "\n",
    "## system prompt\n",
    "system_template = \\\n",
    "\"\"\"\n",
    "You are an impartial judge tasked with evaluating the textual aesthetics of responses provided by two AI assistants to the user prompt displayed below. Your goal is to determine which response is more aesthetically pleasing and easier to read and understand.  \n",
    "  \n",
    "Begin your evaluation by considering the following aspects for each response:  \n",
    "  \n",
    "1. **Readability**: Is the text easy to read and understand? Are the sentences of appropriate length and complexity?  \n",
    "2. **Visual Organization**: Is the text visually organized in a logical manner? Are there appropriate headings, subheadings, lists, and other formatting elements?  \n",
    "3. **Consistency**: Does the text maintain a consistent style and format throughout?  \n",
    "4. **Overall Structure**: Are the paragraphs well-structured and logically connected? Is there appropriate spacing between paragraphs?  \n",
    "  \n",
    "Follow these steps for your evaluation:  \n",
    "1. **Analyze each response**: Carefully read and analyze both responses based on the criteria provided.  \n",
    "2. **Compare both responses**: Determine which response excels in textual aesthetics considering all aspects.  \n",
    "3. **Make a final decision**: Choose the response that is better in terms of textual aesthetics and justify your choice.  \n",
    "  \n",
    "You must output only one of the following choices as your final verdict with a label:  \n",
    "1. Assistant A is significantly better: [[A>>B]]  \n",
    "2. Assistant A is slightly better: [[A>B]]  \n",
    "3. Tie, relatively the same: [[A=B]]  \n",
    "4. Assistant B is slightly better: [[B>A]]  \n",
    "5. Assistant B is significantly better: [[B>>A]]  \n",
    "  \n",
    "Example output: \"My final verdict is Assistant A is slightly better: [[A>B]].\"  \n",
    "\"\"\"\n",
    "\n",
    "## user prompt\n",
    "user_template  = \\\n",
    "\"\"\"<|User Prompt|>{instruction}\n",
    "<|The Start of Assistant A's Answer|>\n",
    "{answer_1}\n",
    "<|The End of Assistant A's Answer|>\n",
    "\n",
    "<|The Start of Assistant B's Answer|>\n",
    "{answer_2}\n",
    "<|The End of Assistant B's Answer|>\"  \n",
    "\"\"\"\n",
    "\n",
    "def get_data():\n",
    "    ds = load_dataset(\"HuggingFaceH4/ultrafeedback_binarized\", split=\"train_prefs\")\n",
    "    def get_instruct_response(item):\n",
    "        # item[\"instruction\"] = item['chosen'][0][\"content\"]\n",
    "        item[\"response\"] = item['chosen'][1][\"content\"]\n",
    "        return item\n",
    "    ds = ds.map(get_instruct_response, batch_size=1024, num_proc=8)\n",
    "    export_data = ds.select_columns([\"prompt\", \"response\"])\n",
    "    return export_data\n",
    "\n",
    "def get_revised_text(client, instruction, completion, user_template, system_template, max_tokens=2048):  \n",
    "    # 格式化用户模板，插入指令和完成的文本  \n",
    "    content = user_template.format(instruction=instruction, completion=completion)  \n",
    "      \n",
    "    # 从客户端获取响应  \n",
    "    gpt_answer = client.get_response(content=content, system=system_template, max_tokens=max_tokens)  \n",
    "      \n",
    "    if gpt_answer is None:  \n",
    "        gpt_answer = \"\"  \n",
    "    gpt_answer = gpt_answer.strip()  \n",
    "      \n",
    "    # 确定是否需要修改  \n",
    "    need_modification = \"Y\" if \"**Does it need modification**: [[Y]]\" in gpt_answer else \"N\"  \n",
    "      \n",
    "    # 提取修改后的文本  \n",
    "    if need_modification == \"Y\":  \n",
    "        revised_text_start = gpt_answer.find(\"<|Revised Content Start|>\") + len(\"<|Revised Content Start|>\")  \n",
    "        revised_text_end = gpt_answer.find(\"<|Revised Content End|>\", revised_text_start)  \n",
    "        revised_text = gpt_answer[revised_text_start:revised_text_end].strip()  \n",
    "    else:  \n",
    "        revised_text = \"\"  \n",
    "      \n",
    "    return need_modification, revised_text, gpt_answer  \n",
    "\n",
    "  \n",
    "    return need_modification, revised_text, gpt_answer  \n",
    " \n",
    "def process_row(index, client, row, user_template, system_template, max_tokens=2048, output_file=\"output.jsonl\"):  \n",
    "    prompt = row['prompt']  \n",
    "    response = row['response']  \n",
    "    need_modification, revised_text, gpt_answer = get_revised_text(client, prompt, response, user_template, system_template, max_tokens=max_tokens)  \n",
    "    # print(f\"index {index}\")\n",
    "    result = {  \n",
    "        'index': index,  \n",
    "        'prompt': prompt,  \n",
    "        'response': response,  \n",
    "        'does_it_need_modification': need_modification,  \n",
    "        'revised_text': revised_text,  \n",
    "        'gpt_answer': gpt_answer  \n",
    "    }  \n",
    "    # Write the result to a JSONL file  \n",
    "    with open(output_file, 'a') as f:  \n",
    "        f.write(json.dumps(result) + \"\\n\")  \n",
    "    return result  \n",
    "def main():  \n",
    "    # Initialize multiple clients  \n",
    "    clients = [Openai(apis=[API_INFOS[i]]) for i in range(len(API_INFOS))]  \n",
    "    print(f\"clients number: {len(clients)}\")\n",
    "    export_data = get_data()\n",
    "    sample_data = export_data.select(range(100))\n",
    "    # sample_data = export_data # all\n",
    "    # user_template = \"User: {instruction}\\nCompletion: {completion}\"  \n",
    "    # system_template = \"You are a helpful assistant.\"  \n",
    "    max_tokens = 2048  \n",
    "    cur_dir = os.path.dirname(os.path.abspath(__file__))\n",
    "    # data_path = os.path.join(cur_dir, \"revised_data/output_sorted.jsonl\")\n",
    "    output_file = \"revised_data/output_v2.jsonl\"  \n",
    "    output_file = os.path.join(cur_dir, output_file)\n",
    "  \n",
    "    # Clear the output file before starting  \n",
    "    if os.path.exists(output_file):  \n",
    "        os.remove(output_file)  \n",
    "  \n",
    "    revised_data = []  \n",
    "  \n",
    "    with ThreadPoolExecutor(max_workers=len(clients)) as executor:  \n",
    "        # Create a future for each row in the dataset  \n",
    "        futures = [executor.submit(process_row, i, clients[i % len(clients)], row, user_template, system_template, max_tokens, output_file) for i, row in enumerate(sample_data)]  \n",
    "  \n",
    "        # Collect the results as they complete  \n",
    "        for future in tqdm(as_completed(futures), total=len(futures)):  \n",
    "            revised_data.append(future.result())  \n",
    "\n",
    "  \n",
    "    # Load results from JSONL file and ensure the order is preserved  \n",
    "    with open(output_file, 'r') as f:  \n",
    "        revised_data = [json.loads(line) for line in f]  \n",
    "  \n",
    "    # Sort by the original index  \n",
    "    revised_dataset = revised_data.sort(key=lambda x: x['index'])  \n",
    "  \n",
    "    # Create a new Dataset  \n",
    "    revised_dataset = Dataset.from_pandas(pd.DataFrame(revised_data))  \n",
    "    sorted_output_path = os.path.join(cur_dir, \"revised_data/output_sorted_v2.jsonl\")\n",
    "    revised_dataset.to_json(sorted_output_path) \n",
    "if __name__ == \"__main__\":\n",
    "    main()\n",
    "    # from generate_res import generate_res\n",
    "    # generate_res()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "from utils import read_jsonl\n",
    "def extract_final_verdict(llm_output):  \n",
    "    \"\"\"  \n",
    "    Extracts the final verdict from the LLM output.  \n",
    "  \n",
    "    Parameters:  \n",
    "    llm_output (str): The output string from the LLM.  \n",
    "  \n",
    "    Returns:  \n",
    "    str: The final verdict in the format [[A>>B]], [[A>B]], [[A=B]], [[B>A]], or [[B>>A]].  \n",
    "    \"\"\"  \n",
    "    # Define the regex pattern to match the final verdict  \n",
    "    pattern = r'\\[\\[A>>B\\]\\]|\\[\\[A>B\\]\\]|\\[\\[A=B\\]\\]|\\[\\[B>A\\]\\]|\\[\\[B>>A\\]\\]'  \n",
    "  \n",
    "    # Search for the pattern in the LLM output  \n",
    "    match = re.search(pattern, llm_output)  \n",
    "  \n",
    "    if match:  \n",
    "        return match.group(0)  \n",
    "    else:  \n",
    "        return None \n",
    "    \n",
    "def get_judged_answer(client, instruction, answer_1, answer_2, user_template, system_template, max_tokens=2048):  \n",
    "    # 格式化用户模板，插入指令和完成的文本  \n",
    "    content = user_template.format(instruction=instruction, answer_1=answer_1, answer_2=answer_2)  \n",
    "      \n",
    "    # 从客户端获取响应  \n",
    "    gpt_answer = client.get_response(content=content, system=system_template, max_tokens=max_tokens)  \n",
    "      \n",
    "    if gpt_answer is None:  \n",
    "        gpt_answer = \"\"  \n",
    "    gpt_answer = gpt_answer.strip()  \n",
    "    \n",
    "    score = extract_final_verdict(gpt_answer)\n",
    "\n",
    "    return score, gpt_answer  \n",
    "\n",
    "def get_judge(client, row, user_template, system_template, max_tokens=2048, output_file=\"judges.jsonl\"):  \n",
    "    # prompt = row['prompt']  \n",
    "    # response = row['response']  \n",
    "    # need_modification, revised_text, gpt_answer = get_revised_text(client, prompt, response, user_template, system_template, max_tokens=max_tokens)  \n",
    "    # print(f\"index {index}\")\n",
    "    prompt = row['prompt'] \n",
    "    answer_1 = row['response']\n",
    "    answer_2 = row['revised_text']\n",
    "    score, judgment = get_judged_answer(client, prompt, answer_1, answer_2, user_template, system_template, max_tokens=2048) \n",
    "    result = row\n",
    "    result['judge'] = judgment\n",
    "    result['score'] = score    \n",
    "    with open(output_file, 'a') as f:  \n",
    "        f.write(json.dumps(result) + \"\\n\")  \n",
    "    return result \n",
    "def main():  \n",
    "    clients = [Openai(apis=[API_INFOS[i]]) for i in range(len(API_INFOS))]  \n",
    "    print(f\"clients number: {len(clients)}\")\n",
    "    # Initialize multiple clients  \n",
    "    revised_data = read_jsonl(\"revised_data/output_sorted.jsonl\")\n",
    "    sample_data = revised_data.select(range(100))\n",
    "    # sample_data = export_data # all\n",
    "    # user_template = \"User: {instruction}\\nCompletion: {completion}\"  \n",
    "    # system_template = \"You are a helpful assistant.\"  \n",
    "    max_tokens = 2048  \n",
    "    cur_dir = os.path.dirname(os.path.abspath(__file__))\n",
    "    # data_path = os.path.join(cur_dir, \"revised_data/output_sorted.jsonl\")\n",
    "    output_file = \"revised_data/output_judge.jsonl\"  \n",
    "    output_file = os.path.join(cur_dir, output_file)\n",
    "  \n",
    "    # Clear the output file before starting  \n",
    "    if os.path.exists(output_file):  \n",
    "        os.remove(output_file)  \n",
    "  \n",
    "    revised_data = []  \n",
    "  \n",
    "    with ThreadPoolExecutor(max_workers=len(clients)) as executor:  \n",
    "        # Create a future for each row in the dataset  \n",
    "        futures = [executor.submit(process_row, i, clients[i % len(clients)], row, user_template, system_template, max_tokens, output_file) for i, row in enumerate(sample_data)]  \n",
    "  \n",
    "        # Collect the results as they complete  \n",
    "        for future in tqdm(as_completed(futures), total=len(futures)):  \n",
    "            revised_data.append(future.result())  \n",
    "\n",
    "  \n",
    "    # Load results from JSONL file and ensure the order is preserved  \n",
    "    with open(output_file, 'r') as f:  \n",
    "        revised_data = [json.loads(line) for line in f]  \n",
    "  \n",
    "    # Sort by the original index  \n",
    "    revised_dataset = revised_data.sort(key=lambda x: x['index'])  \n",
    "  \n",
    "    # Create a new Dataset  \n",
    "    revised_dataset = Dataset.from_pandas(pd.DataFrame(revised_data))  \n",
    "    sorted_output_path = os.path.join(cur_dir, \"revised_data/output_sorted_v2.jsonl\")\n",
    "    revised_dataset.to_json(sorted_output_path) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'index': 0,\n",
       "  'prompt': 'how can i develop a habit of drawing daily',\n",
       "  'response': \"Developing a daily habit of drawing can be challenging but with consistent practice and a few tips, it can become an enjoyable and rewarding part of your daily routine. Here are some strategies to help you develop the habit of drawing daily:\\n\\n1. Set a specific time: Allocate a specific time of the day to draw. It could be in the morning, afternoon, or evening. Make drawing a part of your daily routine.\\n2. Set a specific duration: Determine the amount of time you want to spend on drawing each day. It can be as little as 10 minutes or as long as an hour. Be consistent with the duration to help build the habit.\\n3. Start small and simple: Don't try to create a masterpiece every day, start with simple and easy-to-do sketches. Focus on improving your skills gradually.\\n4. Use a variety of tools and mediums: Experiment with different tools like pencils, pens, markers, and different mediums like paper, canvas, or digital apps to keep your drawing practice interesting and engaging.\\n5. Take breaks and rest: Taking breaks and resting after some time of drawing can help you avoid burnout and stay motivated.\\n6. Challenge yourself: Set challenges like drawing objects from memory or a specific subject to improve your skills and keep your drawing practice interesting.\\n7. Track your progress: Keep a record of your daily drawing practice and track your progress. This can be a source of motivation and help you see how far you've come.\\n\\nRemember, developing a habit takes time and patience. Stay consistent with your drawing practice, be flexible and open to trying new things, and with time, you'll develop a habit of daily drawing that brings you joy and satisfaction.\",\n",
       "  'does_it_need_modification': 'Y',\n",
       "  'revised_text': \"Developing a daily habit of drawing can be challenging, but with consistent practice and a few tips, it can become an enjoyable and rewarding part of your daily routine. Here are some strategies to help you develop the habit of drawing daily:\\n\\n1. **Set a specific time**: Allocate a specific time of the day to draw. It could be in the morning, afternoon, or evening. Make drawing a part of your daily routine.\\n\\n2. **Set a specific duration**: Determine the amount of time you want to spend on drawing each day. It can be as little as 10 minutes or as long as an hour. Be consistent with the duration to help build the habit.\\n\\n3. **Start small and simple**: Don't try to create a masterpiece every day. Start with simple and easy-to-do sketches. Focus on improving your skills gradually.\\n\\n4. **Use a variety of tools and mediums**: Experiment with different tools like pencils, pens, markers, and different mediums like paper, canvas, or digital apps to keep your drawing practice interesting and engaging.\\n\\n5. **Take breaks and rest**: Taking breaks and resting after some time of drawing can help you avoid burnout and stay motivated.\\n\\n6. **Challenge yourself**: Set challenges like drawing objects from memory or a specific subject to improve your skills and keep your drawing practice interesting.\\n\\n7. **Track your progress**: Keep a record of your daily drawing practice and track your progress. This can be a source of motivation and help you see how far you've come.\\n\\nRemember, developing a habit takes time and patience. Stay consistent with your drawing practice, be flexible and open to trying new things, and with time, you'll develop a habit of daily drawing that brings you joy and satisfaction.\",\n",
       "  'gpt_answer': \"**Textual Aesthetic Analysis**:  \\nThe text is generally clear and well-organized, but it could benefit from some formatting improvements to enhance readability. Specifically, using bullet points for the list of strategies and adding some emphasis on key points would make the text more engaging and easier to follow.\\n\\n**Does it need modification**: [[Y]]\\n\\n**Revised Text**:  \\n<|Revised Content Start|>  \\nDeveloping a daily habit of drawing can be challenging, but with consistent practice and a few tips, it can become an enjoyable and rewarding part of your daily routine. Here are some strategies to help you develop the habit of drawing daily:\\n\\n1. **Set a specific time**: Allocate a specific time of the day to draw. It could be in the morning, afternoon, or evening. Make drawing a part of your daily routine.\\n\\n2. **Set a specific duration**: Determine the amount of time you want to spend on drawing each day. It can be as little as 10 minutes or as long as an hour. Be consistent with the duration to help build the habit.\\n\\n3. **Start small and simple**: Don't try to create a masterpiece every day. Start with simple and easy-to-do sketches. Focus on improving your skills gradually.\\n\\n4. **Use a variety of tools and mediums**: Experiment with different tools like pencils, pens, markers, and different mediums like paper, canvas, or digital apps to keep your drawing practice interesting and engaging.\\n\\n5. **Take breaks and rest**: Taking breaks and resting after some time of drawing can help you avoid burnout and stay motivated.\\n\\n6. **Challenge yourself**: Set challenges like drawing objects from memory or a specific subject to improve your skills and keep your drawing practice interesting.\\n\\n7. **Track your progress**: Keep a record of your daily drawing practice and track your progress. This can be a source of motivation and help you see how far you've come.\\n\\nRemember, developing a habit takes time and patience. Stay consistent with your drawing practice, be flexible and open to trying new things, and with time, you'll develop a habit of daily drawing that brings you joy and satisfaction.  \\n<|Revised Content End|>\"},\n",
       " {'index': 1,\n",
       "  'prompt': \"how can I transform the getPosition method of antv/g's group in zrender?\",\n",
       "  'response': \"It is not recommended to modify built-in methods as it can lead to unexpected results and potential bugs. You may consider developing a new method or exploring other methods to achieve your desired outcome. Alternatively, you can search for other libraries or modules that offer similar functionalities or reach out to the library's support team for assistance.\",\n",
       "  'does_it_need_modification': 'Y',\n",
       "  'revised_text': \"Modifying built-in methods is generally not recommended as it can lead to unexpected results and potential bugs. Instead, consider the following alternatives:\\n\\n1. **Develop a New Method**: Create a new method that achieves your desired outcome without altering the existing built-in methods.\\n2. **Explore Other Methods**: Look into other methods provided by the library that might offer the functionality you need.\\n3. **Use Other Libraries**: Search for other libraries or modules that provide similar functionalities.\\n4. **Seek Support**: Reach out to the library's support team or community for assistance and guidance.\\n\\nBy following these alternatives, you can achieve your goals while maintaining the stability and reliability of your code.\",\n",
       "  'gpt_answer': \"**Textual Aesthetic Analysis**:  \\nThe text provides a clear and concise recommendation against modifying built-in methods, suggesting alternative approaches. However, it could benefit from improved structure and formatting to enhance readability and comprehension.\\n\\n**Does it need modification**: [[Y]]\\n\\n**Revised Text**:  \\n<|Revised Content Start|>  \\nModifying built-in methods is generally not recommended as it can lead to unexpected results and potential bugs. Instead, consider the following alternatives:\\n\\n1. **Develop a New Method**: Create a new method that achieves your desired outcome without altering the existing built-in methods.\\n2. **Explore Other Methods**: Look into other methods provided by the library that might offer the functionality you need.\\n3. **Use Other Libraries**: Search for other libraries or modules that provide similar functionalities.\\n4. **Seek Support**: Reach out to the library's support team or community for assistance and guidance.\\n\\nBy following these alternatives, you can achieve your goals while maintaining the stability and reliability of your code.\\n<|Revised Content End|>\"}]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from utils import read_jsonl\n",
    "revised_data = read_jsonl(\"revised_data/output_sorted.jsonl\")\n",
    "revised_data[:2]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
