{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "target_data_path = \"./processed_train.jsonl\"\n",
    "harmless_data_list = []\n",
    "helpful_data_list = []\n",
    "with open(\"./harmless-base/train.jsonl/train.jsonl\", \"r\") as f:\n",
    "    for line in f:\n",
    "        harmless_data_list.append(eval(line))\n",
    "with open(\"./helpful-base/train.jsonl/train.jsonl\", \"r\") as f:\n",
    "    for line in f:\n",
    "        helpful_data_list.append(eval(line))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "target_list = []\n",
    "for data in harmless_data_list[:400]:\n",
    "    query = data['chosen'].split(\"\\n\\nHuman: \")[1].split(\"\\n\\nAssistant: \")[0]\n",
    "    # print(query)\n",
    "    target_list.append((query, \"harmless-base\"))\n",
    "for data in helpful_data_list[:400]:\n",
    "    query = data['chosen'].split(\"\\n\\nHuman: \")[1].split(\"\\n\\nAssistant: \")[0]\n",
    "    # print(query)\n",
    "    target_list.append((query, \"helpful-base\"))\n",
    "with open(\"./processed_train.jsonl\", \"w\") as f:\n",
    "    for target in target_list:\n",
    "        f.write(json.dumps({\"query\": target[0], \"source\": target[1]}))\n",
    "        f.write(\"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "target_data_path = \"./processed_test.jsonl\"\n",
    "harmless_data_list = []\n",
    "helpful_data_list = []\n",
    "with open(\"./harmless-base/test.jsonl/test.jsonl\", \"r\") as f:\n",
    "    for line in f:\n",
    "        harmless_data_list.append(eval(line))\n",
    "with open(\"./helpful-base/test.jsonl/test.jsonl\", \"r\") as f:\n",
    "    for line in f:\n",
    "        helpful_data_list.append(eval(line))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "explanation",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
