{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pickle\n",
    "import jsonlines\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import json\n",
    "import copy\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "prompt_template = \"The point of interest has following attributes: \\n name is <NAME>; category is <CATEGORY>; type is <TYPE>; open status is <OPEN>; review count is <COUNT>; city is <CITY>; average score is <STARS>.\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = json.load(open(\"./handled/item2attributes.json\", \"r\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data[\"PzOqRohWw7F7YEPBz6AubA\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "item_data = {}\n",
    "for key, value in tqdm(data.items()):\n",
    "    item_str = copy.deepcopy(prompt_template)\n",
    "    item_str = item_str.replace(\"<NAME>\", value[\"name\"])\n",
    "    cate_str = \"\"\n",
    "    for cate in value[\"categories\"]:\n",
    "        cate_str += (cate + \" \")\n",
    "    item_str = item_str.replace(\"<CATEGORY>\", cate_str)\n",
    "    item_str = item_str.replace(\"<TYPE>\", value[\"type\"])\n",
    "    item_str = item_str.replace(\"<OPEN>\", str(value[\"open\"]))\n",
    "    item_str = item_str.replace(\"<COUNT>\", str(value[\"review_count\"]))\n",
    "    item_str = item_str.replace(\"<CITY>\", value[\"city\"])\n",
    "    item_str = item_str.replace(\"<STARS>\", str(value[\"stars\"]))\n",
    "    \n",
    "    item_data[key] = item_str"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "item_data['PzOqRohWw7F7YEPBz6AubA']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "json.dump(item_data, open(\"./handled/item_str.json\", \"w\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "item_data = json.load(open(\"./handled/item_str.json\", \"r\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import jsonlines\n",
    "\n",
    "def save_data(data_path, data):\n",
    "    '''write all_data list to a new jsonl'''\n",
    "    with jsonlines.open(\"./handled/\"+ data_path, \"w\") as w:\n",
    "        for meta_data in data:\n",
    "            w.write(meta_data)\n",
    "\n",
    "id_map = json.load(open(\"./handled/id_map.json\", \"r\"))[\"item2id\"]\n",
    "json_data = []\n",
    "for key, value in item_data.items():\n",
    "    json_data.append({\"input\": value, \"target\": \"\", \"item\": key, \"item_id\": id_map[key]})\n",
    "\n",
    "save_data(\"item_str.jsonline\", json_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "url = \"\"\n",
    "\n",
    "payload = json.dumps({\n",
    "   \"model\": \"text-embedding-ada-002\",\n",
    "   \"input\": \"The food was delicious and the waiter...\"\n",
    "})\n",
    "headers = {\n",
    "   'Authorization': '',\n",
    "   'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',\n",
    "   'Content-Type': 'application/json'\n",
    "}\n",
    "\n",
    "response = requests.request(\"POST\", url, headers=headers, data=payload)\n",
    "\n",
    "print(response.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "re_json = json.loads(response.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(re_json[\"data\"][0][\"embedding\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_response(prompt):\n",
    "    url = \"\"\n",
    "\n",
    "    payload = json.dumps({\n",
    "    \"model\": \"text-embedding-ada-002\",\n",
    "    \"input\": prompt\n",
    "    })\n",
    "    headers = {\n",
    "    'Authorization': '',\n",
    "    'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',\n",
    "    'Content-Type': 'application/json'\n",
    "    }\n",
    "\n",
    "    response = requests.request(\"POST\", url, headers=headers, data=payload)\n",
    "    re_json = json.loads(response.text)\n",
    "\n",
    "    return re_json[\"data\"][0][\"embedding\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "item_emb = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "while 1:    # avoid broken due to internet connection\n",
    "    try:\n",
    "        for key, value in tqdm(item_data.items()):\n",
    "            if key not in item_emb.keys():\n",
    "                item_emb[key] = get_response(value)\n",
    "    except:\n",
    "        continue\n",
    "    if len(item_emb) == len(item_data):\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(item_emb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "id_map = json.load(open(\"./handled/id_map.json\", \"r\"))[\"id2item\"]\n",
    "emb_list = []\n",
    "for id in range(1, len(item_emb)+1):\n",
    "    meta_emb = item_emb[id_map[str(id)]]\n",
    "    emb_list.append(meta_emb)\n",
    "\n",
    "emb_list = np.array(emb_list)\n",
    "pickle.dump(emb_list, open(\"./handled/itm_emb_np.pkl\", \"wb\"))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "llm",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.5"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
