{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==Model:  gpt4o ==\n",
      "==Task:  toolUsePlans ==\n",
      "day: 1, morning: [{'name': 'Please Touch Museum', 'address': '4231 Avenue of the Republic, Philadelphia, PA 19131'}], afternoon: [{'name': 'Shofuso Japanese House & Garden', 'address': 'Horticultural Dr and Lansdowne Dr, Philadelphia, PA 19131'}, {'name': 'Philadelphia Zoo', 'address': '3400 W Girard Ave, Philadelphia, PA 19104'}], night: [{'name': 'Spruce Street Harbor Park', 'address': '121 N Columbus Blvd, Philadelphia, PA 19106'}], hotel: {'name': 'The Rittenhouse Hotel', 'address': '210 W Rittenhouse Sq, Philadelphia, PA 19103'}\n",
      "day: 2, morning: [{'name': 'Franklin Square', 'address': '200 N 6th St, Philadelphia, PA 19106'}], afternoon: [{'name': 'Museum of the American Revolution', 'address': '101 S 3rd St, Philadelphia, PA 19106'}, {'name': 'The Franklin Institute Science Museum', 'address': '222 N 20th St, Philadelphia, PA 19103'}], night: [{'name': 'Rittenhouse Square Park', 'address': '1800 Walnut St, Philadelphia, PA 19103'}], hotel: {'name': 'The Rittenhouse Hotel', 'address': '210 W Rittenhouse Sq, Philadelphia, PA 19103'}\n",
      "the optimized order is:  [2, 1, 3, 8, 6, 7, 4, 9]\n",
      "the total distance gap is:  0.25539354378296625 the total cluster jump is:  1.0 the attraction arranged for each day on average is:  4.0\n",
      "the clustering information is:  {'Cluster_0': [['Shane Confectionery'], ['The Constitutional Walking Tour'], ['Franklin Square'], ['Franklin Square Mini Golf'], ['Founding Footsteps'], ['The Big Bus Company'], ['The Betsy Ross House'], ['Museum of the American Revolution'], ['National Constitution Center'], ['National Liberty Museum'], ['National Museum of American Jewish History'], ['The African American Museum in Philadelphia'], ['Old City District'], ['Independence After Hours'], ['Independence Park Hotel, Bw Premier Collection'], ['Philadelphia Segway Tours By Wheel Fun Rentals'], ['Lokal Hotel'], ['Cherry Street Pier'], ['Philadelphia Sightseeing Tours & Transportation'], ['Bow Tie Tours'], ['Philadelphia Urban Adventures'], [\"Spirits of '76 Ghost Tours\"], ['Benjamin Franklin Museum'], ['Philly Tour Hub'], ['Powel House'], [\"Penn's View Hotel\"], ['The Franklin Residences'], ['Tattooed Mom'], ['Historic Philadelphia, Inc']], 'Cluster_1': [['Morris Arboretum'], ['Harry Potter Festival'], ['Woodmere Art Museum']], 'Cluster_10': [['Awbury Arboretum']], 'Cluster_11': [['Birchtree Catering']], 'Cluster_12': [['Fort Mifflin']], 'Cluster_13': [['Independence Seaport Museum'], ['Rizzo Ice Rink'], ['RiverLink Ferry System'], ['South Street Headhouse District'], ['Spirit of Philadelphia'], ['Benjamin Franklin Bridge'], [\"Penn's Landing\"], ['Patriot Harbor Lines'], ['Spruce Street Harbor Park']], 'Cluster_14': [['Philly Official Guide'], ['Seger Dog Park'], ['Bury The Hatchet'], ['Bloktoberfest'], [\"Fante's Kitchen Shop\"]], 'Cluster_15': [['Greensgrow Farms'], ['Suraya'], ['Neon Museum of Philadelphia'], ['Chinatown'], ['Pizza Brain']], 'Cluster_16': [['The Franklin Institute Science Museum'], ['The Oval'], ['Fairmount Water Works'], ['Cira Green'], ['Philly Bike Tour'], ['Eastern State Penitentiary Historic Site'], ['Parks On Tap-Azalea Gardens'], ['Philadelphia Museum of Art']], 'Cluster_17': [['Please Touch Museum'], ['Shofuso Japanese House & Garden']], 'Cluster_18': [['The Inn at Penn, a Hilton Hotel'], ['University Family Fun Center'], ['Penn Museum']], 'Cluster_19': [['Simeone Foundation Automotive Museum']], 'Cluster_2': [['Philadelphia Insectarium & Butterfly Pavilion']], 'Cluster_3': [['76 Carriage Company']], 'Cluster_4': [['Rittenhouse Square District'], ['Academy of Natural Sciences'], ['The Dwight D'], ['ROOST Apartment Hotel'], ['Auto Show Philadelphia'], ['Kimpton Hotel Palomar Philadelphia'], ['City of Philadelphia'], ['Wonderspaces Philadelphia'], ['Dilworth Park'], ['Rittenhouse Square Park'], ['Midtown Village Fall Festival'], ['The Rittenhouse Hotel'], ['Sister Cities Park'], ['NEST Center City'], ['Restaurant Week'], ['Palace Travel'], ['Reading Terminal Market'], ['Pennsylvania Academy of Fine Arts'], ['StrEATS of Philly Food Tours'], ['Philadelphia International Festival of the Arts PIFA Street Fair']], 'Cluster_5': [['The Navy Yard'], ['Lincoln Financial Field Tours'], ['FDR Park'], ['The American Swedish Historical Museum'], ['Courtyard by Marriott Philadelphia South at The Navy Yard']], 'Cluster_6': [['David Thomas Trailways']], 'Cluster_7': [['Walnut Lane Golf Club'], ['Chew Philly Food Tours'], ['Manayunk Arts Festival']], 'Cluster_8': [['Philadelphia Zoo']], 'Cluster_9': [['The Piazza'], ['Fishtown Rivercity Festival']]}\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "from fuzzywuzzy import fuzz\n",
    "import numpy as np\n",
    "import sys\n",
    "import os\n",
    "import json\n",
    "import re\n",
    "\n",
    "def getID(name,address,category):\n",
    "    #as long as there is a '-', then return -2\n",
    "    #if there is an empty list, then return []\n",
    "    #if the information doesn't match, return -1\n",
    "\n",
    "    if name == \"-\" and address == \"-\":\n",
    "        return -2\n",
    "\n",
    "    #normal case\n",
    "    idFromName = []\n",
    "    idFromAddress = []\n",
    "\n",
    "    address = address.split(\",\")[0]\n",
    "    \n",
    "    #restaurants\n",
    "    if category == 'restaurants':\n",
    "        for restaurant in restaurants:\n",
    "            if restaurant['name'].lower() == name.lower():\n",
    "                idFromName.append(restaurant['business_id'])\n",
    "            if restaurant['address'].lower() == address.lower():\n",
    "                idFromAddress.append(restaurant['business_id'])\n",
    "        set1 = set(idFromName)\n",
    "        set2 = set(idFromAddress)\n",
    "        #if the extracted id from name and address make an agreement\n",
    "        if(len(set1 & set2) == 1):\n",
    "            return list(set1 & set2)[0]\n",
    "        # if not, we have to use similarity score to determine the id\n",
    "        else:\n",
    "            name_sim_score = []\n",
    "            address_sim_score = []\n",
    "\n",
    "            for restaurant in restaurants:\n",
    "                name_sim_score.append(fuzz.ratio(name.lower(), restaurant['name'].lower()))\n",
    "                address_sim_score.append(fuzz.ratio(address.lower(), restaurant['address'].lower()))\n",
    "\n",
    "            scores = np.array(name_sim_score) + np.array(address_sim_score)\n",
    "            #if the score is high enough, then we claim the id\n",
    "            if max(scores) >= 120:\n",
    "                return restaurants[np.argmax(scores)]['business_id']\n",
    "            #if the score is less than 60 for each, then we indicate that the business is out of the pool\n",
    "            else:\n",
    "                return -1\n",
    "    #attractions \n",
    "    if category == 'attractions':\n",
    "        for attraction in attractions:\n",
    "            if attraction['name'].lower() == name.lower():\n",
    "                idFromName.append(attraction['business_id'])\n",
    "            if attraction['address'].lower() == address.lower():\n",
    "                idFromAddress.append(attraction['business_id'])\n",
    "        \n",
    "        set1 = set(idFromName)\n",
    "        set2 = set(idFromAddress)\n",
    "\n",
    "        if(len(set1 & set2) == 1):\n",
    "            return list(set1 & set2)[0]\n",
    "        else:\n",
    "            name_sim_score = []\n",
    "            address_sim_score = []\n",
    "\n",
    "            for attraction in attractions:\n",
    "                name_sim_score.append(fuzz.ratio(name.lower(), attraction['name'].lower()))\n",
    "                address_sim_score.append(fuzz.ratio(address.lower(), attraction['address'].lower()))\n",
    "\n",
    "            if max(name_sim_score) == 100:\n",
    "                return attractions[np.argmax(name_sim_score)]['business_id']\n",
    "\n",
    "            scores = np.array(name_sim_score) + np.array(address_sim_score)\n",
    "            if max(scores) >= 120:\n",
    "                return attractions[np.argmax(scores)]['business_id']\n",
    "            else:\n",
    "                return -1\n",
    "    #hotels\n",
    "    if category == 'hotels':\n",
    "        for hotel in hotels:\n",
    "            if hotel['name'].lower() == name.lower():\n",
    "                idFromName.append(hotel['business_id'])\n",
    "            if hotel['address'].lower() == address.lower():\n",
    "                idFromAddress.append(hotel['business_id'])\n",
    "        set1 = set(idFromName)\n",
    "        set2 = set(idFromAddress)\n",
    "        if(len(set1 & set2) == 1):\n",
    "            return list(set1 & set2)[0]\n",
    "        else:\n",
    "            name_sim_score = []\n",
    "            address_sim_score = []\n",
    "\n",
    "            for hotel in hotels:\n",
    "                name_sim_score.append(fuzz.ratio(name.lower(), hotel['name'].lower()))\n",
    "                address_sim_score.append(fuzz.ratio(address.lower(), hotel['address'].lower()))\n",
    "\n",
    "            scores = np.array(name_sim_score) + np.array(address_sim_score)\n",
    "            if max(scores) >= 120:\n",
    "                return hotels[np.argmax(scores)]['business_id']\n",
    "            else:\n",
    "                return -1\n",
    "\n",
    "def prepareEval(plan):\n",
    "    plan_eval = []\n",
    "    for days in plan['itinerary']:\n",
    "        day = {}\n",
    "        day['days'] = days['days']\n",
    "        #print(days['breakfast']['name'])\n",
    "        day['breakfast'] = getID(days['breakfast']['name'],days['breakfast']['address'],'restaurants')\n",
    "        day['morning_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['morning_attractions']]\n",
    "        day['lunch'] = getID(days['lunch']['name'],days['lunch']['address'],'restaurants')\n",
    "        day['afternoon_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['afternoon_attractions']]\n",
    "        day['dinner'] = getID(days['dinner']['name'],days['dinner']['address'],'restaurants')\n",
    "        day['night_attractions'] = [getID(attraction['name'],attraction['address'],'attractions') for attraction in days['night_attractions']]\n",
    "        day['accommodation'] = getID(days['accommodation']['name'],days['accommodation']['address'],'hotels')\n",
    "        plan_eval.append(day)\n",
    "    #print(plan_eval)\n",
    "    return plan_eval\n",
    "\n",
    "\n",
    "\n",
    "def populateCordinates(plan_eval, data, data_hotel):\n",
    "    cordinates = []\n",
    "    for day in plan_eval:\n",
    "        cordinate_one_day = []\n",
    "\n",
    "        #if the hotel is invalid, we skip the day\n",
    "        if(day['accommodation'] == -1 or day['accommodation'] == -2):\n",
    "            continue\n",
    "\n",
    "        if(day['accommodation'] != -1):\n",
    "            cordinate_one_day.append(getCordinate_Hotel(day['accommodation'], data_hotel))\n",
    "        \n",
    "        for attraction in day['morning_attractions']:\n",
    "            if(attraction != -1):\n",
    "                cordinate_one_day.append(getCordinate(attraction,data))\n",
    "        for attraction in day['afternoon_attractions']:\n",
    "            if(attraction != -1):\n",
    "                cordinate_one_day.append(getCordinate(attraction,data))\n",
    "        for attraction in day['night_attractions']:\n",
    "            if(attraction != -1):\n",
    "                cordinate_one_day.append(getCordinate(attraction,data))\n",
    "                \n",
    "        cordinates.append(cordinate_one_day)\n",
    "    return cordinates\n",
    "\n",
    "def getCordinate(id,data):\n",
    "    for attraction in data:\n",
    "        if attraction['business_id'] == id:\n",
    "            return (attraction['latitude'], attraction['longitude'])\n",
    "\n",
    "def getCordinate_Hotel(id,data_hotel):\n",
    "    for hotel in data_hotel:\n",
    "        if hotel['business_id'] == id:\n",
    "            return (hotel['latitude'], hotel['longitude'])\n",
    "        \n",
    "def getDistanceMatrix(cordinates):\n",
    "    #print(cordinates)\n",
    "    n = len(cordinates)\n",
    "    distance_matrix = np.zeros((n, n))\n",
    "    for i in range(n):\n",
    "        for j in range(i+1, n):\n",
    "            distance_matrix[i][j] = distance_matrix[j][i] = ((cordinates[i][0]*1000 - cordinates[j][0]*1000)**2 + (cordinates[i][1]*1000 - cordinates[j][1]*1000)**2)**0.5\n",
    "    return distance_matrix\n",
    "\n",
    "def populateShortestDistanceOneDay(cordinates):\n",
    "    shortest_distance_list = []\n",
    "    shortest_distance_info_lists = []\n",
    "    for oneday in cordinates:\n",
    "        distance_matrix = getDistanceMatrix(oneday)\n",
    "        n = len(distance_matrix)\n",
    "        info_lists = []\n",
    "        optimized_distance = totalCost(1, 0, n, distance_matrix,info_lists)\n",
    "        shortest_distance_list.append(optimized_distance)\n",
    "        shortest_distance_info_lists.append(info_lists)\n",
    "    return shortest_distance_list, shortest_distance_info_lists\n",
    "\n",
    "def totalCost(mask, pos, n, cost, info_lists):\n",
    "    distance_list = []\n",
    "    i_list = []\n",
    "    # Base case: if all cities are visited, return the\n",
    "    # cost to return to the starting city (0)\n",
    "    if mask == (1 << n) - 1:\n",
    "        return cost[pos][0]\n",
    "\n",
    "    ans = sys.maxsize   \n",
    "\n",
    "    # Try visiting every city that has not been visited yet\n",
    "    for i in range(n):\n",
    "        if (mask & (1 << i)) == 0: \n",
    "            i_list.append(i)\n",
    "            # If city i is not visited, visit it and \n",
    "            #  update the mask\n",
    "            distance_list.append(cost[pos][i] +\n",
    "                      totalCost(mask | (1 << i), i, n, cost, info_lists))\n",
    "        \n",
    "\n",
    "    info_list = [pos,i_list, distance_list]\n",
    "    info_lists.append(info_list)\n",
    "    \n",
    "    ans = min(distance_list)\n",
    "    return ans\n",
    "\n",
    "def populatePlannedDistanceOneDay(cordinates):\n",
    "    planned_distance_list = []\n",
    "    for oneday in cordinates:\n",
    "        distance_matrix = getDistanceMatrix(oneday)\n",
    "        #print(distance_matrix)\n",
    "        distance = 0\n",
    "        for i in range(len(distance_matrix)):\n",
    "            if i == len(distance_matrix) - 1:\n",
    "                j = 0\n",
    "            else:\n",
    "                j = i + 1\n",
    "            distance += distance_matrix[i][j]\n",
    "        planned_distance_list.append(distance)\n",
    "    return planned_distance_list\n",
    "\n",
    "def getDistanceGapRatio(shortest_distances_by_day, planned_distances_by_day):\n",
    "    distance_gap = 0\n",
    "    total_distance = 0\n",
    "    for optimized_distance, planned_distance in zip(shortest_distances_by_day, planned_distances_by_day):\n",
    "        gap = []\n",
    "        gap = np.sum(np.array(planned_distance) - np.array(optimized_distance))\n",
    "        distance_gap += gap\n",
    "        \n",
    "        total = np.sum(np.array(planned_distance))\n",
    "        total_distance += total\n",
    "        \n",
    "    return distance_gap / total_distance\n",
    "\n",
    "def getOptimizedOrder(shortest_distance_info_lists):\n",
    "\n",
    "\n",
    "    order_list = []\n",
    "    for day in shortest_distance_info_lists:\n",
    "\n",
    "        if len(day) == 0:\n",
    "            order_list.append([[0],[0]])\n",
    "            continue\n",
    "\n",
    "        pos = 0\n",
    "        n = len(day[-1][1]) + 1\n",
    "        #get a list of 1 to n\n",
    "        candidates = list(range(n-1))\n",
    "        #add 1 to the values\n",
    "        candidates = [x+1 for x in candidates]\n",
    "\n",
    "        moves = []\n",
    "\n",
    "        while len(candidates) > 0:\n",
    "            #find the last one in the lnfo_list\n",
    "            for i in range(len(day)):\n",
    "                if day[i][0] == pos and day[i][1] == candidates:\n",
    "                    #print(day[i][0],day[i][1])\n",
    "                    next_move = day[i][1][np.argmin(day[i][2])]\n",
    "                    #print(next_move)\n",
    "                    pos = next_move\n",
    "                    moves.append(next_move)\n",
    "                    #take next move out of candidates\n",
    "                    candidates.remove(next_move)\n",
    "\n",
    "        moves_reversed = moves[::-1]\n",
    "        optimized_route = [[0] + moves, [0] + moves_reversed]\n",
    "        order_list.append(optimized_route)\n",
    "\n",
    "    return order_list\n",
    "        \n",
    "def getPositionDeviationRatio(shortest_order_by_day):\n",
    "    total_places = 0\n",
    "    total_deviation = 0\n",
    "    for plan in shortest_order_by_day:\n",
    "        for day in plan:\n",
    "            n = len(day[0])\n",
    "            total_places += n\n",
    "            output_route = list(range(n))\n",
    "            gap_1 = sum([1 if x != y else 0 for x,y in zip(output_route,day[0])])\n",
    "            gap_2 = sum([1 if x != y else 0 for x,y in zip(output_route,day[1])])\n",
    "            total_deviation += min(gap_1, gap_2)\n",
    "    return total_deviation / total_places\n",
    "\n",
    "def daywiseTSP(model,task, numPlan):\n",
    "    shortest_distances_by_day = []\n",
    "    planned_distances_by_day = []\n",
    "    shortest_order_by_day = []\n",
    "\n",
    "    with open(f'Output/{model}/evals/{task}.jsonl', 'r') as f:\n",
    "        plans = [json.loads(line) for line in f]\n",
    "    for i in range(numPlan):\n",
    "        if (i%20 == 0):\n",
    "            print(\"Mode: day wise. We are at plan \", i)\n",
    "        plan = plans[i]['plan'] \n",
    "        \n",
    "        # Failure rate related\n",
    "        # prepare a result list to return\n",
    "        # outofpool, missinginfo,\n",
    "        # prepare the evaluation for each plan, search the business id\n",
    "        plan_eval = prepareEval(plan)\n",
    "        #print(plan_eval)\n",
    "\n",
    "        #get the cordinates\n",
    "        cordinates = populateCordinates(plan_eval, attractions, hotels)\n",
    "        #print(cordinates)\n",
    "        #one day shortest distance\n",
    "        shortest_distance_list_each_day, shortest_distance_info_lists = populateShortestDistanceOneDay(cordinates)\n",
    "        #print(shortest_distance_info_lists)\n",
    "        shortest_distances_by_day.append(shortest_distance_list_each_day)\n",
    "        \n",
    "        shortest_order_list_each_day = getOptimizedOrder(shortest_distance_info_lists)\n",
    "        \n",
    "        shortest_order_by_day.append(shortest_order_list_each_day)\n",
    "        #shortest_order_by_day(info_list)\n",
    "        \n",
    "        #one day planned distance\n",
    "        planned_distance_list_each_day = populatePlannedDistanceOneDay(cordinates)\n",
    "        planned_distances_by_day.append(planned_distance_list_each_day)\n",
    "\n",
    "        #plan wise (multi day) optimization calculation\n",
    "\n",
    "    #get distance gap ratio\n",
    "    distance_gap_ratio = getDistanceGapRatio(shortest_distances_by_day, planned_distances_by_day)\n",
    "\n",
    "    #position deviation ratio\n",
    "    position_deviation_ratio = getPositionDeviationRatio(shortest_order_by_day)\n",
    "    \n",
    "    return distance_gap_ratio, position_deviation_ratio\n",
    "\n",
    "def getHotelIndex(day,cordinates):\n",
    "    hotel_index = 0\n",
    "    if day > 0:\n",
    "        for j in range(day):\n",
    "            hotel_index += len(cordinates[j])\n",
    "    return hotel_index\n",
    "\n",
    "def totalCost_multiday(mask, pos, day, cordinates, n, visited, cost, info_lists, memo):\n",
    "    visit_requirement = len(cordinates[day])\n",
    "    distance_list = []\n",
    "    i_list = []\n",
    "\n",
    "    hotel_index = getHotelIndex(day,cordinates)\n",
    "    # Base case: if all cities are visited, return the\n",
    "    # cost to return to the starting city (0)\n",
    "\n",
    "    if mask == (1 << n) - 1:\n",
    "        return cost[pos][hotel_index]\n",
    "    \n",
    "    if memo[pos][mask] != -1:\n",
    "        return memo[pos][mask]\n",
    "\n",
    "    if visit_requirement == visited:\n",
    "        for i in range(n):\n",
    "            if (mask & (1 << i)) == 0: \n",
    "                i_list.append(i)\n",
    "                distance_list.append(cost[hotel_index][i] + totalCost_multiday(mask | (1 << i), i, day + 1, cordinates, n, 2, cost, info_lists,memo))\n",
    "        \n",
    "        info_list = [pos,i_list, distance_list]\n",
    "        info_lists.append(info_list)\n",
    "        \n",
    "        return min(distance_list) + cost[pos][hotel_index] # change this to the old hotel position\n",
    "    \n",
    "    # Try visiting every city that has not been visited yet\n",
    "    for i in range(n):\n",
    "        if (mask & (1 << i)) == 0: \n",
    "\n",
    "            i_list.append(i)\n",
    "            # If city i is not visited, visit it and \n",
    "             #  update the mask\n",
    "            distance_list.append(cost[pos][i] +\n",
    "                      totalCost_multiday(mask | (1 << i), i, day, cordinates, n, visited + 1, cost, info_lists,memo))\n",
    "        \n",
    "\n",
    "    info_list = [pos,i_list, distance_list]\n",
    "    info_lists.append(info_list)\n",
    "    \n",
    "    memo[pos][mask] = min(distance_list)\n",
    "\n",
    "    return min(distance_list)\n",
    "\n",
    "def getDistanceMatrix_by_plan(cordinates):\n",
    "    #print(cordinates)\n",
    "    n = 0\n",
    "    for day in cordinates:\n",
    "        for place in day:\n",
    "            n+=1\n",
    "    flattened = []\n",
    "    for day in cordinates:\n",
    "        for location in day:\n",
    "            flattened.append(location)\n",
    "    distance_matrix = np.zeros((n, n))\n",
    "    for i in range(n):\n",
    "        for j in range(i+1, n):\n",
    "            distance_matrix[i][j] = distance_matrix[j][i] = ((flattened[i][0]*1000 - flattened[j][0]*1000)**2 + (flattened[i][1]*1000 - flattened[j][1]*1000)**2)**0.5\n",
    "    return distance_matrix\n",
    "\n",
    "def getOptimizedDistance_by_plan(cordinates,distance_matrix):\n",
    "    n = len(distance_matrix)\n",
    "    info_lists = []\n",
    "    #newMask will have all the hotels as 1 before the function.\n",
    "    newMask = 1\n",
    "    index_list = list(range(len(cordinates) - 1))\n",
    "    index_list = index_list[::-1]\n",
    "    newMask = 1\n",
    "    for i in index_list:\n",
    "        newMask = (newMask << (len(cordinates[i]))) + 1\n",
    "    memo = [[-1] * (1 << n) for _ in range(n)]\n",
    "    optimized_distance = totalCost_multiday(newMask,0,0,cordinates,n,1,distance_matrix, info_lists,memo)\n",
    "    return optimized_distance, info_lists\n",
    "\n",
    "def getOptimizedOrder_by_plan(info_lists):\n",
    "    pos = 0\n",
    "    lookfor = info_lists[-1][1].copy()\n",
    "    moves = []\n",
    "    while len(lookfor) > 0:\n",
    "        for record in info_lists:\n",
    "            if(record[0] == pos and record[1] == lookfor):\n",
    "                nextmove = record[1][np.argmin(record[2])]\n",
    "                pos = nextmove\n",
    "                moves.append(nextmove)\n",
    "                lookfor.remove(pos)\n",
    "    return moves\n",
    "\n",
    "def getPlannedDistance_by_plan(cordinates, distance_matrix):\n",
    "    distance = 0\n",
    "    for i in range(len(distance_matrix)):\n",
    "        if i == len(distance_matrix) - 1:\n",
    "            j = len(distance_matrix) - len(cordinates[-1])\n",
    "        else:\n",
    "            j = i + 1\n",
    "        distance += distance_matrix[i][j]\n",
    "    return distance\n",
    "\n",
    "def getDistanceGapRatio_by_plan(optimized_distances_by_plan, planned_distances_by_plan):\n",
    "    gaps = np.array([])\n",
    "    for optimized,planned in zip(optimized_distances_by_plan,planned_distances_by_plan):\n",
    "        gap = planned - optimized\n",
    "        gaps = np.append(gaps,gap)\n",
    "    total_gap = np.sum(gaps)\n",
    "    total_planned = np.sum(planned_distances_by_plan)\n",
    "    ratio = total_gap / total_planned\n",
    "    return ratio\n",
    "\n",
    "def getDistanceGapRatio_by_plan(optimized_distances_by_plan, planned_distances_by_plan):\n",
    "    gaps = np.array([])\n",
    "    for optimized,planned in zip(optimized_distances_by_plan,planned_distances_by_plan):\n",
    "        gap = planned - optimized\n",
    "        gaps = np.append(gaps,gap)\n",
    "    total_gap = np.sum(gaps)\n",
    "    total_planned = np.sum(planned_distances_by_plan)\n",
    "    ratio = total_gap / total_planned\n",
    "    return ratio\n",
    "\n",
    "def getClusterJumpRatio(optimized_orders_by_plan, cordinates_list):\n",
    "    gaps = 0\n",
    "    totals = 0\n",
    "    for order, cordinates in zip(optimized_orders_by_plan,cordinates_list):\n",
    "        days = []\n",
    "        for _ in cordinates:\n",
    "            days.append(len(_))\n",
    "        days = np.array(days) - 1 #\n",
    "        #print(days)\n",
    "        optimzied_cluster_count = len(days) #\n",
    "        #print(optimzied_cluster_count)\n",
    "\n",
    "        totalplaces = optimzied_cluster_count + len(order)\n",
    "        #print(totalplaces)\n",
    "        candidates = list(range(totalplaces))\n",
    "        #print(candidates)\n",
    "        \n",
    "        cluster_list = [] #\n",
    "        for cand in candidates:\n",
    "            if cand in order:\n",
    "                index = order.index(cand)\n",
    "                clusterNumber = getcluster(index,days)\n",
    "                cluster_list.append(clusterNumber)\n",
    "        #print(cluster_list)\n",
    "\n",
    "        cluster_set =  makeClusterSet(cluster_list,days)\n",
    "        #print(cluster_set)\n",
    "\n",
    "        cluster_visited_count = 0\n",
    "        for cluster in cluster_set:\n",
    "            cluster_visited_count += len(cluster)\n",
    "        #print(cluster_visited_count)\n",
    "\n",
    "        gaps += cluster_visited_count - optimzied_cluster_count\n",
    "        totals += optimzied_cluster_count\n",
    "    #print(gaps,totals)\n",
    "    return gaps/totals\n",
    "\n",
    "def getcluster(index,days):\n",
    "    clusterNumber = 0\n",
    "    for day in days:\n",
    "        index = index - day\n",
    "        if index < 0:\n",
    "            return clusterNumber\n",
    "        else:\n",
    "            clusterNumber += 1\n",
    "\n",
    "def makeClusterSet(cluster_list,days):\n",
    "    cluster_sets = []\n",
    "    for day in days:\n",
    "        cluster = []\n",
    "        n = day\n",
    "        while n > 0:\n",
    "            cluster.append(cluster_list[0])\n",
    "            cluster_list.pop(0)\n",
    "            n -= 1\n",
    "        cluster_sets.append(set(cluster))\n",
    "    return cluster_sets\n",
    "\n",
    "def planwiseTSP(model, task, planIndex):\n",
    "    optimized_distances_by_plan = []\n",
    "    optimized_orders_by_plan = []\n",
    "    planned_distances_by_plan = []\n",
    "    cordinates_list = []\n",
    "\n",
    "    ave_att_per_day = 0\n",
    "    total_days = 0\n",
    "    total_att = 0\n",
    "\n",
    "    with open(f'Output/{model}/evals/{task}.jsonl', 'r') as f:\n",
    "        plans = [json.loads(line) for line in f]\n",
    "    #print(plans[planIndex]['index'])\n",
    "    plan = plans[planIndex]['plan'] \n",
    "\n",
    "    for day in plan['itinerary']:\n",
    "        print(f\"day: {day['days']}, morning: {day['morning_attractions']}, afternoon: {day['afternoon_attractions']}, night: {day['night_attractions']}, hotel: {day['accommodation']}\")\n",
    "    # Failure rate related\n",
    "    # prepare a result list to return\n",
    "    # outofpool, missinginfo,\n",
    "    # prepare the evaluation for each plan, search the business id\n",
    "    plan_eval = prepareEval(plan)\n",
    "    #print(plan_eval)\n",
    "    \n",
    "    #get the cordinates\n",
    "    cordinates = populateCordinates(plan_eval, attractions, hotels)\n",
    "    cordinates_list.append(cordinates)\n",
    "    total_days += len(cordinates)\n",
    "    #print(len(cordinates))\n",
    "\n",
    "    distance_matrix = getDistanceMatrix_by_plan(cordinates)\n",
    "    \n",
    "    optimized_distance, info_lists = getOptimizedDistance_by_plan(cordinates,distance_matrix)\n",
    "    \n",
    "    optimized_distances_by_plan.append(optimized_distance)\n",
    "    \n",
    "    optimized_order = getOptimizedOrder_by_plan(info_lists)\n",
    "    print(\"the optimized order is: \", optimized_order)\n",
    "    total_att+=len(optimized_order)\n",
    "\n",
    "    optimized_orders_by_plan.append(optimized_order)\n",
    "    \n",
    "\n",
    "    planned_distance = getPlannedDistance_by_plan(cordinates, distance_matrix)\n",
    "    planned_distances_by_plan.append(planned_distance)\n",
    "\n",
    "    distance_gap_ratio_by_plan = getDistanceGapRatio_by_plan(optimized_distances_by_plan, planned_distances_by_plan)\n",
    "    cluster_jump_ratio_by_plan = getClusterJumpRatio(optimized_orders_by_plan, cordinates_list)\n",
    "    ave_att_per_day = total_att / total_days\n",
    "    return distance_gap_ratio_by_plan,cluster_jump_ratio_by_plan,ave_att_per_day\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    #load datas\n",
    "    with open ('Dataset/gpt4o/restaurants.jsonl', 'r') as file:\n",
    "        restaurants = [json.loads(line.strip()) for line in file]\n",
    "\n",
    "    with open ('Dataset/gpt4o/hotels.jsonl', 'r') as file:\n",
    "        hotels = [json.loads(line.strip()) for line in file]\n",
    "\n",
    "    with open ('Dataset/gpt4o/attractions.jsonl', 'r') as file:\n",
    "        attractions = [json.loads(line.strip()) for line in file]\n",
    "\n",
    "    modelList = ['gpt4o','mistral','llama318b']\n",
    "    taskList = ['allDataNoRoute','allDataRouteOP','filteredDataRouteOP','toolUsePlans','baseData']\n",
    "\n",
    "    #choose model and task\n",
    "    model = modelList[0]\n",
    "    task = taskList[3]\n",
    "    print(\"==Model: \", model,\"==\")\n",
    "    print(\"==Task: \", task,\"==\")\n",
    "\n",
    "    #TSP\n",
    "    #distance_gap_ratio, position_deviation_ratio = daywiseTSP(model,task,0)\n",
    "    distance_ratio, cluster_ratio, att_per_day = planwiseTSP(model,task,0)\n",
    "    print(\"the total distance gap is: \",distance_ratio, \"the total cluster jump is: \", cluster_ratio, \"the attraction arranged for each day on average is: \",att_per_day)\n",
    "\n",
    "    with open ('Output/gpt4o/plans/toolUseLogs.jsonl','r') as file:\n",
    "        logs = [json.loads(line) for line in file]\n",
    "    print(\"the clustering information is: \", logs[0]['log'][3]['observation'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "torchgpu",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
