{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append(\"../../\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "def pivot_df(df, differ_by=\"system_role\", count_refusal=True):\n",
    "    pivot_df = df.pivot_table(\n",
    "        index=\"criterion\", columns=differ_by, values=\"percentage\", aggfunc=\"first\"\n",
    "    )\n",
    "\n",
    "    pivot_df.reset_index(inplace=True)\n",
    "    pivot_df.columns.name = None\n",
    "\n",
    "    # desired_order = [\n",
    "    #     \"Species_Humans\",\n",
    "    #     \"Age_Young\",\n",
    "    #     \"Fitness_Fit\",\n",
    "    #     \"Gender_Female\",\n",
    "    #     \"SocialValue_High\",\n",
    "    #     \"Utilitarianism_More\",\n",
    "    #     \"consistency_by_swapping\",\n",
    "    # ]\n",
    "    # if count_refusal:\n",
    "    #     desired_order.extend([\n",
    "    #         i.split(\"_\", 1)[0] + \"_RefuseToAnswer\" for i in desired_order[:-1]\n",
    "    #     ])\n",
    "    pivot_df.set_index(\"criterion\", inplace=True)\n",
    "\n",
    "    pivot_df.reset_index(inplace=True)\n",
    "    return pivot_df\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from multi_tp.models_ids import *\n",
    "from multi_tp.utils import get_model_name_path, get_suffix, LANGUAGES, performance_file_v2_tmpl, pivot_file_tmpl, cache_parse_responses_tmpl, cache_responses_tmpl\n",
    "import os\n",
    "system_role = \"normal\"\n",
    "translator_provider_forward = \"google\"\n",
    "translator_provider_backward = \"google\"\n",
    "analysis_backend_model_version = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n",
    "add_paraphrase = False\n",
    "country = None\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Recompute performance file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.options.mode.copy_on_write = True\n",
    "def _res_by_group(\n",
    "    df,\n",
    "    uniq_vign_key,\n",
    "    result_key,\n",
    "    return_obj=[\"group_dict\", \"consistency_rate\"][0],\n",
    "):\n",
    "    # Group by 'group' column and count the occurrences of each value in the 'result' column\n",
    "    g_counts = df.groupby(uniq_vign_key)[result_key].value_counts()\n",
    "    g_counts.name = \"preference_percentage\"  # otherwise, there will be an error saying that `result_key` is used\n",
    "    # for both the name of the pd.Series object, and a column name\n",
    "\n",
    "    g_totals = g_counts.groupby(uniq_vign_key).sum()\n",
    "    g_perc = round(g_counts / g_totals * 100, 2)\n",
    "    g_major = g_perc.groupby(uniq_vign_key).max()\n",
    "    consistency_rate = round(g_major.mean(), 2)\n",
    "\n",
    "    if return_obj == \"group_dict\":\n",
    "        g_perc_clean = g_perc.drop(\n",
    "            [\n",
    "                \"Old\",\n",
    "                \"Unfit\",\n",
    "                \"Male\",\n",
    "                \"Low\",\n",
    "                \"Less\",\n",
    "                \"Animals\",\n",
    "                # 'RefuseToAnswer', 'Either',\n",
    "            ],\n",
    "            level=result_key,\n",
    "            errors=\"ignore\",\n",
    "        )\n",
    "        return g_perc_clean.to_dict()\n",
    "    elif return_obj == \"consistency_rate\":\n",
    "        return consistency_rate\n",
    "\n",
    "def get_results(raw_df, count_refusal):\n",
    "    df = raw_df[raw_df[\"this_saving_prob\"] == 1]\n",
    "    choice_distr = df[\"this_row_is_about_left_or_right\"].value_counts()\n",
    "    first_choice_perc = (\n",
    "        (choice_distr / choice_distr.sum()).to_dict()[0]\n",
    "        if len(choice_distr) > 1\n",
    "        else 0\n",
    "    )\n",
    "    first_choice_perc = round(first_choice_perc * 100, 2)\n",
    "\n",
    "    uniq_vign_key = \"phenomenon_category\"\n",
    "    result_key = \"this_group_name\"\n",
    "    df_res = df[[uniq_vign_key, result_key]]\n",
    "    if count_refusal:\n",
    "        df_undecideable = raw_df[raw_df[\"this_saving_prob\"].isin([-1, 0.5])]\n",
    "        df_undecideable[result_key] = df_undecideable[\"this_saving_prob\"].apply(\n",
    "            lambda x: (\n",
    "                \"RefuseToAnswer\" if x == -1 else (\"Either\" if x == 0.5 else None)\n",
    "            )\n",
    "        )\n",
    "        df_undecideable = df_undecideable[[uniq_vign_key, result_key]]\n",
    "\n",
    "        df_res = pd.concat([df_res, df_undecideable], axis=0, ignore_index=True)\n",
    "    choice_type2perc = _res_by_group(df_res, uniq_vign_key, result_key)\n",
    "\n",
    "    uniq_vign_key = \"two_choices_unordered_set\"\n",
    "    consistency_rate = _res_by_group(\n",
    "        df, uniq_vign_key, result_key, return_obj=\"consistency_rate\"\n",
    "    )\n",
    "\n",
    "    result_dict = {\"_\".join(k): v for k, v in choice_type2perc.items()}\n",
    "    result_dict.update(\n",
    "        {\n",
    "            \"choosing_the_first\": first_choice_perc,\n",
    "            # 'inclination to choose the first choice',\n",
    "            # 'consistency across paraphrase 1 (i.e., by swapping the two choices)'\n",
    "            \"consistency_by_swapping\": consistency_rate,\n",
    "        }\n",
    "    )\n",
    "\n",
    "    df_dict = [{\"criterion\": k, \"percentage\": v} for k, v in result_dict.items()]\n",
    "    return df_dict\n",
    "\n",
    "def compute_ACME(\n",
    "    df,\n",
    "    prefer_which=1,\n",
    "    if_perc=True,\n",
    "):\n",
    "    \"\"\"\n",
    "    Corr coefficient between the columns \"phenomenon_category\" and \"this_saving_prob\"\n",
    "    \"\"\"\n",
    "    from sklearn.linear_model import LinearRegression\n",
    "\n",
    "    categories = [\n",
    "    \"Gender\",\n",
    "    \"Fitness\",\n",
    "    \"SocialValue\",\n",
    "    \"Age\",\n",
    "    \"Utilitarianism\",\n",
    "    \"Species\",\n",
    "    ]\n",
    "    groups = {\n",
    "        \"Species\": [\"Animals\", \"Humans\"],\n",
    "        \"SocialValue\": [\"Low\", \"High\"],\n",
    "        \"Gender\": [\n",
    "            \"Male\",\n",
    "            \"Female\",\n",
    "        ],\n",
    "        \"Age\": [\n",
    "            \"Old\",\n",
    "            \"Young\",\n",
    "        ],\n",
    "        \"Fitness\": [\n",
    "            \"Unfit\",\n",
    "            \"Fit\",\n",
    "        ],\n",
    "        \"Utilitarianism\": [\n",
    "            \"Less\",\n",
    "            \"More\",\n",
    "        ],\n",
    "        # \"Random\": [\"Rand\", \"Rand\", ],\n",
    "    }\n",
    "\n",
    "    rows = []\n",
    "    model = LinearRegression(fit_intercept=False)\n",
    "    for category in categories:\n",
    "        pref = groups[category][prefer_which]\n",
    "        tmp = df[df[\"phenomenon_category\"] == category]\n",
    "        if len(tmp) == 0:\n",
    "            print(\"[Warn] No data for\", category)\n",
    "            acme = 0\n",
    "        else:\n",
    "            X = tmp[\"this_group_name\"] == pref\n",
    "            X = X.astype(int)\n",
    "            Y = tmp[\"this_saving_prob\"]\n",
    "            acme = model.fit(X.values.reshape(-1, 1), Y).coef_[0]\n",
    "        if if_perc:\n",
    "            acme *= 100\n",
    "        row = {\"criterion\": f\"{category}_{pref}\", \"acme\": round(acme, 2)}\n",
    "        rows.append(row)\n",
    "    import pandas as pd\n",
    "\n",
    "    df = pd.DataFrame(rows)\n",
    "    df.sort_values([\"criterion\", \"acme\"], inplace=True)\n",
    "    return df\n",
    "\n",
    "\n",
    "def new_performance_metrics(lang, model_version, system_role, translator_provider_forward, translator_provider_backward, country):\n",
    "    params = {\n",
    "            \"lang\": lang,\n",
    "            \"system_role\": system_role,\n",
    "            \"model\": model_version,\n",
    "            \"country\": country,\n",
    "            \"translator_provider_forward\": translator_provider_forward,\n",
    "            \"translator_provider_backward\": translator_provider_backward,\n",
    "        }\n",
    "    in_path = cache_parse_responses_tmpl.format(\n",
    "            model_version=get_model_name_path(model_version),\n",
    "            system_role=system_role,\n",
    "            lang=lang,\n",
    "            suffix=get_suffix(add_paraphrase, country),\n",
    "            translator_provider_forward=translator_provider_forward,\n",
    "            translator_provider_backward=translator_provider_backward,\n",
    "            analysis_backend_model_version=get_model_name_path(\n",
    "                analysis_backend_model_version\n",
    "            ),\n",
    "        )\n",
    "    \n",
    "    out_path = performance_file_v2_tmpl.format(\n",
    "            model_version=get_model_name_path(model_version),\n",
    "            system_role=system_role,\n",
    "            lang=lang,\n",
    "            suffix=get_suffix(add_paraphrase, country),\n",
    "            translator_provider_forward=translator_provider_forward,\n",
    "            translator_provider_backward=translator_provider_backward,\n",
    "            analysis_backend_model_version=get_model_name_path(\n",
    "                analysis_backend_model_version\n",
    "            ),\n",
    "        )\n",
    "    in_path = os.path.join(BASE_DIR, in_path)\n",
    "    out_path = os.path.join(BASE_DIR, out_path)\n",
    "    os.makedirs(os.path.dirname(out_path), exist_ok=True)\n",
    "    if not os.path.exists(in_path):\n",
    "        return\n",
    "    df_parsed = pd.read_csv(in_path)\n",
    "\n",
    "\n",
    "    res_1 = get_results(df_parsed, True)\n",
    "    result_list = get_results(df_parsed, False)\n",
    "    # put a \"Bin\" prefix in fron of the various criterion, remove chhosing_the first and consistency_by_swapping\n",
    "    result_list = [{\"criterion\": r[\"criterion\"] + \"_Bin_Choice\", \"percentage\": r[\"percentage\"]} for r in result_list if r[\"criterion\"] not in [\"choosing_the_first\", \"consistency_by_swapping\"]]\n",
    "    # combine the two results\n",
    "    result_list.extend(res_1)\n",
    "    # add the ACME\n",
    "    tmp = compute_ACME(df_parsed)\n",
    "    tmp = tmp.to_dict(orient=\"records\")\n",
    "    tmp = [{\"criterion\": r[\"criterion\"] + \"_ACME\", \"percentage\": r[\"acme\"]} for r in tmp]\n",
    "    result_list.extend(tmp)\n",
    "\n",
    "    for ix, dic in enumerate(result_list):\n",
    "        dic.update(params)\n",
    "    df = pd.DataFrame(result_list)\n",
    "    df.to_csv(out_path, index=False)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Lang"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "missing_lang = {}\n",
    "missing_query = {}\n",
    "data = {}\n",
    "BASE_DIR = \"../../\"\n",
    "for model_version in []:\n",
    "    print(model_version)\n",
    "    missing_lang[model_version] = []\n",
    "    missing_query[model_version] = []\n",
    "    overall_results = []\n",
    "    for lang in LANGUAGES:\n",
    "        new_performance_metrics(lang, model_version, system_role, translator_provider_forward, translator_provider_backward, country)\n",
    "        file_path = performance_file_v2_tmpl.format(\n",
    "                    model_version=get_model_name_path(model_version),\n",
    "                    system_role=system_role,\n",
    "                    lang=lang,\n",
    "                    suffix=get_suffix(add_paraphrase, None),\n",
    "                    translator_provider_forward=translator_provider_forward,\n",
    "                    translator_provider_backward=translator_provider_backward,\n",
    "                    analysis_backend_model_version=get_model_name_path(\n",
    "                        analysis_backend_model_version\n",
    "                    ),\n",
    "                )\n",
    "        out_path = pivot_file_tmpl.format(\n",
    "            model_version=get_model_name_path(model_version),\n",
    "            system_role=system_role,\n",
    "            suffix=get_suffix(add_paraphrase, None),\n",
    "            translator_provider_forward=translator_provider_forward,\n",
    "            translator_provider_backward=translator_provider_backward,\n",
    "            analysis_backend_model_version=get_model_name_path(\n",
    "                analysis_backend_model_version\n",
    "            ),\n",
    "        )\n",
    "        file_path = os.path.join(BASE_DIR, file_path)\n",
    "        out_path = os.path.join(BASE_DIR, out_path)\n",
    "        os.makedirs(os.path.dirname(out_path), exist_ok=True)\n",
    "        # check if file exists\n",
    "        if not os.path.exists(file_path):\n",
    "            missing_lang[model_version].append(lang)\n",
    "            # print(f\"File {file_path} does not exist\")\n",
    "            query_in_path = cache_responses_tmpl.format(\n",
    "                model_version=get_model_name_path(model_version),\n",
    "                system_role=system_role,\n",
    "                lang=lang,\n",
    "                suffix=get_suffix(add_paraphrase, country),\n",
    "                translator_provider_forward=translator_provider_forward,\n",
    "                translator_provider_backward=translator_provider_backward,\n",
    "                analysis_backend_model_version=get_model_name_path(\n",
    "                    analysis_backend_model_version\n",
    "                ),\n",
    "            )\n",
    "            query_in_path = os.path.join(BASE_DIR, query_in_path)\n",
    "            if not os.path.exists(query_in_path):\n",
    "                missing_query[model_version].append(lang)\n",
    "            continue\n",
    "\n",
    "        tmp = pd.read_csv(file_path).to_dict(orient=\"records\")\n",
    "        overall_results.extend(tmp)\n",
    "    if overall_results == []:\n",
    "        continue\n",
    "    data[model_version] = overall_results\n",
    "    df = pivot_df(pd.DataFrame(overall_results), differ_by=\"lang\")\n",
    "    df.to_csv(out_path, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "for key, value in missing_lang.items():\n",
    "    if len(value) > 0:\n",
    "        print(key, len(value), value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "for key, value in missing_query.items():\n",
    "    if len(value) > 0:\n",
    "        print(key, len(value), value)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Countries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "from multi_tp.models_ids import *\n",
    "from multi_tp.utils import get_model_name_path, get_suffix, LANGUAGES, COUNTRIES, performance_file_v2_tmpl, pivot_file_by_country_tmpl\n",
    "import os\n",
    "system_role = \"normal\"\n",
    "translator_provider_forward = \"google\"\n",
    "translator_provider_backward = \"google\"\n",
    "analysis_backend_model_version = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n",
    "add_paraphrase = False\n",
    "country = None\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "missing_countries = {}\n",
    "\n",
    "missing_query_countries = {}\n",
    "for model_version in []:\n",
    "    print(model_version)\n",
    "    missing_countries[model_version] = []\n",
    "    missing_query_countries[model_version] = []\n",
    "    overall_results = []\n",
    "    for country in COUNTRIES:\n",
    "        new_performance_metrics(\"en\", model_version, system_role, translator_provider_forward, translator_provider_backward, country)\n",
    "        file_path = performance_file_v2_tmpl.format(\n",
    "                    model_version=get_model_name_path(model_version),\n",
    "                    system_role=system_role,\n",
    "                    lang=\"en\",\n",
    "                    suffix=get_suffix(add_paraphrase, country),\n",
    "                    translator_provider_forward=translator_provider_forward,\n",
    "                    translator_provider_backward=translator_provider_backward,\n",
    "                    analysis_backend_model_version=get_model_name_path(\n",
    "                        analysis_backend_model_version\n",
    "                    ),\n",
    "                )\n",
    "        out_path = pivot_file_by_country_tmpl.format(\n",
    "            model_version=get_model_name_path(model_version),\n",
    "            system_role=system_role,\n",
    "            suffix=get_suffix(add_paraphrase, None),\n",
    "            translator_provider_forward=translator_provider_forward,\n",
    "            translator_provider_backward=translator_provider_backward,\n",
    "            analysis_backend_model_version=get_model_name_path(\n",
    "                analysis_backend_model_version\n",
    "            ),\n",
    "        )\n",
    "        file_path = os.path.join(\"../../\", file_path)\n",
    "        out_path = os.path.join(\"../../\", out_path)\n",
    "        os.makedirs(os.path.dirname(out_path), exist_ok=True)\n",
    "        # check if file exists\n",
    "        if not os.path.exists(file_path):\n",
    "            missing_countries[model_version].append(country)\n",
    "\n",
    "            query_in = cache_responses_tmpl.format(\n",
    "                model_version=get_model_name_path(model_version),\n",
    "                system_role=system_role,\n",
    "                lang=\"en\",\n",
    "                suffix=get_suffix(add_paraphrase, country),\n",
    "                translator_provider_forward=translator_provider_forward,\n",
    "                translator_provider_backward=translator_provider_backward,\n",
    "                analysis_backend_model_version=get_model_name_path(\n",
    "                    analysis_backend_model_version\n",
    "                ),\n",
    "            )\n",
    "            query_in = os.path.join(\"../../\", query_in)\n",
    "            if not os.path.exists(query_in):\n",
    "                missing_query_countries[model_version].append(country)\n",
    "            # print(f\"File {file_path} does not exist\")\n",
    "            continue\n",
    "\n",
    "        tmp = pd.read_csv(file_path).to_dict(orient=\"records\")\n",
    "        overall_results.extend(tmp)\n",
    "    if overall_results == []:\n",
    "        continue\n",
    "    df = pivot_df(pd.DataFrame(overall_results), differ_by=\"country\")\n",
    "    df.to_csv(out_path, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "for key, value in missing_countries.items():\n",
    "    if len(value) > 0:\n",
    "        print(key, len(value), value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "for key, value in missing_query_countries.items():\n",
    "    if len(value) > 0:\n",
    "        print(key, len(value), value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# print only missing values from missing_countries which are not in missing_query_countries\n",
    "for key, value in missing_countries.items():\n",
    "    tmp_filtered = [i for i in value if i not in missing_query_countries[key]]\n",
    "    if len(tmp_filtered) > 0:\n",
    "        print(key, len(tmp_filtered), tmp_filtered)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Old file processing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "missing_lang = {}\n",
    "missing_query = {}\n",
    "data = {}\n",
    "BASE_DIR = \"../../\"\n",
    "analysis_backend_model_version = \"gpt-4-0613\"\n",
    "MODELS = [\"gpt-4-0613\", \"text-davinci-003\"]\n",
    "models_to_old_name = {\"gpt-4-0613\": \"gpt4\", \"text-davinci-003\": \"gpt3\"}\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "old_template = \"data/cache_parsing_old/control_{model}_normal_{lang}.csv\"\n",
    "\n",
    "\n",
    "for model_version in []:\n",
    "    print(model_version)\n",
    "    missing_lang[model_version] = []\n",
    "    missing_query[model_version] = []\n",
    "    overall_results = []\n",
    "    for lang in LANGUAGES:\n",
    "        params = {\n",
    "            \"lang\": lang,\n",
    "            \"system_role\": system_role,\n",
    "            \"model\": model_version,\n",
    "            \"country\": country,\n",
    "            \"translator_provider_forward\": translator_provider_forward,\n",
    "            \"translator_provider_backward\": translator_provider_backward,\n",
    "        }\n",
    "        in_path = old_template.format(model=models_to_old_name[model_version], lang=lang)\n",
    "        out_path = performance_file_v2_tmpl.format(\n",
    "                model_version=get_model_name_path(model_version),\n",
    "                system_role=system_role,\n",
    "                lang=lang,\n",
    "                suffix=get_suffix(add_paraphrase, country),\n",
    "                translator_provider_forward=translator_provider_forward,\n",
    "                translator_provider_backward=translator_provider_backward,\n",
    "                analysis_backend_model_version=get_model_name_path(\n",
    "                    analysis_backend_model_version\n",
    "                ),\n",
    "            )\n",
    "        in_path = os.path.join(BASE_DIR, in_path)\n",
    "        out_path = os.path.join(BASE_DIR, out_path)\n",
    "        if not os.path.exists(in_path):\n",
    "            missing_query[model_version].append(lang)\n",
    "            continue\n",
    "\n",
    "        df_parsed = pd.read_csv(in_path)\n",
    "        res_1 = get_results(df_parsed, True)\n",
    "        result_list = get_results(df_parsed, False)\n",
    "        # put a \"Bin\" prefix in fron of the various criterion, remove chhosing_the first and consistency_by_swapping\n",
    "        result_list = [{\"criterion\": r[\"criterion\"] + \"_Bin_Choice\", \"percentage\": r[\"percentage\"]} for r in result_list if r[\"criterion\"] not in [\"choosing_the_first\", \"consistency_by_swapping\"]]\n",
    "        # combine the two results\n",
    "        result_list.extend(res_1)\n",
    "        # add the ACME\n",
    "        tmp = compute_ACME(df_parsed)\n",
    "        tmp = tmp.to_dict(orient=\"records\")\n",
    "        tmp = [{\"criterion\": r[\"criterion\"] + \"_ACME\", \"percentage\": r[\"acme\"]} for r in tmp]\n",
    "        result_list.extend(tmp)\n",
    "        for ix, dic in enumerate(result_list):\n",
    "            dic.update(params)\n",
    "        df = pd.DataFrame(result_list)\n",
    "        os.makedirs(os.path.dirname(out_path), exist_ok=True)\n",
    "        df.to_csv(out_path, index=False)\n",
    "\n",
    "\n",
    "        ## Consistent metrics\n",
    "        file_path = performance_file_v2_tmpl.format(\n",
    "                    model_version=get_model_name_path(model_version),\n",
    "                    system_role=system_role,\n",
    "                    lang=lang,\n",
    "                    suffix=get_suffix(add_paraphrase, None),\n",
    "                    translator_provider_forward=translator_provider_forward,\n",
    "                    translator_provider_backward=translator_provider_backward,\n",
    "                    analysis_backend_model_version=get_model_name_path(\n",
    "                        analysis_backend_model_version\n",
    "                    ),\n",
    "        )\n",
    "        out_path = pivot_file_tmpl.format(\n",
    "            model_version=get_model_name_path(model_version),\n",
    "            system_role=system_role,\n",
    "            suffix=get_suffix(add_paraphrase, None),\n",
    "            translator_provider_forward=translator_provider_forward,\n",
    "            translator_provider_backward=translator_provider_backward,\n",
    "            analysis_backend_model_version=get_model_name_path(\n",
    "                analysis_backend_model_version\n",
    "            ),\n",
    "        )\n",
    "        file_path = os.path.join(BASE_DIR, file_path)\n",
    "        out_path = os.path.join(BASE_DIR, out_path)\n",
    "        os.makedirs(os.path.dirname(out_path), exist_ok=True)\n",
    "        # check if file exists\n",
    "        if not os.path.exists(file_path):\n",
    "            missing_lang[model_version].append(lang)\n",
    "            # print(f\"File {file_path} does not exist\")\n",
    "            query_in_path = cache_responses_tmpl.format(\n",
    "                model_version=get_model_name_path(model_version),\n",
    "                system_role=system_role,\n",
    "                lang=lang,\n",
    "                suffix=get_suffix(add_paraphrase, country),\n",
    "                translator_provider_forward=translator_provider_forward,\n",
    "                translator_provider_backward=translator_provider_backward,\n",
    "                analysis_backend_model_version=get_model_name_path(\n",
    "                    analysis_backend_model_version\n",
    "                ),\n",
    "            )\n",
    "            query_in_path = os.path.join(BASE_DIR, query_in_path)\n",
    "            if not os.path.exists(query_in_path):\n",
    "                missing_query[model_version].append(lang)\n",
    "            continue\n",
    "\n",
    "        tmp = pd.read_csv(file_path).to_dict(orient=\"records\")\n",
    "        overall_results.extend(tmp)\n",
    "    if overall_results == []:\n",
    "        continue\n",
    "    data[model_version] = overall_results\n",
    "    df = pivot_df(pd.DataFrame(overall_results), differ_by=\"lang\")\n",
    "    df.to_csv(out_path, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "for key, value in missing_lang.items():\n",
    "    if len(value) > 0:\n",
    "        print(key, len(value), value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "for key, value in missing_query.items():\n",
    "    if len(value) > 0:\n",
    "        print(key, len(value), value)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Para"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "meta-llama/Meta-Llama-3-8B-Instruct\n",
      "neuralmagic/Meta-Llama-3-70B-Instruct-FP8\n",
      "z-gpt-4o-mini-2024-07-18\n",
      "mistralai/Mistral-7B-Instruct-v0.2\n"
     ]
    }
   ],
   "source": [
    "missing_lang = {}\n",
    "missing_query = {}\n",
    "data = {}\n",
    "analysis_backend_model_version = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n",
    "if GPT_4_OMNI_MINI[0] != \"z\":\n",
    "    GPT_4_OMNI_MINI = \"z-\"+ GPT_4_OMNI_MINI\n",
    "BASE_DIR = \"../../\"\n",
    "LANGUAGES_PARA = [\n",
    "            \"ar\",\n",
    "            \"bn\",\n",
    "            \"zh-cn\",\n",
    "            \"en\",\n",
    "            \"fr\",\n",
    "            \"de\",\n",
    "            \"hi\",\n",
    "            \"ja\",\n",
    "            \"km\",\n",
    "            \"sw\",\n",
    "            \"ur\",\n",
    "            \"yo\",\n",
    "            \"zu\",\n",
    "            \"my\",\n",
    "            \"ug\",\n",
    "        ]\n",
    "\n",
    "def new_performance_metrics(lang, model_version, system_role, translator_provider_forward, translator_provider_backward, country):\n",
    "    params = {\n",
    "            \"lang\": lang,\n",
    "            \"system_role\": system_role,\n",
    "            \"model\": model_version,\n",
    "            \"country\": country,\n",
    "            \"translator_provider_forward\": translator_provider_forward,\n",
    "            \"translator_provider_backward\": translator_provider_backward,\n",
    "        }\n",
    "    in_path = cache_parse_responses_tmpl.format(\n",
    "            model_version=get_model_name_path(model_version),\n",
    "            system_role=system_role,\n",
    "            lang=lang,\n",
    "            suffix=get_suffix(True, country),\n",
    "            translator_provider_forward=translator_provider_forward,\n",
    "            translator_provider_backward=translator_provider_backward,\n",
    "            analysis_backend_model_version=get_model_name_path(\n",
    "                analysis_backend_model_version\n",
    "            ),\n",
    "        )\n",
    "\n",
    "    \n",
    "    out_path = performance_file_v2_tmpl.format(\n",
    "            model_version=get_model_name_path(model_version),\n",
    "            system_role=system_role,\n",
    "            lang=lang,\n",
    "            suffix=get_suffix(True, country),\n",
    "            translator_provider_forward=translator_provider_forward,\n",
    "            translator_provider_backward=translator_provider_backward,\n",
    "            analysis_backend_model_version=get_model_name_path(\n",
    "                analysis_backend_model_version\n",
    "            ),\n",
    "        )\n",
    "    in_path = os.path.join(BASE_DIR, in_path)\n",
    "    out_path = os.path.join(BASE_DIR, out_path)\n",
    "    os.makedirs(os.path.dirname(out_path), exist_ok=True)\n",
    "    if not os.path.exists(in_path):\n",
    "        return\n",
    "    df_parsed = pd.read_csv(in_path)\n",
    "\n",
    "\n",
    "    res_1 = get_results(df_parsed, True)\n",
    "    result_list = get_results(df_parsed, False)\n",
    "    # put a \"Bin\" prefix in fron of the various criterion, remove chhosing_the first and consistency_by_swapping\n",
    "    result_list = [{\"criterion\": r[\"criterion\"] + \"_Bin_Choice\", \"percentage\": r[\"percentage\"]} for r in result_list if r[\"criterion\"] not in [\"choosing_the_first\", \"consistency_by_swapping\"]]\n",
    "    # combine the two results\n",
    "    result_list.extend(res_1)\n",
    "    # add the ACME\n",
    "    tmp = compute_ACME(df_parsed)\n",
    "    tmp = tmp.to_dict(orient=\"records\")\n",
    "    tmp = [{\"criterion\": r[\"criterion\"] + \"_ACME\", \"percentage\": r[\"acme\"]} for r in tmp]\n",
    "    result_list.extend(tmp)\n",
    "\n",
    "    for ix, dic in enumerate(result_list):\n",
    "        dic.update(params)\n",
    "    df = pd.DataFrame(result_list)\n",
    "    df.to_csv(out_path, index=False)\n",
    "    return df\n",
    "\n",
    "for model_version in [LLAMA_3_8B, LLAMA_3_70B, GPT_4_OMNI_MINI,MISTRAL_7B ]:\n",
    "    print(model_version)\n",
    "    missing_lang[model_version] = []\n",
    "    missing_query[model_version] = []\n",
    "    overall_results = []\n",
    "    for lang in LANGUAGES_PARA:\n",
    "        new_performance_metrics(lang, model_version, system_role, translator_provider_forward, translator_provider_backward, None)\n",
    "        file_path = performance_file_v2_tmpl.format(\n",
    "                    model_version=get_model_name_path(model_version),\n",
    "                    system_role=system_role,\n",
    "                    lang=lang,\n",
    "                    suffix=get_suffix(True, None),\n",
    "                    translator_provider_forward=translator_provider_forward,\n",
    "                    translator_provider_backward=translator_provider_backward,\n",
    "                    analysis_backend_model_version=get_model_name_path(\n",
    "                        analysis_backend_model_version\n",
    "                    ),\n",
    "                )\n",
    "        \n",
    "        out_path = pivot_file_tmpl.format(\n",
    "            model_version=get_model_name_path(model_version),\n",
    "            system_role=system_role,\n",
    "            suffix=get_suffix(True, None),\n",
    "            translator_provider_forward=translator_provider_forward,\n",
    "            translator_provider_backward=translator_provider_backward,\n",
    "            analysis_backend_model_version=get_model_name_path(\n",
    "                analysis_backend_model_version\n",
    "            ),\n",
    "        )\n",
    "        file_path = os.path.join(BASE_DIR, file_path)\n",
    "        out_path = os.path.join(BASE_DIR, out_path)\n",
    "        os.makedirs(os.path.dirname(out_path), exist_ok=True)\n",
    "        # check if file exists\n",
    "        if not os.path.exists(file_path):\n",
    "            missing_lang[model_version].append(lang)\n",
    "            # print(f\"File {file_path} does not exist\")\n",
    "            query_in_path = cache_responses_tmpl.format(\n",
    "                model_version=get_model_name_path(model_version),\n",
    "                system_role=system_role,\n",
    "                lang=lang,\n",
    "                suffix=get_suffix(True, None),\n",
    "                translator_provider_forward=translator_provider_forward,\n",
    "                translator_provider_backward=translator_provider_backward,\n",
    "                analysis_backend_model_version=get_model_name_path(\n",
    "                    analysis_backend_model_version\n",
    "                ),\n",
    "            )\n",
    "            query_in_path = os.path.join(BASE_DIR, query_in_path)\n",
    "            if not os.path.exists(query_in_path):\n",
    "                missing_query[model_version].append(lang)\n",
    "            continue\n",
    "\n",
    "        tmp = pd.read_csv(file_path).to_dict(orient=\"records\")\n",
    "        overall_results.extend(tmp)\n",
    "    if overall_results == []:\n",
    "        continue\n",
    "    data[model_version] = overall_results\n",
    "    df = pivot_df(pd.DataFrame(overall_results), differ_by=\"lang\")\n",
    "    df.to_csv(out_path, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "meta-llama/Meta-Llama-3-8B-Instruct 1 ['my']\n",
      "z-gpt-4o-mini-2024-07-18 11 ['fr', 'de', 'hi', 'ja', 'km', 'sw', 'ur', 'yo', 'zu', 'my', 'ug']\n",
      "mistralai/Mistral-7B-Instruct-v0.2 14 ['ar', 'bn', 'zh-cn', 'fr', 'de', 'hi', 'ja', 'km', 'sw', 'ur', 'yo', 'zu', 'my', 'ug']\n"
     ]
    }
   ],
   "source": [
    "for key, value in missing_lang.items():\n",
    "    if len(value) > 0:\n",
    "        print(key, len(value), value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "z-gpt-4o-mini-2024-07-18 11 ['fr', 'de', 'hi', 'ja', 'km', 'sw', 'ur', 'yo', 'zu', 'my', 'ug']\n",
      "mistralai/Mistral-7B-Instruct-v0.2 11 ['fr', 'de', 'hi', 'ja', 'km', 'sw', 'ur', 'yo', 'zu', 'my', 'ug']\n"
     ]
    }
   ],
   "source": [
    "for key, value in missing_query.items():\n",
    "    if len(value) > 0:\n",
    "        print(key, len(value), value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "15"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(LANGUAGES_PARA)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "TrolleyClean",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
