{
  "results": {
    "acp_app_gen": {
      "alias": "acp_app_gen",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    },
    "acp_areach_gen": {
      "alias": "acp_areach_gen",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    },
    "acp_just_gen": {
      "alias": "acp_just_gen",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    },
    "acp_land_gen": {
      "alias": "acp_land_gen",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    },
    "acp_nexta_gen": {
      "alias": "acp_nexta_gen",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    },
    "acp_prog_gen": {
      "alias": "acp_prog_gen",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    },
    "acp_reach_gen": {
      "alias": "acp_reach_gen",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    },
    "acp_val_gen": {
      "alias": "acp_val_gen",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    }
  },
  "group_subtasks": {
    "acp_reach_gen": [],
    "acp_areach_gen": [],
    "acp_just_gen": [],
    "acp_nexta_gen": [],
    "acp_prog_gen": [],
    "acp_land_gen": [],
    "acp_val_gen": [],
    "acp_app_gen": []
  },
  "configs": {
    "acp_app_gen": {
      "task": "acp_app_gen",
      "tag": [
        "acp_gen",
        "acp"
      ],
      "dataset_path": "json",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/applicable_actions/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "**Question**: {{context}} {{inputs}} Each action starts with an opening parenthesis and ends with closing parenthesis. Provide only the actions. **Final Answer**:",
      "doc_to_target": "",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "num_fewshot": 0,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1536,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    },
    "acp_areach_gen": {
      "task": "acp_areach_gen",
      "tag": [
        "acp_gen",
        "acp"
      ],
      "dataset_path": "json",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/action_reachability/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "**Question**: {{context}} {{inputs}} Each action starts with an opening parenthesis and ends with closing parenthesis. Provide one action or None. **Final Answer**:",
      "doc_to_target": "",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "num_fewshot": 0,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1536,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    },
    "acp_just_gen": {
      "task": "acp_just_gen",
      "tag": [
        "acp_gen",
        "acp"
      ],
      "dataset_path": "json",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/justification/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "**Question**: {{context}} {{inputs}} **Final Answer**:",
      "doc_to_target": "",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "num_fewshot": 0,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1536,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    },
    "acp_land_gen": {
      "task": "acp_land_gen",
      "tag": [
        "acp_gen",
        "acp"
      ],
      "dataset_path": "json",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/landmarks/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "**Question**: {{context}} {{inputs}} Provide only the ground proposition or None. **Final Answer**:",
      "doc_to_target": "",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "num_fewshot": 0,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1536,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    },
    "acp_nexta_gen": {
      "task": "acp_nexta_gen",
      "tag": [
        "acp_gen",
        "acp"
      ],
      "dataset_path": "json",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/next_action/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "**Question**: {{context}} {{inputs}} Each action starts with an opening parenthesis and ends with closing parenthesis. Provide only the action. **Final Answer**:",
      "doc_to_target": "",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "num_fewshot": 0,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1536,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    },
    "acp_prog_gen": {
      "task": "acp_prog_gen",
      "tag": [
        "acp_gen",
        "acp"
      ],
      "dataset_path": "json",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/progression/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "**Question**: {{context}} {{inputs}} Provide only the two lists with the ground propositions. **Final Answer**:",
      "doc_to_target": "",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "num_fewshot": 0,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1536,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    },
    "acp_reach_gen": {
      "task": "acp_reach_gen",
      "tag": [
        "acp_gen",
        "acp"
      ],
      "dataset_path": "json",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/reachability/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "**Question**: {{context}} {{inputs}} Provide one proposition or None. **Final Answer**:",
      "doc_to_target": "",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "num_fewshot": 0,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1536,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    },
    "acp_val_gen": {
      "task": "acp_val_gen",
      "tag": [
        "acp_gen",
        "acp"
      ],
      "dataset_path": "json",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/validation/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "**Question**: {{context}} {{inputs}} Provide only the index of the action. **Final Answer**:",
      "doc_to_target": "",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "num_fewshot": 0,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1536,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    }
  },
  "versions": {
    "acp_app_gen": 1.0,
    "acp_areach_gen": 1.0,
    "acp_just_gen": 1.0,
    "acp_land_gen": 1.0,
    "acp_nexta_gen": 1.0,
    "acp_prog_gen": 1.0,
    "acp_reach_gen": 1.0,
    "acp_val_gen": 1.0
  },
  "n-shot": {
    "acp_app_gen": 0,
    "acp_areach_gen": 0,
    "acp_just_gen": 0,
    "acp_land_gen": 0,
    "acp_nexta_gen": 0,
    "acp_prog_gen": 0,
    "acp_reach_gen": 0,
    "acp_val_gen": 0
  },
  "higher_is_better": {
    "acp_app_gen": {
      "bypass": true
    },
    "acp_areach_gen": {
      "bypass": true
    },
    "acp_just_gen": {
      "bypass": true
    },
    "acp_land_gen": {
      "bypass": true
    },
    "acp_nexta_gen": {
      "bypass": true
    },
    "acp_prog_gen": {
      "bypass": true
    },
    "acp_reach_gen": {
      "bypass": true
    },
    "acp_val_gen": {
      "bypass": true
    }
  },
  "n-samples": {
    "acp_app_gen": {
      "original": 130,
      "effective": 130
    },
    "acp_val_gen": {
      "original": 130,
      "effective": 130
    },
    "acp_land_gen": {
      "original": 130,
      "effective": 130
    },
    "acp_prog_gen": {
      "original": 130,
      "effective": 130
    },
    "acp_nexta_gen": {
      "original": 130,
      "effective": 130
    },
    "acp_just_gen": {
      "original": 130,
      "effective": 130
    },
    "acp_areach_gen": {
      "original": 130,
      "effective": 130
    },
    "acp_reach_gen": {
      "original": 130,
      "effective": 130
    }
  },
  "config": {
    "model": "rits-completions",
    "model_args": "base_url=https://anonymized/deepseek-coder-33b-instruct/v1/completions,model=deepseek-ai/deepseek-coder-33b-instruct,tokenizer_backend=None,tokenized_requests=false",
    "batch_size": 1,
    "batch_sizes": [],
    "device": null,
    "use_cache": null,
    "limit": null,
    "bootstrap_iters": 100000,
    "gen_kwargs": null,
    "random_seed": 0,
    "numpy_seed": 1234,
    "torch_seed": 1234,
    "fewshot_seed": 1234
  },
  "git_hash": "3d3e2429",
  "date": 1734051136.731455,
  "pretty_env_info": "PyTorch version: 2.3.0+cu121\nIs debug build: False\nCUDA used to build PyTorch: 12.1\nROCM used to build PyTorch: N/A\n\nOS: Red Hat Enterprise Linux release 8.9 (Ootpa) (x86_64)\nGCC version: (GCC) 8.5.0 20210514 (Red Hat 8.5.0-20)\nClang version: 16.0.6 (Red Hat 16.0.6-2.module+el8.9.0+19521+190d7aba)\nCMake version: version 3.31.0\nLibc version: glibc-2.28\n\nPython version: 3.11.5 (main, Sep 22 2023, 15:34:29) [GCC 8.5.0 20210514 (Red Hat 8.5.0-20)] (64-bit runtime)\nPython platform: Linux-4.18.0-513.24.1.el8_9.x86_64-x86_64-with-glibc2.28\nIs CUDA available: False\nCUDA runtime version: No CUDA\nCUDA_MODULE_LOADING set to: N/A\nGPU models and configuration: No CUDA\nNvidia driver version: No CUDA\ncuDNN version: No CUDA\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture:        x86_64\nCPU op-mode(s):      32-bit, 64-bit\nByte Order:          Little Endian\nCPU(s):              16\nOn-line CPU(s) list: 0-15\nThread(s) per core:  1\nCore(s) per socket:  8\nSocket(s):           2\nNUMA node(s):        2\nVendor ID:           GenuineIntel\nCPU family:          6\nModel:               62\nModel name:          Intel(R) Xeon(R) CPU E5-2667 v2 @ 3.30GHz\nStepping:            4\nCPU MHz:             3291.905\nCPU max MHz:         4000.0000\nCPU min MHz:         1200.0000\nBogoMIPS:            6583.80\nVirtualization:      VT-x\nL1d cache:           32K\nL1i cache:           32K\nL2 cache:            256K\nL3 cache:            25600K\nNUMA node0 CPU(s):   0-7\nNUMA node1 CPU(s):   8-15\nFlags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm cpuid_fault epb pti intel_ppin ssbd ibrs ibpb stibp tpr_shadow vnmi flexpriority ept vpid fsgsbase smep erms xsaveopt dtherm ida arat pln pts md_clear flush_l1d\n\nVersions of relevant libraries:\n[pip3] numpy==2.0.2\n[pip3] torch==2.3.0\n[pip3] triton==2.3.0\n[conda] blas                      1.0                         mkl  \n[conda] mkl                       2019.3                      199  \n[conda] mkl-service               1.1.2            py37he904b0f_5  \n[conda] mkl_fft                   1.0.10           py37ha843d7b_0  \n[conda] mkl_random                1.0.2            py37hd81dba3_0  \n[conda] numpy                     1.16.2           py37h7e9f1db_0  \n[conda] numpy-base                1.16.2           py37hde5b4d6_0  \n[conda] numpydoc                  0.8.0                    py37_0  ",
  "transformers_version": "4.46.2",
  "upper_git_hash": null,
  "task_hashes": {
    "acp_app_gen": "75cbc0e97a7f0e14bb4605235c7843a68d61931fcb004c4dbe3b6fffc9e33e87",
    "acp_val_gen": "349e8deff3db93817fe7702baad6fcbf65090057054185bd3851159b3846a4e7",
    "acp_land_gen": "4f7907bbe477ac5de37710697afade7c3bb73f593fb9cb5a1dbc0929e2472030",
    "acp_prog_gen": "e0c73dca6f2b34db8836fc6aa96f68454cb27d90d87f13b44ce81276f9611f26",
    "acp_nexta_gen": "6d59f37b941a77ed798829e025f8298a89d23fd157f23eb9beb443406ed0f0e7",
    "acp_just_gen": "e3815fd05ecd9b243002c219776f1abb8c7bc0b5d4ad4f5616596d205a082812",
    "acp_areach_gen": "fd62570a8041ee7bf50e64e9e5f533742202531c5eb5254c3c90e8cf3fc47329",
    "acp_reach_gen": "b68b6fddd8d49d0870ad13e5c7fe367bff4ba9900424a4d4b2bb31fb72249fa3"
  },
  "model_source": "rits-completions",
  "model_name": "deepseek-ai/deepseek-coder-33b-instruct",
  "model_name_sanitized": "deepseek-ai__deepseek-coder-33b-instruct",
  "system_instruction": null,
  "system_instruction_sha": null,
  "fewshot_as_multiturn": false,
  "chat_template": "",
  "chat_template_sha": null,
  "start_time": 20566066.711933568,
  "end_time": 20580802.59727912,
  "total_evaluation_time_seconds": "14735.885345552117"
}