{
  "results": {
    "acp_app_gen_2shot_chat": {
      "alias": "acp_app_gen_2shot_chat",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    },
    "acp_areach_gen_2shot_chat": {
      "alias": "acp_areach_gen_2shot_chat",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    },
    "acp_just_gen_2shot_chat": {
      "alias": "acp_just_gen_2shot_chat",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    },
    "acp_land_gen_2shot_chat": {
      "alias": "acp_land_gen_2shot_chat",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    },
    "acp_nexta_gen_2shot_chat": {
      "alias": "acp_nexta_gen_2shot_chat",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    },
    "acp_prog_gen_2shot_chat": {
      "alias": "acp_prog_gen_2shot_chat",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    },
    "acp_reach_gen_2shot_chat": {
      "alias": "acp_reach_gen_2shot_chat",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    },
    "acp_val_gen_2shot_chat": {
      "alias": "acp_val_gen_2shot_chat",
      "bypass,none": 999,
      "bypass_stderr,none": "N/A"
    }
  },
  "group_subtasks": {
    "acp_reach_gen_2shot_chat": [],
    "acp_areach_gen_2shot_chat": [],
    "acp_just_gen_2shot_chat": [],
    "acp_nexta_gen_2shot_chat": [],
    "acp_prog_gen_2shot_chat": [],
    "acp_land_gen_2shot_chat": [],
    "acp_val_gen_2shot_chat": [],
    "acp_app_gen_2shot_chat": []
  },
  "configs": {
    "acp_app_gen_2shot_chat": {
      "task": "acp_app_gen_2shot_chat",
      "tag": [
        "acp_gen_2shot_chat"
      ],
      "dataset_path": "json",
      "dataset_name": "acp_app_gen_2shot",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/applicable_actions/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "{{context}} {{inputs}} Each action starts with an opening parenthesis and ends with closing parenthesis. Provide only the actions.",
      "doc_to_target": "{{answer}}",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": [
          {
            "context": "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty.  There are 2 keys in 1 different shapes: Key key0-1 is of shape shape0, Key key0-0 is of shape shape0.  Currently, the robot is at position f3-2f and its arm is empty. All the positions are open except the following: f2-0f has shape0 shaped lock, f4-2f has shape0 shaped lock. Key key0-0 is at position f2-2f. Key key0-1 is at position f1-3f. The available actions are: (unlock ?curpos ?lockpos ?key ?shape) - unlock the place ?lockpos with key ?key of shape ?shape from the current position place ?curpos, (move ?curpos ?nextpos) - travel from the current position ?curpos to the next position ?nextpos, (pickup ?curpos ?key) - pick up key ?key from place ?curpos, (pickup-and-loose ?curpos ?newkey ?oldkey) - pick up the key ?newkey at the current position place ?curpos and loose the key ?oldkey being held, and (putdown ?curpos ?key) - put down key ?key at current position place ?curpos.",
            "inputs": "Generate the list of all ground actions that are applicable in this state.",
            "answer": "[(move f3-2f f3-1f), (move f3-2f f2-2f), (move f3-2f f3-3f)]"
          },
          {
            "context": "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l0-0 and l0-1 are in c0; l1-0 and l1-1 are in c1. Currently, t1, p2, and p3 are at l1-0, a0 is at l0-0, t0 is at l0-1, p1 and p0 are in t1. The available actions are: (load-truck ?obj ?truck ?loc) - load object ?obj into truck ?truck at location ?loc, (load-airplane ?obj ?airplane ?loc) - load object ?obj into airplane ?airplane at location ?loc, (unload-truck ?obj ?truck ?loc) - unload object ?obj from truck ?truck at location ?loc, (unload-airplane ?obj ?airplane ?loc) - remove the object ?obj from the airplane ?airplane and place it on the location ?loc, (drive-truck ?truck ?loc-from ?loc-to ?city) - navigate the truck ?truck from its current location ?loc-from in city ?city to the new location ?loc-to within the same city, and (fly-airplane ?airplane ?loc-from ?loc-to) - fly airplane ?airplane from airport ?loc-from to airport ?loc-to.",
            "inputs": "Generate the list of all ground actions that are applicable in this state.",
            "answer": "[(drive-truck t1 l1-0 l1-0 c1), (drive-truck t0 l0-1 l0-0 c0), (load-truck p2 t1 l1-0), (unload-truck p0 t1 l1-0), (drive-truck t0 l0-1 l0-1 c0), (fly-airplane a0 l0-0 l1-0), (fly-airplane a0 l0-0 l0-0), (unload-truck p1 t1 l1-0), (drive-truck t1 l1-0 l1-1 c1), (load-truck p3 t1 l1-0)]"
          }
        ]
      },
      "num_fewshot": 2,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1000,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    },
    "acp_areach_gen_2shot_chat": {
      "task": "acp_areach_gen_2shot_chat",
      "tag": [
        "acp_gen_2shot_chat"
      ],
      "dataset_path": "json",
      "dataset_name": "acp_areach_gen_2shot",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/action_reachability/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "{{context}} {{inputs}} Each action starts with an opening parenthesis and ends with closing parenthesis. Provide one action or None.",
      "doc_to_target": "{{answer}}",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": [
          {
            "context": "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty.  There are 2 keys in 0 different shapes: Key key0-0 is of shape shape0, Key key0-1 is of shape shape0.  Currently, the robot is at position f2-2f and its arm is empty. All the positions are open except the following: f2-0f has shape0 shaped lock, f4-2f has shape0 shaped lock. Key key0-0 is at position f1-2f. Key key0-1 is at position f1-3f. The available actions are: (unlock ?curpos ?lockpos ?key ?shape) - unlock the place ?lockpos with key ?key of shape ?shape from the current position place ?curpos, (move ?curpos ?nextpos) - move from place ?curpos to place ?nextpos, (pickup ?curpos ?key) - retrieve the key ?key from its current position ?curpos, (pickup-and-loose ?curpos ?newkey ?oldkey) - pick up the key ?newkey from the current position ?curpos and loose the key ?oldkey which is being held, and (putdown ?curpos ?key) - put the key ?key at the current position place ?curpos.",
            "inputs": "What action can never become applicable, in any state reachable from the current state?",
            "answer": "(unlock f0-3f f0-4f key0-0 shape0)"
          },
          {
            "context": "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-1 and l1-0 are in c1; l0-0 and l0-1 are in c0. Currently, t1 is at l1-1, a0 is at l1-0, p0 is at l0-0, t0 is at l0-1, p2 is in a0, p1 is in t1, p3 is in t0. The available actions are: (load-truck ?obj ?truck ?loc) - load the object ?obj from location ?loc into the truck ?truck, (load-airplane ?obj ?airplane ?loc) - load object ?obj into airplane ?airplane at location ?loc, (unload-truck ?obj ?truck ?loc) - offload the object ?obj from the truck ?truck at location ?loc, (unload-airplane ?obj ?airplane ?loc) - offload the object ?obj from the airplane ?airplane at location ?loc, (drive-truck ?truck ?loc-from ?loc-to ?city) - navigate the truck ?truck from location ?loc-from in city ?city to location ?loc-to in the same city, and (fly-airplane ?airplane ?loc-from ?loc-to) - operate the airplane ?airplane from airport ?loc-from to airport ?loc-to.",
            "inputs": "What action can never become applicable, in any state reachable from the current state?",
            "answer": "(drive-truck t0 l1-1 l0-0 c0)"
          }
        ]
      },
      "num_fewshot": 2,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1000,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    },
    "acp_just_gen_2shot_chat": {
      "task": "acp_just_gen_2shot_chat",
      "tag": [
        "acp_gen_2shot_chat"
      ],
      "dataset_path": "json",
      "dataset_name": "acp_just_gen_2shot",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/justification/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "{{context}} {{inputs}}",
      "doc_to_target": "{{answer}}",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": [
          {
            "context": "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty.  There are 2 keys in 1 different shapes: Key key0-0 is of shape shape0, Key key0-1 is of shape shape0.  Currently, the robot is at position f3-3f and its arm is empty. All the positions are open except the following: f4-2f has shape0 shaped lock, f2-0f has shape0 shaped lock. Key key0-0 is at position f2-2f. Key key0-1 is at position f1-3f. The available actions are: (unlock ?curpos ?lockpos ?key ?shape) - unlock place ?lockpos with key ?key of shape ?shape from current position place ?curpos, (move ?curpos ?nextpos) - move from ?curpos to ?nextpos, (pickup ?curpos ?key) - retrieve the key ?key from its current position ?curpos, (pickup-and-loose ?curpos ?newkey ?oldkey) - pick up key ?newkey at current position place ?curpos and loose key ?oldkey being held, and (putdown ?curpos ?key) - put down the key ?key at the current position ?curpos. The goal is to reach a state where the following facts hold: Key key0-0 is at f2-0f location and Key key0-1 is at f1-3f location.",
            "inputs": "Simplify the plan [(move f3-3f f3-2f), (move f3-2f f2-2f), (pickup f2-2f key0-0), (move f2-2f f2-1f), (putdown f2-1f key0-0), (pickup f2-1f key0-0), (unlock f2-1f f2-0f key0-0 shape0), (move f2-1f f2-0f), (putdown f2-0f key0-0)] by removing either a single action or a pair of consecutive actions, while still maintaining a valid plan. Provide the resulting simplified plan.",
            "answer": "[(move f3-3f f3-2f), (move f3-2f f2-2f), (pickup f2-2f key0-0), (move f2-2f f2-1f), (unlock f2-1f f2-0f key0-0 shape0), (move f2-1f f2-0f), (putdown f2-0f key0-0)]"
          },
          {
            "context": "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-1 and l1-0 are in c1; l0-0 and l0-1 are in c0. Currently, p2, p1, and p3 are at l1-0, p0 and t1 are at l1-1, t0 is at l0-1, a0 is at l0-0. The available actions are: (load-truck ?obj ?truck ?loc) - load the object ?obj from location ?loc into the truck ?truck, (load-airplane ?obj ?airplane ?loc) - load the object ?obj from location ?loc onto the airplane ?airplane, (unload-truck ?obj ?truck ?loc) - unload the object ?obj from the truck ?truck at location ?loc, (unload-airplane ?obj ?airplane ?loc) - remove the object ?obj from the airplane ?airplane and place it on the location ?loc, (drive-truck ?truck ?loc-from ?loc-to ?city) - drive truck ?truck from location ?loc-from in city ?city to location ?loc-to in the same city, and (fly-airplane ?airplane ?loc-from ?loc-to) - fly the airplane ?airplane from location ?loc-from to location ?loc-to. The goal is to reach a state where the following facts hold: p3 is at l0-1, p2 is at l1-0, p0 is at l0-0, and p1 is at l1-0.",
            "inputs": "Simplify the plan [(fly-airplane a0 l0-0 l1-0), (fly-airplane a0 l1-0 l0-0), (load-truck p0 t1 l1-1), (drive-truck t1 l1-1 l1-0 c1), (unload-truck p0 t1 l1-0), (fly-airplane a0 l0-0 l1-0), (load-airplane p0 a0 l1-0), (load-airplane p3 a0 l1-0), (fly-airplane a0 l1-0 l0-0), (unload-airplane p0 a0 l0-0), (unload-airplane p3 a0 l0-0), (drive-truck t0 l0-1 l0-0 c0), (load-truck p3 t0 l0-0), (drive-truck t0 l0-0 l0-1 c0), (unload-truck p3 t0 l0-1)] by removing either a single action or a pair of consecutive actions, while still maintaining a valid plan. Provide the resulting simplified plan.",
            "answer": "[(load-truck p0 t1 l1-1), (drive-truck t1 l1-1 l1-0 c1), (unload-truck p0 t1 l1-0), (fly-airplane a0 l0-0 l1-0), (load-airplane p0 a0 l1-0), (load-airplane p3 a0 l1-0), (fly-airplane a0 l1-0 l0-0), (unload-airplane p0 a0 l0-0), (unload-airplane p3 a0 l0-0), (drive-truck t0 l0-1 l0-0 c0), (load-truck p3 t0 l0-0), (drive-truck t0 l0-0 l0-1 c0), (unload-truck p3 t0 l0-1)]"
          }
        ]
      },
      "num_fewshot": 2,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1000,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    },
    "acp_land_gen_2shot_chat": {
      "task": "acp_land_gen_2shot_chat",
      "tag": [
        "acp_gen_2shot_chat"
      ],
      "dataset_path": "json",
      "dataset_name": "acp_land_gen_2shot",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/landmarks/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "{{context}} {{inputs}} Provide only the ground proposition or None.",
      "doc_to_target": "{{answer}}",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": [
          {
            "context": "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty.  There are 2 keys in 1 different shapes: Key key0-1 is of shape shape0, Key key0-0 is of shape shape0.  Currently, the robot is at position f3-0f and its arm is empty. All the positions are open except the following: f4-2f has shape0 shaped lock. Key key0-0 is at position f3-0f. Key key0-1 is at position f1-3f. The goal is to reach a state where the following facts hold: Key key0-0 is at f2-0f location and Key key0-1 is at f1-3f location. The available propositions are: (at ?r ?x) - Key ?r is at ?x location, (at-robot ?x) - Robot is at ?x location, (locked ?x) - Location ?x is locked, (holding ?k) - Robot is holding ?k, (open ?x) - Location ?x is open, and (arm-empty) - Robot's arm is empty.",
            "inputs": "Generate a non-trivial fact landmark, one that does not hold in the initial state or goal.",
            "answer": "(holding key0-0)"
          },
          {
            "context": "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-0 and l1-1 are in c1; l0-1 and l0-0 are in c0. Currently, a0 and p2 are at l1-0, t0 is at l0-0, t1 is at l1-1, p3 and p1 are in a0, p0 is in t1. The goal is to reach a state where the following facts hold: p0 is at l0-0, p2 is at l1-0, p1 is at l1-0, and p3 is at l0-1. The available propositions are: (at ?obj ?loc) - ?obj is at ?loc and (in ?obj1 ?obj2) - ?obj1 is in ?obj2.",
            "inputs": "Generate a non-trivial fact landmark, one that does not hold in the initial state or goal.",
            "answer": "(in p3 t0)"
          }
        ]
      },
      "num_fewshot": 2,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1000,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    },
    "acp_nexta_gen_2shot_chat": {
      "task": "acp_nexta_gen_2shot_chat",
      "tag": [
        "acp_gen_2shot_chat"
      ],
      "dataset_path": "json",
      "dataset_name": "acp_nexta_gen_2shot",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/next_action/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "{{context}} {{inputs}} Each action starts with an opening parenthesis and ends with closing parenthesis. Provide only the action.",
      "doc_to_target": "{{answer}}",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": [
          {
            "context": "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty. There are 2 keys in 1 different shapes: Key key0-1 is of shape shape0, Key key0-0 is of shape shape0. Currently, the robot is at position f4-0f and its arm is empty. All the positions are open except the following: f4-2f has shape0 shaped lock. Key key0-0 is at position f3-0f. Key key0-1 is at position f1-3f. The goal is to reach a state where the following facts hold: Key key0-0 is at f2-0f location and Key key0-1 is at f1-3f location. The available actions are: (unlock ?curpos ?lockpos ?key ?shape) - unlock place ?lockpos with key ?key of shape ?shape from current position place ?curpos, (move ?curpos ?nextpos) - travel from the current position ?curpos to the next position ?nextpos, (pickup ?curpos ?key) - pick up key ?key from place ?curpos, (pickup-and-loose ?curpos ?newkey ?oldkey) - pick up the key ?newkey at the current position place ?curpos and loose the key ?oldkey being held, and (putdown ?curpos ?key) - put down the key ?key at the current position ?curpos.",
            "inputs": "What is the next action that takes us towards the goal?",
            "answer": "(move f4-0f f3-0f)"
          },
          {
            "context": "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l0-1 and l0-0 are in c0; l1-1 and l1-0 are in c1. Currently, t0 is at l0-1, a0 is at l0-0, t1 and p1 are at l1-0, p2, p0, and p3 are in t1. The goal is to reach a state where the following facts hold: p3 is at l0-1, p2 is at l1-0, p1 is at l1-0, and p0 is at l0-0. The available actions are: (load-truck ?obj ?truck ?loc) - load object ?obj into truck ?truck at location ?loc, (load-airplane ?obj ?airplane ?loc) - load the object ?obj from location ?loc onto the airplane ?airplane, (unload-truck ?obj ?truck ?loc) - unload the object ?obj from the truck ?truck at location ?loc, (unload-airplane ?obj ?airplane ?loc) - unload object ?obj from airplane ?airplane at location ?loc, (drive-truck ?truck ?loc-from ?loc-to ?city) - drive the truck ?truck in city ?city from location ?loc-from to location ?loc-to, and (fly-airplane ?airplane ?loc-from ?loc-to) - operate the airplane ?airplane from airport ?loc-from to airport ?loc-to.",
            "inputs": "What is the next action that takes us towards the goal?",
            "answer": "(drive-truck t0 l0-1 l0-0 c0)"
          }
        ]
      },
      "num_fewshot": 2,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1000,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    },
    "acp_prog_gen_2shot_chat": {
      "task": "acp_prog_gen_2shot_chat",
      "tag": [
        "acp_gen_2shot_chat"
      ],
      "dataset_path": "json",
      "dataset_name": "acp_prog_gen_2shot",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/progression/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "{{context}} {{inputs}} Provide only the two lists with the ground propositions.",
      "doc_to_target": "{{answer}}",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": [
          {
            "context": "A robot is in a grid and can only move to places that are connected to its current position. \nThe grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty.  \nThere are 2 keys in 0 different shapes: Key key0-0 is of shape shape0, Key key0-1 is of shape shape0.  \nCurrently, the robot is at position f0-1f and its arm is empty. All the positions are open except the following: f4-2f has shape0 shaped lock. Key key0-1 is at position f1-3f. Key key0-0 is at position f0-1f. The available propositions are: (at ?r ?x) - Key ?r is at ?x location, (at-robot ?x) - Robot is at ?x location, (locked ?x) - Location ?x is locked, (holding ?k) - Robot is holding ?k, (open ?x) - Location ?x is open, and (arm-empty) - Robot's arm is empty.",
            "inputs": "Break down the outcomes of performing the action \"retrieve the key key0-0 from its current position f0-1f\" into two lists, positive effects and negative effects. Positive effects are the propositions that are false in the current state but will become true after performing the action. Negative effects are the propositions that are true in the current state and will become false after performing the action.",
            "answer": "[(holding key0-0)] [(arm-empty), (at key0-0 f0-1f)]"
          },
          {
            "context": "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l1-1 and l1-0 are in c1; l0-1 and l0-0 are in c0. Currently, p2, t1, p1, p3, a0, and p0 are at l1-0, t0 is at l0-1. The available propositions are: (at ?obj ?loc) - ?obj is at ?loc and (in ?obj1 ?obj2) - ?obj1 is in ?obj2.",
            "inputs": "Break down the outcomes of performing the action \"load object p3 into truck t1 at location l1-0\" into two lists, positive effects and negative effects. Positive effects are the propositions that are false in the current state but will become true after performing the action. Negative effects are the propositions that are true in the current state and will become false after performing the action.",
            "answer": "[(in p3 t1)] [(at p3 l1-0)]"
          }
        ]
      },
      "num_fewshot": 2,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1000,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    },
    "acp_reach_gen_2shot_chat": {
      "task": "acp_reach_gen_2shot_chat",
      "tag": [
        "acp_gen_2shot_chat"
      ],
      "dataset_path": "json",
      "dataset_name": "acp_reach_gen_2shot",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/reachability/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "{{context}} {{inputs}} Provide one proposition or None.",
      "doc_to_target": "{{answer}}",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": [
          {
            "context": "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty.  There are 2 keys in 0 different shapes: Key key0-1 is of shape shape0, Key key0-0 is of shape shape0.  Currently, the robot is at position f1-2f and its arm is empty. All the positions are open except the following: f4-2f has shape0 shaped lock. Key key0-0 is at position f1-0f. Key key0-1 is at position f1-3f. The available propositions are: (at ?r ?x) - Key ?r is at ?x location, (at-robot ?x) - Robot is at ?x location, (locked ?x) - Location ?x is locked, (holding ?k) - Robot is holding ?k, (open ?x) - Location ?x is open, and (arm-empty) - Robot's arm is empty.",
            "inputs": "What proposition can never hold in any potentially reachable state?",
            "answer": "(locked f3-1f)"
          },
          {
            "context": "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l0-0 and l0-1 are in c0; l1-0 and l1-1 are in c1. Currently, a0, p2, and t1 are at l1-0, p3 and p0 are at l0-0, t0 is at l0-1, p1 is in t1. The available propositions are: (at ?obj ?loc) - ?obj is at ?loc and (in ?obj1 ?obj2) - ?obj1 is in ?obj2.",
            "inputs": "What proposition can never hold in any potentially reachable state?",
            "answer": "(at t0 l1-1)"
          }
        ]
      },
      "num_fewshot": 2,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1000,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    },
    "acp_val_gen_2shot_chat": {
      "task": "acp_val_gen_2shot_chat",
      "tag": [
        "acp_gen_2shot_chat"
      ],
      "dataset_path": "json",
      "dataset_name": "acp_val_gen_2shot",
      "dataset_kwargs": {
        "data_files": {
          "test": "./anonymized/validation/test.gen.json.gz"
        }
      },
      "test_split": "test",
      "doc_to_text": "{{context}} {{inputs}} Provide only the index of the action.",
      "doc_to_target": "{{answer}}",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": [
          {
            "context": "A robot is in a grid and can only move to places that are connected to its current position. The grid size is 5x5, and the locations are of the form fi-jf (e.g., f3-2f or f0-1f). The grid cells are connected to their neighbors (e.g., f1-2f is connected to the four neighbors f0-2f, f2-2f, f1-1f, and f1-3f). Some positions on the grid are locked and can be opened with a key of a matching shape. The robot has an arm that can pick up a key when the key is in same location as the robot and the arm is empty.  There are 2 keys in 1 different shapes: Key key0-0 is of shape shape0, Key key0-1 is of shape shape0.  Currently, the robot is at position f3-3f and its arm is empty. All the positions are open except the following: f2-0f has shape0 shaped lock, f4-2f has shape0 shaped lock. Key key0-1 is at position f1-3f. Key key0-0 is at position f2-2f. The goal is to reach a state where the following facts hold: Key key0-0 is at f2-0f location and Key key0-1 is at f1-3f location. The available actions are: (unlock ?curpos ?lockpos ?key ?shape) - unlock the place ?lockpos with the key ?key of the shape ?shape from the current position place ?curpos, (move ?curpos ?nextpos) - travel from the current position ?curpos to the next position ?nextpos, (pickup ?curpos ?key) - pick up key ?key from place ?curpos, (pickup-and-loose ?curpos ?newkey ?oldkey) - pick up the key ?newkey from the current position ?curpos and loose the key ?oldkey which is being held, and (putdown ?curpos ?key) - put down key ?key at current position place ?curpos.",
            "inputs": "What is the first inapplicable action in the next sequence of actions: [(move f3-3f f3-2f), (move f3-2f f2-2f), (pickup f2-2f key0-0), (pickup-and-loose f4-0f key0-0 key0-1), (unlock f2-1f f2-0f key0-0 shape0), (move f2-1f f2-0f), (putdown f2-0f key0-0), (move f2-0f f2-1f)]?",
            "answer": "3"
          },
          {
            "context": "There are several cities, each containing several locations, some of which are airports. There are also trucks, which can drive within a single city, and airplanes, which can fly between airports. The goal is to get some packages from various locations to various new locations. There are 2 trucks and 1 airplane, as well as 4 packages. There are 4 locations across 2 cities. The locations are in cities as follows: l0-1 and l0-0 are in c0; l1-1 and l1-0 are in c1. Currently, t1 and p0 are at l1-1, t0 is at l0-1, p3, p2, and p1 are at l1-0, a0 is at l0-0. The goal is to reach a state where the following facts hold: p2 is at l1-0, p3 is at l0-1, p0 is at l0-0, and p1 is at l1-0. The available actions are: (load-truck ?obj ?truck ?loc) - load object ?obj into truck ?truck at location ?loc, (load-airplane ?obj ?airplane ?loc) - load the object ?obj from location ?loc onto the airplane ?airplane, (unload-truck ?obj ?truck ?loc) - unload the object ?obj from the truck ?truck at location ?loc, (unload-airplane ?obj ?airplane ?loc) - unload object ?obj from airplane ?airplane at location ?loc, (drive-truck ?truck ?loc-from ?loc-to ?city) - navigate the truck ?truck from its current location ?loc-from in city ?city to the new location ?loc-to within the same city, and (fly-airplane ?airplane ?loc-from ?loc-to) - fly the airplane ?airplane from location ?loc-from to location ?loc-to.",
            "inputs": "What is the first inapplicable action in the next sequence of actions: [(load-truck p0 t1 l1-1), (drive-truck t1 l1-1 l1-0 c1), (unload-truck p0 t1 l1-0), (fly-airplane a0 l0-0 l1-0), (unload-truck p3 t0 l0-1), (load-airplane p3 a0 l1-0), (fly-airplane a0 l1-0 l0-0), (unload-airplane p0 a0 l0-0), (unload-airplane p3 a0 l0-0), (drive-truck t0 l0-1 l0-0 c0), (load-truck p3 t0 l0-0), (drive-truck t0 l0-0 l0-1 c0), (unload-truck p3 t0 l0-1)]?",
            "answer": "4"
          }
        ]
      },
      "num_fewshot": 2,
      "metric_list": [
        {
          "metric": "bypass"
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "\n\n\n\n",
          "**Question**:",
          "**Question:**",
          "Q:"
        ],
        "do_sample": false,
        "max_gen_toks": 1000,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 1.0
      }
    }
  },
  "versions": {
    "acp_app_gen_2shot_chat": 1.0,
    "acp_areach_gen_2shot_chat": 1.0,
    "acp_just_gen_2shot_chat": 1.0,
    "acp_land_gen_2shot_chat": 1.0,
    "acp_nexta_gen_2shot_chat": 1.0,
    "acp_prog_gen_2shot_chat": 1.0,
    "acp_reach_gen_2shot_chat": 1.0,
    "acp_val_gen_2shot_chat": 1.0
  },
  "n-shot": {
    "acp_app_gen_2shot_chat": 2,
    "acp_areach_gen_2shot_chat": 2,
    "acp_just_gen_2shot_chat": 2,
    "acp_land_gen_2shot_chat": 2,
    "acp_nexta_gen_2shot_chat": 2,
    "acp_prog_gen_2shot_chat": 2,
    "acp_reach_gen_2shot_chat": 2,
    "acp_val_gen_2shot_chat": 2
  },
  "higher_is_better": {
    "acp_app_gen_2shot_chat": {
      "bypass": true
    },
    "acp_areach_gen_2shot_chat": {
      "bypass": true
    },
    "acp_just_gen_2shot_chat": {
      "bypass": true
    },
    "acp_land_gen_2shot_chat": {
      "bypass": true
    },
    "acp_nexta_gen_2shot_chat": {
      "bypass": true
    },
    "acp_prog_gen_2shot_chat": {
      "bypass": true
    },
    "acp_reach_gen_2shot_chat": {
      "bypass": true
    },
    "acp_val_gen_2shot_chat": {
      "bypass": true
    }
  },
  "n-samples": {
    "acp_app_gen_2shot_chat": {
      "original": 130,
      "effective": 130
    },
    "acp_val_gen_2shot_chat": {
      "original": 130,
      "effective": 130
    },
    "acp_land_gen_2shot_chat": {
      "original": 130,
      "effective": 130
    },
    "acp_prog_gen_2shot_chat": {
      "original": 130,
      "effective": 130
    },
    "acp_nexta_gen_2shot_chat": {
      "original": 130,
      "effective": 130
    },
    "acp_just_gen_2shot_chat": {
      "original": 130,
      "effective": 130
    },
    "acp_areach_gen_2shot_chat": {
      "original": 130,
      "effective": 130
    },
    "acp_reach_gen_2shot_chat": {
      "original": 130,
      "effective": 130
    }
  },
  "config": {
    "model": "azure-openai-chat-completions",
    "model_args": "base_url=https://anonymized//openai/deployments/gpt-4o-2024-08-06/chat/completions,tokenizer_backend=None,tokenized_requests=false,api_version=2024-08-01-preview",
    "batch_size": 1,
    "batch_sizes": [],
    "device": null,
    "use_cache": null,
    "limit": null,
    "bootstrap_iters": 100000,
    "gen_kwargs": null,
    "random_seed": 0,
    "numpy_seed": 1234,
    "torch_seed": 1234,
    "fewshot_seed": 1234
  },
  "git_hash": "bf59855f",
  "date": 1735595358.2390833,
  "pretty_env_info": "PyTorch version: 2.5.1+cu124\nIs debug build: False\nCUDA used to build PyTorch: 12.4\nROCM used to build PyTorch: N/A\n\nOS: Red Hat Enterprise Linux release 8.9 (Ootpa) (x86_64)\nGCC version: (GCC) 8.5.0 20210514 (Red Hat 8.5.0-20)\nClang version: Could not collect\nCMake version: version 3.26.5\nLibc version: glibc-2.28\n\nPython version: 3.10.5 | packaged by conda-forge | (main, Jun 14 2022, 07:06:46) [GCC 10.3.0] (64-bit runtime)\nPython platform: Linux-4.18.0-513.11.1.el8_9.x86_64-x86_64-with-glibc2.28\nIs CUDA available: False\nCUDA runtime version: Could not collect\nCUDA_MODULE_LOADING set to: N/A\nGPU models and configuration: No devices found.\nNvidia driver version: Could not collect\ncuDNN version: Could not collect\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture:        x86_64\nCPU op-mode(s):      32-bit, 64-bit\nByte Order:          Little Endian\nCPU(s):              128\nOn-line CPU(s) list: 0-127\nThread(s) per core:  1\nCore(s) per socket:  64\nSocket(s):           2\nNUMA node(s):        2\nVendor ID:           AuthenticAMD\nCPU family:          25\nModel:               1\nModel name:          AMD EPYC 7763 64-Core Processor\nStepping:            1\nCPU MHz:             3523.623\nCPU max MHz:         3529.0520\nCPU min MHz:         1500.0000\nBogoMIPS:            4900.00\nVirtualization:      AMD-V\nL1d cache:           32K\nL1i cache:           32K\nL2 cache:            512K\nL3 cache:            32768K\nNUMA node0 CPU(s):   0-63\nNUMA node1 CPU(s):   64-127\nFlags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 invpcid_single hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd amd_ppin brs arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm\n\nVersions of relevant libraries:\n[pip3] numpy==2.2.0\n[pip3] torch==2.5.1\n[pip3] triton==3.1.0\n[conda] blas                      1.0                         mkl  \n[conda] mkl                       2019.3                      199  \n[conda] mkl-service               1.1.2            py37he904b0f_5  \n[conda] mkl_fft                   1.0.10           py37ha843d7b_0  \n[conda] mkl_random                1.0.2            py37hd81dba3_0  \n[conda] numpy                     1.16.2           py37h7e9f1db_0  \n[conda] numpy-base                1.16.2           py37hde5b4d6_0  \n[conda] numpydoc                  0.8.0                    py37_0  ",
  "transformers_version": "4.47.0",
  "upper_git_hash": null,
  "task_hashes": {
    "acp_app_gen_2shot_chat": "e1cc80dff34fc7867138fec7bcca61aea05449bf7e41889deaef8e1e662ede44",
    "acp_val_gen_2shot_chat": "7c3198f2a0ca47b8aad29b932750cbfbf8e82f9414758a844e44f48b3f6daceb",
    "acp_land_gen_2shot_chat": "c2024df82d4f3cbdc8569c056f7c7810a9de96fab6cd11e3f181d71e31718afb",
    "acp_prog_gen_2shot_chat": "b570e3f8b2e10d26cedc4650ef4dfe4395ea1338d7df5007ae00f8a2885b84fe",
    "acp_nexta_gen_2shot_chat": "310158295723072e1c21f92efa74dc63a52919914a058c42cb4084c862556019",
    "acp_just_gen_2shot_chat": "767f10ae2fbe69a9321080986872ec26c2748be80b8f8f06399e1adece729734",
    "acp_areach_gen_2shot_chat": "b0f01bdd1a710c6a92ce647e03cf55535d25cf8791d4ffcd4c1424e00946ca2a",
    "acp_reach_gen_2shot_chat": "951462d994a36d2a6b0a7ce9ddca96f7a3e1e96b83c0ceb8edd0e1f9ab54b4c3"
  },
  "model_source": "azure-openai-chat-completions",
  "model_name": "",
  "model_name_sanitized": "",
  "system_instruction": null,
  "system_instruction_sha": null,
  "fewshot_as_multiturn": true,
  "chat_template": "",
  "chat_template_sha": null,
  "start_time": 27549055.71723679,
  "end_time": 27550409.084676187,
  "total_evaluation_time_seconds": "1353.3674393966794"
}