{
  "results": {
    "minerva_math": {
      "exact_match,none": 0.2857142857142857,
      "exact_match_stderr,none": "N/A",
      "math_verify,none": 0.2857142857142857,
      "math_verify_stderr,none": "N/A",
      "alias": "minerva_math"
    },
    "minerva_math_algebra": {
      "alias": " - minerva_math_algebra",
      "exact_match,none": 1.0,
      "exact_match_stderr,none": "N/A",
      "math_verify,none": 1.0,
      "math_verify_stderr,none": "N/A"
    },
    "minerva_math_counting_and_prob": {
      "alias": " - minerva_math_counting_and_prob",
      "exact_match,none": 1.0,
      "exact_match_stderr,none": "N/A",
      "math_verify,none": 1.0,
      "math_verify_stderr,none": "N/A"
    },
    "minerva_math_geometry": {
      "alias": " - minerva_math_geometry",
      "exact_match,none": 0.0,
      "exact_match_stderr,none": "N/A",
      "math_verify,none": 0.0,
      "math_verify_stderr,none": "N/A"
    },
    "minerva_math_intermediate_algebra": {
      "alias": " - minerva_math_intermediate_algebra",
      "exact_match,none": 0.0,
      "exact_match_stderr,none": "N/A",
      "math_verify,none": 0.0,
      "math_verify_stderr,none": "N/A"
    },
    "minerva_math_num_theory": {
      "alias": " - minerva_math_num_theory",
      "exact_match,none": 0.0,
      "exact_match_stderr,none": "N/A",
      "math_verify,none": 0.0,
      "math_verify_stderr,none": "N/A"
    },
    "minerva_math_prealgebra": {
      "alias": " - minerva_math_prealgebra",
      "exact_match,none": 0.0,
      "exact_match_stderr,none": "N/A",
      "math_verify,none": 0.0,
      "math_verify_stderr,none": "N/A"
    },
    "minerva_math_precalc": {
      "alias": " - minerva_math_precalc",
      "exact_match,none": 0.0,
      "exact_match_stderr,none": "N/A",
      "math_verify,none": 0.0,
      "math_verify_stderr,none": "N/A"
    }
  },
  "groups": {
    "minerva_math": {
      "exact_match,none": 0.2857142857142857,
      "exact_match_stderr,none": "N/A",
      "math_verify,none": 0.2857142857142857,
      "math_verify_stderr,none": "N/A",
      "alias": "minerva_math"
    }
  },
  "group_subtasks": {
    "minerva_math": [
      "minerva_math_algebra",
      "minerva_math_counting_and_prob",
      "minerva_math_geometry",
      "minerva_math_intermediate_algebra",
      "minerva_math_num_theory",
      "minerva_math_prealgebra",
      "minerva_math_precalc"
    ]
  },
  "configs": {
    "minerva_math_algebra": {
      "task": "minerva_math_algebra",
      "tag": [
        "math_word_problems"
      ],
      "dataset_path": "EleutherAI/hendrycks_math",
      "dataset_name": "algebra",
      "dataset_kwargs": {
        "trust_remote_code": true
      },
      "training_split": "train",
      "test_split": "test",
      "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc: dict) -> dict:\n        out_doc = {\n            \"problem\": doc[\"problem\"],\n            \"solution\": doc[\"solution\"],\n            \"answer\": normalize_final_answer(\n                remove_boxed(last_boxed_only_string(doc[\"solution\"]))\n            ),\n        }\n        if getattr(doc, \"few_shot\", None) is not None:\n            out_doc[\"few_shot\"] = True\n        return out_doc\n\n    return dataset.map(_process_doc)\n",
      "doc_to_text": "def doc_to_text(doc: dict) -> str:\n    return \"Problem:\" + \"\\n\" + doc[\"problem\"] + \"\\n\\n\" + \"Solution:\"\n",
      "doc_to_target": "{{answer if few_shot is undefined else solution}}",
      "unsafe_code": false,
      "process_results": "def process_results(doc: dict, results: List[str]) -> Dict[str, int]:\n    candidates = results[0]\n\n    unnormalized_answer = get_unnormalized_answer(candidates)\n    answer = normalize_final_answer(unnormalized_answer)\n\n    if is_equiv(answer, doc[\"answer\"]):\n        retval = 1\n    else:\n        retval = 0\n\n    # math_verify\n    res = verify(parse(doc[\"answer\"]), parse(candidates))\n    mathval = 1 if res else 0\n\n    results = {\n        \"exact_match\": retval,\n        \"math_verify\": mathval,\n    }\n    return results\n",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": "<function list_fewshot_samples at 0x7f8e546cc2c0>"
      },
      "num_fewshot": 4,
      "metric_list": [
        {
          "metric": "exact_match",
          "aggregation": "mean",
          "higher_is_better": true
        },
        {
          "metric": "math_verify",
          "aggregation": "mean",
          "higher_is_better": true
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "Problem:"
        ],
        "do_sample": false,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 2.0
      }
    },
    "minerva_math_counting_and_prob": {
      "task": "minerva_math_counting_and_prob",
      "tag": [
        "math_word_problems"
      ],
      "dataset_path": "EleutherAI/hendrycks_math",
      "dataset_name": "counting_and_probability",
      "dataset_kwargs": {
        "trust_remote_code": true
      },
      "training_split": "train",
      "test_split": "test",
      "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc: dict) -> dict:\n        out_doc = {\n            \"problem\": doc[\"problem\"],\n            \"solution\": doc[\"solution\"],\n            \"answer\": normalize_final_answer(\n                remove_boxed(last_boxed_only_string(doc[\"solution\"]))\n            ),\n        }\n        if getattr(doc, \"few_shot\", None) is not None:\n            out_doc[\"few_shot\"] = True\n        return out_doc\n\n    return dataset.map(_process_doc)\n",
      "doc_to_text": "def doc_to_text(doc: dict) -> str:\n    return \"Problem:\" + \"\\n\" + doc[\"problem\"] + \"\\n\\n\" + \"Solution:\"\n",
      "doc_to_target": "{{answer if few_shot is undefined else solution}}",
      "unsafe_code": false,
      "process_results": "def process_results(doc: dict, results: List[str]) -> Dict[str, int]:\n    candidates = results[0]\n\n    unnormalized_answer = get_unnormalized_answer(candidates)\n    answer = normalize_final_answer(unnormalized_answer)\n\n    if is_equiv(answer, doc[\"answer\"]):\n        retval = 1\n    else:\n        retval = 0\n\n    # math_verify\n    res = verify(parse(doc[\"answer\"]), parse(candidates))\n    mathval = 1 if res else 0\n\n    results = {\n        \"exact_match\": retval,\n        \"math_verify\": mathval,\n    }\n    return results\n",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": "<function list_fewshot_samples at 0x7f8e5469c900>"
      },
      "num_fewshot": 4,
      "metric_list": [
        {
          "metric": "exact_match",
          "aggregation": "mean",
          "higher_is_better": true
        },
        {
          "metric": "math_verify",
          "aggregation": "mean",
          "higher_is_better": true
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "Problem:"
        ],
        "do_sample": false,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 2.0
      }
    },
    "minerva_math_geometry": {
      "task": "minerva_math_geometry",
      "tag": [
        "math_word_problems"
      ],
      "dataset_path": "EleutherAI/hendrycks_math",
      "dataset_name": "geometry",
      "dataset_kwargs": {
        "trust_remote_code": true
      },
      "training_split": "train",
      "test_split": "test",
      "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc: dict) -> dict:\n        out_doc = {\n            \"problem\": doc[\"problem\"],\n            \"solution\": doc[\"solution\"],\n            \"answer\": normalize_final_answer(\n                remove_boxed(last_boxed_only_string(doc[\"solution\"]))\n            ),\n        }\n        if getattr(doc, \"few_shot\", None) is not None:\n            out_doc[\"few_shot\"] = True\n        return out_doc\n\n    return dataset.map(_process_doc)\n",
      "doc_to_text": "def doc_to_text(doc: dict) -> str:\n    return \"Problem:\" + \"\\n\" + doc[\"problem\"] + \"\\n\\n\" + \"Solution:\"\n",
      "doc_to_target": "{{answer if few_shot is undefined else solution}}",
      "unsafe_code": false,
      "process_results": "def process_results(doc: dict, results: List[str]) -> Dict[str, int]:\n    candidates = results[0]\n\n    unnormalized_answer = get_unnormalized_answer(candidates)\n    answer = normalize_final_answer(unnormalized_answer)\n\n    if is_equiv(answer, doc[\"answer\"]):\n        retval = 1\n    else:\n        retval = 0\n\n    # math_verify\n    res = verify(parse(doc[\"answer\"]), parse(candidates))\n    mathval = 1 if res else 0\n\n    results = {\n        \"exact_match\": retval,\n        \"math_verify\": mathval,\n    }\n    return results\n",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": "<function list_fewshot_samples at 0x7f8e5462cc20>"
      },
      "num_fewshot": 4,
      "metric_list": [
        {
          "metric": "exact_match",
          "aggregation": "mean",
          "higher_is_better": true
        },
        {
          "metric": "math_verify",
          "aggregation": "mean",
          "higher_is_better": true
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "Problem:"
        ],
        "do_sample": false,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 2.0
      }
    },
    "minerva_math_intermediate_algebra": {
      "task": "minerva_math_intermediate_algebra",
      "tag": [
        "math_word_problems"
      ],
      "dataset_path": "EleutherAI/hendrycks_math",
      "dataset_name": "intermediate_algebra",
      "dataset_kwargs": {
        "trust_remote_code": true
      },
      "training_split": "train",
      "test_split": "test",
      "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc: dict) -> dict:\n        out_doc = {\n            \"problem\": doc[\"problem\"],\n            \"solution\": doc[\"solution\"],\n            \"answer\": normalize_final_answer(\n                remove_boxed(last_boxed_only_string(doc[\"solution\"]))\n            ),\n        }\n        if getattr(doc, \"few_shot\", None) is not None:\n            out_doc[\"few_shot\"] = True\n        return out_doc\n\n    return dataset.map(_process_doc)\n",
      "doc_to_text": "def doc_to_text(doc: dict) -> str:\n    return \"Problem:\" + \"\\n\" + doc[\"problem\"] + \"\\n\\n\" + \"Solution:\"\n",
      "doc_to_target": "{{answer if few_shot is undefined else solution}}",
      "unsafe_code": false,
      "process_results": "def process_results(doc: dict, results: List[str]) -> Dict[str, int]:\n    candidates = results[0]\n\n    unnormalized_answer = get_unnormalized_answer(candidates)\n    answer = normalize_final_answer(unnormalized_answer)\n\n    if is_equiv(answer, doc[\"answer\"]):\n        retval = 1\n    else:\n        retval = 0\n\n    # math_verify\n    res = verify(parse(doc[\"answer\"]), parse(candidates))\n    mathval = 1 if res else 0\n\n    results = {\n        \"exact_match\": retval,\n        \"math_verify\": mathval,\n    }\n    return results\n",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": "<function list_fewshot_samples at 0x7f8e5464f920>"
      },
      "num_fewshot": 4,
      "metric_list": [
        {
          "metric": "exact_match",
          "aggregation": "mean",
          "higher_is_better": true
        },
        {
          "metric": "math_verify",
          "aggregation": "mean",
          "higher_is_better": true
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "Problem:"
        ],
        "do_sample": false,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 2.0
      }
    },
    "minerva_math_num_theory": {
      "task": "minerva_math_num_theory",
      "tag": [
        "math_word_problems"
      ],
      "dataset_path": "EleutherAI/hendrycks_math",
      "dataset_name": "number_theory",
      "dataset_kwargs": {
        "trust_remote_code": true
      },
      "training_split": "train",
      "test_split": "test",
      "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc: dict) -> dict:\n        out_doc = {\n            \"problem\": doc[\"problem\"],\n            \"solution\": doc[\"solution\"],\n            \"answer\": normalize_final_answer(\n                remove_boxed(last_boxed_only_string(doc[\"solution\"]))\n            ),\n        }\n        if getattr(doc, \"few_shot\", None) is not None:\n            out_doc[\"few_shot\"] = True\n        return out_doc\n\n    return dataset.map(_process_doc)\n",
      "doc_to_text": "def doc_to_text(doc: dict) -> str:\n    return \"Problem:\" + \"\\n\" + doc[\"problem\"] + \"\\n\\n\" + \"Solution:\"\n",
      "doc_to_target": "{{answer if few_shot is undefined else solution}}",
      "unsafe_code": false,
      "process_results": "def process_results(doc: dict, results: List[str]) -> Dict[str, int]:\n    candidates = results[0]\n\n    unnormalized_answer = get_unnormalized_answer(candidates)\n    answer = normalize_final_answer(unnormalized_answer)\n\n    if is_equiv(answer, doc[\"answer\"]):\n        retval = 1\n    else:\n        retval = 0\n\n    # math_verify\n    res = verify(parse(doc[\"answer\"]), parse(candidates))\n    mathval = 1 if res else 0\n\n    results = {\n        \"exact_match\": retval,\n        \"math_verify\": mathval,\n    }\n    return results\n",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": "<function list_fewshot_samples at 0x7f8e5464c360>"
      },
      "num_fewshot": 4,
      "metric_list": [
        {
          "metric": "exact_match",
          "aggregation": "mean",
          "higher_is_better": true
        },
        {
          "metric": "math_verify",
          "aggregation": "mean",
          "higher_is_better": true
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "Problem:"
        ],
        "do_sample": false,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 2.0
      }
    },
    "minerva_math_prealgebra": {
      "task": "minerva_math_prealgebra",
      "tag": [
        "math_word_problems"
      ],
      "dataset_path": "EleutherAI/hendrycks_math",
      "dataset_name": "prealgebra",
      "dataset_kwargs": {
        "trust_remote_code": true
      },
      "training_split": "train",
      "test_split": "test",
      "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc: dict) -> dict:\n        out_doc = {\n            \"problem\": doc[\"problem\"],\n            \"solution\": doc[\"solution\"],\n            \"answer\": normalize_final_answer(\n                remove_boxed(last_boxed_only_string(doc[\"solution\"]))\n            ),\n        }\n        if getattr(doc, \"few_shot\", None) is not None:\n            out_doc[\"few_shot\"] = True\n        return out_doc\n\n    return dataset.map(_process_doc)\n",
      "doc_to_text": "def doc_to_text(doc: dict) -> str:\n    return \"Problem:\" + \"\\n\" + doc[\"problem\"] + \"\\n\\n\" + \"Solution:\"\n",
      "doc_to_target": "{{answer if few_shot is undefined else solution}}",
      "unsafe_code": false,
      "process_results": "def process_results(doc: dict, results: List[str]) -> Dict[str, int]:\n    candidates = results[0]\n\n    unnormalized_answer = get_unnormalized_answer(candidates)\n    answer = normalize_final_answer(unnormalized_answer)\n\n    if is_equiv(answer, doc[\"answer\"]):\n        retval = 1\n    else:\n        retval = 0\n\n    # math_verify\n    res = verify(parse(doc[\"answer\"]), parse(candidates))\n    mathval = 1 if res else 0\n\n    results = {\n        \"exact_match\": retval,\n        \"math_verify\": mathval,\n    }\n    return results\n",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": "<function list_fewshot_samples at 0x7f8e5472db20>"
      },
      "num_fewshot": 4,
      "metric_list": [
        {
          "metric": "exact_match",
          "aggregation": "mean",
          "higher_is_better": true
        },
        {
          "metric": "math_verify",
          "aggregation": "mean",
          "higher_is_better": true
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "Problem:"
        ],
        "do_sample": false,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 2.0
      }
    },
    "minerva_math_precalc": {
      "task": "minerva_math_precalc",
      "tag": [
        "math_word_problems"
      ],
      "dataset_path": "EleutherAI/hendrycks_math",
      "dataset_name": "precalculus",
      "dataset_kwargs": {
        "trust_remote_code": true
      },
      "training_split": "train",
      "test_split": "test",
      "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc: dict) -> dict:\n        out_doc = {\n            \"problem\": doc[\"problem\"],\n            \"solution\": doc[\"solution\"],\n            \"answer\": normalize_final_answer(\n                remove_boxed(last_boxed_only_string(doc[\"solution\"]))\n            ),\n        }\n        if getattr(doc, \"few_shot\", None) is not None:\n            out_doc[\"few_shot\"] = True\n        return out_doc\n\n    return dataset.map(_process_doc)\n",
      "doc_to_text": "def doc_to_text(doc: dict) -> str:\n    return \"Problem:\" + \"\\n\" + doc[\"problem\"] + \"\\n\\n\" + \"Solution:\"\n",
      "doc_to_target": "{{answer if few_shot is undefined else solution}}",
      "unsafe_code": false,
      "process_results": "def process_results(doc: dict, results: List[str]) -> Dict[str, int]:\n    candidates = results[0]\n\n    unnormalized_answer = get_unnormalized_answer(candidates)\n    answer = normalize_final_answer(unnormalized_answer)\n\n    if is_equiv(answer, doc[\"answer\"]):\n        retval = 1\n    else:\n        retval = 0\n\n    # math_verify\n    res = verify(parse(doc[\"answer\"]), parse(candidates))\n    mathval = 1 if res else 0\n\n    results = {\n        \"exact_match\": retval,\n        \"math_verify\": mathval,\n    }\n    return results\n",
      "description": "",
      "target_delimiter": " ",
      "fewshot_delimiter": "\n\n",
      "fewshot_config": {
        "sampler": "first_n",
        "samples": "<function list_fewshot_samples at 0x7f8e546f4220>"
      },
      "num_fewshot": 4,
      "metric_list": [
        {
          "metric": "exact_match",
          "aggregation": "mean",
          "higher_is_better": true
        },
        {
          "metric": "math_verify",
          "aggregation": "mean",
          "higher_is_better": true
        }
      ],
      "output_type": "generate_until",
      "generation_kwargs": {
        "until": [
          "Problem:"
        ],
        "do_sample": false,
        "temperature": 0.0
      },
      "repeats": 1,
      "should_decontaminate": false,
      "metadata": {
        "version": 2.0
      }
    }
  },
  "versions": {
    "minerva_math": 1.0,
    "minerva_math_algebra": 2.0,
    "minerva_math_counting_and_prob": 2.0,
    "minerva_math_geometry": 2.0,
    "minerva_math_intermediate_algebra": 2.0,
    "minerva_math_num_theory": 2.0,
    "minerva_math_prealgebra": 2.0,
    "minerva_math_precalc": 2.0
  },
  "n-shot": {
    "minerva_math_algebra": 4,
    "minerva_math_counting_and_prob": 4,
    "minerva_math_geometry": 4,
    "minerva_math_intermediate_algebra": 4,
    "minerva_math_num_theory": 4,
    "minerva_math_prealgebra": 4,
    "minerva_math_precalc": 4
  },
  "higher_is_better": {
    "minerva_math": {
      "exact_match": true,
      "math_verify": true
    },
    "minerva_math_algebra": {
      "exact_match": true,
      "math_verify": true
    },
    "minerva_math_counting_and_prob": {
      "exact_match": true,
      "math_verify": true
    },
    "minerva_math_geometry": {
      "exact_match": true,
      "math_verify": true
    },
    "minerva_math_intermediate_algebra": {
      "exact_match": true,
      "math_verify": true
    },
    "minerva_math_num_theory": {
      "exact_match": true,
      "math_verify": true
    },
    "minerva_math_prealgebra": {
      "exact_match": true,
      "math_verify": true
    },
    "minerva_math_precalc": {
      "exact_match": true,
      "math_verify": true
    }
  },
  "n-samples": {
    "minerva_math_algebra": {
      "original": 1187,
      "effective": 1
    },
    "minerva_math_counting_and_prob": {
      "original": 474,
      "effective": 1
    },
    "minerva_math_geometry": {
      "original": 479,
      "effective": 1
    },
    "minerva_math_intermediate_algebra": {
      "original": 903,
      "effective": 1
    },
    "minerva_math_num_theory": {
      "original": 540,
      "effective": 1
    },
    "minerva_math_prealgebra": {
      "original": 871,
      "effective": 1
    },
    "minerva_math_precalc": {
      "original": 546,
      "effective": 1
    }
  },
  "config": {
    "model": "llada_dist",
    "model_args": "model_path=GSAI-ML/LLaDA-8B-Instruct,gen_length=1024,steps=1024,block_length=8,remasking=low_confidence",
    "batch_size": 1,
    "batch_sizes": [],
    "device": null,
    "use_cache": null,
    "limit": 1.0,
    "bootstrap_iters": 100000,
    "gen_kwargs": null,
    "random_seed": 0,
    "numpy_seed": 1234,
    "torch_seed": 1234,
    "fewshot_seed": 1234
  },
  "git_hash": "9b919e8",
  "date": 1753294197.4011095,
  "pretty_env_info": "PyTorch version: 2.4.0\nIs debug build: False\nCUDA used to build PyTorch: 12.1\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.4 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: Could not collect\nLibc version: glibc-2.35\n\nPython version: 3.12.9 | packaged by conda-forge | (main, Mar  4 2025, 22:48:41) [GCC 13.3.0] (64-bit runtime)\nPython platform: Linux-5.15.0-210.163.7.el8uek.x86_64-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: Could not collect\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: GPU 0: NVIDIA A100-SXM4-80GB\nNvidia driver version: 560.35.03\ncuDNN version: Probably one of the following:\n/usr/lib/x86_64-linux-gnu/libcudnn.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_adv_infer.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_adv_train.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_cnn_infer.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_cnn_train.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_ops_infer.so.8.9.0\n/usr/lib/x86_64-linux-gnu/libcudnn_ops_train.so.8.9.0\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture:                         x86_64\nCPU op-mode(s):                       32-bit, 64-bit\nAddress sizes:                        48 bits physical, 48 bits virtual\nByte Order:                           Little Endian\nCPU(s):                               256\nOn-line CPU(s) list:                  0-254\nOff-line CPU(s) list:                 255\nVendor ID:                            AuthenticAMD\nModel name:                           AMD EPYC 7J13 64-Core Processor\nCPU family:                           25\nModel:                                1\nThread(s) per core:                   2\nCore(s) per socket:                   64\nSocket(s):                            2\nStepping:                             1\nFrequency boost:                      enabled\nCPU max MHz:                          3673.0950\nCPU min MHz:                          0.0000\nBogoMIPS:                             4900.16\nFlags:                                fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 invpcid_single hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local nt_good clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin brs arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm\nVirtualization:                       AMD-V\nL1d cache:                            4 MiB (128 instances)\nL1i cache:                            4 MiB (128 instances)\nL2 cache:                             64 MiB (128 instances)\nL3 cache:                             512 MiB (16 instances)\nNUMA node(s):                         8\nNUMA node0 CPU(s):                    0-15,128-143\nNUMA node1 CPU(s):                    16-31,144-159\nNUMA node2 CPU(s):                    32-47,160-175\nNUMA node3 CPU(s):                    48-63,176-191\nNUMA node4 CPU(s):                    64-79,192-207\nNUMA node5 CPU(s):                    80-95,208-223\nNUMA node6 CPU(s):                    96-111,224-239\nNUMA node7 CPU(s):                    112-127,240-254\nVulnerability Gather data sampling:   Not affected\nVulnerability Itlb multihit:          Not affected\nVulnerability L1tf:                   Not affected\nVulnerability Mds:                    Not affected\nVulnerability Meltdown:               Not affected\nVulnerability Mmio stale data:        Not affected\nVulnerability Reg file data sampling: Not affected\nVulnerability Retbleed:               Not affected\nVulnerability Spec rstack overflow:   Mitigation; safe RET\nVulnerability Spec store bypass:      Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1:             Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:             Mitigation; Retpolines; IBPB conditional; IBRS_FW; STIBP always-on; RSB filling; PBRSB-eIBRS Not affected; BHI Not affected\nVulnerability Srbds:                  Not affected\nVulnerability Tsx async abort:        Not affected\n\nVersions of relevant libraries:\n[pip3] mypy-extensions==1.0.0\n[pip3] numpy==2.0.2\n[pip3] torch==2.4.0\n[pip3] torchaudio==2.4.0\n[pip3] torchvision==0.19.0\n[pip3] triton==3.0.0\n[conda] blas                      2.116                       mkl    conda-forge\n[conda] blas-devel                3.9.0            16_linux64_mkl    conda-forge\n[conda] libblas                   3.9.0            16_linux64_mkl    conda-forge\n[conda] libcblas                  3.9.0            16_linux64_mkl    conda-forge\n[conda] liblapack                 3.9.0            16_linux64_mkl    conda-forge\n[conda] liblapacke                3.9.0            16_linux64_mkl    conda-forge\n[conda] libopenvino-pytorch-frontend 2025.0.0             h5888daf_1    conda-forge\n[conda] mkl                       2022.1.0           h84fe81f_915    conda-forge\n[conda] mkl-devel                 2022.1.0           ha770c72_916    conda-forge\n[conda] mkl-include               2022.1.0           h84fe81f_915    conda-forge\n[conda] numpy                     2.0.2           py312h58c1407_1    conda-forge\n[conda] pytorch                   2.4.0           py3.12_cuda12.1_cudnn9.1.0_0    pytorch\n[conda] pytorch-cuda              12.1                 ha16c6d3_6    pytorch\n[conda] pytorch-mutex             1.0                        cuda    pytorch\n[conda] torchaudio                2.4.0               py312_cu121    pytorch\n[conda] torchtriton               3.0.0                     py312    pytorch\n[conda] torchvision               0.19.0              py312_cu121    pytorch",
  "transformers_version": "4.49.0",
  "upper_git_hash": null,
  "tokenizer_pad_token": [
    "<|endoftext|>",
    "126081"
  ],
  "tokenizer_eos_token": [
    "<|endoftext|>",
    "126081"
  ],
  "tokenizer_bos_token": [
    "<|startoftext|>",
    "126080"
  ],
  "eot_token_id": null,
  "max_length": 4096,
  "task_hashes": {},
  "model_source": "llada_dist",
  "model_name": "GSAI-ML/LLaDA-8B-Instruct",
  "model_name_sanitized": "GSAI-ML__LLaDA-8B-Instruct",
  "system_instruction": null,
  "system_instruction_sha": null,
  "fewshot_as_multiturn": false,
  "chat_template": null,
  "chat_template_sha": null,
  "start_time": 7937309.349413365,
  "end_time": 7938004.803751687,
  "total_evaluation_time_seconds": "695.4543383214623"
}