{
  "grader": {
    "type": "python",
    "name": "katago_pv_reward",
    "source": "\nimport json\nimport math\nimport re\n\nTOP_MOVE_SCORES = {\n    1: 1.00,\n    2: 0.85,\n    3: 0.70,\n    4: 0.50,\n    5: 0.50,\n}\n\nMOVE_RE = re.compile(r\"^(?:PASS|[A-HJ-T](?:[1-9]|1[0-9]))$\")\n\n\ndef _as_dict(sample):\n    if isinstance(sample, dict):\n        if isinstance(sample.get(\"output_json\"), dict):\n            return sample[\"output_json\"]\n        if isinstance(sample.get(\"json\"), dict):\n            return sample[\"json\"]\n        for key in (\"output_text\", \"text\", \"content\"):\n            value = sample.get(key)\n            if isinstance(value, str):\n                try:\n                    parsed = json.loads(value)\n                    if isinstance(parsed, dict):\n                        return parsed\n                except Exception:\n                    pass\n        return sample\n    if isinstance(sample, str):\n        try:\n            parsed = json.loads(sample)\n            if isinstance(parsed, dict):\n                return parsed\n        except Exception:\n            pass\n    return {}\n\n\ndef _valid_move(move):\n    return isinstance(move, str) and MOVE_RE.fullmatch(move.upper()) is not None\n\n\ndef _valid_pv(pv):\n    return isinstance(pv, list) and all(_valid_move(move) for move in pv)\n\n\ndef move_reward(pred_move, ref_top_moves):\n    try:\n        rank = ref_top_moves.index(pred_move) + 1\n        return TOP_MOVE_SCORES.get(rank, 0.0)\n    except ValueError:\n        return 0.0\n\n\ndef pv_reward(pred_pv, ref_pv, alpha=0.85, max_len=12):\n    T = min(len(pred_pv), len(ref_pv), max_len)\n    if T == 0:\n        return 0.0\n    num = 0.0\n    den = 0.0\n    for t in range(T):\n        w = alpha ** t\n        den += w\n        if pred_pv[t] == ref_pv[t]:\n            num += w\n        else:\n            break\n    return num / den if den > 0 else 0.0\n\n\ndef bounded_linear_reward(error, tol):\n    return max(0.0, 1.0 - error / tol)\n\n\ndef grade(sample, item):\n    \"\"\"\n    sample: model output dict or wrapper containing output_json/output_text\n    item: reference dict with precomputed KataGo values\n    \"\"\"\n    sample = _as_dict(sample)\n    try:\n        explanation = str(sample[\"explanation\"])\n        pred_move = str(sample[\"best_move\"]).upper()\n        pred_pv = [str(move).upper() for move in sample[\"pv_top1\"]]\n        pred_wr = float(sample[\"winrate_black\"])\n        pred_score = float(sample[\"score_lead_black\"])\n    except Exception:\n        return 0.0\n\n    ref_top_moves = [str(move).upper() for move in item[\"reference\"][\"top_moves\"]]\n    ref_pv = [str(move).upper() for move in item[\"reference\"][\"pv_top1\"]]\n    ref_wr = float(item[\"reference\"][\"winrate_black\"])\n    ref_score = float(item[\"reference\"][\"score_lead_black\"])\n\n    if not _valid_move(pred_move) or not _valid_pv(pred_pv):\n        return 0.0\n\n    explanation_words = len(re.findall(r\"\\S+\", explanation))\n    if explanation_words == 0:\n        return 0.0\n    r_format = 1.0 if explanation_words <= 150 else 0.5\n\n    r_move = move_reward(pred_move, ref_top_moves)\n    r_pv = pv_reward(pred_pv, ref_pv)\n    r_wr = bounded_linear_reward(abs(pred_wr - ref_wr), tol=15.0)\n    r_score = bounded_linear_reward(abs(pred_score - ref_score), tol=10.0)\n\n    reward = (\n        0.35 * r_move +\n        0.30 * r_pv +\n        0.15 * r_wr +\n        0.15 * r_score +\n        0.05 * r_format\n    )\n    return float(max(0.0, min(1.0, reward)))\n"
  },
  "response_format": {
    "type": "json_schema",
    "json_schema": {
      "name": "katago_pv_prediction",
      "strict": true,
      "schema": {
        "type": "object",
        "properties": {
          "best_move": {
            "type": "string",
            "description": "Predicted best move in GTP coordinate form, e.g. R15, or PASS."
          },
          "explanation": {
            "type": "string",
            "description": "Brief explanation, max 150 words, of key position features and why the best move is correct."
          },
          "pv_top1": {
            "type": "array",
            "items": {
              "type": "string"
            },
            "minItems": 1,
            "maxItems": 12,
            "description": "Predicted principal variation for the best move, up to 12 plies."
          },
          "winrate_black": {
            "type": "number",
            "description": "Black winrate as a percentage from 0 to 100."
          },
          "score_lead_black": {
            "type": "number",
            "description": "KataGo score lead from Black's perspective, in points."
          }
        },
        "required": [
          "explanation",
          "best_move",
          "pv_top1",
          "winrate_black",
          "score_lead_black"
        ],
        "additionalProperties": false
      }
    }
  },
  "model": "o4-mini-2025-04-16",
  "train_file_path": "openairft/rft_katago/katago_rft_train.jsonl",
  "validation_file_path": "openairft/rft_katago/katago_rft_validation.jsonl",
  "grader_validation": {
    "grader": {
      "type": "python",
      "name": "katago_pv_reward",
      "source": "\nimport json\nimport math\nimport re\n\nTOP_MOVE_SCORES = {\n    1: 1.00,\n    2: 0.85,\n    3: 0.70,\n    4: 0.50,\n    5: 0.50,\n}\n\nMOVE_RE = re.compile(r\"^(?:PASS|[A-HJ-T](?:[1-9]|1[0-9]))$\")\n\n\ndef _as_dict(sample):\n    if isinstance(sample, dict):\n        if isinstance(sample.get(\"output_json\"), dict):\n            return sample[\"output_json\"]\n        if isinstance(sample.get(\"json\"), dict):\n            return sample[\"json\"]\n        for key in (\"output_text\", \"text\", \"content\"):\n            value = sample.get(key)\n            if isinstance(value, str):\n                try:\n                    parsed = json.loads(value)\n                    if isinstance(parsed, dict):\n                        return parsed\n                except Exception:\n                    pass\n        return sample\n    if isinstance(sample, str):\n        try:\n            parsed = json.loads(sample)\n            if isinstance(parsed, dict):\n                return parsed\n        except Exception:\n            pass\n    return {}\n\n\ndef _valid_move(move):\n    return isinstance(move, str) and MOVE_RE.fullmatch(move.upper()) is not None\n\n\ndef _valid_pv(pv):\n    return isinstance(pv, list) and all(_valid_move(move) for move in pv)\n\n\ndef move_reward(pred_move, ref_top_moves):\n    try:\n        rank = ref_top_moves.index(pred_move) + 1\n        return TOP_MOVE_SCORES.get(rank, 0.0)\n    except ValueError:\n        return 0.0\n\n\ndef pv_reward(pred_pv, ref_pv, alpha=0.85, max_len=12):\n    T = min(len(pred_pv), len(ref_pv), max_len)\n    if T == 0:\n        return 0.0\n    num = 0.0\n    den = 0.0\n    for t in range(T):\n        w = alpha ** t\n        den += w\n        if pred_pv[t] == ref_pv[t]:\n            num += w\n        else:\n            break\n    return num / den if den > 0 else 0.0\n\n\ndef bounded_linear_reward(error, tol):\n    return max(0.0, 1.0 - error / tol)\n\n\ndef grade(sample, item):\n    \"\"\"\n    sample: model output dict or wrapper containing output_json/output_text\n    item: reference dict with precomputed KataGo values\n    \"\"\"\n    sample = _as_dict(sample)\n    try:\n        explanation = str(sample[\"explanation\"])\n        pred_move = str(sample[\"best_move\"]).upper()\n        pred_pv = [str(move).upper() for move in sample[\"pv_top1\"]]\n        pred_wr = float(sample[\"winrate_black\"])\n        pred_score = float(sample[\"score_lead_black\"])\n    except Exception:\n        return 0.0\n\n    ref_top_moves = [str(move).upper() for move in item[\"reference\"][\"top_moves\"]]\n    ref_pv = [str(move).upper() for move in item[\"reference\"][\"pv_top1\"]]\n    ref_wr = float(item[\"reference\"][\"winrate_black\"])\n    ref_score = float(item[\"reference\"][\"score_lead_black\"])\n\n    if not _valid_move(pred_move) or not _valid_pv(pred_pv):\n        return 0.0\n\n    explanation_words = len(re.findall(r\"\\S+\", explanation))\n    if explanation_words == 0:\n        return 0.0\n    r_format = 1.0 if explanation_words <= 150 else 0.5\n\n    r_move = move_reward(pred_move, ref_top_moves)\n    r_pv = pv_reward(pred_pv, ref_pv)\n    r_wr = bounded_linear_reward(abs(pred_wr - ref_wr), tol=15.0)\n    r_score = bounded_linear_reward(abs(pred_score - ref_score), tol=10.0)\n\n    reward = (\n        0.35 * r_move +\n        0.30 * r_pv +\n        0.15 * r_wr +\n        0.15 * r_score +\n        0.05 * r_format\n    )\n    return float(max(0.0, min(1.0, reward)))\n"
    }
  },
  "grader_smoke": {
    "reward": 0.0,
    "metadata": {
      "name": "katago_pv_reward",
      "type": "python",
      "errors": {
        "formula_parse_error": false,
        "sample_parse_error": false,
        "sample_parse_error_details": null,
        "truncated_observation_error": false,
        "unresponsive_reward_error": false,
        "invalid_variable_error": false,
        "invalid_variable_error_details": null,
        "other_error": false,
        "python_grader_server_error": true,
        "python_grader_server_error_type": "unauthorized_error",
        "python_grader_runtime_error": false,
        "python_grader_runtime_error_details": null,
        "model_grader_server_error": false,
        "model_grader_refusal_error": false,
        "model_grader_refusal_error_details": null,
        "model_grader_parse_error": false,
        "model_grader_parse_error_details": null,
        "model_grader_exceeded_max_tokens_error": false,
        "model_grader_server_error_details": null,
        "endpoint_grader_internal_error": false,
        "endpoint_grader_internal_error_details": null,
        "endpoint_grader_server_error": false,
        "endpoint_grader_server_error_details": null,
        "endpoint_grader_safety_check_error": false
      },
      "execution_time": 7.124865531921387,
      "metadata": {},
      "scores": {},
      "token_usage": null,
      "sampled_model_name": null
    },
    "sub_rewards": {},
    "model_grader_token_usage_per_model": {}
  }
}