{
  "timestamp": "2025-09-14T22:13:52.365996",
  "benchmark_size": 10,
  "prompt_type": "direct_answer",
  "models_evaluated": [
    "huggingface/results/phase2/models/stage6_hard/merged_model_fixed"
  ],
  "results_summary": {
    "huggingface/results/phase2/models/stage6_hard/merged_model_fixed": {
      "accuracy": 0.0,
      "avg_response_time": 78.52125198841095,
      "results_by_operation": {
        "subtraction": {
          "total": 4,
          "correct": 0,
          "accuracy": 0.0,
          "avg_response_time": 78.42172884941101
        },
        "addition": {
          "total": 3,
          "correct": 0,
          "accuracy": 0.0,
          "avg_response_time": 78.44728994369507
        },
        "exponentiation": {
          "total": 1,
          "correct": 0,
          "accuracy": 0.0,
          "avg_response_time": 79.23960447311401
        },
        "logarithm": {
          "total": 2,
          "correct": 0,
          "accuracy": 0.0,
          "avg_response_time": 78.47206509113312
        }
      },
      "results_by_difficulty": {
        "easy": {
          "total": 1,
          "correct": 0,
          "accuracy": 0.0,
          "avg_response_time": 78.46129584312439
        },
        "hard": {
          "total": 6,
          "correct": 0,
          "accuracy": 0.0,
          "avg_response_time": 78.55475989977519
        },
        "medium": {
          "total": 3,
          "correct": 0,
          "accuracy": 0.0,
          "avg_response_time": 78.47422154744466
        }
      },
      "metadata": {
        "prompt_type": "direct_answer",
        "prompt_description": "Direct numerical answer only"
      }
    }
  }
}