{
  "timestamp": "2025-09-15T04:05:04.548175",
  "benchmark_size": 211,
  "prompt_type": "direct_answer",
  "models_evaluated": [
    "huggingface/results/phase2/models/stage6_hard/merged_model"
  ],
  "results_summary": {
    "huggingface/results/phase2/models/stage6_hard/merged_model": {
      "accuracy": 0.052132701421800945,
      "avg_response_time": 108.49803927385412,
      "results_by_operation": {
        "multiplication": {
          "total": 25,
          "correct": 0,
          "accuracy": 0.0,
          "avg_response_time": 109.32365671157837
        },
        "division": {
          "total": 25,
          "correct": 1,
          "accuracy": 0.04,
          "avg_response_time": 108.86885780334472
        },
        "trigonometry": {
          "total": 10,
          "correct": 0,
          "accuracy": 0.0,
          "avg_response_time": 108.96455557346344
        },
        "subtraction": {
          "total": 40,
          "correct": 1,
          "accuracy": 0.025,
          "avg_response_time": 108.15847190022468
        },
        "complex": {
          "total": 1,
          "correct": 0,
          "accuracy": 0.0,
          "avg_response_time": 108.89791750907898
        },
        "exponentiation": {
          "total": 25,
          "correct": 0,
          "accuracy": 0.0,
          "avg_response_time": 108.5280823802948
        },
        "logarithm": {
          "total": 25,
          "correct": 0,
          "accuracy": 0.0,
          "avg_response_time": 108.25943026542663
        },
        "addition": {
          "total": 60,
          "correct": 9,
          "accuracy": 0.15,
          "avg_response_time": 108.22838764190674
        }
      },
      "results_by_difficulty": {
        "medium": {
          "total": 100,
          "correct": 11,
          "accuracy": 0.11,
          "avg_response_time": 108.54214522123337
        },
        "hard": {
          "total": 86,
          "correct": 0,
          "accuracy": 0.0,
          "avg_response_time": 108.3695412929668
        },
        "easy": {
          "total": 25,
          "correct": 0,
          "accuracy": 0.0,
          "avg_response_time": 108.76364853858948
        }
      },
      "metadata": {
        "prompt_type": "direct_answer",
        "prompt_description": "Direct numerical answer only"
      }
    }
  }
}