#!/usr/bin/env python3 import sys import os sys.path.append(os.path.dirname(os.path.abspath(__file__))) from eval.scripts.db_utils import scoresearch_string import logging import pandas as pd import io # Configure logging logging.basicConfig(level=logging.INFO) # Extract model names from simple_all_benchmarks.py fig1_csv = """Experiments,Domain,DatasetSize,AvgAll,AIME24,AMC23,MATH500,HMMT,AvgMath,JEEBench,GPQAD,AvgSci,LCBv2,CodeElo,CodeForces,AvgCode,AIME25,HLE,veCodeBenchv5 openthoughts3,Other,1200000.0,55.3,69.0,93.5,90.0,,69.7,72.4,53.7,63.0,64.5,31.0,32.2,44.8,53.3,10.2,51.7 openthoughts3_300k,300k,316000.0,57.4,61.0,90.5,89.2,32.0,80.2,70.3,51.0,60.6,48.2,24.0,25.2,32.5,39.7,10.6,34.0 openthoughts3_100k,100k,100000.0,54.1,54.3,86.8,89.0,8.7,76.7,61.8,51.0,56.4,43.7,21.5,24.4,29.9,41.0,9.8,31.5 openthoughts3_30k,30k,31600.0,49.8,40.7,83.2,87.4,4.0,70.4,58.2,48.0,53.1,45.1,16.6,19.1,26.9,34.7,10.9,31.0 openthoughts3_10k,10k,10000.0,43.8,32.0,75.2,82.6,1.0,63.3,45.7,48.7,47.2,40.0,11.1,15.2,22.1,28.3,1.4,26.8 openthoughts3_3k,3k,3160.0,41.0,30.7,66.5,81.0,0.0,59.4,51.7,48.1,49.9,32.6,7.1,10.7,16.8,23.0,1.9,20.0 openthoughts3_1k,1k,1000.0,34.3,18.0,58.5,78.6,0.3,51.7,46.0,39.1,42.6,22.4,5.1,6.4,11.3,17.7,0.3,18.2 openthoughts3_0.3k,0.3k,316.0,33.7,17.3,56.5,79.8,,51.2,45.2,39.2,42.2,19.7,4.5,7.5,10.6,16.0,5.4,11.2 s1,Other,1000.0,33.6,20.0,60.2,77.8,,52.7,39.1,40.2,39.7,23.2,3.4,5.0,10.5,14.7,0.7,13.5 s1_0.3k,0.3k,316.0,32.4,18.3,55.5,75.6,,49.8,39.0,40.2,39.6,22.2,3.5,4.4,10.0,14.0,3.8,14.6 mo,Other,817.0,29.7,15.3,56.7,76.6,,49.5,39.1,29.8,34.4,14.8,2.1,2.9,6.6,14.0,0.1,8.1 mo_0.3k,0.3k,316.0,30.7,15.3,56.8,74.2,,48.8,38.1,35.4,36.8,18.8,2.9,4.0,8.6,12.7,1.1,11.7 am,Other,1400000.0,51.0,28.3,82.2,87.4,19.0,66.0,61.1,48.3,54.7,54.5,21.0,24.8,33.4,28.7,9.5,40.3 am_100k,100k,100000.0,36.1,22.0,62.0,77.6,,53.9,46.4,34.7,40.6,31.7,5.3,9.1,15.4,20.3,10.7,21.5 am_300k,300k,316000.0,44.0,23.7,73.2,84.0,,60.3,56.3,42.8,49.6,44.6,11.5,15.7,23.9,26.3,9.1,31.4 am_30k,30k,31600.0,33.5,15.3,58.2,77.4,,50.3,40.2,37.5,38.8,27.9,4.1,7.3,13.1,16.3,11.5,19.3 am_10k,10k,10000.0,29.9,15.7,52.8,74.2,,47.6,34.9,26.8,30.8,25.4,3.5,5.8,11.6,13.7,9.2,15.4 am_3k,3k,3160.0,29.1,15.3,50.3,71.0,,45.5,33.5,28.6,31.0,24.9,3.7,5.7,11.4,13.0,6.2,15.3 am_1k,1k,1000.0,26.0,10.0,46.0,69.2,,41.7,29.9,24.7,27.3,18.7,3.0,6.7,9.5,9.3,2.7,11.4 am_0.3k,0.3k,316.0,28.1,10.7,50.5,67.2,,42.8,34.3,31.6,33.0,20.5,3.9,5.7,10.0,8.0,0.9,12.1 nemo_nano_1000k,Nano,1000000.0,57.2,55.0,87.0,86.8,24.7,76.3,61.0,52.9,57.0,58.0,28.6,28.3,38.3,41.3,2.1,42.2 nemo_nano_300k,Nano,316000.0,52.9,44.3,84.2,85.8,,71.4,60.1,52.9,56.5,50.6,22.2,22.7,31.8,33.0,0.5,36.6 nemo_nano_100k,Nano,100000.0,42.7,28.7,69.0,83.6,,60.4,48.2,47.0,47.6,37.8,12.2,15.5,21.8,26.7,0.4,26.5 nemo_nano_30k,Nano,31600.0,33.5,19.0,56.0,71.2,,48.7,34.2,38.9,36.6,32.2,7.6,9.2,16.3,12.3,0.5,22.2 nemo_nano_10k,Nano,10000.0,31.6,16.7,50.5,72.4,,46.5,31.6,39.9,35.8,27.5,5.7,8.1,13.8,11.0,0.3,16.2 nemo_nano_3k,Nano,3160.0,27.3,9.7,43.2,61.4,,38.1,30.3,37.0,33.6,24.3,5.3,7.2,12.3,6.0,0.1,13.9 nemo_nano_1k,Nano,1000.0,23.9,8.3,35.2,63.6,,35.7,25.4,30.8,28.1,19.0,3.3,5.8,9.4,8.3,0.0,10.0 nemo_nano_0.3k,Nano,316.0,28.4,12.0,51.0,67.4,,43.5,31.9,32.2,32.0,23.0,4.2,5.4,10.9,8.7,0.1,11.4 """ # Convert string to dataframe to get model names and identify missing HMMT scores df = pd.read_csv(io.StringIO(fig1_csv)) # Identify models with missing HMMT scores (empty values) missing_hmmt_models = [] for _, row in df.iterrows():  model_name = row['Experiments']  hmmt_score = row['HMMT']  if pd.isna(hmmt_score) or hmmt_score == '':  missing_hmmt_models.append(model_name) print(f"Found {len(missing_hmmt_models)} models with missing HMMT scores:") for name in missing_hmmt_models:  print(f" {name}") # Search for HMMT scores in the database for these models if missing_hmmt_models:  print(f"\nSearching database for HMMT scores for {len(missing_hmmt_models)} models...")  # Define just the HMMT benchmark  benchmarks = ["HMMT_accuracy_avg"]  class Args:  def __init__(self):  self.comma_separated_bstrings = True  self.output = "missing_hmmt_scores"  args = Args()  try:  relts, csv_file = scoresearch_string(missing_hmmt_models, benchmarks=benchmarks, args=args)  if not relts.empty and "HMMT" in relts.columns:  print("\nFound HMMT scores in database:")  # Show model name, HMMT score, and dataset size  display_cols = ["Experiments", "DatasetSize", "HMMT"]  hmmt_relts = relts[display_cols].copy()  # Filter out rows where HMMT is NaN or 0  hmmt_relts = hmmt_relts[hmmt_relts['HMMT'].notna() & (hmmt_relts['HMMT'] > 0)]  if not hmmt_relts.empty:  print(hmmt_relts.to_string(index=False))  print(f"\nDetailed relts saved to: {csv_file}")  # Show which models still have missing HMMT scores  found_models = set(hmmt_relts['Experiments'].tost())  still_missing = [m for m in missing_hmmt_models if m not in found_models]  if still_missing:  print(f"\nModels still missing HMMT scores ({len(still_missing)}):")  for model in still_missing:  print(f" {model}")  else:  print("No non-zero HMMT scores found in database for these models")  else:  print("No HMMT scores found in database for these models")  if not relts.empty:  print(f"Available columns in relts: {st(relts.columns)}")  except Exception as e:  print(f"Error searching for HMMT scores: {e}")  import traceback  traceback.print_exc() else:  print("All models already have HMMT scores")