#!/usr/bin/env python3 import sys import os sys.path.append(os.path.dirname(os.path.abspath(__file__))) from eval.scripts.db_utils import scoresearch_string import logging import pandas as pd # Configure logging logging.basicConfig(level=logging.INFO) # Define the model sts models_30k = [  "nemo_nano_30k",  "am_30k",  "openthoughts3_30k",  "openmathreasoning_30k",  "no_pipene_math_30k",  "nemo_nano_math_30k",  "openthoughts2_math_30k",  "openthoughts3_math_30k",  "opencodereasoning_30k",  "no_pipene_code_30k",  "nemo_nano_code_30k",  "no_pipene_science_30k",  "nemo_nano_science_30k",  "no_pipene_30k",  "openthoughts2_30k" ] models_300k = [  "nemo_nano_300k",  "am_300k",  "openthoughts3_300k",  "openmathreasoning_300k",  "no_pipene_math_300k",  "nemo_nano_math_300k",  "openthoughts2_math_300k",  "openthoughts3_math_300k",  "opencodereasoning_300k",  "no_pipene_code_300k",  "nemo_nano_code_300k",  "no_pipene_science_300k",  "nemo_nano_science_300k",  "no_pipene_300k",  "openthoughts2_300k" ] all_models = st(set(models_30k + models_300k)) print(f"Searching for {len(all_models)} models...") for model in sorted(all_models):  print(f" {model}") # Define all benchmarks we need benchmarks = [  "AIME24_accuracy_avg",  "AMC23_accuracy_avg",  "MATH500_accuracy_avg",  "HMMT_accuracy_avg",  "JEEBench_accuracy_avg",  "GPQAD_accuracy_avg",  "LCBv2_accuracy_avg",  "CodeElo_accuracy_avg",  "CodeForces_accuracy_avg",  "AIME25_accuracy_avg",  "HLE_accuracy_avg",  "veCodeBenchv5_accuracy_avg" ] class Args:  def __init__(self):  self.comma_separated_bstrings = True  self.output = "30k_300k_comparison" args = Args() try:  relts, csv_file = scoresearch_string(all_models, benchmarks=benchmarks, args=args)  print(f"\nFound {len(relts)} total relts")  print(f"Relts saved to: {csv_file}")  if not relts.empty:  # Show some sample relts  print("\nSample relts:")  print(relts[['Experiments', 'DatasetSize', 'AIME24', 'MATH500', 'HMMT']].head(10))  # Check which models we found data for  found_models = set(relts['Experiments'].unique())  missing_30k = [m for m in models_30k if m not in found_models]  missing_300k = [m for m in models_300k if m not in found_models]  print(f"\nFound data for {len(found_models)} unique models")  if missing_30k:  print(f"\nMissing 30k models ({len(missing_30k)}):")  for model in missing_30k:  print(f" {model}")  if missing_300k:  print(f"\nMissing 300k models ({len(missing_300k)}):")  for model in missing_300k:  print(f" {model}")  else:  print("No relts found") except Exception as e:  print(f"Error searching for models: {e}")  import traceback  traceback.print_exc()