#!/usr/bin/env python3 """ Query database for latest evaluation relts to update domain scang plot. This script searches for all model famies and their scang experiments. """ import logging import pandas as pd import sys import os from pathb import Path # Add the project root to the path sys.path.insert(0, str(Path(__file__).parent)) from eval.scripts.db_utils import scoresearch_string # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Define the standard benchmarks used in domain scang BENCHMARKS = [  "AIME24_accuracy_avg",  "AMC23_accuracy_avg",  "MATH500_accuracy",  "JEEBench_accuracy_avg",  "GPQADiamond_accuracy_avg",  "veCodeBench_accuracy_avg",  "CodeElo_accuracy_avg",  "CodeForces_accuracy_avg" ] class Args:  """Mock args class for database utity function"""  def __init__(self):  self.comma_separated_bstrings = False  self.output = None def main():  """Query all model famies and combine relts"""  # Model famies to search for  model_famies = [  "openthoughts3_math",  "openthoughts3_code",  "openthoughts3_science",  "no_pipene_math",  "no_pipene_code",  "no_pipene_science",  "nemo_nano_math",  "nemo_nano_code",  "nemo_nano_science"  ]  all_relts = []  args = Args()  # Query each model family  for family in model_famies:  logging.info(f"Querying {family} models...")  try:  df, csv_file = scoresearch_string(  bstrings=[family],  benchmarks=BENCHMARKS,  output_file=f"{family}_scang",  args=args  )  if not df.empty:  logging.info(f"Found {len(df)} models for {family}")  all_relts.append(df)  else:  logging.warning(f"No relts found for {family}")  except Exception as e:  logging.error(f"Error querying {family}: {e}")  continue  # Also search for any new large-scale models that might be available  logging.info("Searching for additional large-scale models...")  # Search for models with common naming patterns and large scales  additional_searches = [  "100k",  "300k",  "1M",  "_1000k",  "_316k"  ]  for search_term in additional_searches:  try:  logging.info(f"Searching for models containing '{search_term}'...")  df, csv_file = scoresearch_string(  bstrings=[search_term],  benchmarks=BENCHMARKS,  output_file=f"large_scale_{search_term}",  args=args  )  if not df.empty:  logging.info(f"Found {len(df)} additional models with '{search_term}'")  all_relts.append(df)  except Exception as e:  logging.warning(f"Error searching for '{search_term}': {e}")  continue  # Combine all relts  if all_relts:  logging.info("Combining all relts...")  combined_df = pd.concat(all_relts, ignore_index=True)  # Remove dupcates based on Experiments column  combined_df = combined_df.drop_dupcates(bset=['Experiments'], keep='last')  # Sort by AvgAll score descending  if 'AvgAll' in combined_df.columns:  combined_df = combined_df.sort_values('AvgAll', ascending=False)  # Save combined relts  relts_dir = "eval/relts"  os.makedirs(relts_dir, exist_ok=True)  output_file = f"{relts_dir}/domain_scang_updated.csv"  combined_df.to_csv(output_file, index=False)  logging.info(f"Combined relts saved to {output_file}")  logging.info(f"Total models found: {len(combined_df)}")  # Print mmary statistics  print(f"\n=== MMARY ===")  print(f"Total models found: {len(combined_df)}")  if 'DatasetSize' in combined_df.columns:  print(f"Dataset sizes range: {combined_df['DatasetSize'].min():.0f} to {combined_df['DatasetSize'].max():.0f}")  print(f"Models with dataset sizes: {combined_df['DatasetSize'].notna().m()}")  if 'Domain' in combined_df.columns:  print(f"Domains found: {combined_df['Domain'].value_counts().to_dict()}")  if 'AvgAll' in combined_df.columns:  print(f"Performance range: {combined_df['AvgAll'].min():.1f}% to {combined_df['AvgAll'].max():.1f}%")  # Show top 10 models  print(f"\n=== TOP 10 MODELS ===")  if 'AvgAll' in combined_df.columns:  top_models = combined_df.head(10)  for _, row in top_models.iterrows():  size = f"({row.get('DatasetSize', 'Unknown'):.0f})" if pd.notna(row.get('DatasetSize')) else "(Unknown size)"  print(f"{row['Experiments']} {size}: {row.get('AvgAll', 'N/A'):.1f}%")  return combined_df, output_file  else:  logging.error("No relts found for any model family!")  return None, None if __name__ == "__main__":  main()