import pandas as pd import matplotb.pyplot as plt import os import io # Hardcoded CSV data fig1_ot2_csv = """Experiments,Domain,DatasetSize,AvgAll,AIME24,AMC23,MATH500,AvgMath,JEEBench,GPQAD,AvgSci,LCBv2,CodeElo,CodeForces,AvgCode openthoughts3_300k,300k,316000.0,57.7,61.7,88.8,88.8,79.8,69.2,51.0,60.1,51.7,24.9,25.8,34.1 openthoughts3_100k,100k,100000.0,54.1,54.3,88.8,88.4,77.2,61.1,49.0,55.0,44.0,22.5,24.5,30.3 openthoughts3_30k,30k,31600.0,49.8,40.7,83.2,87.4,70.4,58.2,48.0,53.1,45.1,16.6,19.1,26.9 openthoughts3_10k,10k,10000.0,43.8,32.0,75.2,82.6,63.3,45.7,48.7,47.2,40.0,11.1,15.2,22.1 openthoughts3_3k,3k,3160.0,41.0,30.7,66.5,81.0,59.4,51.7,48.1,49.9,32.6,7.1,10.7,16.8 openthoughts3_1k,1k,1000.0,34.3,18.0,58.5,78.6,51.7,46.0,39.1,42.6,22.4,5.1,6.4,11.3 openthoughts3_0.3k,0.3k,316.0,33.7,17.3,56.5,79.8,51.2,45.2,39.2,42.2,19.7,4.5,7.5,10.6 s1,Other,1000.0,33.6,20.0,60.2,77.8,52.7,39.1,40.2,39.7,23.2,3.4,5.0,10.5 s1_0.3k,0.3k,316.0,32.4,18.3,55.5,75.6,49.8,39.0,40.2,39.6,22.2,3.5,4.4,10.0 mo,Other,817.0,29.7,15.3,56.7,76.6,49.5,39.1,29.8,34.4,14.8,2.1,2.9,6.6 mo_0.3k,0.3k,316.0,30.7,15.3,56.8,74.2,48.8,38.1,35.4,36.8,18.8,2.9,4.0,8.6 am,Other,1400000.0,51.0,28.3,82.2,87.4,66.0,61.1,48.3,54.7,54.5,21.0,24.8,33.4 am_300k,300k,316000.0,44.0,23.7,73.2,84.0,60.3,56.3,42.8,49.6,44.6,11.5,15.7,23.9 am_30k,30k,31600.0,33.5,15.3,58.2,77.4,50.3,40.2,37.5,38.8,27.9,4.1,7.3,13.1 am_10k,10k,10000.0,29.9,15.7,52.8,74.2,47.6,34.9,26.8,30.8,25.4,3.5,5.8,11.6 am_3k,3k,3160.0,29.1,15.3,50.3,71.0,45.5,33.5,28.6,31.0,24.9,3.7,5.7,11.4 am_1k,1k,1000.0,26.0,10.0,46.0,69.2,41.7,29.9,24.7,27.3,18.7,3.0,6.7,9.5 am_0.3k,0.3k,316.0,28.1,10.7,50.5,67.2,42.8,34.3,31.6,33.0,20.5,3.9,5.7,10.0 nemo_nano,Nano,4350000.0,64.6,62.0,94.0,90.4,82.1,72.2,59.6,65.9,66.8,36.3,35.3,46.1 nemo_nano_1000k,Nano,1000000.0,57.2,55.0,87.0,86.8,76.3,61.0,52.9,57.0,58.0,28.6,28.3,38.3 nemo_nano_300k,Nano,316000.0,52.9,44.3,84.2,85.8,71.4,60.1,52.9,56.5,50.6,22.2,22.7,31.8 nemo_nano_100k,Nano,100000.0,42.7,28.7,69.0,83.6,60.4,48.2,47.0,47.6,37.8,12.2,15.5,21.8 nemo_nano_30k,Nano,31600.0,33.5,19.0,56.0,71.2,48.7,34.2,38.9,36.6,32.2,7.6,9.2,16.3 nemo_nano_10k,Nano,10000.0,31.6,16.7,50.5,72.4,46.5,31.6,39.9,35.8,27.5,5.7,8.1,13.8 nemo_nano_3k,Nano,3160.0,27.3,9.7,43.2,61.4,38.1,30.3,37.0,33.6,24.3,5.3,7.2,12.3 nemo_nano_1k,Nano,1000.0,23.9,8.3,35.2,63.6,35.7,25.4,30.8,28.1,19.0,3.3,5.8,9.4 nemo_nano_0.3k,Nano,316.0,28.4,12.0,51.0,67.4,43.5,31.9,32.2,32.0,23.0,4.2,5.4,10.9 openthoughts2,Other,1040000.0,54.1,53.7,86.0,87.6,75.8,62.2,48.8,55.5,52.6,18.8,22.9,31.4 openthoughts2_1000k,1000k,1000000.0,56.1,57.0,88.8,87.6,77.8,64.4,49.7,57.0,55.3,21.5,24.6,33.8 openthoughts2_300k,300k,316000.0,50.0,41.0,84.2,86.0,70.4,58.7,46.8,52.8,48.8,14.3,19.7,27.6 openthoughts2_100k,100k,100000.0,43.0,32.3,73.5,82.8,62.9,52.6,42.3,47.4,37.3,9.3,13.9,20.2 openthoughts2_30k,30k,31600.0,37.8,24.7,60.7,81.0,55.5,46.5,37.7,42.1,33.9,6.6,11.6,17.4 openthoughts2_10k,10k,10000.0,35.0,16.7,59.2,79.2,51.7,44.3,42.9,43.6,25.9,4.4,7.7,12.7 openthoughts2_3k,3k,3160.0,31.0,15.7,55.5,73.0,48.1,40.7,37.1,38.9,22.6,5.1,5.7,11.1 openthoughts2_1k,1k,1000.0,28.7,13.3,50.0,72.8,45.4,35.3,32.2,33.8,20.0,2.7,3.0,8.6 openthoughts2_0.3k,0.3k,316.0,31.2,14.0,53.5,70.8,46.1,38.6,43.4,41.0,21.9,3.7,4.0,9.9 """ # Convert string to dataframe df = pd.read_csv(io.StringIO(fig1_ot2_csv)) df = df.set_index("Experiments") # Convert percentages to decimal for col in df.columns:  if col not in ["Domain", "DatasetSize"]:  df[col] = df[col] / 100.0 FONTSIZE = 20 MARKERSIZE = 10 NEWIDTH = 3 FIGSIZE = (12.25, 6) # Create figure with 3 bplots plt.rcParams.update({  "font.size": FONTSIZE,  "axes.titlesize": FONTSIZE,  "axes.labelsize": FONTSIZE,  "xtick.labelsize": FONTSIZE,  "ytick.labelsize": FONTSIZE,  "legend.fontsize": FONTSIZE, }) # Plot domains domains = ["AIME24", "LCBv2", "GPQAD", "AMC23", "MATH500", "JEEBench", "CodeElo", "CodeForces",]#"AIME25", "veCodeBenchv5"] titles = domains # Change sharey from True to False to make y-axes independent fig, axes = plt.bplots(1, len(domains), figsize=(len(domains)*6,6), sharey=False) # Define model groups by name pattern models = {  "nemo_nano": [],  "am": [],  "openthoughts3": [],  "s1": [],  "mo": [],  "openthoughts2": [], } # Group models by prefix and collect their dataset sizes for model in df.index:  for prefix in models.keys():  if model.startswith(prefix):  dataset_size = df.loc[model, "DatasetSize"]  models[prefix].append((dataset_size, model))  break # Sort each model group by dataset size for prefix in models:  models[prefix].sort(key=lambda x: x[0]) # Define colors for each model family colors = {  "nemo_nano": "blue",  "am": "green",  "openthoughts3": "red",  "s1": "purple",  "mo": "orange",  "openthoughts2": "black", } # Define display names for legend display_names = {  "am": "AM",  "s1": "s1.1",  "mo": "MO",  "openthoughts3": "OpenThoughts3",  "openthoughts2": "OpenThoughts2",  "nemo_nano": "Nemotron Nano", } # Define basene models with their performance metrics basenes = {  "Qwen-2.5-7B-Instruct": {  "AIME24": 0.15,  "LCBv2": 0.33,  "GPQAD": 0.237,  "AMC23": 0.535,  "MATH500": 0.706,  "JEEBench": 0.336,  "CodeElo": 0.051,  "CodeForces": 0.099,  "AIME25": 0.08,  "veCodeBenchv5": 0.172,  "color": "black",  "nestyle": "--"  },  "DeepSeek-R1-Distill-Qwen-7B": {  "AIME24": 0.567,  "LCBv2": 0.478,  "GPQAD": 0.481,  "AMC23": 0.882,  "MATH500": 0.886,  "JEEBench": 0.504,  "CodeElo": 0.199,  "CodeForces": 0.211,  "AIME25": 0.39,  "veCodeBenchv5": 0.351,  "color": "red",  "nestyle": "--"  } } for i, metric in enumerate(domains):  ax = axes[i]  ax.set_title(titles[i], fontsize=FONTSIZE)  ax.set_xlabel("Dataset Size", fontsize=FONTSIZE)  # Only set ylabel for the first bplot  if i == 0:  ax.set_ylabel("Accuracy (%)", fontsize=FONTSIZE)  ax.grid(True, nestyle='--', alpha=0.25)  # Plot each model family  for prefix, model_st in models.items():  if model_st:  x_values = []  y_values = []  for size, model_name in model_st:  if model_name in df.index and metric in df.columns:  x_values.append(size)  y_values.append(df.loc[model_name, metric])  if x_values and y_values:  ax.plot(x_values, [z*100 for z in y_values], 'o-',  label=display_names[prefix],  color=colors[prefix],  newidth=NEWIDTH,  markersize=MARKERSIZE)  # Add basene horizontal nes  for basene_name, basene_data in basenes.items():  if metric in basene_data:  basene_value = basene_data[metric]  ax.axhne(y=basene_value*100,  color=basene_data["color"],  nestyle=basene_data["nestyle"],  newidth=NEWIDTH-1,  alpha=0.5,  label=basene_name)  ax.set_xscale('log')  ax.set_xticks([1000, 10000, 100000, 1000000])  ax.set_xticklabels(['1K', '10K', '100K', '1M'], fontsize=FONTSIZE-2)  # Set y-axis mits with padding, considering basene values too  y_values = st(df[metric].values)  # Add basene values to the st for mit calculation  for basene_data in basenes.values():  if metric in basene_data:  y_values.append(basene_data[metric])  y_values = [z*100 for z in y_values]  # Now actually set the y-axis mits for each bplot individually  min_y = max(0, min(y_values) - 5) # Added more padding (5% instead of 0.05)  max_y = min(100, max(y_values) + 5) # Added more padding (5% instead of 0.05)  ax.set_ym(min_y, max_y) # Collect all legend handles and labels from all bplots to create a combined legend all_handles = [] all_labels = [] # Get unique legend items from all plots for i in range(len(axes)):  handles, labels = axes[i].get_legend_handles_labels()  for h, l in zip(handles, labels):  if l not in all_labels: # Only add if not already in the st  all_handles.append(h)  all_labels.append(l) plt.tight_layout(rect=[0.1, 0.25, 1, 1]) fig.bplots_adjust(wspace=0.1) # Increased spacing between bplots # Create a combined legend fig.legend(all_handles[:2] + all_handles[3:], all_labels[:2] + all_labels[3:], loc=(0.1, 0.03), ncol=3,  fontsize=FONTSIZE-2, frameon=False, fancybox=True, framealpha=0.7) fig.legend(all_handles[2:3], [r'$\mathbf{' + all_labels[2:3][0] + "}$"], loc=(0.35, 0.19), ncol=1,  fontsize=FONTSIZE, frameon=False, fancybox=True, framealpha=0.7) relts_dir = "eval/relts" os.makedirs(relts_dir, exist_ok=True) plt.savefig(f"{relts_dir}/simple_fig1_ot2.png", dpi=300, bbox_inches='tight') plt.savefig(f"{relts_dir}/simple_fig1_ot2.pdf", format='pdf', bbox_inches='tight')