import pandas as pd import matplotb.pyplot as plt import os import io # Hardcoded CSV data compare_model_csv = """Experiments,Domain,DatasetSize,AvgAll,AIME24,AMC23,MATH500,AvgMath,JEEBench,GPQAD,AvgSci,LCBv2,CodeElo,CodeForces,AvgCode DeepSeek-R1-Distill-Qwen-7B,Other,,62.6,51.3,92.0,88.0,77.1,,33.2,33.2,48.7,,,48.7 Llama-3.1-Nemotron-Nano-8B-v1,Other,,62.6,62.0,94.0,89.4,81.8,70.7,52.9,61.8,68.0,30.9,32.9,43.9 AceMath-RL-Nemotron-7B,Other,,58.1,70.7,95.5,90.6,85.6,55.4,55.7,55.6,51.3,22.8,22.9,32.3 OpenThinker2-7B,Other,,56.7,58.3,89.8,87.6,78.6,65.1,47.0,56.0,56.2,22.8,26.6,35.2 OpenMath-Nemotron-7B,Other,,46.9,73.7,95.8,90.4,86.6,53.9,33.8,43.8,19.1,4.3,4.4,9.3 SYNTHETIC-1-SFT-7B,Other,,44.8,36.7,80.0,87.2,68.0,55.8,44.4,50.1,31.9,8.6,14.1,18.2 OpenR1-Qwen-7B,Other,,43.7,52.7,85.2,86.8,74.9,64.4,46.5,55.4,9.8,2.8,1.6,4.7 OlympicCoder-7B,Other,,42.6,21.7,59.7,73.6,51.7,40.3,43.6,42.0,56.0,21.7,24.2,34.0 OpenThinker-7B,Other,,38.6,,,,,,38.6,38.6,,,, Bespoke-Stratos-7B,Other,,31.7,14.3,54.2,77.6,48.7,40.2,31.8,36.0,27.4,3.2,4.8,11.8 Qwen2.5-7B-Instruct,Other,,24.6,,,,,,24.6,24.6,,,, OpenCodeReasoning-Nemotron-7B,Other,,24.3,1.3,8.8,19.2,9.8,10.0,28.6,19.3,63.3,30.3,32.7,42.1 MR,Other,,14.7,14.0,32.2,38.6,28.3,16.7,12.1,14.4,3.0,0.6,0.4,1.3""" # Convert string to dataframe df = pd.read_csv(io.StringIO(compare_model_csv)) # Set index and convert percentages to decimal if "Experiments" in df.columns:  df = df.set_index("Experiments") # Convert percentage values to fractions for plotting for col in df.columns:  if col not in ["Domain", "DatasetSize", "🏆 (All)", "🏆 (Domain)"]:  df[col] = df[col] / 100.0 # Define metrics to plot metrics_to_plot = ["AvgMath", "AvgCode", "AvgSci"] # Set plotting style plt.rcParams.update({  "font.size": 14,  "axes.titlesize": 16,  "axes.labelsize": 14,  "xtick.labelsize": 12,  "ytick.labelsize": 12,  "legend.fontsize": 12,  "figure.titlesize": 18, }) # Create figure with 3 bplots (one for each domain) fig, axes = plt.bplots(1, 3, figsize=(18, 6)) # Define model family matching patterns nvidia_patterns = ["Nemotron"] openthoughts_bespoke_patterns = ["OpenThinker", "Bespoke"] openr1_patterns = ["OpenR1", "OlympicCoder"] # Create dict to store color and pattern for each model model_styles = {} # Categorize models by pattern unique_models = st(df.index) # Define hatches hatches = ['', '//', '\\\\', 'xx', '++', '**', 'oo', 'OO', '...', '---'] # Group models by family nvidia_models = [] openthoughts_bespoke_models = [] openr1_models = [] qwen_models = [] other_models = [] for model_name in unique_models:  if "Qwen2.5-7B-Instruct" in model_name:  qwen_models.append(model_name)  ef any(pattern in model_name for pattern in nvidia_patterns):  nvidia_models.append(model_name)  ef any(pattern in model_name for pattern in openthoughts_bespoke_patterns):  openthoughts_bespoke_models.append(model_name)  ef any(pattern in model_name for pattern in openr1_patterns):  openr1_models.append(model_name)  else:  other_models.append(model_name) # Assign colors and hatches by family # NVIDIA models - Green for i, model in enumerate(nvidia_models):  hatch_index = i % len(hatches)  model_styles[model] = {'color': '#2ca02c', 'hatch': hatches[hatch_index]} # OpenThoughts & Bespoke models - Red for i, model in enumerate(openthoughts_bespoke_models):  hatch_index = i % len(hatches)  model_styles[model] = {'color': '#d62728', 'hatch': hatches[hatch_index]} # OpenR1 models - Orange for i, model in enumerate(openr1_models):  hatch_index = i % len(hatches)  model_styles[model] = {'color': '#ff7f0e', 'hatch': hatches[hatch_index]} # Qwen basene model - Gray for model in qwen_models:  model_styles[model] = {'color': '#7f7f7f', 'hatch': ''} # Other models - Different colors other_colors = ['#1f77b4', '#9467bd', '#8c564b', '#e377c2',  '#bcbd22', '#17becf', '#006ba4', '#ababab'] for i, model in enumerate(other_models):  color_index = i % len(other_colors)  model_styles[model] = {'color': other_colors[color_index], 'hatch': ''} # Function to get color for a model def get_color_for_model(model_name):  if model_name in model_styles:  return model_styles[model_name]['color']  return "#7f7f7f" # Default gray # Function to get hatch pattern for a model def get_hatch_for_model(model_name):  if model_name in model_styles:  return model_styles[model_name]['hatch']  return "" # Default empty hatch # Plot each metric as a bar chart for i, metric in enumerate(metrics_to_plot):  ax = axes[i]  # Sort the dataframe by the current metric in descending order  if metric in df.columns:  sorted_df = df.sort_values(metric, ascending=False)  # Plot each bar individually with proper colors and hatching  import numpy as np  bar_width = 0.8  x_positions = np.arange(len(sorted_df.index))  for j, (model_name, value) in enumerate(zip(sorted_df.index, sorted_df[metric])):  color = get_color_for_model(model_name)  hatch = get_hatch_for_model(model_name)  # Create bar with proper hatch pattern  ax.bar(x_positions[j], value, width=bar_width, color=color,  edgecolor='black', newidth=0.5, hatch=hatch,  label=model_name)  # Set the x-ticks to match the bar positions  ax.set_xticks(x_positions)  # Set labels and title  title_mapping = {  "AvgMath": "Math",  "AvgCode": "Code",  "AvgSci": "Science"  }  ax.set_title(title_mapping.get(metric, metric), fontsize=14, fontweight='bold')  ax.set_ylabel('Score')  # Format y-axis as percentage  ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: '{:.0%}'.format(y)))  # Create shorter model names  short_labels = []  for model_name in sorted_df.index:  if "/" in model_name:  parts = model_name.spt("/")  short_labels.append(parts[-1])  ef "AceMath-RL-Nemotron-7B" in model_name:  short_labels.append("AceMath")  ef "OpenCodeReasoning-Nemotron-7B" in model_name:  short_labels.append("OpenCodeReasoning")  ef "OpenThinker2-7B" in model_name:  short_labels.append("OpenThinker2")  ef "Llama-3.1-Nemotron-Nano-8B" in model_name:  short_labels.append("Llama-3.1-Nemotron")  else:  short_labels.append(model_name)  # Apply new labels and rotate them  ax.set_xticklabels(short_labels, rotation=45, ha='right', rotation_mode='anchor')  # Add padding at the bottom for labels  ax.tick_params(axis='x', which='major', pad=8)  # Add grid for readabity  ax.grid(axis='y', nestyle='--', alpha=0.7)  # Set y-axis mits  ax.set_ym(0, min(1.0, max(sorted_df[metric]) * 1.15))  else:  # Handle case where metric isn't in the dataframe  ax.text(0.5, 0.5, f"Metric '{metric}' not found",  horizontalagnment='center', verticalagnment='center',  transform=ax.transAxes) # Adjust layout for the rotated x-axis labels plt.tight_layout(rect=[0, 0.08, 1, 0.96]) fig.bplots_adjust(wspace=0.3, bottom=0.2) # Create relts directory if it doesn't exist relts_dir = "eval/relts" os.makedirs(relts_dir, exist_ok=True) # Save as PNG and PDF plt.savefig(f"{relts_dir}/simple_fig1_bar_chart.png", dpi=300, bbox_inches='tight') print(f"Bar charts saved to {relts_dir}/simple_fig1_bar_chart.png") plt.savefig(f"{relts_dir}/simple_fig1_bar_chart.pdf", format='pdf', bbox_inches='tight') print(f"Bar charts saved to {relts_dir}/simple_fig1_bar_chart.pdf")