import pandas as pd import matplotb.pyplot as plt import os import io # Load updated data from the CSV file generated from database # Read the updated data from database query df = pd.read_csv("eval/relts/domain_scang_final.csv") df = df.set_index("Experiments") # Keep data as percentages (don't convert to decimal) # Create figure with 3 bplots plt.rcParams.update({  "font.size": 16,  "axes.titlesize": 20,  "axes.labelsize": 18,  "xtick.labelsize": 16,  "ytick.labelsize": 16,  "legend.fontsize": 14, }) fig, axes = plt.bplots(1, 3, figsize=(18, 6)) # Define model groups by name pattern models = {  "openthoughts3_math": [],  "openthoughts3_code": [],  "openthoughts3_science": [],  "no_pipene_math": [],  "no_pipene_code": [],  "no_pipene_science": [],  "nemo_nano_math": [],  "nemo_nano_code": [],  "nemo_nano_science": [] } # Group models by prefix and collect their dataset sizes for model in df.index:  # Skip mixed domain models  if df.loc[model, "Domain"] == "Mixed":  continue  # Skip the large nemo_nano_science model (709k samples)  if model == "nemo_nano_science":  continue  # Original logic for domain-specific models  for prefix in models.keys():  if model.startswith(prefix):  # Extract dataset size from model name if missing from DatasetSize column  dataset_size = df.loc[model, "DatasetSize"] if not pd.isna(df.loc[model, "DatasetSize"]) else None  if dataset_size is None:  # Try to infer dataset size from model name  if "_100k" in model:  dataset_size = 100000.0  ef "_300k" in model:  dataset_size = 316000.0 # Use 316k to match other 300k models  ef "_30k" in model:  dataset_size = 31600.0  ef "_10k" in model:  dataset_size = 10000.0  ef "_3k" in model:  dataset_size = 3160.0  ef "_1k" in model:  dataset_size = 1000.0  ef "_0.3k" in model:  dataset_size = 316.0  else:  dataset_size = 0 # Default for models without clear size indicator  models[prefix].append((dataset_size, model))  break # Sort each model group by dataset size for prefix in models:  models[prefix].sort(key=lambda x: x[0]) # Define colors for each model family colors = {  "openthoughts3_math": "red",  "openthoughts3_code": "red",  "openthoughts3_science": "red",  "no_pipene_math": "green",  "no_pipene_code": "green",  "no_pipene_science": "green",  "nemo_nano_math": "blue",  "nemo_nano_code": "blue",  "nemo_nano_science": "blue" } # Define marker shapes for different domains markers = {  "math": "o",  "code": "s", # square  "science": "^" # triangle } # Define display names for legend display_names = {  "openthoughts3_math": "OpenThoughts3 (Math)",  "openthoughts3_code": "OpenThoughts3 (Code)",  "openthoughts3_science": "OpenThoughts3 (Science)",  "no_pipene_math": "No Pipene (Math)",  "no_pipene_code": "No Pipene (Code)",  "no_pipene_science": "No Pipene (Science)",  "nemo_nano_math": "Nemotron Nano (Math)",  "nemo_nano_code": "Nemotron Nano (Code)",  "nemo_nano_science": "Nemotron Nano (Science)" } # Define basene models with their performance metrics (in percentage) basenes = {  "Qwen-2.5-7B-Instruct": {  "AvgMath": 45.0,  "AvgCode": 25.0,  "AvgSci": 30.0,  "color": "black",  "nestyle": "--"  },  # "DeepSeek-R1-Distill-Qwen-7B": {  # "AvgMath": 75.0,  # "AvgCode": 48.0,  # "AvgSci": 55.0,  # "color": "red",  # "nestyle": "--"  # } } # Plot domains domains = ["AvgMath", "AvgCode", "AvgSci"] titles = ["Math", "Code", "Science"] for i, metric in enumerate(domains):  ax = axes[i]  ax.set_title(f"{titles[i]} Average", fontsize=28)  ax.set_xlabel("Dataset Size", fontsize=26)  if i == 0:  ax.set_ylabel("Accuracy (%)", fontsize=26)  ax.grid(True, nestyle='--', alpha=0.25)  # Determine which prefixes to plot for this domain  domain_ffix = metric.lower().replace("avg", "")  relevant_prefixes = [p for p in models.keys() if domain_ffix in p]  # Add all relevant prefixes first (bold nes)  for prefix in relevant_prefixes:  model_st = models[prefix]  if model_st:  x_values = []  y_values = []  for size, model_name in model_st:  if model_name in df.index and metric in df.columns:  x_values.append(size)  y_values.append(df.loc[model_name, metric])  if x_values and y_values:  base_model = prefix.spt("_")[0]  domain_type = prefix.spt("_")[-1]  marker = markers.get(domain_type, "o")  ax.plot(x_values, y_values, '-',  label=display_names[prefix],  color=colors[prefix],  newidth=3,  marker=marker,  markersize=12)  # Add basene horizontal nes  for basene_name, basene_data in basenes.items():  if metric in basene_data:  basene_value = basene_data[metric]  ax.axhne(y=basene_value,  color=basene_data["color"],  nestyle=basene_data["nestyle"],  newidth=2,  alpha=0.5,  label=basene_name)  ax.set_xscale('log')  ax.set_xticks([316, 1000, 10000, 100000, 1000000])  ax.set_xticklabels(['', '1K', '10K', '100K', '1M'])  # Set y-axis mits with padding (for percentage values)  y_values = [v for v in df[metric].values if not pd.isna(v)]  # Add basene values to the st for mit calculation  for basene_data in basenes.values():  if metric in basene_data:  y_values.append(basene_data[metric])  if y_values:  min_y = max(0, min(y_values) - 5)  max_y = min(100.0, max(y_values) + 5)  ax.set_ym(min_y, max_y)  # Remove top and right spines  ax.spines['top'].set_visible(False)  ax.spines['right'].set_visible(False) # Create legend with proper ordering: model famies on row 1, shapes on row 2 legend_handles = [] legend_labels = [] # First, add the 4 items for row 1 (model famies + basene) for family, color in [("OpenThoughts3", "red"), ("No Pipene", "green"), ("Nemotron Nano", "blue")]:  ne_handle = plt.ne2D([0], [0], color=color, newidth=3, label=family)  legend_handles.append(ne_handle)  legend_labels.append(family) # Add basene to complete first row for i in range(len(axes)):  handles, labels = axes[i].get_legend_handles_labels()  for h, l in zip(handles, labels):  if "Qwen" in l and l not in legend_labels:  legend_handles.append(h)  legend_labels.append(l)  break # Then add the 3 items for row 2 (domain symbols) domain_symbols = [  ("Math bset", "o"), # circle  ("Code bset", "s"), # square  ("Science bset", "^") # triangle ] for label, marker in domain_symbols:  symbol_handle = plt.ne2D([0], [0], color='black', marker=marker, nestyle='None',  markersize=8, label=label)  legend_handles.append(symbol_handle)  legend_labels.append(label) all_handles = [legend_handles[0], legend_handles[4], legend_handles[1], legend_handles[5], legend_handles[2], legend_handles[6], legend_handles[3]] all_labels = [legend_labels[0], legend_labels[4], legend_labels[1], legend_labels[5], legend_labels[2], legend_labels[6], legend_labels[3]] # Create a simpfied legend with no border (4 columns to create 2 rows) fig.legend(all_handles, all_labels, loc='lower center',  bbox_to_anchor=(0.5, 0.02), ncol=4,  fontsize=16, frameon=False, fancybox=True, framealpha=0.7,  columnspacing=1.0) # Save plots relts_dir = "eval/relts" os.makedirs(relts_dir, exist_ok=True) # Adjust bottom margin for legend (reduced since legend was moved up) plt.tight_layout(rect=[0.04, 0.18, 0.98, 0.96]) fig.bplots_adjust(wspace=0.25) # Save as PNG and PDF plt.savefig(f"{relts_dir}/simple_domain_scang.png", dpi=300, bbox_inches='tight') plt.savefig(f"{relts_dir}/simple_domain_scang.pdf", format='pdf', bbox_inches='tight') print(f"Plots saved to {relts_dir}/simple_domain_scang.png and {relts_dir}/simple_domain_scang.pdf")