{{ experiment_name }}

{% for dataset, dataset_data in experiment_data.items() %}

{{dataset}}

{% for metric, metric_data in dataset_data.items() %}

{{ metric }}

{{ metric }} fraction of prompts where perturbing alpha behaves as expected

{{ metric_data.scaling_fracs | safe }}

average {{ metric }} (separate)

average {{ metric }}

average {{ metric }} (separate)

average {{ metric }}
{% endfor %} {% endfor %}