{{ experiment_name }}
{% for dataset, dataset_data in experiment_data.items() %}
{{dataset}}
{% for metric, metric_data in dataset_data.items() %}
{{ metric }}
{{ metric }} fraction of prompts where perturbing alpha behaves as expected
{{ metric_data.scaling_fracs | safe }}
average {{ metric }} (separate)
average {{ metric }} (separate)
average {{ metric }} (same plot)
average {{ metric }} (separate)
{% endfor %} {% endfor %}