{
    "family": "san",
    "model_name": "gpt2",
    "train_steps": 1000000,
    "n_dims": 15,
    "n_embd": 256,
    "n_layer": 12,
    "n_head": 8,
    "learning_rate": 0.0001,
    "gpu": [
        1
    ],
    "curriculum_points_start": 120,
    "curriculum_points_end": 120,
    "name": "mix4-linear_regression-quadratic_regression-relu_regression-sparse_parity_15_3",
    "task_list": [
        {
            "task": "linear_regression",
            "data": "gaussian",
            "batch_size": 64,
            "mu": 0,
            "scale": 1,
            "y_format": 0,
            "exp_name": "linear_regression"
        },
        {
            "task": "quadratic_regression",
            "data": "gaussian",
            "batch_size": 64,
            "mu": 0,
            "scale": 1,
            "y_format": 0,
            "exp_name": "quadratic_regression"
        },
        {
            "task": "relu_regression",
            "data": "gaussian",
            "batch_size": 64,
            "mu": 0,
            "scale": 1,
            "y_format": 0,
            "exp_name": "relu_regression"
        },
        {
            "task": "sparse_parity",
            "data": "boolean",
            "k": 3,
            "l": 455,
            "batch_size": 64,
            "y_format": 0,
            "exp_name": "sparse_parity_15_3"
        }
    ]
}