{
    "family": "san",
    "model_name": "gpt2",
    "train_steps": 1000000,
    "n_dims": 10,
    "n_embd": 256,
    "n_layer": 12,
    "n_head": 8,
    "learning_rate": 0.0001,
    "gpu": [
        3, 4
    ],
    "curriculum_points_start": 120,
    "curriculum_points_end": 120,
    "name": "mix6-disjunction-linear_regression-quadratic_regression-relu_regression-sparse_linear_regression-parity_10",


    "task_list": [
        {
            "task": "sparse_linear_regression",
            "data": "gaussian",
            "batch_size": 64,
            "mu": 0,
            "scale": 1,
            "sparsity": 3,
            "valid_coords": 10,
            "y_format": 0,
            "exp_name": "sparse_linear_regression"
        },
        {
            "task": "quadratic_regression",
            "data": "gaussian",
            "batch_size": 64,
            "mu": 0,
            "scale": 1,
            "y_format": 0,
            "exp_name": "quadratic_regression"
        },
        {
            "task": "relu_regression",
            "data": "gaussian",
            "batch_size": 64,
            "mu": 0,
            "scale": 1,
            "y_format": 0,
            "exp_name": "relu_regression"
        },
        {
            "task": "conjunction",
            "data": "boolean",
            "batch_size": 64,
            "y_format": 0,
            "exp_name": "conjunction"
        },
        {
            "task": "disjunction",
            "data": "boolean",
            "batch_size": 64,
            "y_format": 0,
            "exp_name": "disjunction"
        },
        {
            "task": "parity",
            "data": "boolean",
            "batch_size": 64,
            "y_format": 0,
            "exp_name": "parity_10"
        }
    ]
}