{
    "exp_name": "transformer-polynomial",
    "task": "expansion",
    "batch_size": 128,
    "epochs": 1,
    "data_format": "polynomial_basis",
    "group": "sos_sweep",
    "test_batch_size": 2,
    "seed": 42,
    "training_size": 990000,
    "test_size": 1000,
    "train_test_split": false,
    "max_degree": 20,
    "weight_decay": 0.0001,
    "train_sample_skimming": true,
    "max_sequence_length": 2048,
    "model": "custom_bart",
    "embedding_type": "standard",
    "position_encoding_type": "learned",
    "coeff_encoding": "prefix",
    "learning_target": "k-leading-terms",
    "monomial_embedding": true,
    "save_wandb_artifact": true,
    "field": "RR",
    "num_variables": 6,
    "data_path": "n6_sparse_uniform_sparse_d6_m30",
    "gradient_accumulation_steps": 4,
    "num_workers": 8,
    "num_leading_terms": 1,
    "tags": [
        "sos-coefficients"
    ],
    "config": "config/experiments/sweep_expansion.yaml",
    "experiment": "sos_coefficients",
    "save_path": "./dumped",
    "exp_id": "",
    "d_model": 512,
    "dim_feedforward": 2048,
    "num_encoder_layers": 6,
    "num_decoder_layers": 6,
    "nhead": 8,
    "dropout": 0.1,
    "attention_dropout": 0,
    "encoding_method": "standard",
    "use_advanced_expander": false,
    "expander_type": "linear",
    "use_classification": true,
    "use_regression": true,
    "classification_weight": 1.0,
    "regression_weight": 1.0,
    "rational_coefficients": null,
    "max_coefficient": 1000,
    "learning_rate": 0.0001,
    "warmup_ratio": 0.05,
    "optimizer": "adamw_torch",
    "resume_from_checkpoint": false,
    "wandb_id": null,
    "dryrun": false,
    "split_coeff_exp": false,
    "token_type_position_encoding": false,
    "monomial_type_position_encoding": false,
    "monomial_id_embedding": false,
    "token_expander": "mlp2",
    "coeff_token_size": 1,
    "aware_of_padding": false
}