attention_dropout: 0
aware_of_padding: false
batch_size: 128
classification_weight: 1.0
coeff_encoding: prefix
coeff_token_size: 1
config: config/experiments/sweep_expansion.yaml
d_model: 512
data_format: polynomial_basis
data_path: n6_sparse_uniform_sparse_d6_m30
dim_feedforward: 2048
dropout: 0.1
dryrun: false
embedding_type: standard
encoding_method: standard
epochs: 1
exp_id: ''
exp_name: transformer-polynomial
expander_type: linear
experiment: sos_coefficients
field: RR
gradient_accumulation_steps: 4
group: sos_sweep
learning_rate: 0.0001
learning_target: k-leading-terms
max_coefficient: 1000
max_degree: 20
max_sequence_length: 2048
model: custom_bart
monomial_embedding: true
monomial_id_embedding: false
monomial_type_position_encoding: false
nhead: 8
num_decoder_layers: 6
num_encoder_layers: 6
num_leading_terms: 1
num_variables: 6
num_workers: 8
optimizer: adamw_torch
position_encoding_type: learned
rational_coefficients: null
regression_weight: 1.0
resume_from_checkpoint: false
save_path: ./dumped
save_wandb_artifact: true
seed: 42
split_coeff_exp: false
tags:
- sos-coefficients
task: expansion
test_batch_size: 2
test_size: 1000
token_expander: mlp2
token_type_position_encoding: false
train_sample_skimming: true
train_test_split: false
training_size: 990000
use_advanced_expander: false
use_classification: true
use_regression: true
wandb_id: null
warmup_ratio: 0.05
weight_decay: 0.0001
