args:
    device: 'cuda:0'
    DEBUG: False
    transfer: True

    #results_dir: './clip_vit/'
    #results_dir: './new_pretrained_fullgrad_clip_comparison/'
    #pretrained_model_path: "./front_attention_both_conditioning_SECOND_FINAL_FACTFORMER_PRETRAINED_MODELS/"
    #pretrained_model_path: "./back_attention_both_conditioning_SECOND_FINAL_FACTFORMER_PRETRAINED_MODELS/"
    pretrained_model_path: "./both_attention_both_conditioning_SECOND_FINAL_FACTFORMER_PRETRAINED_MODELS/"
    

    #results_dir: './FINAL_PAPER_RESULTS/'
    #results_dir: './finding_zeros___FINAL_PAPER_RESULTS_5step/'
    #results_dir: './finding_zeros_attention__FINAL_PAPER_RESULTS_5step/'

    #results_dir: './front_attention_FINAL_PAPER_RESULTS/'
    #results_dir: './back_attention_FINAL_PAPER_RESULTS/'
    #results_dir: './both_attention_FINAL_PAPER_RESULTS/'

    #results_dir: './ablation_ATTENTION_FINAL_PAPER_RESULTS/'
    #results_dir: './ATTENTION_FINAL_PAPER_RESULTS/'
    results_dir: './FINAL_RUN_TEST/'

      #results_dir: './ATTENTION_FINAL_PAPER_RESULTS_5step/'
      #results_dir: './finetune_ATTENTION_FINAL_PAPER_RESULTS/'

    #
    dataset: 'all'
    pretraining_loss: 'clip'
    #pretraining_loss: 'weightedclip'
    clip: True

    normalize: False

    bcs: True
    eq_coeff: True
    qualitative: True

    coeff: False
    time: False
    sentence: False

    #mixing: 'convolutional'
    mixing: 'attention'
    patch_size: 8

    pushforward: 1
    t_bundle: 1

    train_style: 'next_step'
    #train_style: 'fixed_future'
    num_workers: 0

    batch_size: 64
    #batch_size: 128
    #batch_size: 256

    pretraining_batch_size: 128
      #pretraining_batch_size: 256

    initial_step: 1
    #initial_step: 5
    #initial_step: 20
    #initial_step: 41
    #
    t_train: 200
    validate: 1
    #data_name: '2d_ns_1s_256_4eq.h5'
    #data_name: '2d_ns_30s_256_370eq.h5'
    data_name: '2d_electric_100_60.h5'
    #data_name: 'hba'
    base_path: '/home/PATH_TO_DATA/'
    return_text: True
    reduced_resolution: 4
    reduced_resolution_t: 1
    reduced_batch: 1
    #split_style: 'equation' # 'initial_condition' or 'equation'
    split_style: 'initial_condition' # 'initial_condition' or 'equation'
    embedding: 'None'
    #embedding: 'clip'
    #embedding: 'llm'
    #embedding: 'oformerllm'
    
    #llm: 'all-mpnet-base-v2'
    #llm: 'all-distilroberta-v1'
    
    llm: 'all-MiniLM-L6-v2'
    #llm: 'meta-llama/Meta-Llama-3.1-8B'

    load_pretrained: False

    # CLIP
    embed_dim: 256
    #embed_dim: 128
    #embed_dim: 32
    downsample: 2
    detach: False

    # Optimizer
    learning_rate: 1.e-4
    #learning_rate: 5.e-4
    #learning_rate: 1.e-4
    finetune_learning_rate: 1.e-4
    #finetune_learning_rate: 5.e-5
    #weight_decay: 1.e-7
    weight_decay: 1.e-8
    scheduler_step: 20
    scheduler_gamma: 0.5

    epochs: 10
    #epochs: 1

    #epochs: 10
    #epochs: 500
    num_seeds: 3

    pretraining_learning_rate: 1.e-3
    pretraining_weight_decay: 1.e-5
    #pretraining_epochs: 1
    #pretraining_epochs: 10
    #pretraining_epochs: 200
    pretraining_epochs: 1000

    # Sim samples
    #img_size: 128
    img_size: 64
    #img_size: 16
    num_t: 100
    #num_x: 64
    #num_y: 64
    num_x: 100
    num_y: 60

    sim_time: 21
    #sim_time: 51
    #sim_time: 101
    #sim_time: 80
    #sim_time: 120
    #sim_time: 999

    num_samples: 100
    #num_samples: 500
    #num_samples: 1000
    #num_samples: 2000
    pretraining_num_samples: 0
    #pretraining_num_samples: 50
    #pretraining_num_samples: 100
    #pretraining_num_samples: 300

    samples_per_equation: 1
    #samples_per_equation: 5

    # FactFormer
    depth: 1

    dim: 128
    #dim: 64
    #dim: 32

    dim_head: 64
    heads: 4
    in_dim: 1
    out_dim: 1
    pos_in_dim: 2
    pos_out_dim: 2
    kernel_multiplier: 2
    latent_multiplier: 2.0
    max_latent_steps: 4

    dropout: 0.01

    # Tracking
    log_freq: 50
    progress_plot_freq: 200

