experiment_name: 'all_stages'
trial_name: None

stage1:
    experiment_name: ' '
    phase: ' '

    lightning:
        trainer:
            devices: 2
            max_epochs: 2
            accelerator: "gpu"
            strategy: "ddp"
            gradient_clip_val: 0.25
            lr: 0.00005
            precision: 16
            accumulate_grad_batches: 2
            sync_batchnorm: true
            enable_progress_bar: true
            log_every_n_steps: 50
        checkpoint_callback:
            monitor: 'val_loss'
            dirpath: ' '
            save_last: true
            mode: min
            save_top_k: 10
        early_stopping_callback:
            monitor: 'val_loss'
            min_delta: 0.00
            patience: 10
            verbose: False
            mode: 'min'
        logger:
            logger_type: "CSVLogger"
            save_dir: ' '

    model:
        text:
            bert_type: ' '
            last_n_layers: 4
            aggregate_method: 'sum'
            norm: false
            embedding_dim: 768
            freeze_bert: false
            agg_tokens: true
        num_prototypes: 64
        temperature: 0.1
        reconstruction_weight: 1.0
        use_compile: true

    data:
        dataset: evidence
        evidence_csv_path: ' '
        max_length: 128

    train:
        batch_size: 128
        num_workers: 16
        persistent_workers: true
        prefetch_factor: 4
        optimizer:
            name: 'Adam'
            weight_decay: 1e-6
            betas: [0.9, 0.999]
        scheduler:
            name: 'plateau'
            monitor: 'val_loss'
            interval: 'epoch'
            frequency: 1
            factor: 0.5
            patience: 3

    output_dir: ' '


stage2:
    experiment_name: ' '
    phase: ' '

    lightning:
        trainer:
            devices: 2
            max_epochs: 4
            accelerator: "gpu"
            strategy: "ddp"
            gradient_clip_val: 0.25
            lr: 0.0001
            precision: 16
            accumulate_grad_batches: 2
            sync_batchnorm: true
            enable_progress_bar: true
            log_every_n_steps: 50
        checkpoint_callback:
            monitor: 'val_loss'
            dirpath: ' '
            save_last: true
            mode: min
            save_top_k: 10
        early_stopping_callback:
            monitor: 'val_loss'
            min_delta: 0.00
            patience: 10
            verbose: False
            mode: 'min'
        logger:
            logger_type: ' '
            project: ' '

    model:
        text:
            bert_type: ' '
            last_n_layers: 4
            aggregate_method: 'sum'
            norm: false
            embedding_dim: 768
            freeze_bert: true
            agg_tokens: true
        vision:
            model_name: "vit_base_patch16_224"
            freeze_cnn: false
            pretrained: true
        num_prototypes: 64
        temperature: 0.1
        num_queries: 64
        distillation_weight: 1.0
        consistency_weight: 1.0
        diversity_weight: 1.0
        consistency_k: 5
        stage1_checkpoint_path: ' '
        use_compile: true

    data:
        dataset: xx
        evidence_csv_path: ' '
        paired_csv_path: ' '
        sample_ratio: 0.1
        max_length: 128
        image:
            imsize: 224

    train:
        batch_size: 64
        num_workers: 16
        persistent_workers: true
        prefetch_factor: 4
        optimizer:
            name: 'AdamW'
            weight_decay: 1e-6
            betas: [0.9, 0.999]
        scheduler:
            name: 'plateau'
            monitor: 'val_loss'
            interval: 'epoch'
            frequency: 1
            factor: 0.5
            patience: 3

    transforms:
        random_crop: null
        random_horizontal_flip: null
        random_affine: null
        color_jitter: null
        norm: "half"

    output_dir: ' '

stage3:
    experiment_name: ' '
    phase: ' '

    lightning:
        trainer:
            devices: 2
            max_epochs: 5
            accelerator: "gpu"
            strategy: "ddp"
            gradient_clip_val: 0.25
            lr: 0.0001
            precision: 16
            accumulate_grad_batches: 2
            sync_batchnorm: true
            enable_progress_bar: true
            log_every_n_steps: 50
        checkpoint_callback:
            monitor: 'val_loss'
            dirpath: ' '
            save_last: true
            mode: min
            save_top_k: 10
        early_stopping_callback:
            monitor: 'val_loss'
            min_delta: 0.00
            patience: 10
            verbose: False
            mode: 'min'
        logger:
            logger_type: ' '
            project: ' '

    model:
        text:
            bert_type: ' '
            last_n_layers: 4
            aggregate_method: 'sum'
            norm: false
            embedding_dim: 768
            freeze_bert: false
            agg_tokens: true
        vision:
            model_name: "vit_base_patch16_224"
            freeze_cnn: false
            pretrained: true
        num_prototypes: 64
        temperature: 0.1
        num_queries: 64
        use_stage3_queries: false
        stage3_num_queries: null
        stage3_num_attention_heads: null
        propagation_alpha: 0.5
        propagation_steps: 2
        info_nce_temperature: 0.07
        infonce_weight: 1.0
        normalize_entropy: true
        stage1_checkpoint_path: ' '
        stage2_checkpoint_path: ' '
        use_compile: true



    data:
        dataset: xx
        evidence_csv_path: ' '
        paired_csv_path: ' '
        stage2_sample_ratio: 0.1
        pair_ratio: 0.3
        max_length: 128
        image:
            imsize: 224

    train:
        batch_size: 64
        num_workers: 16
        persistent_workers: true
        prefetch_factor: 4
        optimizer:
            name: 'AdamW'
            weight_decay: 1e-6
            betas: [0.9, 0.999]
        scheduler:
            name: 'plateau'
            monitor: 'val_loss'
            interval: 'epoch'
            frequency: 1
            factor: 0.5
            patience: 3

    transforms:
        random_crop: null
        random_horizontal_flip: null
        random_affine: null
        color_jitter: null
        norm: "half"

    output_dir: ' '

