_base_ = '../mae/mae_vit-base-p16_8xb512-amp-coslr-300e_in1k.py'

randomness = dict(seed=2, diff_rank_seed=True)

# dataset config
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='ToPIL', to_rgb=True),
    dict(type='torchvision/Resize', size=224),
    dict(
        type='torchvision/RandomCrop',
        size=224,
        padding=4,
        padding_mode='reflect'),
    dict(type='torchvision/RandomHorizontalFlip', p=0.5),
    dict(type='ToNumpy', to_bgr=True),
    dict(type='PackInputs')
]

train_dataloader = dict(dataset=dict(pipeline=train_pipeline))

# model config
model = dict(
    type='MFF', backbone=dict(type='MFFViT', out_indices=[0, 2, 4, 6, 8, 11]))
