train:
  rgb_static:
    - _target_: torchvision.transforms.Resize
      size: 224
      antialias: True
    - _target_: mdt.utils.transforms.RandomShiftsAug
      pad: 10
    - _target_: mdt.utils.transforms.ScaleImageTensor
    - _target_: torchvision.transforms.Normalize
      mean: [0.48145466, 0.4578275, 0.40821073]
      std: [0.26862954, 0.26130258, 0.27577711]
  gen_static:
    - _target_: torchvision.transforms.Resize
      size: ${gen_img_res}
      antialias: True
    - _target_: mdt.utils.transforms.ScaleImageTensor
    - _target_: torchvision.transforms.Normalize
      mean: [0.48145466, 0.4578275, 0.40821073]
      std: [0.26862954, 0.26130258, 0.27577711]
  rgb_gripper:
    - _target_: torchvision.transforms.Resize
      size: 84
      antialias: True
    - _target_: mdt.utils.transforms.RandomShiftsAug
      pad: 4
    - _target_: mdt.utils.transforms.ScaleImageTensor
    - _target_: torchvision.transforms.Normalize
      mean: [0.48145466, 0.4578275, 0.40821073]
      std: [0.26862954, 0.26130258, 0.27577711]
  gen_gripper:
    - _target_: torchvision.transforms.Resize
      size: 112
      antialias: True
    - _target_: mdt.utils.transforms.ScaleImageTensor
    - _target_: torchvision.transforms.Normalize
      mean: [0.48145466, 0.4578275, 0.40821073]
      std: [0.26862954, 0.26130258, 0.27577711]
  depth_static:
    - _target_: torchvision.transforms.Resize
      size: 200
      antialias: True
    - _target_: mdt.utils.transforms.AddDepthNoise
      shape: [1000.0]
      rate: [1000.0]
    - _target_: mdt.utils.transforms.AddGaussianNoise
      mean: [0.0]
      std: [0.01]
  depth_gripper:
    - _target_: torchvision.transforms.Resize
      size: 84
#    - _target_: calvin.utils.transforms.AddDepthNoise
#      shape: [ 1000.0 ]
#      rate: [ 1000.0 ]
    - _target_: mdt.utils.transforms.AddGaussianNoise
      mean: [ 0.0 ]
      std: [ 0.01 ]
  rgb_tactile:
    - _target_: torchvision.transforms.Resize
      size: 70
    - _target_: torchvision.transforms.RandomCrop
      size: 64
    - _target_: mdt.utils.transforms.ScaleImageTensor
    - _target_: torchvision.transforms.Normalize
      mean: [0.5]
      std: [0.5]
    - _target_: mdt.utils.transforms.AddGaussianNoise
      mean: [ 0.0 ]
      std: [ 0.01 ]
  depth_tactile:
    - _target_: torchvision.transforms.Resize
      size: 64
      antialias: True
    - _target_: torchvision.transforms.Normalize
      mean: [0.1,]
      std: [0.2,]
    - _target_: mdt.utils.transforms.AddGaussianNoise
      mean: [ 0.0 ]
      std: [ 0.01 ]
  robot_obs:
    - _target_: mdt.utils.transforms.NormalizeVector
    - _target_: mdt.utils.transforms.AddGaussianNoise
      mean: [ 0.0 ]
      std: [ 0.01 ]
  scene_obs:
    - _target_: mdt.utils.transforms.NormalizeVector
    - _target_: mdt.utils.transforms.AddGaussianNoise
      mean: [ 0.0 ]
      std: [ 0.01 ]
  language:
    - _target_: mdt.utils.transforms.AddGaussianNoise
      mean: [ 0.0 ]
      std: [ 0.01 ]


val:
  rgb_static:
    - _target_: torchvision.transforms.Resize
      size: 224
      antialias: True
    - _target_: mdt.utils.transforms.ScaleImageTensor
    - _target_: torchvision.transforms.Normalize
      mean: [ 0.48145466, 0.4578275, 0.40821073 ]
      std: [ 0.26862954, 0.26130258, 0.27577711 ]
  rgb_gripper:
    - _target_: torchvision.transforms.Resize
      size: 84
      antialias: True
    - _target_: mdt.utils.transforms.ScaleImageTensor
    - _target_: torchvision.transforms.Normalize
      mean: [ 0.48145466, 0.4578275, 0.40821073 ]
      std: [ 0.26862954, 0.26130258, 0.27577711 ]
  gen_static:
    - _target_: torchvision.transforms.Resize
      size: 112
      antialias: True
    - _target_: mdt.utils.transforms.ScaleImageTensor
    - _target_: torchvision.transforms.Normalize
      mean: [ 0.48145466, 0.4578275, 0.40821073 ]
      std: [ 0.26862954, 0.26130258, 0.27577711 ]
  gen_gripper:
    - _target_: torchvision.transforms.Resize
      size: 112
      antialias: True
    - _target_: mdt.utils.transforms.ScaleImageTensor
    - _target_: torchvision.transforms.Normalize
      mean: [ 0.48145466, 0.4578275, 0.40821073 ]
      std: [ 0.26862954, 0.26130258, 0.27577711 ]
  depth_static:
    - _target_: torchvision.transforms.Resize
      size: 200
  depth_gripper:
    - _target_: torchvision.transforms.Resize
      size: 84
  rgb_tactile:
    - _target_: torchvision.transforms.Resize
      size: 70
    - _target_: torchvision.transforms.RandomCrop
      size: 64
    - _target_: mdt.utils.transforms.ScaleImageTensor
    - _target_: torchvision.transforms.Normalize
      mean: [0.5]
      std: [0.5]
  depth_tactile:
    - _target_: torchvision.transforms.Resize
      size: 64
    - _target_: torchvision.transforms.Normalize
      mean: [0.1,]
      std: [0.2,]
  robot_obs:
    - _target_: mdt.utils.transforms.NormalizeVector
  scene_obs:
    - _target_: mdt.utils.transforms.NormalizeVector
