I20230906 17:26:32 19118 dinov2 config.py:59] git:
  sha: N/A, status: clean, branch: N/A

I20230906 17:26:32 19118 dinov2 config.py:60] batch_size: 128
classifier_fpath: None
config_file: dinov2/configs/eval/vitg14_pretrain.yaml
epoch_length: 1250
epochs: 10
eval_period_iterations: 1250
learning_rates: [1e-05, 2e-05, 5e-05, 0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1]
no_resume: False
num_workers: 8
opts: ['train.output_dir=/Data20T/data20t/data20t/envs/shigz1/code/dinov2-main/dinov2-main/dinov2/output']
output_dir: /Data20T/data20t/data20t/envs/shigz1/code/dinov2-main/dinov2-main/dinov2/output
pretrained_weights: dinov2/pretrain/dinov2_vitg14_pretrain.pth
save_checkpoint_frequency: 20
test_class_mapping_fpaths: [None]
test_dataset_strs: None
test_metric_types: None
train_dataset_str: ImageNet:split=TRAIN:root=imagenet/one/:extra=imagenet/two/
val_class_mapping_fpath: None
val_dataset_str: ImageNer:split=VAL:root=imagenet/one/:extra=imagenet/two/
val_metric_type: mean_accuracy
I20230906 17:26:32 19118 dinov2 config.py:26] sqrt scaling learning rate; base: 0.004, new: 0.001
I20230906 17:26:32 19118 dinov2 config.py:33] MODEL:
  WEIGHTS: ''
compute_precision:
  grad_scaler: true
  teacher:
    backbone:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
    dino_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
    ibot_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
  student:
    backbone:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
    dino_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp32
        buffer_dtype: fp32
    ibot_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp32
        buffer_dtype: fp32
dino:
  loss_weight: 1.0
  head_n_prototypes: 65536
  head_bottleneck_dim: 256
  head_nlayers: 3
  head_hidden_dim: 2048
  koleo_loss_weight: 0.1
ibot:
  loss_weight: 1.0
  mask_sample_probability: 0.5
  mask_ratio_min_max:
  - 0.1
  - 0.5
  separate_head: false
  head_n_prototypes: 65536
  head_bottleneck_dim: 256
  head_nlayers: 3
  head_hidden_dim: 2048
train:
  batch_size_per_gpu: 64
  dataset_path: ImageNet:split=TRAIN
  output_dir: /Data20T/data20t/data20t/envs/shigz1/code/dinov2-main/dinov2-main/dinov2/output
  saveckp_freq: 20
  seed: 0
  num_workers: 10
  OFFICIAL_EPOCH_LENGTH: 1250
  cache_dataset: true
  centering: centering
student:
  arch: vit_giant2
  patch_size: 14
  drop_path_rate: 0.3
  layerscale: 1.0e-05
  drop_path_uniform: true
  pretrained_weights: ''
  ffn_layer: swiglufused
  block_chunks: 0
  qkv_bias: true
  proj_bias: true
  ffn_bias: true
teacher:
  momentum_teacher: 0.992
  final_momentum_teacher: 1
  warmup_teacher_temp: 0.04
  teacher_temp: 0.07
  warmup_teacher_temp_epochs: 30
optim:
  epochs: 100
  weight_decay: 0.04
  weight_decay_end: 0.4
  base_lr: 0.004
  lr: 0.001
  warmup_epochs: 10
  min_lr: 1.0e-06
  clip_grad: 3.0
  freeze_last_layer_epochs: 1
  scaling_rule: sqrt_wrt_1024
  patch_embed_lr_mult: 0.2
  layerwise_decay: 0.9
  adamw_beta1: 0.9
  adamw_beta2: 0.999
crops:
  global_crops_scale:
  - 0.32
  - 1.0
  local_crops_number: 8
  local_crops_scale:
  - 0.05
  - 0.32
  global_crops_size: 518
  local_crops_size: 98
evaluation:
  eval_period_iterations: 12500

I20230906 17:54:07 20285 dinov2 config.py:59] git:
  sha: N/A, status: clean, branch: N/A

I20230906 17:54:07 20285 dinov2 config.py:60] batch_size: 128
classifier_fpath: None
config_file: dinov2/configs/eval/vitg14_pretrain.yaml
epoch_length: 1250
epochs: 10
eval_period_iterations: 1250
learning_rates: [1e-05, 2e-05, 5e-05, 0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1]
no_resume: False
num_workers: 8
opts: ['train.output_dir=/Data20T/data20t/data20t/envs/shigz1/code/dinov2-main/dinov2-main/dinov2/output']
output_dir: /Data20T/data20t/data20t/envs/shigz1/code/dinov2-main/dinov2-main/dinov2/output
pretrained_weights: dinov2/pretrain/dinov2_vitg14_pretrain.pth
save_checkpoint_frequency: 20
test_class_mapping_fpaths: [None]
test_dataset_strs: None
test_metric_types: None
train_dataset_str: ImageNet:split=TRAIN:root=imagenet/one/:extra=imagenet/two/
val_class_mapping_fpath: None
val_dataset_str: ImageNer:split=VAL:root=imagenet/one/:extra=imagenet/two/
val_metric_type: mean_accuracy
I20230906 17:54:07 20285 dinov2 config.py:26] sqrt scaling learning rate; base: 0.004, new: 0.001
I20230906 17:54:07 20285 dinov2 config.py:33] MODEL:
  WEIGHTS: ''
compute_precision:
  grad_scaler: true
  teacher:
    backbone:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
    dino_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
    ibot_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
  student:
    backbone:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
    dino_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp32
        buffer_dtype: fp32
    ibot_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp32
        buffer_dtype: fp32
dino:
  loss_weight: 1.0
  head_n_prototypes: 65536
  head_bottleneck_dim: 256
  head_nlayers: 3
  head_hidden_dim: 2048
  koleo_loss_weight: 0.1
ibot:
  loss_weight: 1.0
  mask_sample_probability: 0.5
  mask_ratio_min_max:
  - 0.1
  - 0.5
  separate_head: false
  head_n_prototypes: 65536
  head_bottleneck_dim: 256
  head_nlayers: 3
  head_hidden_dim: 2048
train:
  batch_size_per_gpu: 64
  dataset_path: ImageNet:split=TRAIN
  output_dir: /Data20T/data20t/data20t/envs/shigz1/code/dinov2-main/dinov2-main/dinov2/output
  saveckp_freq: 20
  seed: 0
  num_workers: 10
  OFFICIAL_EPOCH_LENGTH: 1250
  cache_dataset: true
  centering: centering
student:
  arch: vit_giant2
  patch_size: 14
  drop_path_rate: 0.3
  layerscale: 1.0e-05
  drop_path_uniform: true
  pretrained_weights: ''
  ffn_layer: swiglufused
  block_chunks: 0
  qkv_bias: true
  proj_bias: true
  ffn_bias: true
teacher:
  momentum_teacher: 0.992
  final_momentum_teacher: 1
  warmup_teacher_temp: 0.04
  teacher_temp: 0.07
  warmup_teacher_temp_epochs: 30
optim:
  epochs: 100
  weight_decay: 0.04
  weight_decay_end: 0.4
  base_lr: 0.004
  lr: 0.001
  warmup_epochs: 10
  min_lr: 1.0e-06
  clip_grad: 3.0
  freeze_last_layer_epochs: 1
  scaling_rule: sqrt_wrt_1024
  patch_embed_lr_mult: 0.2
  layerwise_decay: 0.9
  adamw_beta1: 0.9
  adamw_beta2: 0.999
crops:
  global_crops_scale:
  - 0.32
  - 1.0
  local_crops_number: 8
  local_crops_scale:
  - 0.05
  - 0.32
  global_crops_size: 518
  local_crops_size: 98
evaluation:
  eval_period_iterations: 12500

I20230906 17:54:07 20285 dinov2 vision_transformer.py:112] using SwiGLU layer as FFN
I20230906 17:58:09 20285 dinov2 utils.py:33] Pretrained weights found at dinov2/pretrain/dinov2_vitg14_pretrain.pth and loaded with msg: <All keys matched successfully>
I20230906 18:41:49 22216 dinov2 config.py:59] git:
  sha: N/A, status: clean, branch: N/A

I20230906 18:41:49 22216 dinov2 config.py:60] batch_size: 128
classifier_fpath: None
config_file: dinov2/configs/eval/vitg14_pretrain.yaml
epoch_length: 1250
epochs: 10
eval_period_iterations: 1250
learning_rates: [1e-05, 2e-05, 5e-05, 0.0001, 0.0002, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1]
no_resume: False
num_workers: 8
opts: ['train.output_dir=/Data20T/data20t/data20t/envs/shigz1/code/dinov2-main/dinov2-main/dinov2/output']
output_dir: /Data20T/data20t/data20t/envs/shigz1/code/dinov2-main/dinov2-main/dinov2/output
pretrained_weights: dinov2/pretrain/dinov2_vitg14_pretrain.pth
save_checkpoint_frequency: 20
test_class_mapping_fpaths: [None]
test_dataset_strs: None
test_metric_types: None
train_dataset_str: ImageNet:split=TRAIN:root=imagenet/one/:extra=imagenet/two/
val_class_mapping_fpath: None
val_dataset_str: ImageNer:split=VAL:root=imagenet/one/:extra=imagenet/two/
val_metric_type: mean_accuracy
I20230906 18:41:49 22216 dinov2 config.py:26] sqrt scaling learning rate; base: 0.004, new: 0.001
I20230906 18:41:49 22216 dinov2 config.py:33] MODEL:
  WEIGHTS: ''
compute_precision:
  grad_scaler: true
  teacher:
    backbone:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
    dino_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
    ibot_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
  student:
    backbone:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
    dino_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp32
        buffer_dtype: fp32
    ibot_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp32
        buffer_dtype: fp32
dino:
  loss_weight: 1.0
  head_n_prototypes: 65536
  head_bottleneck_dim: 256
  head_nlayers: 3
  head_hidden_dim: 2048
  koleo_loss_weight: 0.1
ibot:
  loss_weight: 1.0
  mask_sample_probability: 0.5
  mask_ratio_min_max:
  - 0.1
  - 0.5
  separate_head: false
  head_n_prototypes: 65536
  head_bottleneck_dim: 256
  head_nlayers: 3
  head_hidden_dim: 2048
train:
  batch_size_per_gpu: 64
  dataset_path: ImageNet:split=TRAIN
  output_dir: /Data20T/data20t/data20t/envs/shigz1/code/dinov2-main/dinov2-main/dinov2/output
  saveckp_freq: 20
  seed: 0
  num_workers: 10
  OFFICIAL_EPOCH_LENGTH: 1250
  cache_dataset: true
  centering: centering
student:
  arch: vit_giant2
  patch_size: 14
  drop_path_rate: 0.3
  layerscale: 1.0e-05
  drop_path_uniform: true
  pretrained_weights: ''
  ffn_layer: swiglufused
  block_chunks: 0
  qkv_bias: true
  proj_bias: true
  ffn_bias: true
teacher:
  momentum_teacher: 0.992
  final_momentum_teacher: 1
  warmup_teacher_temp: 0.04
  teacher_temp: 0.07
  warmup_teacher_temp_epochs: 30
optim:
  epochs: 100
  weight_decay: 0.04
  weight_decay_end: 0.4
  base_lr: 0.004
  lr: 0.001
  warmup_epochs: 10
  min_lr: 1.0e-06
  clip_grad: 3.0
  freeze_last_layer_epochs: 1
  scaling_rule: sqrt_wrt_1024
  patch_embed_lr_mult: 0.2
  layerwise_decay: 0.9
  adamw_beta1: 0.9
  adamw_beta2: 0.999
crops:
  global_crops_scale:
  - 0.32
  - 1.0
  local_crops_number: 8
  local_crops_scale:
  - 0.05
  - 0.32
  global_crops_size: 518
  local_crops_size: 98
evaluation:
  eval_period_iterations: 12500

I20230906 18:41:49 22216 dinov2 vision_transformer.py:112] using SwiGLU layer as FFN
I20230906 18:44:59 22216 dinov2 utils.py:33] Pretrained weights found at dinov2/pretrain/dinov2_vitg14_pretrain.pth and loaded with msg: <All keys matched successfully>
I20230906 18:45:09 22216 dinov2 loaders.py:84] using dataset: "ImageNet:split=TRAIN:root=imagenet/one/:extra=imagenet/two/"
I20230906 18:45:09 22216 dinov2 loaders.py:89] # of dataset samples: 624
