model_args:
  model_name_or_path: roneneldan/TinyStories-1M

data_args:
  dataset_name: roneneldan/TinyStories
  dataset_config_name: null
  max_train_samples: 2
  max_eval_samples: 2
  streaming: True
training_args:
  run_name: test
  output_dir: output_test
  overwrite_output_dir: True
  do_train: True
  do_eval: True
  max_steps: 3
  save_steps: 10
  per_device_train_batch_size: 1
  per_device_eval_batch_size: 1
  evaluation_strategy: steps
  eval_steps: 1
  logging_steps: 1
  report_to: none
  use_cpu: True

  train_model_params: True
  model_lr_factor: 0.1
  codebook_reg_p: null
  codebook_weight_decay: 0.01

codebook_args:
  codebook_type: group
  num_codes: 100
  num_codebooks: -1
  layers_to_snap: [0]
  similarity_metric: inner_product
  codebook_at: attn_preproj
  loss: aeloss
  k_codebook: 3
  kmeans_init: False
  kmeans_path: null
  kmeans_init_examples: 1000
  kmeans_kwargs:
    n_init: 1
    batch_size: 1024
    verbose: 1
  codebook_kwargs: null
  replace_codes: True

k_scheduler_kwargs: null

enable_logging: False
pretrained_path: null
get_baseline: False
tag_keys: [num_codes,loss,codebook_type]
tags: ["test", "TGPT"]
project: gpt2-codebook
