# device
mode: train_unseen
gpu_ids: [0]  # gpu ids
batch_size: 2 # each item denotes one story
num_workers: 4 # number of workers
num_cpu_cores: -1  # number of cpu cores
grad_accumulation_steps: 1 # gradient accumulation steps
seed: 0  # random seed
ckpt_dir: ./ckpts/ # checkpoint directory
run_name: flintstones_run # name for this run
cur_char: # cache current character
history_char: # cache history character, don't fill this field
char_tokens: # cache unseen character tokens
inject_lora: False # whether to inject lora
use_reference_image: True
load_parallel: True
num_permute: 0
distillation: 0.25
prompt_modification: True # whether to replace character occurrence with the unique token
adversarial: 0.75
discriminator_lr: 1e-4
start_g_adv: 0
D_net: simple
D_loss_type: bce
mapping_json: desc

# task
dataset: flintstones_unseen # pororo flintstones vistsis vistdii | flintstones_unseen
task: visualization  # continuation visualization

# train
init_lr: 1e-5
warmup_epochs: 5  # warmup epochs
max_epochs: 100 # max epochs
train_model_file:  # model file for resume, none for train from scratch
freeze_clip: True  # whether to freeze clip
freeze_blip: True  # whether to freeze blip
freeze_resnet: False  # whether to freeze resnet
save_freq: 100  # save frequency

# sample
test_model_file: "" #model file for test
calculate_fid: False  # whether to calculate FID scores
calculate_text_clipscore: False # whether to calculate text clipscore
calculate_visual_clipscore: False # whether to calculate visual clipscore, need to have reference images
scheduler: ddim  # ddim pndm
guidance_scale: 6  # guidance scale
num_inference_steps: 100  # number of inference steps
sample_output_dir: ./ckpts/output_images # output directory
custom_prompts: ./datasets/fs_special_token.txt # custom prompts file, only used for custom_sample
stop_sample_early: # only sample for a few stories
resolution: 256 # resolution for sampling, keep the same as the training resolution

# model config
unet_model:
  tuning:
  low_cpu_mem_usage: False

flintstones_unseen:
  target_chars: ["slaghoople", "texarock_2", "gazoo", "helmet_police", "piano_man", "rockzilla", "texarock_1", "theft", "seal"]
  data_dir: ./data/flintstones_data
  target_dir: ./data/flintstones_data/target_chars
  hdf5_file: ./data/flintstones_rare-char_removed.h5
  max_length: 91
  new_tokens: [ "fred", "barney", "wilma", "betty", "dino", "slate" ]
  clip_embedding_tokens: 49412
  blip_embedding_tokens: 30525
  use_handpick: True # using manual followings

pororo_unseen:
  target_chars: ['popo', 'pipi', 'whale', 'shark', 'harry', 'tutu']
  data_dir: ./data/pororo_png
  target_dir: ./data/pororo_png/target_chars
  hdf5_file: ./data/pororo_removed.h5
  max_length: 85
  new_tokens: [ "pororo", "loopy", "eddy", "poby", "tongtong", "crong", "rody", "petty"]
  clip_embedding_tokens: 49416
  blip_embedding_tokens: 30530
  use_handpick: True # using manual followings

flintstones:
  hdf5_file: ./data/flintstones_rare-char_removed.h5
  max_length: 91
  new_tokens: [ "fred", "barney", "wilma", "betty", "dino", "slate"]
  adapt_tokens:
  clip_embedding_tokens: 49412
  blip_embedding_tokens: 30525

pororo:
  hdf5_file: ./data/pororo_char_removed.h5
  max_length: 85
  new_tokens: [ "pororo", "loopy", "eddy", "poby", "tongtong", "crong", "rody", "petty"]
  adapt_tokens:
  clip_embedding_tokens: 49416
  blip_embedding_tokens: 30530

vistsis:
  hdf5_file: /path/to/vist.h5
  max_length: 100
  clip_embedding_tokens: 49408
  blip_embedding_tokens: 30524

vistdii:
  hdf5_file: /path/to/vist.h5
  max_length: 65
  clip_embedding_tokens: 49408
  blip_embedding_tokens: 30524

hydra:
  run:
    dir: .
  output_subdir: null
hydra/job_logging: disabled
hydra/hydra_logging: disabled