export SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
export PROJECT_DIR="$( cd -- "$( dirname -- "$SCRIPT_DIR" )" &> /dev/null && pwd )"
cd $PROJECT_DIR
export PYTHONPATH="$PYTHONPATH:$PROJECT_DIR"
export LIBTPU_INIT_ARGS="--xla_tpu_megacore_fusion_allow_ags=false --xla_enable_async_collective_permute=true --xla_tpu_enable_ag_backward_pipelining=true --xla_tpu_enable_data_parallel_all_reduce_opt=true --xla_tpu_data_parallel_opt_different_sized_ops=true --xla_tpu_enable_async_collective_fusion=true --xla_tpu_enable_async_collective_fusion_multiple_steps=true --xla_tpu_overlap_compute_collective_tc=true --xla_enable_async_all_gather=true"

export llama_tokenizer_path="/home/t-sye/World-Model/checkpoints/lwm_checkpoints/tokenizer.model"

# export dataset_path='data/0808_multiobject_sink_llava_5hz_action_preprocessed.jsonl'
# export dataset_path='data/0809_multiobject_sink_llava_256.jsonl'
export dataset_path='data/0731_multi_200_5hz_val.jsonl'
export eval_dataset_path="data/0731_multi_200_5hz_val.jsonl"
export unseen_eval_dataset_path="data/0731_multi_200_5hz_val.jsonl"

export output_dir=$AMLT_OUTPUT_DIR
export WANDB_API_KEY="0d0155a751a804873eedf37c29060146b377edb8"

export project_id='lwm_debug'
export experiment_note='world-model'
export experiment_id='debug'

# mesh_dim: dp, fsdp, tp, sp
python3 -u -m lwm.train \
    --modality='vision,action,delta' \
    --mesh_dim='!-1,1,1,1' \
    --dtype='bf16' \
    --total_steps=500 \
    --log_freq=1 \
    --delta_tokens=1 \
    --eval_steps=1 \
    --save_model_freq=0 \
    --eval_log_freq=1 \
    --save_milestone_freq=0 \
    --load_llama_config='200m' \
    --update_llama_config="dict(action_vocab_size=256,delta_vocab_size=8,theta=50000000,max_sequence_length=2048,use_flash_attention=True,scan_attention=True,scan_query_chunk_size=512,scan_key_chunk_size=1024,remat_attention='nothing_saveable',scan_mlp=True,scan_mlp_chunk_size=8192,remat_mlp='nothing_saveable',remat_block='nothing_saveable',scan_layers=True)" \
    --tokenizer.vocab_file="$llama_tokenizer_path" \
    --optimizer.type='adamw' \
    --llama.action_vocab_size=256 \
    --llama.delta_vocab_size=8 \
    --optimizer.accumulate_gradient_steps=1 \
    --optimizer.adamw_optimizer.weight_decay=0 \
    --optimizer.adamw_optimizer.lr=2e-5 \
    --optimizer.adamw_optimizer.end_lr=2e-5 \
    --optimizer.adamw_optimizer.lr_warmup_steps=0 \
    --optimizer.adamw_optimizer.lr_decay_steps=3000 \
    --use_data_sharded_loader=True \
    --train_dataset.type='json_vision_delta_action' \
    --train_dataset.delta_vision_action_processor.fields_from_example='fields' \
    --train_dataset.delta_vision_action_processor.n_tokens_per_action=7 \
    --train_dataset.delta_vision_action_processor.n_tokens_per_delta=4 \
    --train_dataset.delta_vision_action_processor.img_aug=False \
    --train_dataset.delta_vision_action_processor.vqgan_checkpoint_path='/home/t-sye/World-Model/checkpoints/lwm_checkpoints/vqgan'\
    --train_dataset.delta_vision_action_processor.image_absolute_path='/home/t-sye/World-Model/finetune_data/'\
    --train_dataset.delta_vision_action_processor.max_n_frames=1 \
    --train_dataset.json_delta_action_dataset.mode="pad" \
    --train_dataset.json_delta_action_dataset.path="$dataset_path" \
    --train_dataset.json_delta_action_dataset.seq_length=384 \
    --train_dataset.json_delta_action_dataset.batch_size=2 \
    --train_dataset.json_delta_action_dataset.tokenizer_processes=1 \
    --train_dataset.json_delta_action_dataset.tokenizer_parallel_chunk_size=2 \
    --train_dataset.json_delta_action_dataset.tokenizer_parallel_batch_size=2 \
    --train_dataset.json_delta_action_dataset.use_data_sharded_loader=True \
    --eval_dataset.type='json_vision_delta_action' \
    --eval_dataset.delta_vision_action_processor.fields_from_example='fields' \
    --eval_dataset.delta_vision_action_processor.n_tokens_per_action=7 \
    --eval_dataset.delta_vision_action_processor.n_tokens_per_delta=4 \
    --eval_dataset.vision_action_processor.max_n_frames=1 \
    --eval_dataset.json_delta_action_dataset.mode="pad" \
    --eval_dataset.json_delta_action_dataset.path="$eval_dataset_path" \
    --eval_dataset.json_delta_action_dataset.seq_length=384 \
    --eval_dataset.json_delta_action_dataset.batch_size=2 \
    --eval_dataset.json_delta_action_dataset.tokenizer_processes=1 \
    --eval_dataset.json_delta_action_dataset.tokenizer_parallel_chunk_size=2 \
    --eval_dataset.json_delta_action_dataset.tokenizer_parallel_batch_size=2 \
    --eval_dataset.json_delta_action_dataset.use_data_sharded_loader=True \
    --unseen_eval_dataset.type='json_vision_delta_action' \
    --unseen_eval_dataset.delta_vision_action_processor.fields_from_example='fields' \
    --unseen_eval_dataset.delta_vision_action_processor.n_tokens_per_action=7 \
    --unseen_eval_dataset.delta_vision_action_processor.n_tokens_per_delta=4 \
    --unseen_eval_dataset.vision_action_processor.max_n_frames=1 \
    --unseen_eval_dataset.json_delta_action_dataset.mode="pad" \
    --unseen_eval_dataset.json_delta_action_dataset.path="$unseen_eval_dataset_path" \
    --unseen_eval_dataset.json_delta_action_dataset.seq_length=384 \
    --unseen_eval_dataset.json_delta_action_dataset.batch_size=2 \
    --unseen_eval_dataset.json_delta_action_dataset.tokenizer_processes=1 \
    --unseen_eval_dataset.json_delta_action_dataset.tokenizer_parallel_chunk_size=2 \
    --unseen_eval_dataset.json_delta_action_dataset.tokenizer_parallel_batch_size=2 \
    --unseen_eval_dataset.json_delta_action_dataset.use_data_sharded_loader=True \
    --checkpointer.save_optimizer_state=False \
    --autoresume=False \
    --logger.append_uuid=False \
    --logger.online=True \
    --logger.project_id="$project_id" \
    --logger.experiment_id="$experiment_id" \
    --logger.experiment_note="$experiment_note" \
    --logger.output_dir="$output_dir" \
    --logger.wandb_dir="$HOME/experiment_output/$project_id"


