#!/bin/bash


#! ======================================== 
#! ================= Trained Model =================
#! ========================================
config_dir="dev/configs/infer/module/stage2/t2i/prefix"
source "$config_dir/all-modules0_2-lipsw_5-prefix_4_0.95-noff_0.05-trained_aud_18w-audw_1.2-ipas_0.8.sh"


#! ======================================== 
#! ================= LoRA Loading =================
#! ========================================
dreambooth_path="/root/models/DreamBooth/insaneRealisticV10_v10.safetensors"
dblora_type="InsaneRealistic"


#! ======================================== 
#! ================= Inference Configs =================
#! ========================================
ip_ckpt='model_ckpts/ip_adapter/ip_faceid_decoupled.pth'
ip_mode='faceid-decoupled'
num_tokens_1=16
num_tokens_2=24
ipa_scale_1=1.0 # 1.0
ipa_scale_2=0.3 # 0.3


image_encoder_path="/root/models/IP-Adapter/models/image_encoder/"
vae_path="/dockerdata/models/sd-vae-ft-mse/"

# -----------------------------------------------------------------

neg_embed_path="/root/models/neg_embeds/realisticvision-negative-embedding.pt"

image_height=512 # 512
image_width=512 # 512

gs=5 # 5
mimic_scale=3.5 # 3.5
ac_scale=1.0 # 1.0
txtweight=1.0 # 1.0
ipa_scale=1.0 # 1.2
audweight=1.8 # 0.6
eta=0.4 # 0.4
infer_step=30 # 30
align_color_alpha=0.4

retarget_strategy="no_retarget" # no_retarget, fix_face, offset_retarget, naive_retarget
num_pad_audio_frames=2
aud_depth=4

context_frames=12
context_overlap=0
n_motion_frames=4

b1=1.0 # 1.0
b2=1.2 # 1.2
s1=0.9 # 0.9
s2=0.6 # 0.6
thres=1 # 1

ctrl_scale=0.3 # 0.2
ctrl_kps=1

gpu_id=1

#! ======================================== 
#! ================= DATA -- ID =================
#! ======================================== 
# source ./configs/data/all_ids/short_case_10.sh
# source dev/configs/data/all_ids/short_case_tys.sh
source dev/configs/data/all_ids/short_case_adriana.sh
# source dev/configs/data/all_ids/talk_emotion.sh

#! ======================================== 
#! ================= DATA -- AUD =================
#! ======================================== 
# source dev/configs/data/aud/short_case/short_case_10.sh
source dev/configs/data/aud/short_case/short_case_tys.sh
# source dev/configs/data/aud/short_case/talk_emotion.sh
# source dev/configs/data/aud/short_case/talk_mola.sh
# source dev/configs/data/aud/long_case/talk_kara.sh
output_dir="$output_dir"


#! ======================================== 
#! ================= Save Config =================
#! ========================================
dir_name="$output_dir/stage2-prefix-meanvar-face/$model_type/$retarget_strategy/$dblora_type"
# dir_name="$output_dir/$stage1_dir/stage2/$model_type/$retarget_strategy/"

data_suffix="$data_suffix-b1_$b1-b2_$b2-s1_$s1-s2_$s2-thres_$thres"
data_suffix="$data_suffix-alpha_$align_color_alpha"

if [ $ctrl_kps -eq 1 ]; then
    data_suffix="$data_suffix-kps_ctrl_thin"
else
    data_suffix="$data_suffix"
fi

data_suffix="$data_suffix-$ip_mode"


output_path="$dir_name/cfs_$context_frames-cop_$context_overlap-prefix_$n_motion_frames\
/gs_$gs-acs_$ac_scale-ipa_$ipa_scale_1-$ipa_scale_2-txtw_$txtweight\
/$suffix-$epoch-audw_$audweight-\
eta_$eta-ctrls_$ctrl_scale-infer_$infer_step-\
$data_suffix.mp4"

export CUDA_VISIBLE_DEVICES=$gpu_id
python inference_prefix_meanvar_face.py \
    --denoising_unet_path $denoising_unet_path \
    --vae_path $vae_path \
    --unet_config_path $unet_config_path \
    --sd_model_name $sd_model_name \
    --motion_module_path $motion_module_path \
    --audio_projection_path $audio_projection_path \
    --image_width $image_width \
    --image_height $image_height \
    --num_pad_audio_frames $num_pad_audio_frames \
    --aud_depth $aud_depth \
    --prompt "$PROMPT" \
    --reference_image_path $reference_image_path \
    --kps_path $kps_path \
    --audio_path $audio_path \
    --output_path $output_path \
    --num_inference_steps $infer_step \
    --guidance_scale $gs \
    --mimic_scale $mimic_scale \
    --eta $eta \
    --align_color_alpha $align_color_alpha \
    --context_frames $context_frames \
    --context_overlap $context_overlap \
    --ip_ckpt $ip_ckpt \
    --ip_mode "$ip_mode" \
    --image_encoder_path $image_encoder_path \
    --retarget_strategy $retarget_strategy \
    --text_attention_weight $txtweight \
    --audio_attention_weight $audweight \
    --t2i_adapter_model_path $t2i_adapter_model_path \
    --t2i_adapter_conditioning_scale $ac_scale \
    --t2i_adapter_control_type $adapter_type \
    --disable_kps \
    --disable_audio 0 \
    --disable_motion 0 \
    --n_motion_frames $n_motion_frames \
    --save_clip 0 \
    --b1 $b1 --b2 $b2 --s1 $s1 --s2 $s2 --threshold $thres \
    --dreambooth_path $dreambooth_path \
    --controlnet_conditioning_scale $ctrl_scale \
    --ctrl_kps $ctrl_kps \
    --apply_t2i_adapter 1 \
    --num_tokens $num_tokens_1 $num_tokens_2 \
    --ipa_scale $ipa_scale_1 $ipa_scale_2 \