export CUDA_VISIBLE_DEVICES="0"

seed=42
mode='train'
watermark_algorithm='Unigram' # Unigram EXP SynthID EWD UPV DIP SWEET MorphMark SIR 
attack_name='None' # None Word-D Word-S-DICT Word-S-BERT Copy-Paste Doc-P-GPT Translation Doc-P-Dipper-1 Doc-P-Dipper-2
unwatermarked_text_source='generated' # natural or generated

temperature=0.0
diffusion_steps=1
gen_length=128
block_length=16
remasking='entropy' # confidence or random or entropy or margin
watermark_type='V' # (V)anilla (R)ipple

# dataset
dataset_name="c4"

if [[ $dataset_name =~ 't' ]]; then
    dataset_size=200
    dataset_path=./datasets/${dataset_name}/${dataset_name}.jsonl
else
    dataset_size=200
    dataset_path=./datasets/${dataset_name}/${dataset_name}_${dataset_size}_${seed}.jsonl
fi

# target_model_name='llama3-8b-instruct'
# target_model_name='LLaDA-8B-Instruct'
# target_model_name='Dream-v0-Instruct-7B'

target_model_name='LLaDA-8B-Base'
# target_model_name='Dream-v0-Base-7B'

target_model_path=/data/xxx/model/${target_model_name}
eval_model_path=/data/xxx/model/${eval_model_name}

input_json_filename=${dataset_path}
output_json_filepath=./output/${watermark_algorithm}/${dataset_name}/${watermark_type}
output_json_filename=${output_json_filepath}/${target_model_name}_remasking_${remasking}_${dataset_size}_steps_${diffusion_steps}_length_${gen_length}_seed_${seed}.jsonl

LOG_DIR=./log/${mode}/${watermark_algorithm}/${dataset_name}/${watermark_type}/

mkdir -p ${LOG_DIR}
mkdir -p ${output_json_filepath}

if [ $mode == 'train' ]; then
    LOG_FILE=${LOG_DIR}/${target_model_name}_remasking_${remasking}_${dataset_size}_steps_${diffusion_steps}_length_${gen_length}_seed_${seed}.log
elif [ $mode == 'test' ]; then
    mkdir -p ./ppl/${watermark_algorithm}
    LOG_FILE=${LOG_DIR}/${target_model_name}_${unwatermarked_text_source}_${attack_name}_remasking_${remasking}_${dataset_size}_steps_${diffusion_steps}_length_${gen_length}_seed_${seed}.log
else
    echo "Invalid mode"
fi

# nohup python -m debugpy --listen 6666 --wait-for-client main.py \
nohup python main.py \
    --seed=${seed} \
    --mode=${mode} \
    --attack_name=${attack_name} \
    --remasking=${remasking} \
    --watermark_type=${watermark_type} \
    --diffusion_steps=${diffusion_steps} \
    --gen_length=${gen_length} \
    --block_length=${block_length} \
    --temperature=${temperature} \
    --dataset_name=${dataset_name} \
    --dataset_size=${dataset_size} \
    --dataset_path=${dataset_path} \
    --watermark_algorithm=${watermark_algorithm} \
    --target_model_name=${target_model_name} \
    --target_model_path=${target_model_path} \
    --input_json_filename=${input_json_filename} \
    --output_json_filename=${output_json_filename} \
    --unwatermarked_text_source=${unwatermarked_text_source} \
> "$LOG_FILE" 2>&1 &