###########
# Original Program synthesis experiment files
#########
# ../experiment_117_bandit_synthesis_gpt4o/data/synthesized_programs/syn_progs.bandit_offspring_score.mixed.csv
# ../experiment_117_bandit_synthesis_gpt4o/data/synthesized_programs/syn_progs.bandit_random.mixed.csv
# ../experiment_117_bandit_synthesis_gpt4o/data/synthesized_programs/syn_progs.bandit_self_score.lle.csv
# ../experiment_117_bandit_synthesis_gpt4o/data/synthesized_programs/syn_progs.bandit_self_score.mixed.csv
# ../experiment_118_bandit_synthesis_claude_sonnet/data/synthesized_programs/syn_progs.bandit_self_score.mixed.csv
# ../experiment_119_bandit_synthesis_claude_haiku/data/synthesized_programs/syn_progs.bandit_self_score.mixed.csv
# ../experiment_120_bandit_synthesis_gpt3.5/data/synthesized_programs/syn_progs.bandit_self_score.mixed.csv
# ../experiment_121_bandit_synthesis_llama3-8b/data/synthesized_programs/syn_progs.bandit_self_score.mixed.csv
# ../experiment_122_bandit_synthesis_llama3-70b/data/synthesized_programs/syn_progs.bandit_self_score.mixed.csv
#########

#####
# Note on target models
####
# 117: gpt-4o-2024-05-13
# 118: claude-3-sonnet-20240229
# 119: claude-3-haiku-20240307
# 120: gpt-3.5-turbo
# 121: meta-llama/Meta-Llama-3-8B-Instruct
# 122: meta-llama/Meta-Llama-3-70B-Instruct
#####



HARMFUL_PROMPT_SAMPLE_SIZE=50
DECORATION_MODEL_NAME=gpt-4-turbo-2024-04-09
MODERATION_MODEL_NAME=gpt-4-turbo-2024-04-09

##
# Stage 1: Copy Synthesized Programs from experiment 117, 118, 119, 120, 121, and 122
###
data/synthesized_programs/:
	mkdir -p data/synthesized_programs

###
# 117
###
data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_offspring_score.mixed.csv:\
	../experiment_117_bandit_synthesis_gpt4o/data/synthesized_programs/syn_progs.bandit_offspring_score.mixed.csv data/synthesized_programs/
	python -m h4rm3l.csv_cmd insert-column \
		--input-file $< \
		--output-file $@ \
		--new-column-name synthesis_target \
		--new-column-value gpt-4o-2024-05-13

data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_random.mixed.csv:\
	../experiment_117_bandit_synthesis_gpt4o/data/synthesized_programs/syn_progs.bandit_random.mixed.csv
	python -m h4rm3l.csv_cmd insert-column \
		--input-file $< \
		--output-file $@ \
		--new-column-name synthesis_target \
		--new-column-value gpt-4o-2024-05-13

data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_self_score.lle.csv:\
	../experiment_117_bandit_synthesis_gpt4o/data/synthesized_programs/syn_progs.bandit_self_score.lle.csv
	python -m h4rm3l.csv_cmd insert-column \
		--input-file $< \
		--output-file $@ \
		--new-column-name synthesis_target \
		--new-column-value gpt-4o-2024-05-13

data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_self_score.mixed.csv:\
	../experiment_117_bandit_synthesis_gpt4o/data/synthesized_programs/syn_progs.bandit_self_score.mixed.csv
	python -m h4rm3l.csv_cmd insert-column \
		--input-file $< \
		--output-file $@ \
		--new-column-name synthesis_target \
		--new-column-value gpt-4o-2024-05-13
###
# 118
###
data/synthesized_programs/claude-3-sonnet-20240229.syn_progs.bandit_self_score.mixed.csv:\
	../experiment_118_bandit_synthesis_claude_sonnet/data/synthesized_programs/syn_progs.bandit_self_score.mixed.csv
	python -m h4rm3l.csv_cmd insert-column \
		--input-file $< \
		--output-file $@ \
		--new-column-name synthesis_target \
		--new-column-value claude-3-sonnet-20240229

###
# 119
###
data/synthesized_programs/claude-3-haiku-20240307.syn_progs.bandit_self_score.mixed.csv:\
	../experiment_119_bandit_synthesis_claude_haiku/data/synthesized_programs/syn_progs.bandit_self_score.mixed.csv
	python -m h4rm3l.csv_cmd insert-column \
		--input-file $< \
		--output-file $@ \
		--new-column-name synthesis_target \
		--new-column-value claude-3-haiku-20240307

###
# 120
###
data/synthesized_programs/gpt-3.5-turbo.syn_progs.bandit_self_score.mixed.csv:\
	../experiment_120_bandit_synthesis_gpt3.5/data/synthesized_programs/syn_progs.bandit_self_score.mixed.csv
	python -m h4rm3l.csv_cmd insert-column \
		--input-file $< \
		--output-file $@ \
		--new-column-name synthesis_target \
		--new-column-value gpt-3.5-turbo

###
# 121
###
data/synthesized_programs/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed.csv:\
	../experiment_121_bandit_synthesis_llama3-8b/data/synthesized_programs/syn_progs.bandit_self_score.mixed.csv
	python -m h4rm3l.csv_cmd insert-column \
		--input-file $< \
		--output-file $@ \
		--new-column-name synthesis_target \
		--new-column-value meta-llama/Meta-Llama-3-8B-Instruct

###
# 122
###
data/synthesized_programs/Meta-Llama-3-70B-Instruct.syn_progs.bandit_self_score.mixed.csv:\
	../experiment_122_bandit_synthesis_llama3-70b/data/synthesized_programs/syn_progs.bandit_self_score.mixed.csv
	python -m h4rm3l.csv_cmd insert-column \
		--input-file $< \
		--output-file $@ \
		--new-column-name synthesis_target \
		--new-column-value meta-llama/Meta-Llama-3-70B-Instruct




all-synthesized-programs: \
	data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_offspring_score.mixed.csv \
	data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_random.mixed.csv \
	data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_self_score.lle.csv \
	data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs/claude-3-sonnet-20240229.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs/claude-3-haiku-20240307.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs/gpt-3.5-turbo.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs/Meta-Llama-3-70B-Instruct.syn_progs.bandit_self_score.mixed.csv
	echo "done"




##
# Prefix selected program names with target LM
###
AFFIX_CMD=\
	python -m h4rm3l.csv_cmd affix-column \
		--input-file $< \
		--output-file $@ \
		--column-name program_name \
		--prefix-column synthesis_target \
		--affix-separator __ 


data/synthesized_programs_renamed/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed.csv: data/synthesized_programs/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed.csv
	$(AFFIX_CMD)

data/synthesized_programs_renamed/gpt-4o-2024-05-13.syn_progs.bandit_self_score.lle.csv: data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_self_score.lle.csv
	$(AFFIX_CMD)

data/synthesized_programs_renamed/claude-3-sonnet-20240229.syn_progs.bandit_self_score.mixed.csv: data/synthesized_programs/claude-3-sonnet-20240229.syn_progs.bandit_self_score.mixed.csv
	$(AFFIX_CMD)

data/synthesized_programs_renamed/sorted/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed.sorted.csv: data/synthesized_programs/sorted/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed.sorted.csv
	$(AFFIX_CMD)

data/synthesized_programs_renamed/claude-3-haiku-20240307.syn_progs.bandit_self_score.mixed.csv: data/synthesized_programs/claude-3-haiku-20240307.syn_progs.bandit_self_score.mixed.csv
	$(AFFIX_CMD)

data/synthesized_programs_renamed/gpt-4o-2024-05-13.syn_progs.bandit_offspring_score.mixed.csv: data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_offspring_score.mixed.csv
	$(AFFIX_CMD)

data/synthesized_programs_renamed/Meta-Llama-3-70B-Instruct.syn_progs.bandit_self_score.mixed.csv: data/synthesized_programs/Meta-Llama-3-70B-Instruct.syn_progs.bandit_self_score.mixed.csv
	$(AFFIX_CMD)

data/synthesized_programs_renamed/gpt-4o-2024-05-13.syn_progs.bandit_random.mixed.csv: data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_random.mixed.csv
	$(AFFIX_CMD)

data/synthesized_programs_renamed/gpt-3.5-turbo.syn_progs.bandit_self_score.mixed.csv: data/synthesized_programs/gpt-3.5-turbo.syn_progs.bandit_self_score.mixed.csv
	$(AFFIX_CMD)

data/synthesized_programs_renamed/gpt-4o-2024-05-13.syn_progs.bandit_self_score.mixed.csv: data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_self_score.mixed.csv
	$(AFFIX_CMD)


data/synthesized_programs_renamed:
	mkdir -p data/synthesized_programs_renamed

all-synthesized-programs-renamed: data/synthesized_programs_renamed \
	data/synthesized_programs_renamed/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs_renamed/gpt-4o-2024-05-13.syn_progs.bandit_self_score.lle.csv \
	data/synthesized_programs_renamed/claude-3-sonnet-20240229.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs_renamed/claude-3-haiku-20240307.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs_renamed/gpt-4o-2024-05-13.syn_progs.bandit_offspring_score.mixed.csv \
	data/synthesized_programs_renamed/Meta-Llama-3-70B-Instruct.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs_renamed/gpt-4o-2024-05-13.syn_progs.bandit_random.mixed.csv \
	data/synthesized_programs_renamed/gpt-3.5-turbo.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs_renamed/gpt-4o-2024-05-13.syn_progs.bandit_self_score.mixed.csv

##
# Synthesis Curves
###
results/prog-synthesis-conditions:
	python -m h4rm3l.cmd  analyse-prog-synthesis \
		--evaluation-files \
			data/synthesized_programs/gpt-4o-2024-05-13.* \
			--color-palette  "#FFD700" "#7FFF00" "#00FFFF" "#FF69B4" "#FF8C00" \
			--plot-extension pdf \
		--output-dir $@

results/prog-synthesis-targets:
	python -m h4rm3l.cmd  analyse-prog-synthesis \
		--evaluation-files \
			data/synthesized_programs/*bandit_self_score.mixed.csv \
			--color-palette  "#56B4E9" "#0072B2" "#77CC5A" "#009E73" "#D55E00" "#E69F00"  \
			--plot-extension pdf \
		--output-dir $@


##
# program clustering Tools
###
PROG_CLUSTERING_CMD=\
	python -m h4rm3l.cmd cluster_programs \
		--input-file $< \
		--output-dir $@ \
		--cluster-model mchochlov/codebert-base-cd-ft \
		--program-name-column program \
		--asr-column syn_score

results/prog-synthesis-clusters/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed: data/synthesized_programs/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed.csv
	$(PROG_CLUSTERING_CMD)

results/prog-synthesis-clusters/claude-3-sonnet-20240229.syn_progs.bandit_self_score.mixed: data/synthesized_programs/claude-3-sonnet-20240229.syn_progs.bandit_self_score.mixed.csv
	$(PROG_CLUSTERING_CMD)

results/prog-synthesis-clusters/claude-3-haiku-20240307.syn_progs.bandit_self_score.mixed: data/synthesized_programs/claude-3-haiku-20240307.syn_progs.bandit_self_score.mixed.csv
	$(PROG_CLUSTERING_CMD)

results/prog-synthesis-clusters/Meta-Llama-3-70B-Instruct.syn_progs.bandit_self_score.mixed: data/synthesized_programs/Meta-Llama-3-70B-Instruct.syn_progs.bandit_self_score.mixed.csv
	$(PROG_CLUSTERING_CMD)

results/prog-synthesis-clusters/gpt-3.5-turbo.syn_progs.bandit_self_score.mixed: data/synthesized_programs/gpt-3.5-turbo.syn_progs.bandit_self_score.mixed.csv
	$(PROG_CLUSTERING_CMD)

results/prog-synthesis-clusters/gpt-4o-2024-05-13.syn_progs.bandit_self_score.mixed: data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_self_score.mixed.csv
	$(PROG_CLUSTERING_CMD)

results/prog-synthesis-clusters/gpt-4o-2024-05-13.syn_progs.bandit_self_score.lle: data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_self_score.lle.csv
	$(PROG_CLUSTERING_CMD)

results/prog-synthesis-clusters/gpt-4o-2024-05-13.syn_progs.bandit_offspring_score.mixed: data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_offspring_score.mixed.csv
	$(PROG_CLUSTERING_CMD)

results/prog-synthesis-clusters/gpt-4o-2024-05-13.syn_progs.bandit_random.mixed: data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_random.mixed.csv
	$(PROG_CLUSTERING_CMD)


results/prog-synthesis-clusters/by-target-model:
	python -m h4rm3l.cmd cluster_programs \
		--output-dir $@ \
		--cluster-model mchochlov/codebert-base-cd-ft \
		--program-name-column program \
		--asr-column syn_score \
		--color-palette  "#56B4E9" "#0072B2" "#77CC5A" "#009E73" "#D55E00" "#E69F00"  \
		--plot-extension pdf \
		--input-files \
			data/synthesized_programs/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed.csv \
			data/synthesized_programs/claude-3-sonnet-20240229.syn_progs.bandit_self_score.mixed.csv \
			data/synthesized_programs/claude-3-haiku-20240307.syn_progs.bandit_self_score.mixed.csv \
			data/synthesized_programs/Meta-Llama-3-70B-Instruct.syn_progs.bandit_self_score.mixed.csv \
			data/synthesized_programs/gpt-3.5-turbo.syn_progs.bandit_self_score.mixed.csv \
			data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_self_score.mixed.csv


results/prog-synthesis-clusters/by-synthesis-method:
	python -m h4rm3l.cmd cluster_programs \
		--output-dir $@ \
		--cluster-model mchochlov/codebert-base-cd-ft \
		--program-name-column program \
		--asr-column syn_score \
		--color-palette  "#FFD700" "#7FFF00" "#00FFFF" "#FF69B4" "#FF8C00" \
		--plot-extension pdf \
		--input-files \
			data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_offspring_score.mixed.csv \
			data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_random.mixed.csv \
			data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_self_score.lle.csv \
			data/synthesized_programs/gpt-4o-2024-05-13.syn_progs.bandit_self_score.mixed.csv \

##
# Selection of benchmark programs (select top k per target LM)
###
SORT_CMD=\
	python -m h4rm3l.csv_cmd sort \
		--input-file $< \
		--output-file $@ \
		--sort-field-name syn_score \
		--sort-descending \
		--sort-numbers \
		--keep-top-k 10


data/synthesized_programs_top_k/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed.csv: data/synthesized_programs_renamed/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed.csv
	$(SORT_CMD)

data/synthesized_programs_top_k/gpt-4o-2024-05-13.syn_progs.bandit_self_score.lle.csv: data/synthesized_programs_renamed/gpt-4o-2024-05-13.syn_progs.bandit_self_score.lle.csv
	$(SORT_CMD)

data/synthesized_programs_top_k/claude-3-sonnet-20240229.syn_progs.bandit_self_score.mixed.csv: data/synthesized_programs_renamed/claude-3-sonnet-20240229.syn_progs.bandit_self_score.mixed.csv
	$(SORT_CMD)

data/synthesized_programs_top_k/sorted/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed.sorted.csv: data/synthesized_programs_renamed/sorted/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed.sorted.csv
	$(SORT_CMD)

data/synthesized_programs_top_k/claude-3-haiku-20240307.syn_progs.bandit_self_score.mixed.csv: data/synthesized_programs_renamed/claude-3-haiku-20240307.syn_progs.bandit_self_score.mixed.csv
	$(SORT_CMD)

data/synthesized_programs_top_k/gpt-4o-2024-05-13.syn_progs.bandit_offspring_score.mixed.csv: data/synthesized_programs_renamed/gpt-4o-2024-05-13.syn_progs.bandit_offspring_score.mixed.csv
	$(SORT_CMD)

data/synthesized_programs_top_k/Meta-Llama-3-70B-Instruct.syn_progs.bandit_self_score.mixed.csv: data/synthesized_programs_renamed/Meta-Llama-3-70B-Instruct.syn_progs.bandit_self_score.mixed.csv
	$(SORT_CMD)

data/synthesized_programs_top_k/gpt-4o-2024-05-13.syn_progs.bandit_random.mixed.csv: data/synthesized_programs_renamed/gpt-4o-2024-05-13.syn_progs.bandit_random.mixed.csv
	$(SORT_CMD)

data/synthesized_programs_top_k/gpt-3.5-turbo.syn_progs.bandit_self_score.mixed.csv: data/synthesized_programs_renamed/gpt-3.5-turbo.syn_progs.bandit_self_score.mixed.csv
	$(SORT_CMD)

data/synthesized_programs_top_k/gpt-4o-2024-05-13.syn_progs.bandit_self_score.mixed.csv: data/synthesized_programs_renamed/gpt-4o-2024-05-13.syn_progs.bandit_self_score.mixed.csv
	$(SORT_CMD)


data/synthesized_programs_top_k:
	mkdir -p data/synthesized_programs_top_k

all-sorted-top-k: data/synthesized_programs_top_k \
	data/synthesized_programs_top_k/Meta-Llama-3-8B-Instruct.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs_top_k/gpt-4o-2024-05-13.syn_progs.bandit_self_score.lle.csv \
	data/synthesized_programs_top_k/claude-3-sonnet-20240229.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs_top_k/claude-3-haiku-20240307.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs_top_k/gpt-4o-2024-05-13.syn_progs.bandit_offspring_score.mixed.csv \
	data/synthesized_programs_top_k/Meta-Llama-3-70B-Instruct.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs_top_k/gpt-4o-2024-05-13.syn_progs.bandit_random.mixed.csv \
	data/synthesized_programs_top_k/gpt-3.5-turbo.syn_progs.bandit_self_score.mixed.csv \
	data/synthesized_programs_top_k/gpt-4o-2024-05-13.syn_progs.bandit_self_score.mixed.csv



data/benchmark/h4rm3l_benchmark_20240604.csv:
	mkdir -p data/benchmark/
	python -m h4rm3l.csv_cmd concatenate \
		--input-files data/synthesized_programs_top_k/*.csv config/sota_programs.csv \
		--output-file $@




######
# 2 Make harmful prompt dataset
####
data/sampled_harmful_prompts/benchmark-advbench-$(HARMFUL_PROMPT_SAMPLE_SIZE).csv: config/advbench_harmful_behaviors.csv
	mkdir -p data/sampled_harmful_prompts/
	echo "harmful_prompt,harmful_prompt_source" > $@
	python -m h4rm3l.csv_cmd sample-csv-file \
		--input-file $< \
		--output-file $@__no_header \
		--seed 42 \
		--sample-count $(HARMFUL_PROMPT_SAMPLE_SIZE) \
		--fieldnames "harmful_prompt" "harmful_prompt_source"
	cat $@__no_header >> $@



######
# 3. Decorate benchmark dataset
####
data/decorated_prompts/benchmark-advbench-$(HARMFUL_PROMPT_SAMPLE_SIZE).decorated.csv: data/sampled_harmful_prompts/benchmark-advbench-$(HARMFUL_PROMPT_SAMPLE_SIZE).csv
	mkdir -p data/decorated_prompts/
	python -m h4rm3l.cmd decorate-csv \
		--input-prompts-file data/sampled_harmful_prompts/benchmark-advbench-$(HARMFUL_PROMPT_SAMPLE_SIZE).csv \
		--input-programs-file data/benchmark/h4rm3l_benchmark_20240604.csv  \
		--input-programs-csv-field program \
		--program-description-field description \
		--decorator-syntax-version 2 \
		--synthesis-model-name $(DECORATION_MODEL_NAME) \
		--credentials ../../keys.json \
		--output-file $@



COMMON_EVAL_PARAMS = \
	--input-file data/decorated_prompts/benchmark-advbench-$(HARMFUL_PROMPT_SAMPLE_SIZE).decorated.csv \
	--output-file $@ \
	--moderation-model $(MODERATION_MODEL_NAME) \
	--eval-prompt-fieldname decorated_harmful_prompt \
	--original-prompt-fieldname harmful_prompt \
	--credentials ../../keys.json


# 117: gpt-4o-2024-05-13
# 118: claude-3-sonnet-20240229
# 119: claude-3-haiku-20240307
# 120: gpt-3.5-turbo
# 121: meta-llama/Meta-Llama-3-8B-Instruct
# 122: meta-llama/Meta-Llama-3-70B-Instruct

######
# Evaluate models
####
results/benchmark-advbench-$(HARMFUL_PROMPT_SAMPLE_SIZE).decorated.evaluated_gpt-4o-2024-05-13.csv:
	mkdir -p results/
	python -m h4rm3l.cmd evaluate \
		$(COMMON_EVAL_PARAMS) \
		--target-model gpt-4o-2024-05-13 \
		--log-dir logs/eval-gpt-4o-2024-05-13

results/benchmark-advbench-$(HARMFUL_PROMPT_SAMPLE_SIZE).decorated.evaluated_claude-3-sonnet-20240229.csv:
	mkdir -p results/
	python -m h4rm3l.cmd evaluate \
		$(COMMON_EVAL_PARAMS) \
		--target-model claude-3-sonnet-20240229 \
		--log-dir logs/eval-claude-3-sonnet-20240229


results/benchmark-advbench-$(HARMFUL_PROMPT_SAMPLE_SIZE).decorated.evaluated_claude-3-haiku-20240307.csv:
	mkdir -p results/
	python -m h4rm3l.cmd evaluate \
		$(COMMON_EVAL_PARAMS) \
		--target-model claude-3-haiku-20240307 \
		--log-dir logs/eval-claude-3-haiku-20240307

results/benchmark-advbench-$(HARMFUL_PROMPT_SAMPLE_SIZE).decorated.evaluated_gpt-3.5-turbo.csv:
	mkdir -p results/
	python -m h4rm3l.cmd evaluate \
		$(COMMON_EVAL_PARAMS) \
		--target-model gpt-3.5-turbo \
		--log-dir logs/eval-gpt-3.5-turbo

results/benchmark-advbench-$(HARMFUL_PROMPT_SAMPLE_SIZE).decorated.evaluated_gpt-4-turbo-2024-04-09.csv:
	mkdir -p results/
	python -m h4rm3l.cmd evaluate \
		$(COMMON_EVAL_PARAMS) \
		--target-model "gpt-4-turbo-2024-04-09" \
		--log-dir logs/eval-gpt-4-turbo-2024-04-09

results/benchmark-advbench-$(HARMFUL_PROMPT_SAMPLE_SIZE).decorated.evaluated_Meta-Llama-3-8B-Instruct.csv:
	mkdir -p results/
	python -m h4rm3l.cmd evaluate \
		$(COMMON_EVAL_PARAMS) \
		--target-model meta-llama/Meta-Llama-3-8B-Instruct \
		--log-dir logs/eval-Meta-Llama-3-8B-Instruct

results/benchmark-advbench-$(HARMFUL_PROMPT_SAMPLE_SIZE).decorated.evaluated_Meta-Llama-3-70B-Instruct.csv:
	mkdir -p results/
	python -m h4rm3l.cmd evaluate \
		$(COMMON_EVAL_PARAMS) \
		--target-model meta-llama/Meta-Llama-3-70B-Instruct \
		--log-dir logs/eval-Meta-Llama-3-70B-Instruct



results/benchmark-advbench-$(HARMFUL_PROMPT_SAMPLE_SIZE).decorated.evaluated_Mistral-7B-Instruct-v0.1.csv:
	mkdir -p results/
	python -m h4rm3l.cmd evaluate \
		$(COMMON_EVAL_PARAMS) \
		--target-model "mistralai/Mistral-7B-Instruct-v0.1" \
		--log-dir logs/Mistral-7B-Instruct-v0.1





PROGRAM_NAMES=\
	identity \
	sota_AIM \
	sota_DAN \
	sota_PAP \
	sota_aff_prfx_inj \
	sota_b64 \
	sota_cipher \
	sota_combination_3 \
	sota_cot \
	sota_few_shots \
	sota_lr_translation \
	sota_obf_pyld_splitting \
	sota_sota_ref_suppr \
	sota_style_short \
	sota_uta_bard \
	sota_uta_gpt \
	sota_uta_llama \
	sota_wikipedia \
	handcrafted_02 \
	handcrafted_03 \
	handcrafted_04 \
	handcrafted_05 \
	handcrafted_06_persuasion \
	meta-llama/Meta-Llama-3-8B-Instruct__synth_bandit_self_score_mixed_iter_028_00536 \
	meta-llama/Meta-Llama-3-8B-Instruct__synth_bandit_self_score_mixed_iter_036_00679 \
	meta-llama/Meta-Llama-3-8B-Instruct__synth_bandit_self_score_mixed_iter_060_01137 \
	meta-llama/Meta-Llama-3-8B-Instruct__synth_bandit_self_score_mixed_iter_066_01252 \
	meta-llama/Meta-Llama-3-8B-Instruct__synth_bandit_self_score_mixed_iter_067_01272 \
	meta-llama/Meta-Llama-3-8B-Instruct__synth_bandit_self_score_mixed_iter_070_01323 \
	meta-llama/Meta-Llama-3-8B-Instruct__synth_bandit_self_score_mixed_iter_072_01358 \
	meta-llama/Meta-Llama-3-8B-Instruct__synth_bandit_self_score_mixed_iter_073_01371 \
	meta-llama/Meta-Llama-3-8B-Instruct__synth_bandit_self_score_mixed_iter_082_01552 \
	meta-llama/Meta-Llama-3-8B-Instruct__synth_bandit_self_score_mixed_iter_091_01713 \
	meta-llama/Meta-Llama-3-70B-Instruct__synth_bandit_self_score_mixed_iter_034_00659 \
	meta-llama/Meta-Llama-3-70B-Instruct__synth_bandit_self_score_mixed_iter_034_00665 \
	meta-llama/Meta-Llama-3-70B-Instruct__synth_bandit_self_score_mixed_iter_034_00667 \
	meta-llama/Meta-Llama-3-70B-Instruct__synth_bandit_self_score_mixed_iter_038_00734 \
	meta-llama/Meta-Llama-3-70B-Instruct__synth_bandit_self_score_mixed_iter_039_00747 \
	meta-llama/Meta-Llama-3-70B-Instruct__synth_bandit_self_score_mixed_iter_040_00764 \
	meta-llama/Meta-Llama-3-70B-Instruct__synth_bandit_self_score_mixed_iter_044_00837 \
	meta-llama/Meta-Llama-3-70B-Instruct__synth_bandit_self_score_mixed_iter_044_00848 \
	meta-llama/Meta-Llama-3-70B-Instruct__synth_bandit_self_score_mixed_iter_052_00984 \
	meta-llama/Meta-Llama-3-70B-Instruct__synth_bandit_self_score_mixed_iter_067_01249 \
	claude-3-haiku-20240307__synth_bandit_self_score_mixed_iter_002_00060 \
	claude-3-haiku-20240307__synth_bandit_self_score_mixed_iter_035_00693 \
	claude-3-haiku-20240307__synth_bandit_self_score_mixed_iter_053_01044 \
	claude-3-haiku-20240307__synth_bandit_self_score_mixed_iter_061_01196 \
	claude-3-haiku-20240307__synth_bandit_self_score_mixed_iter_064_01254 \
	claude-3-haiku-20240307__synth_bandit_self_score_mixed_iter_065_01278 \
	claude-3-haiku-20240307__synth_bandit_self_score_mixed_iter_067_01313 \
	claude-3-haiku-20240307__synth_bandit_self_score_mixed_iter_078_01513 \
	claude-3-haiku-20240307__synth_bandit_self_score_mixed_iter_079_01539 \
	claude-3-haiku-20240307__synth_bandit_self_score_mixed_iter_088_01713 \
	claude-3-sonnet-20240229__synth_bandit_self_score_mixed_iter_045_00851 \
	claude-3-sonnet-20240229__synth_bandit_self_score_mixed_iter_046_00860 \
	claude-3-sonnet-20240229__synth_bandit_self_score_mixed_iter_054_01013 \
	claude-3-sonnet-20240229__synth_bandit_self_score_mixed_iter_066_01216 \
	claude-3-sonnet-20240229__synth_bandit_self_score_mixed_iter_073_01353 \
	claude-3-sonnet-20240229__synth_bandit_self_score_mixed_iter_080_01481 \
	claude-3-sonnet-20240229__synth_bandit_self_score_mixed_iter_085_01565 \
	claude-3-sonnet-20240229__synth_bandit_self_score_mixed_iter_086_01580 \
	claude-3-sonnet-20240229__synth_bandit_self_score_mixed_iter_092_01700 \
	claude-3-sonnet-20240229__synth_bandit_self_score_mixed_iter_094_01728 \
	gpt-3.5-turbo__synth_bandit_self_score_mixed_iter_040_00717 \
	gpt-3.5-turbo__synth_bandit_self_score_mixed_iter_041_00725 \
	gpt-3.5-turbo__synth_bandit_self_score_mixed_iter_041_00727 \
	gpt-3.5-turbo__synth_bandit_self_score_mixed_iter_042_00734 \
	gpt-3.5-turbo__synth_bandit_self_score_mixed_iter_042_00737 \
	gpt-3.5-turbo__synth_bandit_self_score_mixed_iter_042_00743 \
	gpt-3.5-turbo__synth_bandit_self_score_mixed_iter_043_00753 \
	gpt-3.5-turbo__synth_bandit_self_score_mixed_iter_046_00803 \
	gpt-3.5-turbo__synth_bandit_self_score_mixed_iter_089_01525 \
	gpt-3.5-turbo__synth_bandit_self_score_mixed_iter_089_01537 \
	gpt-4o-2024-05-13__synth_bandit_self_score_mixed_iter_022_00439 \
	gpt-4o-2024-05-13__synth_bandit_self_score_mixed_iter_028_00572 \
	gpt-4o-2024-05-13__synth_bandit_self_score_mixed_iter_037_00739 \
	gpt-4o-2024-05-13__synth_bandit_self_score_mixed_iter_038_00750 \
	gpt-4o-2024-05-13__synth_bandit_self_score_mixed_iter_053_01041 \
	gpt-4o-2024-05-13__synth_bandit_self_score_mixed_iter_053_01052 \
	gpt-4o-2024-05-13__synth_bandit_self_score_mixed_iter_057_01130 \
	gpt-4o-2024-05-13__synth_bandit_self_score_mixed_iter_080_01570 \
	gpt-4o-2024-05-13__synth_bandit_self_score_mixed_iter_081_01592 \
	gpt-4o-2024-05-13__synth_bandit_self_score_mixed_iter_091_01776


TARGET_MODEL_NAMES=\
	meta_llama_Meta_Llama_3_8B_Instruct_success_rate \
	meta_llama_Meta_Llama_3_70B_Instruct_success_rate \
	claude_3_haiku_20240307_success_rate \
	claude_3_sonnet_20240229_success_rate \
	gpt_3_5_turbo_success_rate \
	gpt_4o_2024_05_13_success_rate


results/analysis:
	python -m h4rm3l.cmd analyse \
		--evaluation-files \
			results/benchmark-advbench-*.csv  \
		--program-names $(PROGRAM_NAMES) \
		--target-model-names $(TARGET_MODEL_NAMES) \
		--output-dir $@



# SLURM_CLUSTER_INFO
include ../cluster_info.mk
# SLURM_CLUSTER_ACCOUNT_NAME=cluster-account-name
# SLURM_CLUSTER_PARTITION_NAME=cluster-partition-name
# SLURM_CLUSTER_EXCLUDE_NODES=node1,node2,node3

submit-target-to-cluster:
	echo "#!/bin/bash" > slurm_job.sh
	echo "source `pwd`/../../.venv38/bin/activate" >> slurm_job.sh
	echo "cd `pwd`" >> slurm_job.sh
	echo "make $(TARGET)" >> slurm_job.sh
	sbatch \
			--time=120:00:00 \
			--cpus-per-task=32 \
			--gres=gpu:0 \
			--mem 32G \
			--account $(SLURM_CLUSTER_ACCOUNT_NAME) \
			--partition $(SLURM_CLUSTER_PARTITION_NAME) \
			--job-name $(TARGET) \
			--exclude=$(SLURM_CLUSTER_EXCLUDE_NODES) \
			slurm_job.sh 

show-jobs:
	squeue --format="%.18i %.12P %70j %.8T %.10M %.9l %.6D %R" --me

# %.8u