TRAIN_PROMPT_COUNT=10
VALID_PROMPT_COUNT=20

SYNTHESIS_MODEL_PARAMS = \
	--synthesis-model-name gpt-4-turbo-2024-04-09 \
	--synthesis-model-max-tokens 4096

DECORATION_MODEL_NAME=gpt-4-turbo-2024-04-09
MODERATION_MODEL_NAME=gpt-4-turbo-2024-04-09
TARGET_MODEL_NAME=meta-llama/Meta-Llama-3-70B-Instruct


COMMON_SYNTHESIS_PARAMS = \
	--output-file $@ \
	--credentials ../../keys.json \
	--decorator-syntax-version 2 \
	--harmful-prompts-csv data/train_harmful_prompts.csv \
	--harmful-prompts-csv-column harmful_prompt \
	--program-description-field description \
	--program-text-field program \
	$(SYNTHESIS_MODEL_PARAMS) \
	--synthesis-harmful-prompts-sample-size 5 \
	--synthesis-examples-per-iteration 15 \
	--synthesis-iteration-count 100 \
	--synthesis-proposals-per-iteration 20 \
	--synthesis-proposals-per-batch 30 \
	--target-model-name $(TARGET_MODEL_NAME) \
	--moderation-model-name $(MODERATION_MODEL_NAME) \
	--logging-level INFO

HLE_PARAMS=\
	--primitives-file config/primitives_hle.txt \
	--program-examples-file config/program_examples_hle.csv \
	--synthesis-examples-type hle

LLE_PARAMS=\
	--primitives-file config/primitives_lle.txt \
	--program-examples-file config/program_examples_lle.csv \
	--synthesis-examples-type lle

MIXED_PARAMS=\
	--primitives-file config/primitives_hle.txt \
	--program-examples-file config/program_examples_mixed.csv \
	--synthesis-examples-type mixed

BANDIT_SELF_SCORE_PARAMS=\
    --synthesis-beta-score-scalar 2 \
    --synthesis-beta-score-scale-with-iterations

######
# Dataset of harmful prompt used to score programs
####
data/train_harmful_prompts.csv: ../experiment_003/data/harmful_prompts/advbench_harmful_behaviors.csv 
	mkdir -p data/sampled_harmful_prompts /
	cp $< $@


######
# Sample dataset for synthesized program validation
####
# harmful_prompts/elicited_prompts_gtp-3.5-turbo.csv
data/valid_harmful_prompts.csv:
	mkdir -p data/sampled_harmful_prompts/
	echo "harmful_prompt,harmful_prompt_source" > $@
	python -m h4rm3l.csv_cmd sample-csv-file \
		--input-file ../experiment_003/data/harmful_prompts/advbench_harmful_behaviors.csv  \
		--output-file $@.csv__no_header \
		--seed 6785900 \
		--sample-count $(VALID_PROMPT_COUNT) \
		--fieldnames "harmful_prompt" "harmful_prompt_source"
	cat $@.csv__no_header >> $@



######
# Synthesize programs
####
#     parser.add_argument("--synthesis-beta-score-scalar", type=int)
#    parser.add_argument("--synthesis-beta-score-scale-with-iterations", type=int)


######
# bandit_random: Random sampling with sleeper arms (dynamic update of the example pool)
####
data/synthesized_programs/syn_progs.bandit_random.hle.csv:
	mkdir -p data/synthesized_programs/
	python -m h4rm3l.cmd synthesize \
		$(COMMON_SYNTHESIS_PARAMS) \
		$(HLE_PARAMS) \
		--synthesis-algo bandit_random \
		--log-dir logs/synthesis-bandit_random.hle

data/synthesized_programs/syn_progs.bandit_random.lle.csv:
	mkdir -p data/synthesized_programs/
	python -m h4rm3l.cmd synthesize \
		$(COMMON_SYNTHESIS_PARAMS) \
		$(LLE_PARAMS) \
		--synthesis-algo bandit_random \
		--log-dir logs/synthesis-bandit_random.lle

data/synthesized_programs/syn_progs.bandit_random.mixed.csv:
	mkdir -p data/synthesized_programs/
	python -m h4rm3l.cmd synthesize \
		$(COMMON_SYNTHESIS_PARAMS) \
		$(MIXED_PARAMS) \
		--synthesis-algo bandit_random \
		--log-dir logs/synthesis-bandit_random.mixed


######
# bandit_self_score
####
data/synthesized_programs/syn_progs.bandit_self_score.hle.csv:
	mkdir -p data/synthesized_programs/
	python -m h4rm3l.cmd synthesize \
		$(COMMON_SYNTHESIS_PARAMS) \
		$(HLE_PARAMS) \
		$(BANDIT_SELF_SCORE_PARAMS) \
		--synthesis-algo bandit_self_score \
		--log-dir logs/synthesis-bandit_self_score.hle

data/synthesized_programs/syn_progs.bandit_self_score.lle.csv:
	mkdir -p data/synthesized_programs/
	python -m h4rm3l.cmd synthesize \
		$(COMMON_SYNTHESIS_PARAMS) \
		$(LLE_PARAMS) \
		$(BANDIT_SELF_SCORE_PARAMS) \
		--synthesis-algo bandit_self_score \
		--log-dir logs/synthesis-bandit_self_score.lle

data/synthesized_programs/syn_progs.bandit_self_score.mixed.csv:
	mkdir -p data/synthesized_programs/
	python -m h4rm3l.cmd synthesize \
		$(COMMON_SYNTHESIS_PARAMS) \
		$(MIXED_PARAMS) \
		$(BANDIT_SELF_SCORE_PARAMS) \
		--synthesis-algo bandit_self_score \
		--log-dir logs/synthesis-bandit_self_score.mixed



######
# bandit_offspring_score
####
data/synthesized_programs/syn_progs.bandit_offspring_score.hle.csv:
	mkdir -p data/synthesized_programs/
	python -m h4rm3l.cmd synthesize \
		$(COMMON_SYNTHESIS_PARAMS) \
		$(HLE_PARAMS) \
		--synthesis-algo bandit_offspring_score \
		--log-dir logs/synthesis-bandit_offspring_score.hle

data/synthesized_programs/syn_progs.bandit_offspring_score.lle.csv:
	mkdir -p data/synthesized_programs/
	python -m h4rm3l.cmd synthesize \
		$(COMMON_SYNTHESIS_PARAMS) \
		$(LLE_PARAMS) \
		--synthesis-algo bandit_offspring_score \
		--log-dir logs/synthesis-bandit_offspring_score.lle

data/synthesized_programs/syn_progs.bandit_offspring_score.mixed.csv:
	mkdir -p data/synthesized_programs/
	python -m h4rm3l.cmd synthesize \
		$(COMMON_SYNTHESIS_PARAMS) \
		$(MIXED_PARAMS) \
		--synthesis-algo bandit_offspring_score \
		--log-dir logs/synthesis-bandit_offspring_score.mixed



results/analysis/prog-synthesis:
	python -m h4rm3l.cmd  analyse-prog-synthesis \
		--evaluation-files \
			data/synthesized_programs/* \
		--output-dir $@





# data/eval_programs.csv:
# 	python -m h4rm3l.csv_cmd sort \
# 		--input-file data/synthesized_programs/syn_progs.n_shot_nobeam.csv \
# 		--output-file $@ \
# 		--sort-field-name syn_score \
# 		--sort-descending


######
## Manually selected synthesized programs for evaluation
## programs with sore >= 0.6 for each run
## also included score >= 0.4 for lle synthesis
##### config/exp_116_selected_progs_claude.csv
###



######
# 1.3. Decorate dataset for scoring synthesized programs
####
# data/sampled_harmful_prompts/elicited_prompts-claude-10.csv data/synthesized_programs/syn100.0_shot.csv
data/decorated_valid_harmful_prompts.csv: data/valid_harmful_prompts.csv
	mkdir -p data/decorated_harmful_prompts/
	python -m h4rm3l.cmd decorate-csv \
		--input-prompts-file data/valid_harmful_prompts.csv \
		--input-programs-file config/exp_116_selected_progs_claude.csv \
		--input-programs-csv-field program \
		--synthesis-model-name $(DECORATION_MODEL_NAME) \
		--credentials ../../keys.json \
		--decorator-syntax-version 2 \
		--output-file $@

######
# Validation Benchmark
####
COMMON_EVAL_PARAMS=\
	--input-file $< \
	--output-file $@ \
	--moderation-model gpt-4 \
	--eval-prompt-fieldname decorated_harmful_prompt \
	--original-prompt-fieldname harmful_prompt \
	--credentials ../../keys.json


results/valid_eval_hle_gpt-3.5-turbo.csv: data/decorated_valid_harmful_prompts_hle.csv
	mkdir -p results/
	python -m h4rm3l.cmd evaluate \
		$(COMMON_EVAL_PARAMS) \
		--log-dir logs/gpt-3.5-turbo-hle \
		--target-model gpt-3.5-turbo

results/valid_eval_lle_gpt-3.5-turbo.csv: data/decorated_valid_harmful_prompts_lle.csv
	mkdir -p results/
	python -m h4rm3l.cmd evaluate \
		$(COMMON_EVAL_PARAMS) \
		--log-dir logs/gpt-3.5-turbo-lle \
		--target-model gpt-3.5-turbo

results/valid_eval_mixed_gpt-3.5-turbo.csv: data/decorated_valid_harmful_prompts_mixed.csv
	mkdir -p results/
	python -m h4rm3l.cmd evaluate \
		$(COMMON_EVAL_PARAMS) \
		--log-dir logs/gpt-3.5-turbo-mixed \
		--target-model gpt-3.5-turbo





results/valid_eval_gpt-4.csv: data/decorated_valid_harmful_prompts.csv
	mkdir -p results/
	python -m h4rm3l.cmd evaluate \
		$(COMMON_EVAL_PARAMS) \
		--target-model gpt-4

results/valid_eval_claude-3-haiku-20240307.csv: data/decorated_valid_harmful_prompts.csv
	mkdir -p results/
	python -m h4rm3l.cmd evaluate \
		$(COMMON_EVAL_PARAMS) \
		--log-dir logs/claude-eval \
		--target-model claude-3-haiku-20240307


results/analysis/:
	python -m h4rm3l.cmd analyse \
		--evaluation-files results/valid_eval_claude-3-haiku-20240307.csv \
		--output-dir $@



clean:
	rm -rf data results logs