# samples_path: "artifacts/backdoor/sleeper_agent.jsonl"
# samples_path: "artifacts/lie/llmscan_questions_1000_all.jsonl"
# samples_path: "artifacts/lie/llmscan_wikidata.jsonl"
# samples_path: "artifacts/lie/commonsense_qa.jsonl"
# samples_path: "artifacts/lie/mathqa.jsonl"
# samples_path: "artifacts/lie/sciq.jsonl"
# samples_path: "artifacts/jailbreaking/llmscan_gcg.jsonl"
# samples_path: "artifacts/toxicity/jigsaw_conversation_ai.jsonl"
# samples_path: "artifacts/toxicity/surge_ai_toxicity.jsonl"
# samples_path: "artifacts/toxicity/social_chem_101.jsonl"
samples_path: "artifacts/toxicity/real_toxicity_prompts.jsonl"
# samples_path: "artifacts/jailbreaking/pap.jsonl"
# samples_path: "artifacts/jailbreaking/autodan.jsonl"
# samples_path: "artifacts/backdoor/vpi.jsonl"
# samples_path: "artifacts/backdoor/mtba.jsonl"
# model_name: "meta-llama/Llama-3.1-8B-Instruct"
model_name: "meta-llama/Llama-3.2-3B-Instruct"
# model_name: "meta-llama/Llama-4-Scout-17B-16E-Instruct"
hidden_layers: [0, 14, 27]
heads: [0, 12, 23]
combine_causal_effects: true
batch_size: 4
max_samples: 1000
# results_dir: "results/backdoor/sleeper_agent/"
# results_dir: "results/lie/questions_1000/"
# results_dir: "results/lie/wikidata/"
# results_dir: "results/lie/commonsense_qa/"
# results_dir: "results/lie/mathqa/"
# results_dir: "results/lie/original_questions_1000/"
# results_dir: "results/lie/sciq/"
# results_dir: "results/jailbreaking/llmscan_gcg/"
# results_dir: "results/toxicity/jigsaw_conversation_ai/"
# results_dir: "results/toxicity/surge_ai_toxicity/"
# results_dir: "results/toxicity/social_chem_101/"
results_dir: "results/toxicity/real_toxicity_prompts/"
# results_dir: "results/jailbreaking/pap/"
# results_dir: "results/jailbreaking/autodan/"
# results_dir: "results/backdoor/vpi/"
# results_dir: "results/backdoor/mtba/"
# use_microsaccade_intervention: true
# use_attention_intervention: false
use_layer_intervention: true