# Default values (can be overridden via environment or command line)
EXP_DIR ?= exp2_gsm8k_llama-3b-base
SEED ?= 42
DATASET ?= gsm8k
MODEL_NAME ?= llama-3b-base
SLICE ?= '[:2048]'
PY ?= uv run accelerate launch --config_file acc_config.yaml

# Tau/Lambda parameter pairs (more descriptive name)
# Format: tau_lam (underscore separated)
# TAU_LAMS ?= 0.3_0.0 0.8887_0.0 1.1923_0.0 1.4380_0.0 1.6424_0.0 2.0249_0.0 2.3499_0.0 2.5_0.0 \
#         #    0.6_0.0 0.6_1e-4 0.6_2e-4 0.6_3e-4 0.6_4e-4 0.6_5e-4 0.6_6e-4

TAU_LAMS ?= 1.6_0.0 1.7_0.0 1.8_0.0 1.9_0.0 2.0_0.0 2.1_0.0 \
           0.6_2.0e-4 0.6_3.0e-4 0.6_3.5e-4 0.6_4.0e-4 0.6_4.5e-4 0.6_5.0e-4 0.6_5.5e-4

# Helper functions to extract tau and lambda values
# Usage: $(call get_tau,0.6_1e-4) returns 0.6
# Usage: $(call get_lam,0.6_1e-4) returns 1e-4
get_tau = $(word 1,$(subst _, ,$(1)))
get_lam = $(word 2,$(subst _, ,$(1)))

# Specific Python source files for each task
TRACE_GEN_SRC := gentraces.py utils.py gen_config.yaml
GRAD_SRC := save_grad.py utils.py
DISTILL_SRC := distill.py utils.py train_config.yaml

# Key sentinel files - using YAML config files
HOLDOUT_CONFIG := $(EXP_DIR)/traces/holdout.yaml
GRAD_FILE := $(EXP_DIR)/student_grads.pt

# Generate target lists based on TAU_LAMS
TRAIN_CONFIGS := $(foreach p,$(TAU_LAMS),$(EXP_DIR)/traces/tau$(call get_tau,$(p))_lam$(call get_lam,$(p)).yaml)
MODEL_CONFIGS := $(foreach p,$(TAU_LAMS),$(EXP_DIR)/models/$(MODEL_NAME)_tau$(call get_tau,$(p))_lam$(call get_lam,$(p)).yaml)
EVAL_CONFIGS := $(foreach p,$(TAU_LAMS),$(EXP_DIR)/traces/eval_tau$(call get_tau,$(p))_lam$(call get_lam,$(p)).yaml)

# Colored output
CYAN := \033[36m
GREEN := \033[32m
YELLOW := \033[33m
MAGENTA := \033[35m
RESET := \033[0m

# Default target
.PHONY: all
all: evaluate

# Phony targets for individual stages
.PHONY: holdout grads train_traces distill evaluate

# Step 1: Generate holdout traces
$(HOLDOUT_CONFIG): $(TRACE_GEN_SRC)
	@echo "$(YELLOW)🌀 Generating holdout traces$(RESET)"
	mkdir -p $(EXP_DIR)/metadata/holdout $(EXP_DIR)/traces/holdout
	$(PY) gentraces.py \
		hydra.run.dir=$(EXP_DIR)/metadata/holdout \
		exp_dir=$(EXP_DIR) \
		seed=$(SEED) \
		data_split=$(DATASET)_holdout \
		trace_name=holdout
	@echo "$(GREEN)✅ Holdout traces generated$(RESET)"

# Phony target for just the holdout step
holdout: $(HOLDOUT_CONFIG)

# Step 2: Save student gradients
$(GRAD_FILE): $(GRAD_SRC) $(HOLDOUT_CONFIG)
	@echo "$(YELLOW)💾 Saving student gradients$(RESET)"
	mkdir -p $(dir $(GRAD_FILE))
	$(PY) save_grad.py $(HOLDOUT_CONFIG)
	@echo "$(GREEN)✅ Student gradients saved$(RESET)"

# Step 3: Generate training traces for each tau-lambda pair
# Using YAML config files as sentinels
$(EXP_DIR)/traces/tau%_lam%.yaml: $(TRACE_GEN_SRC) $(GRAD_FILE)
	@tau=$*1; lam=$*2; \
	echo "$(YELLOW)⚙️  Generating training traces for tau=$$tau, lam=$$lam$(RESET)"; \
	mkdir -p $(EXP_DIR)/traces/tau$${tau}_lam$${lam} $(EXP_DIR)/metadata/train/tau$${tau}_lam$${lam}; \
	$(PY) gentraces.py \
		hydra.run.dir=$(EXP_DIR)/metadata/train/tau$${tau}_lam$${lam} \
		exp_dir=$(EXP_DIR) \
		seed=$(SEED) \
		data_split=$(DATASET)_train \
		slice=$(SLICE) \
		grad_path=$(GRAD_FILE) \
		tau=$$tau \
		lam=$$lam \
		trace_name=tau$${tau}_lam$${lam}; \
	echo "$(GREEN)✅ Training traces generated for tau=$$tau, lam=$$lam$(RESET)"

# Phony target for just the training traces step
train_traces: $(TRAIN_CONFIGS)
	@echo "$(GREEN)All training traces generated$(RESET)"

# Step 4: Distill model for each tau-lambda pair
# Using YAML config file as sentinel
$(EXP_DIR)/models/$(MODEL_NAME)_tau%_lam%.yaml: $(DISTILL_SRC) $(EXP_DIR)/traces/tau%_lam%.yaml $(HOLDOUT_CONFIG)
	@tau=$*1; lam=$*2; \
	echo "$(YELLOW)⚙️  Distilling model for tau=$$tau, lam=$$lam$(RESET)"; \
	mkdir -p $(EXP_DIR)/models/$(MODEL_NAME)_tau$${tau}_lam$${lam}/final $(EXP_DIR)/metadata/distill/tau$${tau}_lam$${lam}; \
	$(PY) distill.py \
		hydra.run.dir=$(EXP_DIR)/metadata/distill/tau$${tau}_lam$${lam} \
		exp_dir=$(EXP_DIR) \
		train_traces=$(EXP_DIR)/traces/tau$${tau}_lam$${lam} \
		holdout_traces=$(EXP_DIR)/traces/holdout \
		model_name=$(MODEL_NAME)_tau$${tau}_lam$${lam}; \
	echo "$(GREEN)✅ Distillation completed for tau=$$tau, lam=$$lam$(RESET)"

# Step 5: Evaluate each model
# Using config yaml as sentinel
$(EXP_DIR)/traces/eval_tau%_lam%.yaml: $(TRACE_GEN_SRC) $(EXP_DIR)/models/$(MODEL_NAME)_tau%_lam%.yaml
	@tau=$*1; lam=$*2; \
	echo "$(YELLOW)⚙️  Evaluating model for tau=$$tau, lam=$$lam$(RESET)"; \
	mkdir -p $(EXP_DIR)/traces/eval_tau$${tau}_lam$${lam} $(EXP_DIR)/metadata/eval/tau$${tau}_lam$${lam}; \
	$(PY) gentraces.py \
		hydra.run.dir=$(EXP_DIR)/metadata/eval/tau$${tau}_lam$${lam} \
		teacher=$(EXP_DIR)/models/$(MODEL_NAME)_tau$${tau}_lam$${lam}/final \
		teacher_cfg=$(EXP_DIR)/models/$(MODEL_NAME)_tau$${tau}_lam$${lam}.yaml \
		use_wandb=true \
		exp_dir=$(EXP_DIR) \
		seed=$(SEED) \
		data_split=$(DATASET)_test \
		tau=0.6 \
		trace_name=eval_tau$${tau}_lam$${lam}; \
	echo "$(GREEN)✅ Evaluation completed for tau=$$tau, lam=$$lam$(RESET)"

# Phony target for just the evaluation step
evaluate: $(EVAL_CONFIGS)
	@echo "$(GREEN)All evaluations completed$(RESET)"

# Clean target
.PHONY: clean
clean:
	rm -rf $(EXP_DIR)/traces $(EXP_DIR)/models $(EXP_DIR)/metadata
	rm -f $(GRAD_FILE)
	@echo "$(GREEN)All experiment data cleaned$(RESET)"

# Show configuration
.PHONY: config
config:
	@echo "Current configuration:"
	@echo "• Experiment: $(EXP_DIR)"
	@echo "• Model name: $(MODEL_NAME)"
	@echo "• Seed: $(SEED)"
	@echo "• Dataset: $(DATASET)"
	@echo "• Slice: $(SLICE)"
	@echo "• Parameter pairs:"
	@for p in $(TAU_LAMS); do \
		tau=$$(echo $$p | cut -d'_' -f1); \
		lam=$$(echo $$p | cut -d'_' -f2); \
		echo "  - tau=$$tau, lam=$$lam"; \
	done

# Help
.PHONY: help
help:
	@echo "ML Pipeline Makefile"
	@echo ""
	@echo "Usage:"
	@echo "  make                    - Run full pipeline"
	@echo "  make holdout            - Generate only holdout traces"
	@echo "  make grads              - Generate holdout and compute gradients"
	@echo "  make train_traces       - Generate all training traces"
	@echo "  make distill            - Run through model distillation"
	@echo "  make evaluate           - Run full pipeline including evaluation"
	@echo "  make -j 4               - Run with 4 parallel jobs"
	@echo ""
	@echo "Configuration:"
	@echo "  make EXP_DIR=custom_dir SEED=123     - Override variables"
	@echo "  make MODEL_NAME=llama-7b             - Change model name"
	@echo "  make TAU_LAMS=\"0.6_0.0 0.6_1e-4\"    - Run with specific tau/lambda pairs"
	@echo "  make SLICE='[:1024]'                 - Change slice size for training data"
	@echo "  make config                          - Show current configuration"
	@echo ""
	@echo "Cleaning:"
	@echo "  make clean              - Remove all generated files"
