from __future__ import annotations

import argparse
import json
import random
import sys
from pathlib import Path
from typing import Any, Callable, Dict, List

BASE_SAMPLE: Dict[str, Any] = {
	"sex": "Female",
	"age": 29,
	"age_unit": "years",
	"weight": 62,
	"weight_unit": "kg",
	"height": 165,
	"height_unit": "cm",
	"activity_level": "Low active",
	"is_pregnant": False,
	"lactation_status": "none",
}

CUISINES = [
	"indian",
	"chinese",
	"mexican",
	"thai",
	"japanese",
	"mediterranean",
	"italian",
	"korean",
	"american",
	"middle eastern",
]

FIXED_SEED = 42
MEAL_PLAN_DAYS_OPTIONS = (3, 5, 7)
ACTIVITY_LEVEL_OPTIONS = ("Inactive", "Low active", "Active", "Very active")
PRACTICAL_DRI_TARGET = 140
PRACTICAL_FUZZ_VALID_TARGET = 200
DRI_INPUT_KEYS = (
	"sex",
	"age",
	"weight",
	"height",
	"activity_level",
	"age_unit",
	"weight_unit",
	"height_unit",
	"height_inches",
	"is_pregnant",
	"gestation_weeks",
	"prepregnancy_weight",
	"prepregnancy_weight_unit",
	"lactation_status",
)


def _to_float(value: Any) -> float:
	return float(value)


def _age_years_and_months(age: Any, age_unit: str) -> tuple[int, int]:
	age_value = _to_float(age)
	normalized_unit = age_unit.strip().lower()
	age_months = age_value if normalized_unit == "months" else age_value * 12.0
	rounded_months = max(0, int(round(age_months)))
	return rounded_months // 12, rounded_months % 12


def _person_label(sex: str, age: Any, age_unit: str) -> str:
	normalized = sex.strip().lower()
	years, _ = _age_years_and_months(age, age_unit)

	if years < 2:
		if normalized == "female":
			return "baby girl"
		if normalized == "male":
			return "baby boy"
		return "baby"

	if years < 18:
		if normalized == "female":
			return "girl"
		if normalized == "male":
			return "boy"
		return "child"

	if normalized == "female":
		return "woman"
	if normalized == "male":
		return "man"
	return "person"


def _format_height_phrase(sample: Dict[str, Any]) -> str:
	height = sample.get("height")
	height_unit = str(sample.get("height_unit", "cm"))
	if height_unit == "ft_in":
		height_inches = sample.get("height_inches")
		if height_inches is None:
			return f"{height} ft"
		return f"{height} ft {height_inches} in"
	return f"{height} {height_unit}"


def _has_mixed_age(age: Any, age_unit: str) -> bool:
	_, remaining_months = _age_years_and_months(age, age_unit)
	return remaining_months > 0


def _format_age_phrase(age: Any, age_unit: str) -> str:
	years, remaining_months = _age_years_and_months(age, age_unit)
	rounded_months = (years * 12) + remaining_months

	if rounded_months < 24:
		month_word = "month" if rounded_months == 1 else "months"
		return f"{rounded_months}-{month_word}-old"

	if remaining_months == 0:
		return f"{years}-year-old"

	year_word = "year" if years == 1 else "years"
	month_word = "month" if remaining_months == 1 else "months"
	return f"{years} {year_word} and {remaining_months} {month_word} old"


def build_profile_sentence(sample: Dict[str, Any]) -> str:
	sex = str(sample.get("sex", ""))
	age = sample.get("age")
	age_unit = str(sample.get("age_unit", "years"))
	weight = sample.get("weight")
	weight_unit = str(sample.get("weight_unit", "kg"))
	height_phrase = _format_height_phrase(sample)
	activity_level = str(sample.get("activity_level", "Unknown"))
	food_type = str(sample.get("food_type", "unspecified"))
	meal_plan_days = int(sample.get("meal_plan_days", 3))
	is_pregnant = bool(sample.get("is_pregnant", False))
	lactation_status = str(sample.get("lactation_status", "none"))
	age_phrase = _format_age_phrase(age, age_unit)
	person_label = _person_label(sex, age, age_unit)

	if _has_mixed_age(age, age_unit):
		first_sentence = f"I'm a {person_label} who is {age_phrase}."
	else:
		first_sentence = f"I'm a {age_phrase} {person_label}."

	sentence_parts = [
		first_sentence,
		f"I weigh {weight} {weight_unit} and I'm {height_phrase} tall.",
		f"My activity level is {activity_level}.",
		f"I want to eat {food_type} food.",
	]

	if is_pregnant:
		gestation_weeks = sample.get("gestation_weeks")
		prepregnancy_weight = sample.get("prepregnancy_weight")
		prepregnancy_weight_unit = str(sample.get("prepregnancy_weight_unit", "kg"))
		sentence_parts.append("I'm pregnant.")
		if gestation_weeks is not None:
			sentence_parts.append(f"I'm at {gestation_weeks} weeks of gestation.")
		if prepregnancy_weight is not None:
			sentence_parts.append(
				f"My pre-pregnancy weight was {prepregnancy_weight} {prepregnancy_weight_unit}."
			)

	if lactation_status != "none":
		sentence_parts.append(f"I'm breastfeeding ({lactation_status}).")

	sentence_parts.append(
		f"Tell me {meal_plan_days} day meal plan which meets my calorie and nutrient targets."
	)
	return " ".join(sentence_parts)


def _round1(value: float) -> float:
	return round(value, 1)


def _ensure_code_root_on_path() -> None:
	code_root = Path(__file__).resolve().parents[2]
	code_root_str = str(code_root)
	if code_root_str not in sys.path:
		sys.path.insert(0, code_root_str)


def _load_calculate_health_canada_dri() -> Any:
	_ensure_code_root_on_path()
	from utils.calculate_health_canada_dri import calculate_health_canada_dri

	return calculate_health_canada_dri


def _kg_to_lb(value_kg: float) -> float:
	return value_kg / 0.45359


def _lb_to_kg(value_lb: float) -> float:
	return value_lb * 0.45359


def _cm_to_ft_inches(value_cm: float) -> tuple[int, float]:
	total_inches = value_cm / 2.54
	feet = int(total_inches // 12)
	inches = round(total_inches - (feet * 12), 1)
	if inches >= 12.0:
		feet += 1
		inches = 0.0
	return feet, inches


def _ft_inches_to_cm(feet: float, inches: float) -> float:
	return (feet + (inches / 12.0)) * 30.48


def _convert_weight_unit(sample: Dict[str, Any], target_unit: str) -> Dict[str, Any]:
	converted = dict(sample)
	source_unit = str(converted.get("weight_unit", "kg")).lower()
	weight = _to_float(converted.get("weight", 0.0))
	weight_kg = weight if source_unit == "kg" else _lb_to_kg(weight)

	if target_unit == "kg":
		converted["weight"] = _round1(weight_kg)
		converted["weight_unit"] = "kg"
	else:
		converted["weight"] = _round1(_kg_to_lb(weight_kg))
		converted["weight_unit"] = "lb"

	if "prepregnancy_weight" in converted:
		prepreg = _to_float(converted.get("prepregnancy_weight", 0.0))
		prepreg_unit = str(converted.get("prepregnancy_weight_unit", "kg")).lower()
		prepreg_kg = prepreg if prepreg_unit == "kg" else _lb_to_kg(prepreg)
		if target_unit == "kg":
			converted["prepregnancy_weight"] = _round1(prepreg_kg)
			converted["prepregnancy_weight_unit"] = "kg"
		else:
			converted["prepregnancy_weight"] = _round1(_kg_to_lb(prepreg_kg))
			converted["prepregnancy_weight_unit"] = "lb"

	return converted


def _convert_height_unit(sample: Dict[str, Any], target_unit: str) -> Dict[str, Any]:
	converted = dict(sample)
	source_unit = str(converted.get("height_unit", "cm")).lower()

	if source_unit == "ft_in":
		feet = _to_float(converted.get("height", 0.0))
		inches = _to_float(converted.get("height_inches", 0.0))
		height_cm = _ft_inches_to_cm(feet, inches)
	else:
		height_cm = _to_float(converted.get("height", 0.0))

	if target_unit == "cm":
		converted["height"] = _round1(height_cm)
		converted["height_unit"] = "cm"
		converted.pop("height_inches", None)
	else:
		feet, inches = _cm_to_ft_inches(height_cm)
		converted["height"] = feet
		converted["height_unit"] = "ft_in"
		converted["height_inches"] = inches

	return converted


def _convert_age_unit(sample: Dict[str, Any], target_unit: str) -> Dict[str, Any]:
	converted = dict(sample)
	source_unit = str(converted.get("age_unit", "years")).lower()
	age_value = _to_float(converted.get("age", 0.0))
	age_years = age_value / 12.0 if source_unit == "months" else age_value

	if target_unit == "years":
		converted["age"] = round(age_years, 3)
		converted["age_unit"] = "years"
	else:
		converted["age"] = round(age_years * 12.0, 2)
		converted["age_unit"] = "months"

	return converted


def _normalize_age_to_years(sample: Dict[str, Any]) -> Dict[str, Any]:
	return _convert_age_unit(sample, "years")


def _random_age_years(rng: random.Random) -> float:
	boundary_points = [
		0.24,
		0.25,
		0.26,
		0.49,
		0.50,
		0.51,
		0.5833,
		1.0,
		2.99,
		3.0,
		4.0,
		9.0,
		14.0,
		19.0,
		31.0,
		51.0,
		71.0,
	]
	if rng.random() < 0.40:
		return rng.choice(boundary_points)
	return round(rng.uniform(0.1, 85.0), 3)


def _height_range_for_age(age_years: float) -> tuple[float, float]:
	if age_years < 1:
		return (50.0, 78.0)
	if age_years < 3:
		return (75.0, 100.0)
	if age_years < 9:
		return (100.0, 140.0)
	if age_years < 19:
		return (135.0, 190.0)
	return (145.0, 200.0)


def _weight_range_for_age(age_years: float) -> tuple[float, float]:
	if age_years < 1:
		return (3.5, 10.0)
	if age_years < 3:
		return (9.0, 16.0)
	if age_years < 9:
		return (16.0, 35.0)
	if age_years < 19:
		return (30.0, 85.0)
	return (45.0, 120.0)


def _base_input(**overrides: Any) -> Dict[str, Any]:
	sample = dict(BASE_SAMPLE)
	sample.update(
		{
			"age_unit": "years",
			"weight_unit": "kg",
			"height_unit": "cm",
			"is_pregnant": False,
			"lactation_status": "none",
		}
	)
	sample.update(overrides)
	return sample


def _generate_random_valid_sample(rng: random.Random) -> Dict[str, Any]:
	mode = rng.choices(
		population=["base", "pregnant", "lactation"],
		weights=[0.70, 0.16, 0.14],
		k=1,
	)[0]

	age_years = _random_age_years(rng)
	sex = rng.choice(["Male", "Female"])

	if mode in {"pregnant", "lactation"}:
		sex = "Female"
		age_years = round(max(14.0, min(45.0, age_years)), 3)

	weight_low, weight_high = _weight_range_for_age(age_years)
	height_low, height_high = _height_range_for_age(age_years)

	sample = _base_input(
		sex=sex,
		age=round(age_years, 3),
		age_unit="years",
		weight=_round1(rng.uniform(weight_low, weight_high)),
		weight_unit="kg",
		height=_round1(rng.uniform(height_low, height_high)),
		height_unit="cm",
		activity_level=rng.choice(ACTIVITY_LEVEL_OPTIONS),
		is_pregnant=False,
		lactation_status="none",
	)

	if mode == "pregnant":
		sample["is_pregnant"] = True
		sample["lactation_status"] = "none"
		sample["gestation_weeks"] = round(rng.uniform(6.0, 39.0), 1)
		prepreg = max(35.0, sample["weight"] - rng.uniform(0.0, 12.0))
		sample["prepregnancy_weight"] = _round1(prepreg)
		sample["prepregnancy_weight_unit"] = "kg"

	if mode == "lactation":
		sample["is_pregnant"] = False
		sample["lactation_status"] = rng.choice(["0-6 months postpartum", "7-12 months postpartum"])

	if rng.random() < 0.35:
		sample = _convert_weight_unit(sample, "lb")
	if rng.random() < 0.35:
		sample = _convert_height_unit(sample, "ft_in")

	return sample


def _apply_prompt_fields(sample: Dict[str, Any], rng: random.Random) -> None:
	sample["food_type"] = rng.choice(CUISINES)
	sample["meal_plan_days"] = rng.choice(MEAL_PLAN_DAYS_OPTIONS)


def _to_dri_input(sample: Dict[str, Any]) -> Dict[str, Any]:
	payload: Dict[str, Any] = {}
	for key in DRI_INPUT_KEYS:
		if key in sample and sample[key] is not None:
			payload[key] = sample[key]
	return payload


def _validate_with_dri(
	calculate_health_canada_dri: Any,
	sample: Dict[str, Any],
	expected_valid: bool,
	scenario_id: str,
) -> None:
	result = calculate_health_canada_dri(**_to_dri_input(sample))
	got_error = isinstance(result, str) and result.startswith("Error:")
	if expected_valid and got_error:
		raise ValueError(f"Scenario '{scenario_id}' should be valid, but DRI calculator returned: {result}")
	if not expected_valid and not got_error:
		raise ValueError(
			f"Scenario '{scenario_id}' should be invalid, but DRI calculator returned a non-error response"
		)


def _suite_row(sample: Dict[str, Any], suite: str, scenario_id: str) -> Dict[str, Any]:
	row = dict(sample)
	row["suite"] = suite
	row["scenario_id"] = scenario_id
	return row


def _input_fingerprint(sample: Dict[str, Any]) -> str:
	return json.dumps(_to_dri_input(sample), sort_keys=True)


def _prepare_suite_row_sample(sample: Dict[str, Any], rng: random.Random) -> Dict[str, Any]:
	row = dict(sample)
	_apply_prompt_fields(row, rng)
	row["profile_sentence"] = build_profile_sentence(row)
	return row


def _try_add_validated_suite_row(
	rows: List[Dict[str, Any]],
	seen_fingerprints: set[str],
	sample: Dict[str, Any],
	*,
	rng: random.Random,
	suite: str,
	scenario_id: str,
	fingerprint: str,
	validate_sample: Callable[[Dict[str, Any], str], None],
) -> bool:
	if fingerprint in seen_fingerprints:
		return False

	row = _prepare_suite_row_sample(sample, rng)
	validate_sample(row, scenario_id)
	rows.append(_suite_row(row, suite=suite, scenario_id=scenario_id))
	seen_fingerprints.add(fingerprint)
	return True


def _prepregnancy_weight_for_bmi(height_cm: float, target_bmi: float) -> float:
	return _round1(target_bmi * ((height_cm / 100.0) ** 2))


def _scenario(scenario_id: str, sample: Dict[str, Any]) -> Dict[str, Any]:
	return {
		"scenario_id": scenario_id,
		"sample": sample,
	}


def _build_seed_valid_scenarios() -> List[Dict[str, Any]]:
	return [
		_scenario("infant_male_0_2_months", _base_input(sex="Male", age=2.9, age_unit="months", weight=5.8, height=59.0)),
		_scenario("infant_female_0_2_months", _base_input(sex="Female", age=1.8, age_unit="months", weight=4.8, height=55.0)),
		_scenario("infant_male_3_5_months", _base_input(sex="Male", age=3.0, age_unit="months", weight=6.5, height=62.0)),
		_scenario("infant_female_3_5_months", _base_input(sex="Female", age=5.7, age_unit="months", weight=6.4, height=65.0)),
		_scenario("infant_male_6_11_months", _base_input(sex="Male", age=6.0, age_unit="months", weight=7.8, height=68.0)),
		_scenario("infant_female_6_11_months", _base_input(sex="Female", age=11.0, age_unit="months", weight=8.6, height=73.0)),
		_scenario("toddler_male_1_2_years", _base_input(sex="Male", age=2.0, weight=12.8, height=89.0)),
		_scenario("toddler_female_1_2_years", _base_input(sex="Female", age=2.5, weight=12.3, height=87.0)),
		_scenario("child_male_3_inactive", _base_input(sex="Male", age=3.0, weight=14.4, height=95.0, activity_level="Inactive")),
		_scenario("child_female_8_active", _base_input(sex="Female", age=8.0, weight=25.0, height=128.0, activity_level="Active")),
		_scenario("child_male_9_low_active", _base_input(sex="Male", age=9.0, weight=31.0, height=136.0, activity_level="Low active")),
		_scenario("child_female_13_very_active", _base_input(sex="Female", age=13.0, weight=45.0, height=154.0, activity_level="Very active")),
		_scenario("teen_male_14_inactive", _base_input(sex="Male", age=14.0, weight=54.0, height=164.0, activity_level="Inactive")),
		_scenario("teen_male_16_low_active", _base_input(sex="Male", age=16.0, weight=62.0, height=173.0, activity_level="Low active")),
		_scenario("teen_male_17_active", _base_input(sex="Male", age=17.0, weight=67.0, height=176.0, activity_level="Active")),
		_scenario("teen_male_18_very_active", _base_input(sex="Male", age=18.0, weight=71.0, height=179.0, activity_level="Very active")),
		_scenario("teen_female_14_inactive", _base_input(sex="Female", age=14.0, weight=49.0, height=158.0, activity_level="Inactive")),
		_scenario("teen_female_16_low_active", _base_input(sex="Female", age=16.0, weight=54.0, height=162.0, activity_level="Low active")),
		_scenario("teen_female_17_active", _base_input(sex="Female", age=17.0, weight=57.0, height=164.0, activity_level="Active")),
		_scenario("teen_female_18_very_active", _base_input(sex="Female", age=18.0, weight=60.0, height=166.0, activity_level="Very active")),
		_scenario("adult_male_19_inactive", _base_input(sex="Male", age=19.0, weight=70.0, height=176.0, activity_level="Inactive")),
		_scenario("adult_male_31_low_active", _base_input(sex="Male", age=31.0, weight=76.0, height=178.0, activity_level="Low active")),
		_scenario("adult_male_51_active", _base_input(sex="Male", age=51.0, weight=78.0, height=175.0, activity_level="Active")),
		_scenario("adult_male_71_very_active", _base_input(sex="Male", age=71.0, weight=74.0, height=172.0, activity_level="Very active")),
		_scenario("adult_female_19_inactive", _base_input(sex="Female", age=19.0, weight=58.0, height=163.0, activity_level="Inactive")),
		_scenario("adult_female_31_low_active", _base_input(sex="Female", age=31.0, weight=63.0, height=165.0, activity_level="Low active")),
		_scenario("adult_female_51_active", _base_input(sex="Female", age=51.0, weight=66.0, height=162.0, activity_level="Active")),
		_scenario("adult_female_71_very_active", _base_input(sex="Female", age=71.0, weight=61.0, height=159.0, activity_level="Very active")),
		_scenario(
			"pregnant_first_half_week12",
			_base_input(
				sex="Female",
				age=28.0,
				weight=66.0,
				height=165.0,
				activity_level="Low active",
				is_pregnant=True,
				gestation_weeks=12,
				prepregnancy_weight=61.0,
				prepregnancy_weight_unit="kg",
			),
		),
		_scenario(
			"pregnant_second_trimester_uw_inactive",
			_base_input(
				sex="Female",
				age=27.0,
				weight=56.0,
				height=164.0,
				activity_level="Inactive",
				is_pregnant=True,
				gestation_weeks=20,
				prepregnancy_weight=_prepregnancy_weight_for_bmi(164.0, 17.4),
				prepregnancy_weight_unit="kg",
			),
		),
		_scenario(
			"pregnant_second_trimester_nw_low_active",
			_base_input(
				sex="Female",
				age=30.0,
				weight=67.0,
				height=166.0,
				activity_level="Low active",
				is_pregnant=True,
				gestation_weeks=24,
				prepregnancy_weight=_prepregnancy_weight_for_bmi(166.0, 22.1),
				prepregnancy_weight_unit="kg",
			),
		),
		_scenario(
			"pregnant_third_trimester_ow_active",
			_base_input(
				sex="Female",
				age=33.0,
				weight=79.0,
				height=168.0,
				activity_level="Active",
				is_pregnant=True,
				gestation_weeks=32,
				prepregnancy_weight=_prepregnancy_weight_for_bmi(168.0, 27.6),
				prepregnancy_weight_unit="kg",
			),
		),
		_scenario(
			"pregnant_third_trimester_ob_very_active",
			_base_input(
				sex="Female",
				age=35.0,
				weight=91.0,
				height=167.0,
				activity_level="Very active",
				is_pregnant=True,
				gestation_weeks=36,
				prepregnancy_weight=_prepregnancy_weight_for_bmi(167.0, 32.5),
				prepregnancy_weight_unit="kg",
			),
		),
		_scenario(
			"pregnant_boundary_week13",
			_base_input(
				sex="Female",
				age=29.0,
				weight=70.0,
				height=165.0,
				activity_level="Low active",
				is_pregnant=True,
				gestation_weeks=13,
				prepregnancy_weight=_prepregnancy_weight_for_bmi(165.0, 23.0),
				prepregnancy_weight_unit="kg",
			),
		),
		_scenario(
			"lactation_adult_0_6_low_active",
			_base_input(
				sex="Female",
				age=30.0,
				weight=66.0,
				height=165.0,
				activity_level="Low active",
				lactation_status="0-6 months postpartum",
			),
		),
		_scenario(
			"lactation_adult_7_12_active",
			_base_input(
				sex="Female",
				age=33.0,
				weight=64.0,
				height=164.0,
				activity_level="Active",
				lactation_status="7-12 months postpartum",
			),
		),
		_scenario(
			"lactation_adolescent_0_6_inactive",
			_base_input(
				sex="Female",
				age=17.0,
				weight=55.0,
				height=162.0,
				activity_level="Inactive",
				lactation_status="0-6 months postpartum",
			),
		),
		_scenario(
			"lactation_adolescent_7_12_very_active",
			_base_input(
				sex="Female",
				age=18.0,
				weight=57.0,
				height=163.0,
				activity_level="Very active",
				lactation_status="7-12 months postpartum",
			),
		),
		_scenario(
			"unit_weight_lb",
			_base_input(
				sex="Male",
				age=35.0,
				weight=176.4,
				weight_unit="lb",
				height=177.0,
				height_unit="cm",
				activity_level="Low active",
			),
		),
		_scenario(
			"unit_height_ft_in",
			_base_input(
				sex="Female",
				age=34.0,
				weight=140.0,
				weight_unit="lb",
				height=5,
				height_unit="ft_in",
				height_inches=6,
				activity_level="Active",
			),
		),
	]


def _build_dri_suite(
	calculate_health_canada_dri: Any,
	rng: random.Random,
	target_count: int,
) -> List[Dict[str, Any]]:
	rows: List[Dict[str, Any]] = []
	seen_fingerprints: set[str] = set()

	def validate_sample(row: Dict[str, Any], scenario_id: str) -> None:
		_validate_with_dri(
			calculate_health_canada_dri=calculate_health_canada_dri,
			sample=row,
			expected_valid=True,
			scenario_id=scenario_id,
		)

	for scenario in _build_seed_valid_scenarios():
		base_id = str(scenario["scenario_id"])
		base_sample = _normalize_age_to_years(dict(scenario["sample"]))

		variants = [
			("native", base_sample),
			("weight_lb", _convert_weight_unit(base_sample, "lb")),
			("height_ft_in", _convert_height_unit(base_sample, "ft_in")),
		]

		for variant_name, variant in variants:
			fingerprint = _input_fingerprint(variant)
			scenario_id = f"dri/{base_id}/{variant_name}"
			_try_add_validated_suite_row(
				rows,
				seen_fingerprints,
				variant,
				rng=rng,
				suite="dri",
				scenario_id=scenario_id,
				fingerprint=fingerprint,
				validate_sample=validate_sample,
			)

	attempts = 0
	max_attempts = max(target_count * 20, 2000)
	while len(rows) < target_count and attempts < max_attempts:
		attempts += 1
		sample = _generate_random_valid_sample(rng)
		fingerprint = _input_fingerprint(sample)
		scenario_id = f"dri/extra_{len(rows) + 1:05d}"

		_try_add_validated_suite_row(
			rows,
			seen_fingerprints,
			sample,
			rng=rng,
			suite="dri",
			scenario_id=scenario_id,
			fingerprint=fingerprint,
			validate_sample=validate_sample,
		)

	if len(rows) < target_count:
		raise RuntimeError(
			f"Unable to build requested DRI suite size: requested={target_count}, built={len(rows)}"
		)

	return rows[:target_count]


def _build_fuzz_valid_suite(
	calculate_health_canada_dri: Any,
	rng: random.Random,
	target_count: int,
) -> List[Dict[str, Any]]:
	rows: List[Dict[str, Any]] = []
	seen_fingerprints: set[str] = set()
	attempts = 0
	max_attempts = max(target_count * 20, 50000)

	def validate_sample(row: Dict[str, Any], scenario_id: str) -> None:
		_validate_with_dri(
			calculate_health_canada_dri=calculate_health_canada_dri,
			sample=row,
			expected_valid=True,
			scenario_id=scenario_id,
		)

	while len(rows) < target_count and attempts < max_attempts:
		attempts += 1
		sample = _generate_random_valid_sample(rng)
		fingerprint = _input_fingerprint(sample)
		scenario_id = f"fuzz_valid/{len(rows) + 1:05d}"

		try:
			_try_add_validated_suite_row(
				rows,
				seen_fingerprints,
				sample,
				rng=rng,
				suite="fuzz_valid",
				scenario_id=scenario_id,
				fingerprint=fingerprint,
				validate_sample=validate_sample,
			)
		except ValueError:
			continue

	if len(rows) < target_count:
		raise RuntimeError(
			f"Unable to build requested fuzz-valid suite size: requested={target_count}, built={len(rows)}"
		)

	return rows


def build_datasets(dri_target: int, fuzz_valid_target: int) -> Dict[str, List[Dict[str, Any]]]:
	rng = random.Random(FIXED_SEED)
	calculate_health_canada_dri = _load_calculate_health_canada_dri()
	dri_rows: List[Dict[str, Any]] = []

	if dri_target > 0:
		dri_rows.extend(
			_build_dri_suite(
				calculate_health_canada_dri=calculate_health_canada_dri,
				rng=rng,
				target_count=dri_target,
			)
		)
	if fuzz_valid_target > 0:
		dri_rows.extend(
			_build_fuzz_valid_suite(
				calculate_health_canada_dri=calculate_health_canada_dri,
				rng=rng,
				target_count=fuzz_valid_target,
			)
		)

	return {"dri": dri_rows}


def write_outputs(output_dir: Path, datasets: Dict[str, List[Dict[str, Any]]]) -> None:
	output_dir.mkdir(parents=True, exist_ok=True)

	total_rows = 0
	for dataset_name, rows in datasets.items():
		jsonl_path = output_dir / f"{dataset_name}.jsonl"

		with jsonl_path.open("w", encoding="utf-8") as f:
			for row in rows:
				f.write(json.dumps(row, ensure_ascii=False) + "\n")

		total_rows += len(rows)
		print(f"Wrote {len(rows)} samples for {dataset_name}")
		print(f"JSONL: {jsonl_path}")

	print(f"Total samples written: {total_rows}")


def main() -> None:
	parser = argparse.ArgumentParser(
		description="Generate practical evaluation samples for DRI suites."
	)
	parser.add_argument(
		"--dri-target",
		type=int,
		default=PRACTICAL_DRI_TARGET,
		help="Number of DRI branch+boundary valid cases (recommended 120-180).",
	)
	parser.add_argument(
		"--fuzz-valid-target",
		type=int,
		default=PRACTICAL_FUZZ_VALID_TARGET,
		help="Number of seeded random valid fuzz cases (recommended 150-300).",
	)
	parser.add_argument(
		"--output-dir",
		type=Path,
		default=Path(__file__).resolve().parent,
		help="Directory where suite-specific JSONL files are written.",
	)
	args = parser.parse_args()

	if args.dri_target < 0 or args.fuzz_valid_target < 0:
		raise ValueError("dri-target and fuzz-valid-target must be >= 0")
	datasets = build_datasets(
		dri_target=args.dri_target,
		fuzz_valid_target=args.fuzz_valid_target,
	)

	write_outputs(args.output_dir, datasets)


if __name__ == "__main__":
	main()
