:: Windows commands to set up and run the pipeline (per-model/per-layer embeddings)

:: 1) Create a virtual environment (optional but recommended)
python -m venv .venv

:: 2) Activate it
.\.venv\Scripts\activate

:: 3) Install dependencies
pip install --upgrade pip
pip install -r requirements.txt

:: -----------------------------
:: Development / Quick Smoke Test
:: -----------------------------

:: Extract a deterministic small subset on CPU (append-safe; overwrite to rebuild)
python -m src.cli extract --data_root data --out_dir artifacts --device cpu --golden_subset --overwrite --batch_size 16

:: Run analyses over per-model embeddings directory
python -m src.cli analyze --embeddings_path artifacts/embeddings --out_dir artifacts

:: 2D projection (t-SNE) across all models/layers
python -m src.cli viz2d --embeddings_path artifacts/embeddings --method tsne --out_dir artifacts/viz

:: 2D projection (UMAP) for a specific model (example: ResNet-50)
python -m src.cli viz2d --embeddings_path artifacts/embeddings --method umap --model resnet50 --out_dir artifacts/viz

:: Visualize feature maps for paired images (CPU default). Example: ResNet-50 layer3
python -m src.cli vizmaps --data_root data --model resnet50 --layer layer3 --out_dir artifacts/viz/maps --channels 6 --max_pairs 10

:: -----------------------------
:: Production / Full Dataset Run
:: -----------------------------

:: 4) Extract embeddings for ALL supported models
::    - Use CUDA if GPU is available, otherwise set --device cpu
::    - Per-model/per-layer CSVs will be written under artifacts/embeddings/{model}/{layer}.csv
python -m src.cli extract --data_root data --out_dir artifacts --device cuda --overwrite --batch_size 32

:: 5) Run statistical analyses on the entire embeddings directory
python -m src.cli analyze --embeddings_path artifacts/embeddings --out_dir artifacts

:: 6) 2D projections at scale
::    - All models/layers (t-SNE)
python -m src.cli viz2d --embeddings_path artifacts/embeddings --method tsne --out_dir artifacts/viz
::    - Per model (UMAP) example: EfficientNet-B0
python -m src.cli viz2d --embeddings_path artifacts/embeddings --method umap --model efficientnet_b0 --out_dir artifacts/viz

:: Notes:
:: - The extractor is resume/append aware. Omit --overwrite to skip rows already present per model/layer CSV.
:: - For CPU-only machines, replace --device cuda with --device cpu.
:: - You can subset models during extraction with: --models resnet50 mobilenet_v2 efficientnet_b0

Summary Analysis:

python -m src.analysis.summarize --artifacts_dir artifacts --out_dir artifacts/summary

Optional: restrict to certain models:
python -m src.analysis.summarize --artifacts_dir artifacts --out_dir artifacts/summary --models resnet50 efficientnet_b0
:: -------------------------------------
:: Per-Model Family Commands (Individual)
:: -------------------------------------

:: RESNET FAMILY (resnet18/34/50/101/152)
:: Extract only ResNets
python -m src.cli extract --data_root data --out_dir artifacts --device cuda --overwrite --batch_size 32 --models resnet18 resnet34 resnet50 resnet101 resnet152
:: Analyze (runs over whatever embeddings are present; can be the subset above)
python -m src.cli analyze --embeddings_path artifacts/embeddings --out_dir artifacts
:: Viz2D (ResNets only)
python -m src.cli viz2d --embeddings_path artifacts/embeddings --method tsne --model resnet18 resnet34 resnet50 resnet101 resnet152 --out_dir artifacts/viz
:: Feature maps example (ResNet-50, layer3)
python -m src.cli vizmaps --data_root data --model resnet50 --layer layer3 --out_dir artifacts/viz/maps --channels 6 --max_pairs 10

:: CONVNEXT FAMILY (convnext_tiny/small/base/large)
python -m src.cli extract --data_root data --out_dir artifacts --device cuda --overwrite --batch_size 32 --models convnext_tiny convnext_small convnext_base convnext_large
python -m src.cli analyze --embeddings_path artifacts/embeddings --out_dir artifacts
python -m src.cli viz2d --embeddings_path artifacts/embeddings --method umap --model convnext_tiny convnext_small convnext_base convnext_large --out_dir artifacts/viz
:: Feature maps example (ConvNeXt-Tiny, stage2)
python -m src.cli vizmaps --data_root data --model convnext_tiny --layer stage2 --out_dir artifacts/viz/maps --channels 6 --max_pairs 10

:: MOBILENET FAMILY (mobilenet_v2, mobilenet_v3_small/large)
python -m src.cli extract --data_root data --out_dir artifacts --device cuda --overwrite --batch_size 32 --models mobilenet_v2 mobilenet_v3_small mobilenet_v3_large
python -m src.cli analyze --embeddings_path artifacts/embeddings --out_dir artifacts
python -m src.cli viz2d --embeddings_path artifacts/embeddings --method tsne --model mobilenet_v2 mobilenet_v3_small mobilenet_v3_large --out_dir artifacts/viz
:: Feature maps example (MobileNetV2, features.10)
python -m src.cli vizmaps --data_root data --model mobilenet_v2 --layer features.10 --out_dir artifacts/viz/maps --channels 6 --max_pairs 10

:: DENSENET FAMILY (densenet121/169/201)
python -m src.cli extract --data_root data --out_dir artifacts --device cuda --overwrite --batch_size 32 --models densenet121 densenet169 densenet201
python -m src.cli analyze --embeddings_path artifacts/embeddings --out_dir artifacts
python -m src.cli viz2d --embeddings_path artifacts/embeddings --method umap --model densenet121 densenet169 densenet201 --out_dir artifacts/viz
:: Feature maps example (DenseNet121, features.denseblock3)
python -m src.cli vizmaps --data_root data --model densenet121 --layer features.denseblock3 --out_dir artifacts/viz/maps --channels 6 --max_pairs 10

:: EFFICIENTNET FAMILY (efficientnet_b0 ... b7)
python -m src.cli extract --data_root data --out_dir artifacts --device cuda --overwrite --batch_size 32 --models efficientnet_b0 efficientnet_b1 efficientnet_b2 efficientnet_b3 efficientnet_b4 efficientnet_b5 efficientnet_b6 efficientnet_b7
python -m src.cli analyze --embeddings_path artifacts/embeddings --out_dir artifacts
python -m src.cli viz2d --embeddings_path artifacts/embeddings --method tsne --model efficientnet_b0 efficientnet_b1 efficientnet_b2 efficientnet_b3 efficientnet_b4 efficientnet_b5 efficientnet_b6 efficientnet_b7 --out_dir artifacts/viz
:: Feature maps example (EfficientNet-B0, features.4)
python -m src.cli vizmaps --data_root data --model efficientnet_b0 --layer features.4 --out_dir artifacts/viz/maps --channels 6 --max_pairs 10

:: Layer name tips:
:: - ResNet: conv1, layer1, layer2, layer3, layer4, avgpool
:: - ConvNeXt: stage0, stage1, stage2, stage3
:: - MobileNetV2: features.0/3/6/10/13/17 (indices vary by architecture)
:: - MobileNetV3: features.0/3/6/9/12
:: - DenseNet: features.conv0, features.denseblock1..4, features.transition1..3, features.norm5
:: - EfficientNet: features.0..7, avgpool

:: -------------------------------------
:: Transformers (ViT / Swin) — Separate CLI
:: -------------------------------------

:: Quick smoke (subset, CPU)
python -m src.cli_transformers extract --data_root data --out_dir artifacts_transformers --device cpu --golden_subset --overwrite --batch_size 8 --models vit_b_16 swin_t
python -m src.cli_transformers analyze --embeddings_path artifacts_transformers/embeddings --out_dir artifacts_transformers
python -m src.cli_transformers viz2d --embeddings_path artifacts_transformers/embeddings --method tsne --out_dir artifacts_transformers/viz

:: Full extraction (all supported ViT/Swin models). Use cuda if available.
python -m src.cli_transformers extract --data_root data --out_dir artifacts_transformers --device cuda --overwrite --batch_size 16 --models vit_b_16 vit_l_16 swin_t swin_s swin_b
python -m src.cli_transformers analyze --embeddings_path artifacts_transformers/embeddings --out_dir artifacts_transformers

:: 2D projections filtered by family
python -m src.cli_transformers viz2d --embeddings_path artifacts_transformers/embeddings --method umap --model vit_b_16 vit_l_16 --out_dir artifacts_transformers/viz
python -m src.cli_transformers viz2d --embeddings_path artifacts_transformers/embeddings --method tsne --model swin_t swin_s swin_b --out_dir artifacts_transformers/viz

:: Feature maps / token grids examples
:: - ViT (token heatmap from blocks): layer examples: block11, pre_head_ln
python -m src.cli_transformers vizmaps --data_root data --model vit_b_16 --layer block11 --out_dir artifacts_transformers/viz/maps --channels 6 --max_pairs 10
:: - Swin (stage maps): layer examples: stage0, stage1, stage2, stage3, norm
python -m src.cli_transformers vizmaps --data_root data --model swin_t --layer stage2 --out_dir artifacts_transformers/viz/maps --channels 6 --max_pairs 10

:: Summary Analysis (Transformers):

python -m src.analysis.summarize_transformers --artifacts_dir artifacts_transformers --out_dir artifacts_transformers/summary

:: Optional: restrict to certain transformer models:
python -m src.analysis.summarize_transformers --artifacts_dir artifacts_transformers --out_dir artifacts_transformers/summary --models vit_b_16 swin_t

:: -------------------------------------
:: DINO / DINOv2 (HuggingFace) — Separate CLI
:: -------------------------------------

:: Install dependency (once):
pip install transformers

:: Quick smoke (subset, CPU)
python -m src.cli_dino extract --data_root data --out_dir artifacts_dino --device cpu --golden_subset --overwrite --batch_size 8 --models dino_vits16 dinov2_small
python -m src.cli_dino analyze --embeddings_path artifacts_dino/embeddings --out_dir artifacts_dino
python -m src.cli_dino viz2d --embeddings_path artifacts_dino/embeddings --method tsne --out_dir artifacts_dino/viz

:: Full extraction (all supported DINO models). Use cuda if available.
python -m src.cli_dino extract --data_root data --out_dir artifacts_dino --device cuda --overwrite --batch_size 16 --models dino_vits16 dino_vitb16 dinov2_small dinov2_base dinov2_large
python -m src.cli_dino analyze --embeddings_path artifacts_dino/embeddings --out_dir artifacts_dino

:: 2D projections filtered by family
python -m src.cli_dino viz2d --embeddings_path artifacts_dino/embeddings --method umap --model dino_vits16 dino_vitb16 --out_dir artifacts_dino/viz
python -m src.cli_dino viz2d --embeddings_path artifacts_dino/embeddings --method tsne --model dinov2_small dinov2_base dinov2_large --out_dir artifacts_dino/viz

:: Token grid visualization (choose a block layer, e.g., block11 or last_hidden_state)
python -m src.cli_dino vizmaps --data_root data --model dino_vitb16 --layer block11 --out_dir artifacts_dino/viz/maps --channels 6 --max_pairs 10

:: Summary Analysis (DINO):
python -m src.analysis.summarize_dino --artifacts_dir artifacts_dino --out_dir artifacts_dino/summary
:: Optional model filter:
python -m src.analysis.summarize_dino --artifacts_dir artifacts_dino --out_dir artifacts_dino/summary --models dino_vitb16 dinov2_base

:: --- DINOv3 additions ---
:: Extract DINOv3 variants (requires gated access; ensure 'huggingface-cli login')
python -m src.cli_dino extract --data_root data --out_dir artifacts_dino --device cuda --overwrite --batch_size 16 --models dinov3_vitb16_pretrain dinov3_vit7b16_pretrain
:: Analyze
python -m src.cli_dino analyze --embeddings_path artifacts_dino/embeddings --out_dir artifacts_dino
:: 2D projections (DINOv3 only)
python -m src.cli_dino viz2d --embeddings_path artifacts_dino/embeddings --method umap --model dinov3_vitb16_pretrain dinov3_vit7b16_pretrain --out_dir artifacts_dino/viz
:: Token grid visualization (choose a block layer, e.g., block11 or last_hidden_state)
python -m src.cli_dino vizmaps --data_root data --model dinov3_vitb16_pretrain --layer block11 --out_dir artifacts_dino/viz/maps --channels 6 --max_pairs 10
:: Summary (restricted to DINOv3)
python -m src.analysis.summarize_dino --artifacts_dir artifacts_dino --out_dir artifacts_dino/summary --models dinov3_vitb16_pretrain dinov3_vit7b16_pretrain

set HF_TOKEN=<YOUR TOKEN>