#!/usr/bin/env python3
"""
Manual test commands for evaluate_device_generic.py
Copy and paste these commands to test the functionality step by step.
"""

import os
print("=" * 80)
print("MANUAL TESTING GUIDE for evaluate_device_generic.py")
print("=" * 80)

print("\n1. SETUP - Change to the correct directory:")
print("cd /Volumes/BW_X5/lora_adv/TextAttack-A2T")

print("\n2. BASIC TEST - Test accuracy evaluation with existing BERT model:")
print("python evaluate_device_generic.py \\")
print("    --dataset imdb \\")
print("    --checkpoint-paths /Volumes/BW_X5/lora_adv/TextAttack-A2T/data/models/full/bert-base-uncased_imdb_epochs_5_seed_42/model_bert-base-uncased_ds_imdb_train_epoch_5_run_20250722_104605_seed_42/final_model \\")
print("    --epoch 4 \\")
print("    --accuracy \\")
print("    --device cpu \\")
print("    --save-log")

print("\n3. INTERPRETABILITY TEST - Test AOPC metric:")
print("python evaluate_device_generic.py \\")
print("    --dataset imdb \\")
print("    --checkpoint-paths /Volumes/BW_X5/lora_adv/TextAttack-A2T/data/models/full/bert-base-uncased_imdb_epochs_5_seed_42/model_bert-base-uncased_ds_imdb_train_epoch_5_run_20250722_104605_seed_42/final_model \\")
print("    --epoch 4 \\")
print("    --interpretability \\")
print("    --device cpu")

print("\n4. SMALL ROBUSTNESS TEST - Test with minimal examples:")
print("# First, edit NUM_SAMPLES_FOR_EVALUATION in evaluate_device_generic.py to 5 for quick testing")
print("python evaluate_device_generic.py \\")
print("    --dataset imdb \\")
print("    --checkpoint-paths /Volumes/BW_X5/lora_adv/TextAttack-A2T/data/models/full/bert-base-uncased_imdb_epochs_5_seed_42/model_bert-base-uncased_ds_imdb_train_epoch_5_run_20250722_104605_seed_42/final_model \\")
print("    --epoch 4 \\")
print("    --robustness \\")
print("    --attacks textfooler \\")
print("    --device cpu")

print("\n5. TOKENIZER COMPATIBILITY TEST - Test with different model types:")
print("# This tests if the tokenizer modifications work correctly")
print("python -c \"")
print("import sys")
print("sys.path.append('.')") 
print("sys.path.append('../nlp_training')")
print("from evaluate_device_generic import get_tokenizer_for_model")
print("import transformers")
print("")
print("# Test BERT tokenizer")
print("tokenizer = transformers.AutoTokenizer.from_pretrained('bert-base-uncased')")
print("print(f'BERT - Vocab: {len(tokenizer)}, UNK: {tokenizer.unk_token}, PAD: {tokenizer.pad_token}, MASK: {tokenizer.mask_token}')")
print("")
print("# Test GPT-2 tokenizer")
print("tokenizer = transformers.AutoTokenizer.from_pretrained('gpt2')")
print("print(f'GPT-2 original - Vocab: {len(tokenizer)}, UNK: {tokenizer.unk_token}, PAD: {tokenizer.pad_token}, MASK: {tokenizer.mask_token}')")
print("")
print("# Add missing tokens")
print("tokens_to_add = {}")
print("if tokenizer.pad_token_id is None: tokens_to_add['pad_token'] = '[PAD]'")
print("if tokenizer.mask_token_id is None: tokens_to_add['mask_token'] = '[MASK]'")
print("if tokens_to_add:")
print("    tokenizer.add_special_tokens(tokens_to_add)")
print("    print(f'GPT-2 modified - Vocab: {len(tokenizer)}, UNK: {tokenizer.unk_token}, PAD: {tokenizer.pad_token}, MASK: {tokenizer.mask_token}')")
print("\"")

print("\n6. EXPECTED OUTPUTS:")
print("=" * 50)
print("✓ Accuracy test should show:")
print("  - Model loading messages")
print("  - 'Added X special tokens for A2T compatibility' (if needed)")
print("  - 'Resizing model embeddings from X to Y' (if needed)")
print("  - Accuracy results for IMDB dataset")
print("  - Log file creation")

print("\n✓ Interpretability test should show:")
print("  - AOPC score calculation")
print("  - Progress bars for LIME explanations")
print("  - Final AOPC score")

print("\n✓ Robustness test should show:")
print("  - TextAttack setup")
print("  - Attack progress")
print("  - Attack success rate, queries, perturbation percentage")

print("\n7. TROUBLESHOOTING:")
print("=" * 50)
print("If you see 'ImportError' for GenericSequenceClassifier:")
print("  - Check that ../nlp_training/seq_classifier.py exists")
print("  - Ensure the import path is correct")

print("\nIf you see 'No module named textattack':")
print("  - pip install textattack")

print("\nIf GPU memory issues:")
print("  - Use --device cpu")
print("  - Reduce NUM_SAMPLES_FOR_EVALUATION in the script")

print("\nIf models fail to load:")
print("  - Check that the model path exists")
print("  - Verify the model has config.json and model files")

print("\n8. CREATING A GENERIC SEQUENCE CLASSIFIER MODEL FOR TESTING:")
print("=" * 50)
print("If you want to test with an actual GenericSequenceClassifier:")
print("cd /Volumes/BW_X5/lora_adv")
print("python -c \"")
print("from nlp_training.seq_classifier import GenericSequenceClassifier")
print("import torch")
print("")
print("# Create a simple test model")
print("model = GenericSequenceClassifier(")
print("    model_name='distilbert-base-uncased',")
print("    num_labels=2,")
print("    pooling_strategy='mean'")
print(")")
print("")
print("# Save it")
print("model.save_pretrained('./test_generic_model')")
print("print('Test generic model saved to ./test_generic_model')")
print("\"")

print("\nThen test with:")
print("python TextAttack-A2T/evaluate_device_generic.py \\")
print("    --dataset imdb \\")
print("    --checkpoint-paths ./test_generic_model \\")
print("    --epoch 4 \\")
print("    --accuracy \\")
print("    --device cpu")

print("\n" + "=" * 80)
print("Start with test #2 (Basic Test) to verify everything works!")
print("=" * 80)
