import os
import sys
import argparse
from transformers import AutoTokenizer

def count_tokens(text, model_name="qwen7b", data_root=None):
    if data_root is None:
        data_root = os.environ.get("DATA_ROOT", "/data1/efficient-reasoning")
    
    model_paths = {
        "qwen7b": f"{data_root}/quantize_model/packed/qwen7b-distill",
        "qwen38": f"{data_root}/quantize_model/packed/qwen3-8b",
    }
    
    if model_name not in model_paths:
        return None, None
    
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_paths[model_name])
        
        tokens = tokenizer.encode(text)
        token_count = len(tokens)
        
        return token_count, tokens
        
    except Exception as e:
        print(f"tokenizer error: {e}")
        return None, None

def interactive_mode():
    print("=== Token count tool ===")
    print("Enter text, press Ctrl+C to exit")
    print()
    
    model_name = input("Enter model name (qwen7b/qwen38): ").strip()
    if not model_name:
        model_name = "qwen7b"
    
    while True:
        try:
            text = input("\nEnter text: ")
            if not text.strip():
                continue
                
            token_count, tokens = count_tokens(text, model_name)
            if token_count is not None:
                print(f"Token count: {token_count}")
                print(f"Token list: {tokens[:10]}{'...' if len(tokens) > 10 else ''}")
                
        except KeyboardInterrupt:
            print("\n\nExit program")
            break
        except Exception as e:
            print(f"Error: {e}")

def main():
    parser = argparse.ArgumentParser(description="Token count tool")
    parser.add_argument("--text", type=str, help="Text to count")
    parser.add_argument("--model", type=str, default="qwen7b", help="Model name")
    parser.add_argument("--data_root", type=str, help="Data root directory")
    parser.add_argument("--interactive", action="store_true", help="Interactive mode")
    parser.add_argument("--show_tokens", action="store_true", help="Show token list")
    
    args = parser.parse_args()
    
    if args.interactive:
        interactive_mode()
    elif args.text:
        token_count, tokens = count_tokens(args.text, args.model, args.data_root)
        if token_count is not None:
            print(f"Text: {args.text}")
            print(f"Model: {args.model}")
            print(f"Token count: {token_count}")
            if args.show_tokens:
                print(f"Token list: {tokens}")
    else:
        print("Please provide text or use --interactive mode")
        parser.print_help()

if __name__ == "__main__":
    main()
