use anyhow::Result;
use clap::Parser;
use tokenizers::Tokenizer;

#[derive(Parser, Debug)]
#[command(author, version, about = "Decode token IDs back into text using a HuggingFace tokenizer", long_about = None)]
struct Args {
    /// Tokenizer model name or path to tokenizer.json
    #[arg(short = 'm', long, default_value = "Qwen/Qwen2.5-Coder-32B-Instruct")]
    model: String,

    /// Token IDs to decode.
    ///
    /// Accepts comma-separated or whitespace-separated lists, e.g.:
    ///   --token-ids "1,2,3"  or  --token-ids "1 2 3"
    #[arg(long, required = true)]
    token_ids: String,

    /// If true, skip special tokens during decoding
    #[arg(long, default_value_t = true)]
    skip_special_tokens: bool,
}

fn load_tokenizer(model: &str) -> Result<Tokenizer> {
    // If model is a file path, load directly
    if std::path::Path::new(model).exists() {
        eprintln!("[RUST] Loading tokenizer from file: {}", model);
        return Tokenizer::from_file(model)
            .map_err(|e| anyhow::anyhow!("Failed to load tokenizer from file: {}", e));
    }

    // Otherwise, try to load from HuggingFace cache
    let hf_home = std::env::var("HF_HOME")
        .or_else(|_| std::env::var("HOME").map(|h| format!("{}/.cache/huggingface", h)))
        .unwrap_or_else(|_| ".cache/huggingface".to_string());

    let model_path = model.replace('/', "--");
    let model_root = format!("{}/hub/models--{}", hf_home, model_path);

    // Try refs/main first
    let refs_main = format!("{}/refs/main", model_root);
    if let Ok(rev) = std::fs::read_to_string(&refs_main) {
        let rev = rev.trim();
        let tokenizer_path = format!("{}/snapshots/{}/tokenizer.json", model_root, rev);
        if std::path::Path::new(&tokenizer_path).exists() {
            eprintln!("[RUST] Using tokenizer from: {}", tokenizer_path);
            return Tokenizer::from_file(tokenizer_path)
                .map_err(|e| anyhow::anyhow!("Failed to load tokenizer: {}", e));
        }
    }

    // Fallback: scan snapshots
    let snapshots_dir = format!("{}/snapshots", model_root);
    if let Ok(entries) = std::fs::read_dir(&snapshots_dir) {
        for entry in entries.flatten() {
            let tokenizer_path = entry.path().join("tokenizer.json");
            if tokenizer_path.exists() {
                eprintln!("[RUST] Using tokenizer from: {}", tokenizer_path.display());
                return Tokenizer::from_file(tokenizer_path)
                    .map_err(|e| anyhow::anyhow!("Failed to load tokenizer: {}", e));
            }
        }
    }

    anyhow::bail!(
        "Tokenizer '{}' not found in HuggingFace cache. Please download it first.",
        model
    )
}

fn main() -> Result<()> {
    let args = Args::parse();
    std::env::set_var("TOKENIZERS_PARALLELISM", "false");

    let tok = load_tokenizer(&args.model)?;

    let ids_u32: Vec<u32> = args
        .token_ids
        .split(|c: char| c == ',' || c.is_whitespace())
        .filter(|s| !s.is_empty())
        .map(|s| s.parse::<i32>())
        .collect::<std::result::Result<Vec<_>, _>>()?
        .into_iter()
        .map(|v| v as u32)
        .collect();

    let decoded = tok
        .decode(&ids_u32, args.skip_special_tokens)
        .map_err(|e| anyhow::anyhow!("decode failed: {}", e))?;

    print!("{}", decoded);
    Ok(())
}
