from datetime import datetime
import json
import textwrap

def debug_gemma_call(llm, prompt: str, query: str, **gen_kwargs):
    """
    Call `llm.get_response()` but dump enough diagnostics
    to understand why `response == ""` when that happens.

    Parameters
    ----------
    llm        : your LLM wrapper (must expose .get_response)
    prompt     : the main prompt prefix you normally supply
    query      : the user query you intend to append
    gen_kwargs : any generation kwargs forwarded to the backend
                 (max_new_tokens, stop, temperature, etc.)
    """
    # 1. Build the full prompt exactly as you do in production
    full_prompt = (
        "Provide only one answer and NOTHING else.\n"
        + prompt # .strip()
        + " "
        + query # .strip()
    )

    # 2. Give yourself a timestamp‑tagged header so logs stay readable
    stamp = datetime.now().strftime("%Y‑%m‑%d %H:%M:%S")
    print("=" * 80)
    print(f"[{stamp}] Gemma‑7B‑It debug run")
    print("=" * 80)

    # 3. Show the prompt (indented so line breaks are obvious)
    print("\nPROMPT ↓")
    print(textwrap.indent(full_prompt, prefix="    "))
    print()

    # 4. Show the generation parameters that will be used
    default_gen_kwargs = dict(max_new_tokens=64,
                              temperature=0.7,
                              top_k=40,
                              top_p=0.95,
                              stop=[])
    merged_kwargs = {**default_gen_kwargs, **gen_kwargs}
    print("GEN‑KWARGS ↓")
    print(json.dumps(merged_kwargs, indent=4))
    print()

    # 5. Actually call the model
    raw = llm.get_response(
        prompt=full_prompt,
        n=1,
        raw=True,               # make sure the wrapper returns the untouched payload
        **merged_kwargs
    )

    print("RAW RESPONSE OBJECT ↓")
    # pretty‑print if it’s JSON‑able, else just str()
    try:
        print(json.dumps(raw, indent=2)[:1000])
    except TypeError:
        print(str(raw)[:1000])
    print()

    # 6. Extract the text safely, covering both list‑of‑str and HF‑style dicts
    if isinstance(raw, list):
        text = raw[0]                       # Llama‑cpp or a thin wrapper
    elif isinstance(raw, dict):
        text = raw["choices"][0]["text"]    # OpenAI‑style JSON
    else:
        raise ValueError("Unexpected return type:", type(raw))

    # 7. Optional: decode token IDs if they’re available
    if "sequences" in raw:
        from transformers import AutoTokenizer
        tok = AutoTokenizer.from_pretrained("google/gemma-7b-it")
        ids = raw["sequences"][0]
        decoded = tok.decode(ids, skip_special_tokens=True)
        print("GENERATED TOKEN IDs:", ids)
        print("DECODED TEXT FROM IDs:", repr(decoded))
        print()

    # 8. Final user‑visible result
    print("FINAL TEXT AFTER WRAPPER ↓")
    print(repr(text))
    print("=" * 80)

    return text
