import requests
import time

def llama_response(prompt, temperature=0.0, stop_sequences=[''], check_time=True):
    start_time = time.time()
    
    url = "http://localhost:8000/v1/completions"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": "meta-llama/Llama-3.2-3B",
        "prompt": prompt,
        "max_tokens": 1024,
        "temperature": temperature,
        "stop": stop_sequences
    }
    
    try:
        response = requests.post(url, headers=headers, json=payload)
        
        if response.status_code == 200:
            result = response.json()
            if "choices" in result and len(result["choices"]) > 0:
                output = result["choices"][0].get("text", "")
            else:
                output = result
        else:
            print("Request failed: Status code", response.status_code)
            print("Response message:", response.text)
            output = None
    except Exception as e:
        print("Exception occurred:", str(e))
        output = None

    end_time = time.time()
    execution_time = end_time - start_time

    if check_time:
        print("="*40)
        print(f"Execution Time: {execution_time:.2f} seconds")
        print("="*40)
    
    return output

if __name__ == "__main__":
    test_prompt = "Once upon a time,"
    result = llama_response(test_prompt)
    if result:
        print("Generated text response:")
        print(result)
