#!/usr/bin/env python3


class Llama:
    def __init__(self, conf):
        self.llm_conf = conf
        self.generation_params = self.llm_conf.generation_params
        self.max_length = self.generation_params.max_length

        host, port = self.llm_conf.host, self.llm_conf.port

        self.model = None
        self.inputs_outputs_log_file_path = "inputs_outputs_log.txt"

    def generate(self, prompt, stop=None, max_length=None):
        max_length = max_length if max_length is not None else self.max_length
        self.response = self.model.generate(prompt, max_length)
        generation = self.response["generation"]

        if stop is not None:
            generation = generation.split(stop)[0]

        # remove white spaces at the end
        generation = generation.rstrip()

        with open(self.inputs_outputs_log_file_path, "a") as f:
            f.write(f"Prompt: {prompt}\n-")
            f.write(f"Generation: {generation}\n\n-")
            f.write("-" * 50 + "\n\n")

        return generation
