class Model:
    def __init__(self, model_dir, tokenizer, max_draft_length):
        raise NotImplementedError
    
    async def run(self, prompt_ids, sampling_params, request_id, turn_id):
        """
        prompt_ids is list of tokens
        output is list of list of tokens
            len(output) = beam width
            len(output[i]) = tokens produced per step?
        """
        raise NotImplementedError

    def stop(self):
        pass