def opt(root_dir, model_name='1.3b'):
    from transformers import AutoTokenizer, OPTForCausalLM
    checkpoint_path = root_dir + 'checkpoints/opt/'
    tokenizer = AutoTokenizer.from_pretrained(f"facebook/opt-{model_name}", cache_dir=f"{checkpoint_path}")
    model = OPTForCausalLM.from_pretrained(f"facebook/opt-{model_name}", cache_dir=f"{checkpoint_path}")
    return model, tokenizer, None


def instructprotein(root_dir, model_name='instructprotein'):
    from transformers import AutoModelForCausalLM, AutoTokenizer
    checkpoint_path = root_dir + 'checkpoints/instructprotein/' + model_name
    model =  AutoModelForCausalLM.from_pretrained(checkpoint_path)
    tokenizer = AutoTokenizer.from_pretrained(f'{root_dir}/checkpoints/instructprotein/tokenizer')
    additional_tokens = ["<protein>", "</protein>", "ƤA", "ƤC", "ƤD", "ƤE", "ƤF", "ƤG", "ƤH", "ƤI", "ƤK", "ƤL", "ƤM", "ƤN", "ƤP", "ƤQ", "ƤR", "ƤS", "ƤT", "ƤV", "ƤW", "ƤY"]
    tokenizer.add_tokens(additional_tokens)
    def preprocess(sequences):
        return ["<protein>Ƥ" + 'Ƥ'.join(list(s)) + "</protein>" for s in sequences]
    return model, tokenizer, preprocess
