# RewardModels.
# See: RewardBench for options https://huggingface.co/spaces/allenai/reward-bench

from transformers import AutoModelForSequenceClassification, AutoTokenizer

class RewardModel:
    def __init__(self):
        reward_name = "OpenAssistant/reward-model-deberta-v3-large-v2"
        self.reward_model = AutoModelForSequenceClassification.from_pretrained(reward_name)
        self.tokenizer = AutoTokenizer.from_pretrained(reward_name)

    def reward(self, question, answer):
        inputs = self.tokenizer(question, answer, return_tensors='pt')
        score = self.reward_model(**inputs).logits[0].cpu().detach()
        return score[0]
