import re

from .dataset import BaseDataset


class DescriptionDataset(BaseDataset):
    """
    Description-guided Dataset for *MolinstTest* with description used in PDFBench official experiments.
    """

    @classmethod
    def function(cls, instruction: str) -> str:
        """
        Extract the part of Instruction related to protein function

        :param instruction: Instruction of InterProDataset
        :type instruction: str
        :return: Part of the instruction containing protein function in InterProDataset
        :rtype: str
        """
        function = re.sub(r"^.*?(1\.)", r"\1", instruction)
        function = function.removesuffix("The designed protein sequence is ")
        return function.strip()
