from src.reliability_eval.common.enums import (
    TokenScoreTypes, SequenceAggregateTypes,
    SequenceScoreTypes, QuestionAggregateTypes
)
from src.reliability_eval.common.config import LLMEvaluationPipelineConfig

CONFIDENCE_EVALUATION_PIPELINE = LLMEvaluationPipelineConfig(
    token_score_types=[
        TokenScoreTypes.CROSS_ENTROPY,
        TokenScoreTypes.TOKEN_INFO_TUPLES,
        TokenScoreTypes.RELEVANT_DECODED_TEXT,
        TokenScoreTypes.FULL_DECODED_TEXT,
        TokenScoreTypes.EFFECTIVE_LENGTHS,
        TokenScoreTypes.PADDED_SEQUENCES
    ],
    sequence_aggregate_types=[
        SequenceAggregateTypes.MEAN
    ],
    sequence_score_types=[
        SequenceScoreTypes.ID,
        SequenceScoreTypes.IS_CORRECT
    ],
    question_aggregate_types=[
        QuestionAggregateTypes.MEAN,
        QuestionAggregateTypes.ACCURACY
    ]
)

NLL_EVALUATION_PIPELINE = LLMEvaluationPipelineConfig(
    token_score_types=[
        TokenScoreTypes.NLL,
        TokenScoreTypes.TOKEN_INFO_TUPLES,
        TokenScoreTypes.RELEVANT_DECODED_TEXT,
        TokenScoreTypes.FULL_DECODED_TEXT,
        TokenScoreTypes.EFFECTIVE_LENGTHS,
        TokenScoreTypes.PADDED_SEQUENCES
    ],
    sequence_aggregate_types=[
        SequenceAggregateTypes.MEAN
    ],
    sequence_score_types=[
        SequenceScoreTypes.ID,
        SequenceScoreTypes.IS_CORRECT
    ],
    question_aggregate_types=[
        QuestionAggregateTypes.MEAN,
        QuestionAggregateTypes.ACCURACY
    ]
)

ENTROPY_EVALUATION_PIPELINE = LLMEvaluationPipelineConfig(
    token_score_types=[
        TokenScoreTypes.ENTROPY,
        TokenScoreTypes.TOKEN_INFO_TUPLES,
        TokenScoreTypes.RELEVANT_DECODED_TEXT,
        TokenScoreTypes.FULL_DECODED_TEXT,
        TokenScoreTypes.EFFECTIVE_LENGTHS,
        TokenScoreTypes.PADDED_SEQUENCES
    ],
    sequence_aggregate_types=[
        SequenceAggregateTypes.MEAN
    ],
    sequence_score_types=[
        SequenceScoreTypes.ID,
        SequenceScoreTypes.IS_CORRECT
    ],
    question_aggregate_types=[
        QuestionAggregateTypes.MEAN,
        QuestionAggregateTypes.ACCURACY
    ]
)


TOP_K_EVALUATION_PIPELINE = LLMEvaluationPipelineConfig(
    token_score_types=[
        TokenScoreTypes.TOP_K,
        TokenScoreTypes.TOKEN_INFO_TUPLES,
        TokenScoreTypes.RELEVANT_DECODED_TEXT,
        TokenScoreTypes.FULL_DECODED_TEXT,
        TokenScoreTypes.EFFECTIVE_LENGTHS,
        TokenScoreTypes.PADDED_SEQUENCES
    ],
    sequence_aggregate_types=[
        SequenceAggregateTypes.MEAN
    ],
    sequence_score_types=[
        SequenceScoreTypes.ID,
        SequenceScoreTypes.IS_CORRECT
    ],
    question_aggregate_types=[
        QuestionAggregateTypes.MEAN,
        QuestionAggregateTypes.ACCURACY
    ]
)


SEMANTIC_ENTROPY_EVALUATION_PIPELINE = LLMEvaluationPipelineConfig(
    token_score_types=[
        TokenScoreTypes.SEMANTIC_ENTROPY,
        TokenScoreTypes.TOKEN_INFO_TUPLES,
        TokenScoreTypes.RELEVANT_DECODED_TEXT,
        TokenScoreTypes.FULL_DECODED_TEXT,
        TokenScoreTypes.EFFECTIVE_LENGTHS,
        TokenScoreTypes.PADDED_SEQUENCES
    ],
    sequence_aggregate_types=[
        SequenceAggregateTypes.MEAN
    ],
    sequence_score_types=[
        SequenceScoreTypes.ID,
        SequenceScoreTypes.IS_CORRECT
    ],
    question_aggregate_types=[
        QuestionAggregateTypes.MEAN,
        QuestionAggregateTypes.ACCURACY
    ]
)