from nltk import word_tokenize
from nltk.corpus import stopwords
from collections import Counter

def program_judge_39(query: str, response: str) -> dict:
    """
    Judges responses based on verbosity and redundancy.
    The score decreases as the proportion of repeated non-trivial words increases.
    """
    try:
        # Tokenize and normalize
        words = [w.lower() for w in word_tokenize(response) if w.isalpha()]
        
        if not words:
            return {
                'score': 0.0,
                'reasoning': "Response is empty or contains no valid words.",
                'criteria': "Response Verbosity/Redundancy"
            }

        # Remove stopwords (common words like "the", "is")
        filtered_words = [w for w in words if w not in stopwords.words('english')]
        
        if not filtered_words:  # all words were stopwords
            filtered_words = words  

        # Count frequencies
        word_frequencies = Counter(filtered_words)

        # Count redundant words (total extra occurrences beyond first use)
        redundant_count = sum(count - 1 for count in word_frequencies.values() if count > 1)

        # Redundancy ratio
        redundancy_ratio = redundant_count / len(filtered_words)

        # Calculate score (higher redundancy = lower score)
        score = max(0.0, 10 * (1 - redundancy_ratio))

        reasoning = (
            f"Response has {redundant_count} redundant words out of "
            f"{len(filtered_words)} meaningful words "
            f"(redundancy ratio: {redundancy_ratio:.2f})."
        )

        return {
            'score': round(score, 2),
            'reasoning': reasoning,
            'criteria': "Response Verbosity/Redundancy"
        }

    except Exception as e:
        return {
            'score': 0.0,
            'reasoning': f"Error in evaluation: {str(e)}",
            'criteria': "Error occurred"
        }
