def get_prompt(function_name, criterion):
    
    used_prompt = f"""Create a Python function that serves as a judge to evaluate LLM-generated responses.
    
    Requirements:
    1. Function name must be: {function_name}
    2. Function signature: {function_name}(query: str, response: str) -> dict
    3. Return a dictionary with keys: 'score' (float 0-10), 'reasoning' (str), 'criteria' (str)
    4. Score scale: 0-10 where 10 is highest quality, 0 is lowest quality
    5. Higher scores indicate better/higher quality responses
    6. Implement a unique judging strategy
    7. Include comprehensive error handling
    8. Add a docstring with a one-sentence description of the judging logic
    
    Criterion to use: **{criterion}**
    The function can use third-party specialized model or other python library.
    
    Example structure:
    ```python
    def {function_name}(query: str, response: str) -> dict:
        \"\"\"Judges responses based on [specific criterion].\"\"\"
        try:
            # Your judging logic here
            score = 0.0  # 0-10 scale where 10 = highest quality, 0 = lowest quality
            reasoning = "Detailed explanation of why this score was given"
            criteria = "What specific aspect this judge evaluates"
            
            return {{
                'score': score,
                'reasoning': reasoning,
                ''criteria': criteria
            }}
        except Exception as e:
            return {{
                'score': 0.0,
                'reasoning': f"Error in evaluation: {{str(e)}}",
                'criteria': "Error occurred"
            }}
    ```

    Do not use google search to crawl.
    
    Generate a complete, unique judge function that assigns higher scores to higher quality responses:"""

    return used_prompt
