import json
import os
import numpy as np

from openai import OpenAI


# 配置OpenAI客户端
client = OpenAI(
    api_key="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",  # 请替换为实际的API密钥
)

def evaluate(question, answer):
    # 构建评估提示词
    prompt = f"""
You are a professional market research evaluation expert. Please analyze and score the assistant’s research output of research theme based on the following dimensions:
Relevance: Does the output closely address the research topic and core question?
Insightfulness: Does it dig into consumers’ real needs, psychology, and scenarios, beyond surface-level descriptions?
Actionability: Are the findings translated into measurable, practical, and implementable conclusions or recommendations?
Completeness: Does the output cover key perspectives (e.g., product usage scenarios, brand purchase scenarios, psychological turning points)?
Clarity: Is the language clear, easy to understand, and logically structured?
For each dimension, provide:
A short comment
A score from 1 to 5 (5 = best)
A one-sentence overall evaluation of the research result
Output format example:

Research Theme: {question}
Assistant's Research Output: {answer}

{{
  "Relevance": 4,
  "Insightfulness": 5, 
  "Actionability": 3,
  "Completeness":  4,
  "Clarity": 5, 
  "Score":21, 
  "Comment": "The results are good and can support the initial decision, but further implementation plans are needed."
}}
"""
    # 调用GPT-4进行评估
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "你是一个专业的评估助手，专门评估市场调研的结果。"},
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )
    # 解析GPT-4的回复
    gpt_response = response.choices[0].message.content
    return gpt_response




def evaluate_market_with_gpt4(market_dir="output/market"):
    """使用GPT-4评估market文件的answer和ground_truth的一致性"""
    
 
    
    results = []
    
    # 遍历QA目录下的所有JSON文件
    for filename in os.listdir(market_dir):
        if filename.endswith('.json'):
            filepath = os.path.join(market_dir, filename)
            
            try:
                with open(filepath, 'r', encoding='utf-8') as f:
                    data = json.load(f)
                
                # 提取answer和ground_truth
                answer = data.get('answer', '')
                question_id = data.get('id', filename)
                question = data.get('question', '')
                
                gpt_response = evaluate(question, answer)
                
                result = {
                    'id': question_id,
                    'filename': filename,
                    'question': question,
                    'answer': answer,
                    'evaluation': gpt_response
                }

                # 保存结果到output/evaluation/market/文件夹
                eval_dir = 'output/evaluation/market/'
                os.makedirs(eval_dir, exist_ok=True)
                
                eval_filename = f"market_eval_{question_id}.json"
                eval_path = os.path.join(eval_dir, eval_filename)
                
                with open(eval_path, 'w', encoding='utf-8') as f:
                    json.dump(result, f, ensure_ascii=False, indent=2)
                
                print(f"文件: {filename}")
                print(f"问题: {question}")
                print(f"评估结果已保存到: {eval_path}")
                print("-" * 50)

                results.append(result)

            except Exception as e:
                    print(f"处理文件 {filename} 时出错: {e}")

    
evaluate_market_with_gpt4()
