import json
import os
import numpy as np

from openai import OpenAI


# 配置OpenAI客户端
client = OpenAI(
    api_key="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",  # 请替换为实际的API密钥
)

def evaluate(question, answer):
    # 构建评估提示词
    prompt = f"""
You are a professional industry research evaluator. Please analyze and score the quality of the assistant’s industry analysis output based on the following dimensions:
Relevance: Does the analysis stay focused on the given industry and research question?
Depth of Analysis: Does it go beyond surface-level description to include trends, drivers, risks, and opportunities?
Use of Evidence: Does the analysis reference data, examples, or case studies to support its claims?
Actionability: Does it provide insights that can inform business or strategic decisions?
Clarity & Structure: Is the analysis well-structured, logically coherent, and easy to understand?
Completeness: Does the analysis cover all key perspectives (market size, competition, user behavior, risks, opportunities)?
For each dimension, provide:
A score from 1 to 5 (5 = best)
A one-sentence overall evaluation of the research result
Output format example:

Industry: {question}
Assistant's Industry Analysis: {answer}

{{
  "Relevance": 4,
  "Depth of Analysis": 5, 
  "Use of Evidence": 3,
  "Actionability":  4,
  "Clarity & Structure": 5, 
  "Completeness": 4,
  "Score": 25, 
  "Comment": "A strong analysis with actionable insights, though it could be improved with more supporting evidence."
}}
"""
    # 调用GPT-4进行评估
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "你是一个专业的文本评估助手，专门评估行业分析的结果"},
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )
    # 解析GPT-4的回复
    gpt_response = response.choices[0].message.content
    return gpt_response




def evaluate_industry_with_gpt4(industry_dir="output/industry"):
    """使用GPT-4评估industry文件的answer和ground_truth的一致性"""
    
    results = []
    
    # 遍历QA目录下的所有JSON文件
    for filename in os.listdir(industry_dir):
        if filename.endswith('.json'):
            filepath = os.path.join(industry_dir, filename)
            
            try:
                with open(filepath, 'r', encoding='utf-8') as f:
                    data = json.load(f)
                
                # 提取answer和ground_truth
                answer = data.get('answer', '')
                question_id = data.get('id', filename)
                question = data.get('question', '')
                
                gpt_response = evaluate(question, answer)
                
                result = {
                    'id': question_id,
                    'filename': filename,
                    'question': question,
                    'answer': answer,
                    'evaluation': gpt_response
                }

                # 保存结果到output/evaluation/market/文件夹
                eval_dir = 'output/evaluation/industry/'
                os.makedirs(eval_dir, exist_ok=True)
                
                eval_filename = f"industry_eval_{question_id}.json"
                eval_path = os.path.join(eval_dir, eval_filename)
                
                with open(eval_path, 'w', encoding='utf-8') as f:
                    json.dump(result, f, ensure_ascii=False, indent=2)
                
                print(f"文件: {filename}")
                print(f"问题: {question}")
                print(f"评估结果已保存到: {eval_path}")
                print("-" * 50)

                results.append(result)

            except Exception as e:
                    print(f"处理文件 {filename} 时出错: {e}")

    
evaluate_industry_with_gpt4()
