import openai
import base64
from PIL import Image
import io
import json

# 自定义 API 地址和 API 密钥
openai.api_base = "https://xxx/v1"
openai.api_key = "sk-xxxxx"

base_prompt='''
This is the image generated by stable diffusion, and its prompt is "{}".
Please give the specific fractions (one decimal place) respectively as required below:
Scoring requirements:
Overall Quality:
Is the picture clear and complete, without obvious blurriness, noise or errors?
Is the color natural and harmonious, and does it conform to the theme and scene?

Detail Richness:
Are there rich details in the subject and background of the picture?
Are the details realistic and in line with the logic of reality (if the theme is a real scene)?

Theme Consistency
Does the picture accurately reflect the given theme or description?
Is there any content that deviates from the topic or does not meet expectations?

Creativity & Uniqueness:
Does the picture demonstrate a unique creativity or perspective?
Are there any novel elements or composition methods?

Style Matching degree
Does the picture conform to the specified style (such as realistic, cartoon, oil painting, etc.)?
Is it consistent with the target style?

Emotional Expression
Can pictures convey certain emotions or atmospheres?
Does it resonate with the audience?

Technical Performance:
Does the picture demonstrate good generation techniques, such as light and shadow processing, perspective relationships, etc.?
Are there any obvious generation errors or flaws?

Scoring criteria:
10 points: Perfect, almost impeccable, exceeding expectations.
8-9 points: Excellent, with a few minor flaws, but overall outstanding.
6-7 points: Good, in line with expectations, but with obvious room for improvement.
4-5 points: Average, with many issues that need improvement.
2-3 points: Poor, not in line with expectations, and requires significant adjustments.
1 point: Extremely poor, almost unacceptable.

The final format is:

xxx: Score
xxx: Score
...
Only output the category and the corresponding score. Do not output any other content. One per line
For example
Overall Quality:8.0
Detail Richness:7.6
Theme Consistency:9.3
...
'''

# 自定义模型名称
model_name = "qwen-vl-max-2025-04-02"

folder_path = "out-lora"

with open(f"{folder_path}/output.json", 'r') as f:
    outputs = json.load(f)

# 初始化存储数据的列表和字典
results = []
category_sums = {
    "Overall Quality": 0,
    "Detail Richness": 0,
    "Theme Consistency": 0,
    "Creativity & Uniqueness": 0,
    "Style Matching": 0,
    "Emotional Expression": 0,
    "Technical Performance": 0
}
category_counts = {
    "Overall Quality": 0,
    "Detail Richness": 0,
    "Theme Consistency": 0,
    "Creativity & Uniqueness": 0,
    "Style Matching": 0,
    "Emotional Expression": 0,
    "Technical Performance": 0
}

import time

for item in outputs:
    image_path = f"{folder_path}/{item['image_path']}"
    prompt = item["prompt"]

    # 读取图片并转换为 base64 编码
    with open(image_path, "rb") as image_file:
        encoded_image = base64.b64encode(image_file.read()).decode("utf-8")

    # 构造消息内容
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": base_prompt.format(prompt)
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encoded_image}"
                    }
                }
            ]
        }
    ]

    max_attempts = 3  # 最大尝试次数
    attempt = 0
    success = False

    while attempt < max_attempts and not success:
        try:
            # 调用 API
            response = openai.ChatCompletion.create(
                model=model_name,
                temperature=0.2,  # 较低的温度
                top_p=0.7,        # 较低的 Top-p
                frequency_penalty=0.5,
                presence_penalty=0.5,
                n=1,
                stream=False,
                messages=messages,
                timeout=20  # 设置超时时间为20秒
            )

            # 提取评分信息
            response_content = response.choices[0].message.content

            # 解析评分信息
            scores = {}
            for line in response_content.splitlines():
                if ":" in line:
                    category, score = line.split(":")[0].strip(), float(line.split(":")[1].strip())
                    scores[category] = score
                    category_sums[category] += score
                    category_counts[category] += 1

            # 保存当前结果
            results.append({
                "prompt": prompt,
                "response": response_content,
                "scores": scores
            })
            print(prompt, ":\n", scores, '\n')

            success = True  # 标记为成功
        except Exception as e:
            print(f"Attempt {attempt + 1} failed for prompt '{prompt}': {e}")
            attempt += 1
            time.sleep(2)  # 等待2秒后再次尝试，避免过于频繁的请求

    if not success:
        print(f"Failed to process prompt '{prompt}' after {max_attempts} attempts.")

# 计算平均值
category_averages = {category: category_sums[category] / category_counts[category] for category in category_sums}

# 输出平均值
print("\nAverage Scores:")
for category, average in category_averages.items():
    print(f"{category}: {average:.2f}")

# 保存为 JSON 文件
output_data = {
    "results": results,
    "category_averages": category_averages
}

with open( f"{folder_path}/gpt-out.json", "w") as json_file:
    json.dump(output_data, json_file, indent=4)