import requests
# from load_dataset import load_doi
import json
import os
from datetime import datetime

API_KEY = 'app-BDMvpOVNv9nvceXTuwGI1FqD'
API_URL = 'http://localhost/v1/workflows/run'  # 按实际端口和路径填写

headers = {
    'Authorization': f'Bearer {API_KEY}',
    'Content-Type': 'application/json'
}


def load_doi():
    # 假设文件名为 arxiv-metadata.json
    file_path = "templates/dataset/arxiv.json"

    count = 0
    max_count = 100

    doi_list = []

    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            if count >= max_count:
                break
            data = json.loads(line)
            # 以你截图的字段为例，提取部分信息
            arxiv_id = data.get("id")
            title = data.get("title")
            authors = data.get("authors")
            abstract = data.get("abstract")
            categories = data.get("categories")
            doi_list.append(arxiv_id)
            # print(f"ID: {arxiv_id}")
            # print(f"标题: {title}")
            # print(f"作者: {authors}")
            # print(f"分类: {categories}")
            # print(f"摘要: {abstract}")
            # print("="*40)
            count += 1
    return doi_list

doi_list = load_doi()
# ====== 主要输入：file 或 url 二选一 ======

# 方式一：上传文档或图片（远程URL方式）
# inputs = {
#     "file": [
#         {
#             "transfer_method": "remote_url",
#             "url": "https://cloud.dify.ai/logo/logo-site.png",  # 这里可以是图片或文档的URL
#             "type": "image"  # 或 "document"
#         }
#     ],
#     # "instruction": "请帮我分析这份文档的结构和主要内容"  # 可选，按需添加
# }

for doi in doi_list:
    paper_url = f"https://arxiv.org/pdf/{doi}"
    # 方式二：网页URL
    inputs = {
        "url": paper_url,
        # "instruction": "请帮我提取网页的主要观点"  # 可选
    }

    # ====== 只保留你需要的inputs，其它注释掉 ======

    payload = {
        "inputs": inputs,
        "response_mode": "blocking",  # 推荐用 blocking
        "user": "abc-123"
    }

    response = requests.post(API_URL, headers=headers, json=payload)

    if response.status_code == 200:
        print("运行成功，原始返回内容如下：")
        try:
            data = response.json()
            # 将返回数据保存到日志文件
            # 创建输出目录
            output_dir = "output/KnowGraph"
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
                
            # 生成日志文件名,使用时间戳
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            log_file = os.path.join(output_dir, f"knowgraph_log_{timestamp}.json")
            # 将当前paper_url添加到数据中
            data["paper_url"] = paper_url
            # 将数据写入日志文件
            with open(log_file, "w", encoding="utf-8") as f:
                json.dump(data, f, ensure_ascii=False, indent=2)
            print(f"数据已保存到: {log_file}")
            
            # 只打印最终输出
            if 'outputs' in data:
                print("最终输出：")
                print(data['outputs'])
            else:
                print(data)
        except Exception as e:
            print("返回内容不是合法的JSON，原始内容如下：")
            print(response.text)
    else:
        print("运行失败，状态码：", response.status_code)
        print(response.text)
