import requests
# from load_dataset import load_doi
import json
import os
from datetime import datetime
from datasets import load_dataset
import pandas as pd

API_KEY = 'app-QDhLpLnGqI85IHecKdZLoFFf'
API_URL = 'http://localhost/v1/workflows/run'  # 按实际端口和路径填写

headers = {
    'Authorization': f'Bearer {API_KEY}',
    'Content-Type': 'application/json'
}

def load_text_file():
    translation_dataset = []

    # 读取parquet文件
    file_path = 'templates/dataset/translation/en-zh/test-00000-of-00001.parquet'
    if not os.path.exists(file_path):
        print(f"文件 {file_path} 不存在")
        return []
    
    # 读取parquet文件
    df = pd.read_parquet(file_path)
    
    # 提取所需数据并转换为字典数组
    translation_dataset = []
    for i in range(len(df)):
        item = df.iloc[i]
        translation_dataset.append({
            'source': item["translation"]["en"],
            'target': item["translation"]["zh"]
        })
    
    
    
    # ds = load_dataset("ngia/translation-en-fr", split="test")
    # for i in range(len(ds)):
    #     item = ds[i]
    #     translation_dataset.append({
    #         'source': item["english_src"],
    #         'target': item["french_tgt"]
    #     })
    return translation_dataset



def run_translation(translation_dataset):
    for i in range(51,200):
        item = translation_dataset[i]
        source_text = item["source"]
        target_text = item["target"]

        # 如果你的应用有自定义 inputs，可以在这里填写
        inputs = {
            "target_lang": "Chinese",
            "source_text": source_text,
            "source_lang": "English"
        }

        payload = {
            "inputs": inputs,
            "response_mode": "blocking",  # 或 "streaming"
            "user": "abc-123"             # 可选，用户ID
        }

        response = requests.post(API_URL, headers=headers, json=payload)

        if response.status_code == 200:
            print("运行成功，原始返回内容如下：")
            print(response.text)  # 先打印原始内容

            os.makedirs('output/translation/en-zh', exist_ok=True)

            data = json.loads(response.text)
            translated_text = data["data"]["outputs"]["output"]
            
            # 生成日志文件名
            log_filename = f'output/translation/en-zh/translation_log_{i}.txt'
            
            # 写入日志文件
            with open(log_filename, 'w', encoding='utf-8') as f:
                f.write(f"序号: {i}\n")
                f.write(f"源文本: {source_text}\n")
                f.write(f"目标译文: {target_text}\n")
                # translated_text = translated_text.get("output") if translated_text else None
                f.write(f"模型译文: {translated_text}\n")
                f.write("\n完整响应:\n")
                f.write(response.text)


if __name__ == "__main__":
    translation_dataset = load_text_file()
    run_translation(translation_dataset)




