# translate.py
import os
import sys
import re
from config import API_SECRET_KEY, BASE_URL, LOCAL_MODEL_PATH, DEVICE, OLLAMA_MODEL
from utils import get_gpt_pipeline, extract_content  
import time
import concurrent.futures
from functools import partial
from typing import List, Optional
from openai import OpenAI  

# Initialize the OpenAI client
openai_client = OpenAI(
    api_key=API_SECRET_KEY,
    base_url=BASE_URL
)


def preprocess_and_segment_text(classical_text: str, max_segment_length: int = 2000) -> List[str]:

    cleaned_text = re.sub(r"[◎■※【】]", "", classical_text)
    segments = []
    
    while len(cleaned_text) > max_segment_length:
        split_pos = -1

        for pos in range(max_segment_length, max_segment_length - 100, -1):
            if pos < len(cleaned_text) and cleaned_text[pos] in ["。", "！", "？", "」", "》", "；", "，"]:
                split_pos = pos + 1
                break
        
 
        if split_pos == -1:
            split_pos = max_segment_length
        
        segments.append(cleaned_text[:split_pos])
        cleaned_text = cleaned_text[split_pos:]
    
    segments.append(cleaned_text) 
    return segments

def create_translation_prompt(segment_text: str) -> str:

    return f"""
    你是一位精通中国古典文学和英文的翻译专家。请执行以下翻译任务：

    1. 首先，将以下文言文准确翻译成现代汉语（大白话），要求**意译为主，兼顾语义与语气**, 然后，将上述现代汉语翻译成**流畅自然、符合英语表达习惯的英文句子**。
    「{segment_text}」

    2. 在翻译过程中，请注意以下事项：
    - **意译优先**：以传达原文含义为核心，避免机械直译；
    - 保留原文的所有含义、语气、细节与情感色彩；
    - 如有**典故或历史背景**，应根据其内涵进行意译，并在括号中提供简短解释；
    - 对于文化特定的概念，在括号中提供简短解释；
    - 不要添加额外的评论、分析或解释，只给出针对原文翻译好的内容；
    - 确保翻译结果符合学术标准，语言准确且地道；
    - 避免在英文翻译中夹杂中文词汇。

    【Few-Shot 示例】：

    示例一（哲理类 / 论语风格）：
    文言文: 子曰：“温故而知新，可以为师矣。”
    #english: The Master said, "Those who reflect on the past and gain new understanding are fit to teach others."

    示例二（骈文体 / 辞章优美）：
    文言文: 山光悦鸟，水色空蒙。游目骋怀，足以极视听之娱。
    #english: The mountain light delights the birds; the waters shimmer with a soft haze. Wandering with open eyes and an unburdened heart, one may fully enjoy the pleasures of sight and sound.

    示例三（史传体 / 史记风格）：
    文言文: 项羽力能扛鼎，才气过人，然终败于垓下。
    #english: Xiang Yu possessed the strength to lift a cauldron and the talent to outshine all others, yet he was ultimately defeated at Gaixia.

    示例四（寓言体 / 先秦诸子风格）：
    文言文e: 守株待兔，冀复得兔，兔不可复得，而身为宋国笑。
    #english: He waited by the tree stump, hoping another rabbit would come running — but none ever did, and he became the laughingstock of the State of Song.

    示例五（用典 / 借古喻今）：
    文言文: 愿效老生之献策，如姜尚之垂纶。
    #english: I wish to offer my counsel like an old scholar, just as Jiang Shang (a legendary statesman who gained recognition only in old age) cast his line in still waters, waiting for fate to call him into service.

    示例六（抒情议论结合 / 唐宋散文风格）：
    文言文: 不以物喜，不以己悲。居庙堂之高则忧其民，处江湖之远则忧其君。
    #english: He does not rejoice over external things, nor grieve over personal misfortunes. When in high office, he worries for the people; when far from court, he worries for his ruler.

    请严格按照以下格式输出你的翻译的英文结果：

    #english: [你的英文翻译结果]
    """

def extract_english_result(translation_response: str) -> Optional[str]:

    if not translation_response:
        return None
    
    if "#english:" in translation_response:
        return translation_response.split("#english:")[1].strip()
    else:
        return None
def translate_single_segment(
    segment_text: str,
    model_type: str = "api",
    model_name: str = "deepseek-chat",
    max_tokens: int = 2000
) -> Optional[str]:

    prompt = create_translation_prompt(segment_text)
    
    try:
        if model_type == "api":

            response = openai_client.chat.completions.create(
                model=model_name,
                messages=[
                    {"role": "system", "content": "You are a professional translator"},
                    {"role": "user", "content": prompt}
                ],
            )
            result = response.choices[0].message.content

        else:
            from utils import get_gpt_pipeline
            result = get_gpt_pipeline(
                text=prompt,
                model_id=model_name,
                max_tokens=max_tokens,
                model_type=model_type
            )
        
        return extract_english_result(result)
    
    except Exception as e:
        print(f"Errors in classical Chinese translation: {e}")
        return None


def serial_translate_segments(
    segments: List[str],
    model_type: str = "api",
    model_name: str = "deepseek-chat",
    max_tokens: int = 2000
) -> str:

    results = []
    
    for i, segment in enumerate(segments):
        print(f"Translating segment {i+1}/{len(segments)} ...")
        try:
            result = translate_single_segment(
                segment,
                model_type,
                model_name,
                max_tokens
            )
            
            if result is None:
                print(f"Warning: Translation of paragraph {i+1} failed, using placeholder")
                result = f"[Translation of paragraph {i+1} failed]"
            
            results.append(result)
        except Exception as e:
            print(f"Error: Exception occurred while translating segment {i+1}: {str(e)}")
            results.append(f"[Translation of paragraph {i+1} failed]")
    

    return " ".join(results)


def classical_chinese_to_english(
    classical_text: str,
    model_type: str = "api",
    model_name: str = "deepseek-chat",
    max_tokens: int = 2000
) -> Optional[str]:

    segments = preprocess_and_segment_text(classical_text)
    
    return serial_translate_segments(
        segments,
        model_type,
        model_name,
        max_tokens
    )


