# Import the SDK and the client module
# from label_studio_sdk.client import LabelStudio
import json
import copy
import sys, os
import shutil
from loguru import logger
import random

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gpt_api.unigpt import GPT
from tqdm import tqdm
from gpt_tools.deepseek_tools import DeepseekChat

# import label_studio_tools.process_data
# from label_studio_tools.process_data import process_origin_tasks_jsons as process_origin_tasks_jsons
from common_prompts import translate as translate
from common_prompts import LABEL_STUDIO_URL as LABEL_STUDIO_URL
from common_prompts import API_KEY as API_KEY


struct_data = {
    "data": {
        "file_name": "",
        "image_captions": [
            # {
            #     "caption": "",
            #     "image": "",
            #     "image_name": "",
            #     "caption_translation": ""
            # }
        ],
    }
}

if __name__ == "__main__":
    # Connect to the Label Studio API and check the connection
    # client = LabelStudio(base_url=LABEL_STUDIO_URL, api_key=API_KEY)
    # deepseek_chat = DeepseekChat(temp=1.3)
    qwen_chat = GPT(model="qwen3_32b", vendor="", stream=False, temperature=0.2)

    file_path = "data/chartdata/english/statista"
    output_path = "data/chartQA/label_studio_formal/data_to_annotate/statista"
    output_images_path = os.path.join(output_path, "images")
    output_json_dir = os.path.join(output_path, "jsons")

    label_studio_image_prefix = "/data/local-files/?d=statista/"

    data_info_json_path = os.path.join(file_path, "data_info.json")

    with open(data_info_json_path, "r") as f:
        data_info_jsons = json.load(f)
        failed_filenames = []
        for data_info in data_info_jsons:
            ls_json = copy.deepcopy(struct_data)
            image_caption = {"caption": "", "image": "", "image_name": "", "caption_translation": ""}

            ls_json["data"]["file_name"] = data_info["data_info"]["title"]
            output_json_path = os.path.join(output_json_dir, ls_json["data"]["file_name"] + ".json")

            if os.path.exists(output_json_path):
                logger.info(f"file exist: {output_json_path}")
                continue
            try:
                with open(os.path.join(file_path, data_info["data_path"]["text"]), "r") as txt:
                    image_caption["caption"] = "".join(line.strip() for line in txt)
                image_caption["caption_translation"] = translate(qwen_chat, image_caption["caption"])

                image_caption["image"] = label_studio_image_prefix + data_info["data_path"]["prime_image"]
                image_caption["image_name"] = os.path.basename(data_info["data_path"]["prime_image"])
                shutil.copy(
                    os.path.join(file_path, data_info["data_path"]["prime_image"]),
                    os.path.join(output_path, data_info["data_path"]["prime_image"]),
                )

                ls_json["data"]["image_captions"].append(image_caption)

                numOfimages = range(12)
                k = random.randint(1, 6)
                random_numbers = random.sample(numOfimages, k)
                i = 0
                for image_name in data_info["data_path"]["image"]:
                    if i in random_numbers:
                        image_caption = {
                            "caption": "",
                            "image": label_studio_image_prefix + image_name,
                            "image_name": os.path.basename(image_name),
                            "caption_translation": "",
                        }
                        ls_json["data"]["image_captions"].append(image_caption)
                    i += 1
                    shutil.copy(
                        os.path.join(file_path, image_name),
                        os.path.join(output_path, image_name),
                    )

                with open(output_json_path, "w", encoding="utf-8") as f:
                    json.dump(ls_json, f, ensure_ascii=False, indent=4)
                    logger.info(f"saved file: {output_json_path}")
            except Exception as e:
                failed_filenames.append(data_info["data_info"]["title"])
            # exit()
        print(failed_filenames)
