import glob
import json

json_data = []

from tqdm import tqdm
short_image_descriptions = []
long_image_descriptions = []
backslash_n = "\n"
img_paths = []
prompts = []
metadata_path = "./metrics.json"
with open(metadata_path, mode="r", encoding="utf-8") as json_file:
    for i, line in enumerate(json_file):
        stripped = line.strip()
        if not stripped:
            print(f"Skipping line {i} as strip did not work. Line: {line}, strip: {line.strip()}")
            skipped_lines+=1
            continue
        try:
            line2 = json.loads(line.strip())
            del line2["stat_data"]             
            prompts.append(line2["prompt"])
            img_paths.append(line2['image_path'])                  
        except Exception as e:
            print(line2)
            print(e)        
                
for image, prompt in tqdm(zip(img_paths, prompts)):
    json_data.append({
        "image_path": image,
        "h_div_w": 1.0,
        "long_caption": prompt,
        "long_caption_type": "caption-InternVL2.0",
        "text": prompt,
        "short_caption_type": "short caption-InternVL2.0"
    })

json_file_path = f"./1.000_0001000.jsonl"
with open(json_file_path, "w") as f:
    for entry in json_data:
        f.write(json.dumps(entry) + "\n")
