import json

input_file = "tacos_train.json"
output_file = "formatted_tacos_train.json"

# 读取原始 JSON
with open(input_file, 'r') as f:
    data = json.load(f)

output_data = []

for video_id, content in data.items():
    fps = content.get("fps", 30)  # 如果没有 fps 字段，默认设为 30
    num_frames = content.get("num_frames", 1)
    duration = round(num_frames / fps, 4)

    video_name = f"{video_id}"  # 加 .mp4 后缀
    sentences = content.get("sentences", [])
    timestamps = content.get("timestamps", [])

    for i, sentence in enumerate(sentences):
        if i >= len(timestamps):
            continue  # 防止索引越界

        ts = timestamps[i]
        if not isinstance(ts, list) or len(ts) != 2:
            continue  # 跳过非法 timestamp

        start_frame, end_frame = ts
        start_time = round(start_frame / fps, 4)
        end_time = round(end_frame / fps, 4)

        # Normalize to 0~1
        normalized_solution = [
            round(start_time / duration, 4),
            round(end_time / duration, 4)
        ]

        cleaned_sentence = sentence.strip('" ').strip()

        sample = {
            "dataset": "tacos",
            "text_type": "caption",
            "normal_caption": cleaned_sentence,
            "problem": f"Give the query: '{cleaned_sentence}', when does the described content occur in the video?",
            "video": video_name,
            "solution": normalized_solution,
            "video_length": duration
        }

        output_data.append(sample)

# 写入输出 JSON
with open(output_file, 'w') as f:
    json.dump(output_data, f, indent=2)

print(f"✅ Converted {len(output_data)} samples and saved to {output_file}")
