import json

# Load jsonl file
metadata = []
# with open('/home/t-sye/World-Model/data/bridge_window3_codebook8_codeseqlen9_unshuffled.jsonl', 'r') as f:
with open("/home/t-sye/World-Model/data/bridge_window3_codebook8_codeseqlen1_10percent_unshuffled.jsonl", 'r') as f:
    for line in f:
        metadata.append(json.loads(line))

final_elem = []
is_last = False
for metaelem in metadata:
    image = metaelem['image']
    if is_last:
        is_last = False 
        continue
    if image[0] == image[1]:
        is_last = True
    final_elem.append(metaelem)


print("final elem length is", len(final_elem))
# Write to jsonl file
# with open('/home/t-sye/World-Model/data/bridge_window3_codebook8_codeseqlen9_unshuffled_filtered.jsonl', 'w') as f:
with open("/home/t-sye/World-Model/data/bridge_window3_codebook8_codeseqlen1_10percent_unshuffled_filtered.jsonl", 'w') as f: 
    for metaelem in final_elem:
        f.write(json.dumps(metaelem) + '\n')


# shuffle the dataset
import random
random.shuffle(final_elem)
# with open('/home/t-sye/World-Model/data/bridge_window3_codebook8_codeseqlen9_shuffled_filtered.jsonl', 'w') as f:
with open("/home/t-sye/World-Model/data/bridge_window3_codebook8_codeseqlen1_10percent_shuffled_filtered.jsonl", 'w') as f: 
    for metaelem in final_elem:
        f.write(json.dumps(metaelem) + '\n')

    