import pandas as pd
import numpy as np
from optim_utils import *
from tqdm import tqdm

df = pd.read_parquet("sdv1_bb_edge_groundtruth.parquet", engine="pyarrow")
df_lines = df.to_dict("records")

df_lines_filtered = []
for x in tqdm(df_lines):
    if len(df_lines_filtered) == 2000:
        break
        
    imgs = []
    try:
        imgs.append(download_image(x["url"]))
    except:
        pass
    
    if len(imgs) > 0:
        index = x["index"]
        path = f"outputs/sdv1_bb_edge_groundtruth/gt_images/{index}"
        os.makedirs(path, exist_ok=True)

        for j in range(len(imgs)):
            imgs[j].save(f"{path}/{j}.png")

        df_lines_filtered.append(x)

write_jsonlines(df_lines_filtered, "outputs/sdv1_bb_edge_groundtruth/sdv1_bb_edge_groundtruth.jsonl")
