import pandas as pd

scenarios_name = "generation_outputs/programmatic_scenarios_gpt-4o_1.0_2024-09-15-15:00:08_3/og_scenarios.tsv"

df = pd.read_csv(scenarios_name, sep="\t")

ls = []

for i in range(len(df)):
    seed_id = df.loc[i]["ID"]
    persona = df.loc[i]["Persona"]
    env = df.loc[i]["Environment"]
    sim = df.loc[i]["Similarity"]
    # check if persona is nan
    if persona != persona or env != env:
        continue
    if len(persona.split()) <= 10 and len(env.split()) <= 15 and sim <= 58:
        ls.append([seed_id, persona, env, sim])

pred_df = pd.DataFrame(ls, columns=["ID", "Persona", "Environment", "Similarity"])
pred_df.to_csv("generation_outputs/programmatic_scenarios_gpt-4o_1.0_2024-09-15-15:00:08_3/scenarios_filtered.tsv", sep="\t", index=False)

print(len(ls))