
import os
import pandas as pd

FILES = [
    "batch_prompts_paraphrase.jsonl.result",
    "batch_prompts_sparaphrase.jsonl.result",
]
NAMES = [
    "paraphrase",
    "prompting",
]

os.makedirs("./final", exist_ok=True)
ii = 0
for filename in FILES:
    df = pd.read_json(filename, lines=True)
    responses = df.response.apply(lambda x: x["body"]["choices"][0]["message"]["content"])
    df["generation"] = df.response.apply(lambda x: x["body"]["choices"][0]["message"]["content"])
    df = df[["generation"]]
    amazon = df.iloc[:12000]
    blogs = df.iloc[12000:12000+7000]
    reddit = df.iloc[12000+7000:]

    name = NAMES[ii]
    amazon.to_json(f"./final/amazon_{name}.jsonl", lines=True, orient="records")
    blogs.to_json(f"./final/blogs_{name}.jsonl", lines=True, orient="records")
    reddit.to_json(f"./final/reddit_{name}.jsonl", lines=True, orient="records")
    
    ii += 1