# create scripts/generated/commands.txt for mass pretraining

import os
import argparse
from pathlib import Path

from utils.io import get_full_config, write_list_to_txtfile
from utils.io import find_all_directory_children_w_filter

def get_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-td",
        "--top_backbone_dir",
        help="config path yaml file",
        type=str,
        default=None,
    )
    parser.add_argument(
        "-o",
        "--output_path",
        help="set data_path. this overrides the config file",
        type=str,
        default=None,
    )
    return parser



if __name__ == "__main__":
    parser = get_parser()
    args = parser.parse_args()

    top = args.top_backbone_dir
    output_path = args.output_path

    if output_path is None:
        output_path = "scripts/generated/commands_linear_probe.sh"

    config_paths = find_all_directory_children_w_filter(top, ".yaml")
    # config_paths = [child for child in all_children if ".yaml" in child]

    commands = []
    for c in config_paths:
        for s in [0, 1, 2, 3, 4]:
            config = get_full_config(c)

            if config["save_dir"] is None:
                data_name = [
                    i for i in config["data_args"]["path"].split("/") if i != "data"
                ][0]
                sd = os.path.join(
                    f"experiments", data_name, config["model_type"], f"seed_{s}"
                )

            if os.path.exists(Path(sd) / "scores.pkl"):  # skip if scores already exist
                continue

            commands.append(f"python scripts/run_pretrain.py -c {c} -s {s} -sd {sd}")

    os.makedirs("scripts/generated", exist_ok=True)
    write_list_to_txtfile(commands, output_path)
