import argparse
from pathlib import Path
from tqdm import tqdm


def build_index(path: Path):
    data_path = path / "data.jsonl"
    assert data_path.exists(), f"Jsonline dataset '{data_path}' not found."

    offset = 0
    with open(data_path, "rb") as fin, open(path / "index", "w") as fout:
        for line in tqdm(fin):
            fout.write(f"{offset}\n")
            offset += len(line)
        fout.write(f"{offset}\n")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--path", "-p", required=True, help="Data path.")
    args = parser.parse_args()
    build_index(Path(args.path))