import subprocess
import sys
import multiprocessing as mp
import torch
import os


def f(q, i):
    os.environ["CUDA_VISIBLE_DEVICES"] = str(i)
    while (x := q.get()) is not None:
        subprocess.run(x, shell=True)


REP = int(os.getenv("REP", "1"))
PAR = int(os.getenv("PAR", torch.cuda.device_count()))
if __name__ == "__main__":
    q = mp.Queue()
    p = []
    for i in range(PAR):
        for _ in range(REP):
            p.append(mp.Process(target=f, args=(q, i)))
            p[-1].start()

    for i in filter(lambda x: len(x) > 0, map(lambda x: x.strip(), sys.stdin)):
        q.put(i)
    for _ in p:
        q.put(None)
    for x in p:
        x.join()
