import random
import json
import ray
import mmap
import time
import sys
import traceback
from ray.util.queue import Queue
sys.path.append(".")
from utils.misc import execute
from utils.ray_tools import ProgressBar
from tqdm import tqdm
import pathlib
def split_list(_list, n):
    chunk_size = (len(_list) - 1) // n + 1
    chunks = [_list[i * chunk_size : (i + 1) * chunk_size] for i in range(n)]
    return chunks

N_CPU_PER_THREAD = 1
n_thread=128
DATABASE_DIR="/path/to/dir"
task_fasta="/path/to/dir"
seq_dir="/path/to/dir"
output_dir="/path/to/dir"

def is_complete(job):
    output_Path=pathlib.Path(output_dir)
    output_file_Path=output_Path/(job["name"]+".sto")
    # if the size of output_file_Path is not 0 bytes
    if output_file_Path.exists():
        return True
    else:
        return False

@ray.remote(num_cpus=N_CPU_PER_THREAD)
def process_jobs(id,jobs_queue,actor):
    print("start process",id)
    while not jobs_queue.empty():
        job = jobs_queue.get()
        try:
            execute_one_job(job)
            
        except:
            print(f"failed: {job}")
            traceback.print_exception(*sys.exc_info())
        try:
            actor.update.remote(1)
        except:
            pass
    return 1


def execute_one_job(job):
    seq_file_Path=pathlib.Path(seq_dir)/job[:4]/(job+".fasta")
    output_file_Path=pathlib.Path(output_dir)/(job+".sto")
    execute(
        f"jackhmmer"
        f" --cpu {N_CPU_PER_THREAD}"
        f" -A {output_file_Path}"
        f" -o /dev/null"
        f" -E 0.001"
        f" -N 3"
        f" {str(seq_file_Path)}"
        f" {DATABASE_DIR}"
    )
    # with open(output_file_Path,"w") as f:
    #     f.write("")
    # # sleep 1 s
    # time.sleep(1)
    return 1



all_jobs=[]
with open(task_fasta,"r") as f:
    data=f.readlines()
    for i in range(0,len(data),2):
        name=data[i].strip()[1:]
        seq=data[i+1].strip()
        all_jobs.append(name)
print("all jobs:",len(all_jobs))
uncompleted_jobs=all_jobs

job_queue = Queue()
for job in tqdm(uncompleted_jobs):
    job_queue.put(job)
print("job queue size:",job_queue.qsize())
pb = ProgressBar(len(all_jobs)) 
actor=pb.actor

job_id_list=[]
for i in range(n_thread):
    job_id_list.append(process_jobs.remote(i,job_queue,actor))
pb.print_until_done()
result=ray.get(job_id_list)
print("Done!")