import subprocess
import os
import shutil
import tqdm

# Define sample rate
SAMPLE_RATE = 44100


def fetch_clip(video_id, start_time, label_list, train_or_test, num_):
    start_time = int(start_time)
    end_time = start_time + 10
    outname = f"v{video_id}_{start_time}_{end_time}"

    label_list = label_list.split('+')
    for label in label_list:
        dirr = f"{train_or_test}/{label}"
        if os.path.isfile(dirr + f"/{outname}_out.mkv"):
            return

    if os.path.isfile(f"{outname}.mp4"):
        print("Already have it.")
        return

    subprocess.run(
        ["yt-dlp", "-f", "bestvideo[height<=480]+bestaudio/best[height<=480]",
         f"https://youtube.com/watch?v={video_id}", "--output", f"{outname}.%(ext)s", "--merge-output-format", "mkv"]
    )

    if os.path.isfile(f"./{outname}.mkv"):
        subprocess.run(
            ["ffmpeg", "-loglevel", "quiet", "-i", f"./{outname}.mkv", "-ss", str(start_time), "-to", str(end_time),
             f"./{outname}_out.mkv"]
        )

        for label in label_list:
            dirr = f"{train_or_test}/{label}"
            if not os.path.isdir(dirr):
                os.makedirs(dirr)  # Create directories recursively
                print(dirr)
            else:
                print("Directory exists")

            shutil.move(f"{outname}_out.mkv", dirr)  # Move file to the directory
            os.remove(f"{outname}.mkv")
            break
    else:
        import time
        time.sleep(1)


import threading


# Define the function to be executed by each thread
def process_file(filename):
    with open(filename, "r") as file:
        for line in tqdm.tqdm(file):
            if not line.startswith("#"):
                line = line.strip().replace('"', '+').replace(' ', '_')
                fetch_clip(*line.split(','))  # Split line and pass arguments to fetch_clip function


original_filename = "vggsound.csv"

# Split the original file into chunks for parallel processing
total_lines = sum(1 for line in open(original_filename))
lines_per_chunk = total_lines // 56

os.makedirs("split_files", exist_ok=True)

for i in range(56):
    chunk_filename = f"split_files/chunk_{i}.csv"
    with open(original_filename, "r") as infile, open(chunk_filename, "w") as outfile:
        for j, line in tqdm.tqdm(enumerate(infile)):
            if j >= i * lines_per_chunk and j < (i + 1) * lines_per_chunk:
                outfile.write(line)

threads = []
# Create and start threads to process each chunk in parallel
for i in range(56):
    chunk_filename = f"split_files/chunk_{i}.csv"
    thread = threading.Thread(target=process_file, args=(chunk_filename,))
    threads.append(thread)
    thread.start()

# Wait for all threads to complete
for thread in threads:
    thread.join()
