import os
import json
from collections import Counter
import json

def get_files_recursive(directory):
    """Counts files recursively in a directory, excluding directories."""
    related_files = []
    for root, _, files in os.walk(directory):
        
        for f in files:
            if not os.path.isdir(f) and not '.jpg' in f:
                related_files.append(f)
                
    return related_files

def clean_source(source):
    file_dir = '/'.join(source.split('/')[:-1])
    if 'youtube' in file_dir:
        return 'youtube'
    if 'youcook2' in file_dir:
        return 'youcook2'
    if 'NextQA' in file_dir:
        return 'NextQA'
    else:
        return file_dir
    
def load_jsonl(file_path):
    """Load a JSONL file and return a list of JSON objects."""
    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            if line.strip():  # skip empty lines
                data.append(json.loads(line))
    return data

def counter_to_comma_string(counter):
  """
  Converts a Counter object to a comma-separated string like "key1=value1, key2=value2".
  """
  return "\n".join(f"{key}\t{value}" for key, value in counter.items())
