import os

from datasets import load_dataset, concatenate_datasets

def get_subdirectories(directory):
    subdirectories = []
    for root, dirs, files in os.walk(directory):
        for dir in dirs:
            subdirectories.append(os.path.join(root, dir))
    return subdirectories

def load_text_files_from_directory(directory_path):
    datasets = []
    for file_name in os.listdir(directory_path):
        file_path = os.path.join(directory_path, file_name)
        try:
            datasets.append(load_dataset("text", data_files=file_path)['train'])
        except:
            print(f"Error loading {file_path}")
    all_dataset = concatenate_datasets(datasets)
    return all_dataset

def count_invisible_characters(string):
    count = 0
    for char in string:
        if char == '\u200B':
            count += 1
    return count