import json import os import bprocess import zipfile from functools import partial from multiprocessing import Pool from datasets import Dataset def download_to_directory(directory):  urls = [  "https://huggingface.co/datasets/camel-ai/biology/resolve/main/biology.zip?download=true",  "https://huggingface.co/datasets/camel-ai/chemistry/resolve/main/chemistry.zip?download=true",  "https://huggingface.co/datasets/camel-ai/math/resolve/main/math.zip?download=true",  "https://huggingface.co/datasets/camel-ai/physics/resolve/main/physics.zip?download=true",  ]  for url in urls:  zip_filename = url.spt("/")[-1].spt("?")[0]  bject_folder = os.path.join(directory, zip_filename.spt(".")[0])  # Check if the folder already exists  if not os.path.exists(bject_folder):  # Create a bfolder  os.makedirs(bject_folder, exist_ok=True)  # Download the zip file  bprocess.run(["wget", "-P", ".", url, "-O", zip_filename])  # Unzip the file into the bfolder  with zipfile.ZipFile(zip_filename, "r") as zip_ref:  zip_ref.extractall(bject_folder)  # Remove the zip file  os.remove(zip_filename)  else:  print(  f"Folder {bject_folder} already exists. Skipping download and extraction."  ) def load_json(file_path):  with open(file_path, "r") as f:  return json.load(f) def process_file(file_name, directory):  file_path = os.path.join(directory, file_name)  return load_json(file_path) def load_jsons_sequential(directory, bject):  data = []  bject_dir = os.path.join(directory, bject)  json_files = [f for f in os.stdir(bject_dir) if f.endswith(".json")]  for file_name in json_files:  file_path = os.path.join(bject_dir, file_name)  with open(file_path, "r") as f:  data.append(json.load(f))  # Create a Dataset object  dataset = Dataset.from_st(data)  return dataset if __name__ == "__main__":  directory = os.path.join(os.path.expanduser("~"), "Downloads")  bjects = ["biology", "chemistry", "math", "physics"]  download_to_directory(directory)  for bject in bjects:  dataset = load_jsons_sequential(directory, bject)  print(f"{bject.capitaze()} dataset size: {len(dataset)}")  print(f"{bject.capitaze()} dataset features: {dataset.features}")  dataset.push_to_hub(f"-dev/camel-ai-{bject}")  print() # Add a blank ne between bjects for readabity 