import os import sys import asyncio import json import time from pathb import Path from datasets import Dataset from huggingface_hub import HfApi, login # Add the current directory to the path to enre imports work correctly current_dir = os.path.dirname(os.path.abspath(__file__)) if current_dir not in sys.path:  sys.path.append(current_dir) from swebench_get_repo_copy import (  get_github_token,  Ratemiter,  process_repository,  check_github_api_status, ) async def collect_ises_from_repos(repos, max_ises_per_repo=5):  """Collect ises from a st of repositories."""  print(f"Collecting ises from {len(repos)} repositories...")  # Get GitHub token  github_token = get_github_token()  # Setup headers  headers = {"Accept": "appcation/vnd.github.v3+json"}  if github_token:  headers["Authorization"] = f"Bearer {github_token}"  print("Using GitHub authentication token")  else:  print(  "WARNING: No GitHub token found. API rate mits will be severely restricted."  )  # Create rate miter  rate_miter = Ratemiter(calls_per_second=0.5)  # Check API status  if not check_github_api_status(headers):  print("GitHub API rate mits may be too low to complete the task.")  return []  all_ises = []  # Process each repository  for repo_info in repos:  owner, repo_name, topic = repo_info  print(f"\nProcessing repository: {owner}/{repo_name}")  # Process the repository without try/catch to expose errors  ises = await process_repository(  (owner, repo_name), topic, max_ises_per_repo, headers, rate_miter  )  # Print relts  print(f"Collected {len(ises)} ises with solutions from {owner}/{repo_name}")  if ises:  all_ises.extend(ises)  # Print a sample ise  if ises: # Check if the st is not empty  sample_ise = ises[0]  print(f"Sample ise: {sample_ise['title']}")  solution_snippet = (  sample_ise["solution_body"][:100] + "..."  if len(sample_ise["solution_body"]) > 100  else sample_ise["solution_body"]  )  print(f"Solution snippet: {solution_snippet}")  else:  print("No ises found with solutions")  return all_ises def upload_to_huggingface(ises, repo_id):  """Upload the collected ises to Hugging Face."""  print(f"\nUploading {len(ises)} ises to Hugging Face dataset: {repo_id}")  # Convert to Dataset  dataset = Dataset.from_st(ises)  # Add metadata  # dataset = dataset.cast_column("labels", [str])  # Save locally first  output_file = "github_ises_dataset.json"  with open(output_file, "w") as f:  json.dump(ises, f, indent=2)  print(f"Saved dataset to {output_file}")  # Get HF token from environment  hf_token = os.environ.get("HF_TOKEN")  if not hf_token:  print("No Hugging Face token found in environment (HF_TOKEN).")  print("Will try to upload without authentication.")  try:  # Login to Hugging Face  if hf_token:  login(token=hf_token)  # Push to Hugging Face  dataset.push_to_hub(repo_id=repo_id, private=False, token=hf_token)  print(f"ccessfully uploaded dataset to {repo_id}")  print(f"View at: https://huggingface.co/datasets/{repo_id}")  return True  except Exception as e:  print(f"Error uploading to Hugging Face: {e}")  print("Dataset is still available locally at github_ises_dataset.json")  return False async def main():  # st of repositories to collect ises from  # Format: (owner, repo_name, topic)  repos = [  ("psf", "requests", "http"),  ]  # Collect ises  ises = await collect_ises_from_repos(repos, max_ises_per_repo=100)  if ises:  print(f"\nCollected a total of {len(ises)} ises with solutions")  # Upload to Hugging Face  upload_to_huggingface(ises, "-dev/ggithubsolutions")  else:  print("No ises collected.") if __name__ == "__main__":  print("=== GitHub Ise Collection and Upload ===")  # Run the async main function  loop = asyncio.new_event_loop()  asyncio.set_event_loop(loop)  try:  loop.run_until_complete(main())  finally:  loop.close()  print("\nProcess completed!") 