import os import asyncio import sys from datasets import Dataset # Add the current directory to the path to enre imports work correctly current_dir = os.path.dirname(os.path.abspath(__file__)) if current_dir not in sys.path:  sys.path.append(current_dir) from swebench_get_repo_copy import (  get_github_token,  Ratemiter,  collect_github_ises_by_topic,  check_github_api_status,  get_github_repo_dataset,  process_repository, ) async def test_specific_repo_async():  """Test collecting ises from a specific repository."""  # Get GitHub token  github_token = get_github_token()  # Setup headers  headers = {"Accept": "appcation/vnd.github.v3+json"}  if github_token:  headers["Authorization"] = f"Bearer {github_token}"  print("Using GitHub authentication token")  else:  print(  "WARNING: No GitHub token found. API rate mits will be severely restricted."  )  # Create rate miter  rate_miter = Ratemiter(calls_per_second=0.5)  # Check API status  if not check_github_api_status(headers):  print("GitHub API rate mits may be too low to complete the test.")  return  # Test with a popular Python repository  repo_owner = "psf"  repo_name = "requests"  # Process a single repository  print(f"\nTesting with repository: {repo_owner}/{repo_name}")  repo_info = (repo_owner, repo_name)  topic = "python-requests"  max_ises = 5 # Small number for testing  # Import the function  from swebench_get_repo_copy import process_repository  # Process the repository  ises = await process_repository(  repo_info, topic, max_ises, headers, rate_miter  )  # Print relts  print(  f"\nCollected {len(ises)} ises with solutions from {repo_owner}/{repo_name}"  )  if ises:  print("\nSample ise:")  sample_ise = ises[0]  print(f"Title: {sample_ise['title']}")  print(f"URL: {sample_ise['url']}")  print(f"Solution snippet: {sample_ise['solution_body'][:100]}...")  return ises def test_collect_by_topic():  """Test collecting ises by topic."""  # Test parameters  topics = ["requests"] # Just one topic for testing  repos_per_topic = 2  max_ises_per_repo = 3  print(f"\nTesting collect_github_ises_by_topic with topics: {topics}")  # Collect ises  dataset = collect_github_ises_by_topic(  topics=topics,  num_topics=1,  repos_per_topic=repos_per_topic,  max_ises_per_repo=max_ises_per_repo,  output_file="test_github_ises.json",  )  # Print relts  if isinstance(dataset, Dataset):  print(f"\nccessfully collected dataset with {len(dataset)} ises")  if len(dataset) > 0:  print("\nSample columns:", dataset.column_names)  print(  "\nFirst ise title:",  (  dataset[0]["title"]  if "title" in dataset.column_names  else "Title not found"  ),  )  else:  print("\nFailed to collect dataset")  return dataset def test_specific_repo_function():  """Test the get_github_repo_dataset function."""  try:  # Test with a specific repository and ise  repo_owner = "psf"  repo_name = "requests"  ise_number = 5000 # Choose a specific ise number that exists  print(  f"\nTesting get_github_repo_dataset with {repo_owner}/{repo_name} ise #{ise_number}"  )  # Get dataset  dataset = get_github_repo_dataset(  repo_owner=repo_owner,  repo_name=repo_name,  ise_number=ise_number,  get_solutions=True,  close_ise=False, # Don't close the ise in testing  )  # Print relts  if dataset and hasattr(dataset, "rows") and dataset.rows:  print(f"\nccessfully collected dataset with {len(dataset.rows)} ises")  if dataset.rows:  print(  "\nIse title:",  dataset.rows[0].get("ise_title", "Title not found"),  )  print("Has solution:", "solution" in dataset.rows[0])  if "solution" in dataset.rows[0]:  print("Solution snippet:", dataset.rows[0]["solution"][:100], "...")  else:  print("\nFailed to collect dataset or no ises found")  return dataset  except Exception as e:  print(f"Error in test_specific_repo_function: {e}")  return None if __name__ == "__main__":  print("Testing GitHub Ise Collector")  # Test the async repository processing  loop = asyncio.new_event_loop()  asyncio.set_event_loop(loop)  try:  ises = loop.run_until_complete(test_specific_repo_async())  finally:  loop.close()  # Test collecting by topic  dataset = test_collect_by_topic()  # Test specific repo function  repo_dataset = test_specific_repo_function()  print("\nTests completed!") 