
def get(name, base_path="data"):
    documents, labels = None, None
    
    if name == "20newsgroups":
        from sklearn.datasets import fetch_20newsgroups
        newsgroups = fetch_20newsgroups(subset='all', categories=None, remove=('headers', 'footers', 'quotes'))
        documents, labels = newsgroups.data, newsgroups.target
        
    elif name == "reuters":
        import nltk
        from nltk.corpus import reuters
        nltk.download('reuters')
        documents = [reuters.raw(file_id) for file_id in reuters.fileids()]
        labels = [reuters.categories(file_id)[0] for file_id in reuters.fileids()]
        
    elif name == "yelp":
        import pandas as pd
        data = pd.read_json(f"{base_path}/yelp_academic_dataset_review.json", lines=True)
        documents = data['text'].tolist()
        labels = data['business_id'].tolist()
        
    elif name == "amazon":
        import pandas as pd
        data = pd.read_csv(f"{base_path}/amazon_reviews.csv")
        documents = data['REVIEW_TEXT'].dropna().tolist()
        labels = data['PRODUCT_CATEGORY'].tolist()
        
    elif name == "ag_news":
        import pandas as pd
        def load_ag_news(file_path):
            data = pd.read_csv(file_path, header=None, names=['Class Index', 'Title', 'Description'])
            documents = data['Description'].tolist()
            labels = data['Class Index'].tolist() 
            return documents, labels
        ag_news_path = f"{base_path}/ag_news_train.csv"  
        documents, labels = load_ag_news(ag_news_path)
    
    else:
        raise ValueError(f"Dataset {name} not recognized.")
    
    if documents is None or labels is None:
        raise ValueError(f"Dataset {name} is empty or not loaded correctly.")

    return documents, labels