import os
import sys
import requests
from tqdm import tqdm
import gzip
import shutil

def download_file(url, output_path):
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    print(f"Downloading {url}...")
    
    response = requests.get(url, stream=True)
    
    total_size = int(response.headers.get('content-length', 0))
    block_size = 8192
    
    with open(output_path, 'wb') as f:
        with tqdm(total=total_size, unit='B', unit_scale=True) as pbar:
            for chunk in response.iter_content(chunk_size=block_size):
                if chunk:
                    f.write(chunk)
                    pbar.update(len(chunk))
    
    if output_path.endswith('.gz'):
        print(f"Extracting {output_path}...")
        extracted_path = output_path[:-3] 
        
        with gzip.open(output_path, 'rb') as f_in:
            with open(extracted_path, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)

def main():
    dataset_name = sys.argv[1]
    
    meta_url = f"https://datarepo.eng.ucsd.edu/mcauley_group/data/amazon_2023/raw/meta_categories/meta_{dataset_name}.jsonl.gz"
    review_url = f"https://datarepo.eng.ucsd.edu/mcauley_group/data/amazon_2023/raw/review_categories/{dataset_name}.jsonl.gz"
    
    output_folder = f"/home/yqiao47/dataset/{dataset_name}"
    
    meta_filename = os.path.basename(meta_url)
    meta_output_path = os.path.join(output_folder, meta_filename)
    download_file(meta_url, meta_output_path)

    review_filename = os.path.basename(review_url)
    review_output_path = os.path.join(output_folder, review_filename)
    download_file(review_url, review_output_path)
    
    print(f"Download complete for {dataset_name}")

if __name__ == "__main__":
    main()