import os
from pathlib import Path
from tqdm.auto import tqdm
import shutil

def clean_and_restructure_clevrtex(root_dir):
    """
    Cleans and restructures the clevrtexv2_full directory:
    - Creates 3 directories: images, segmentation, labels
    - Moves main images to images/
    - Moves semantic maps to segmentation/
    - Moves JSON descriptions to labels/
    - Removes all other files
    """
    root = Path(root_dir)
    if not root.exists():
        raise ValueError(f"Directory {root_dir} does not exist!")

    # Create new directory structure
    new_dirs = {
        'images': root / 'images',
        'segmentation': root / 'segmentation',
        'labels': root / 'labels'
    }
    
    for dir_path in new_dirs.values():
        dir_path.mkdir(exist_ok=True)

    # Find all subdirectories containing samples
    sample_dirs = []
    for num_dir in root.iterdir():
        if num_dir.is_dir() and num_dir.name not in new_dirs.keys():
            sample_dirs.extend(d for d in num_dir.iterdir() if d.is_dir())

    total_removed = 0
    total_moved = {'images': 0, 'segmentation': 0, 'labels': 0}
    
    print(f"Found {len(sample_dirs)} sample directories")
    
    for sample_dir in tqdm(sample_dirs, desc="Processing directories"):
        # Get all files in the directory
        files = list(sample_dir.glob("*"))
        
        for file in files:
            if file.is_dir():
                continue
                
            # Process files based on their type
            if file.suffix == '.png':
                if len(file.stem.split("_")) == 4:
                    # Main image
                    shutil.copy2(file, new_dirs['images'] / file.name)
                    total_moved['images'] += 1
                elif "_sem_" in file.name:
                    # Semantic map
                    shutil.copy2(file, new_dirs['segmentation'] / file.name)
                    total_moved['segmentation'] += 1
            elif file.suffix == '.json':
                # JSON description
                shutil.copy2(file, new_dirs['labels'] / file.name)
                total_moved['labels'] += 1
            
            # Remove original file
            file.unlink()
            total_removed += 1

        # Remove empty directory
        if sample_dir.exists() and not any(sample_dir.iterdir()):
            sample_dir.rmdir()
            
        # Also try to remove parent directory if empty
        parent_dir = sample_dir.parent
        if parent_dir.exists() and not any(parent_dir.iterdir()):
            parent_dir.rmdir()

    print(f"Cleanup and restructuring complete!")
    print(f"Total files removed: {total_removed}")
    print("Files moved to new structure:")
    for dir_name, count in total_moved.items():
        print(f"  {dir_name}: {count} files")

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="Clean and restructure ClevrTex dataset directory")
    parser.add_argument(
        "--dataset_root",
        type=str,
        required=True,
        help="Path to clevrtexv2_full directory"
    )
    
    args = parser.parse_args()
    clean_and_restructure_clevrtex(args.dataset_root)
