#!/usr/bin/env python3
"""
Script to create summer and winter subsets from the ForestPersons dataset
"""

import os
from pathlib import Path

def create_season_splits(original_data_dir: str, output_data_dir: str):
    """
    Create separate train/val/test splits for summer and winter seasons
    """
    data_dir = Path(original_data_dir)
    output_dir = Path(output_data_dir)
    
    # Ensure output directory exists
    output_dir.mkdir(exist_ok=True)
    
    # Process each split (train, val, test)
    for split in ['train', 'val', 'test']:
        split_file = data_dir / f'{split}.txt'
        
        if not split_file.exists():
            print(f"Warning: {split_file} not found, skipping...")
            continue
            
        # Read all image paths
        with open(split_file, 'r') as f:
            all_paths = [line.strip() for line in f if line.strip()]
        
        # Separate summer and winter images
        summer_paths = []
        winter_paths = []
        
        for path in all_paths:
            if '_summer_' in path:
                summer_paths.append(path)
            elif '_winter_' in path:
                winter_paths.append(path)
        
        # Write summer subset
        summer_file = output_dir / f'{split}_summer.txt'
        with open(summer_file, 'w') as f:
            for path in summer_paths:
                f.write(f'{path}\n')
        
        # Write winter subset
        winter_file = output_dir / f'{split}_winter.txt'
        with open(winter_file, 'w') as f:
            for path in winter_paths:
                f.write(f'{path}\n')
        
        print(f"{split.upper()} split:")
        print(f"  Total images: {len(all_paths)}")
        print(f"  Summer images: {len(summer_paths)}")
        print(f"  Winter images: {len(winter_paths)}")
        print(f"  Other images: {len(all_paths) - len(summer_paths) - len(winter_paths)}")
        print()

def create_yaml_configs(data_path: str):
    """
    Create YAML configuration files for summer and winter subsets
    """
    # Summer config
    summer_yaml = f"""# ForestPersons Summer Dataset Configuration

path: {data_path}

train: train_summer.txt
val: val_summer.txt
test: test_summer.txt

# number of classes
nc: 1

# class names
names: [ 'person' ]
"""
    
    # Winter config
    winter_yaml = f"""# ForestPersons Winter Dataset Configuration

path: {data_path}

train: train_winter.txt
val: val_winter.txt
test: test_winter.txt

# number of classes
nc: 1

# class names
names: [ 'person' ]
"""
    
    # Write YAML files
    with open(f'{data_path}/forestpersons_summer.yaml', 'w') as f:
        f.write(summer_yaml)
    
    with open(f'{data_path}/forestpersons_winter.yaml', 'w') as f:
        f.write(winter_yaml)
    
    print("Created YAML configuration files:")
    print(f"  - {data_path}/forestpersons_summer.yaml")
    print(f"  - {data_path}/forestpersons_winter.yaml")

if __name__ == "__main__":
    # Configuration
    original_data_dir = "/mnt/home/annonymous/iclr2026/yolov11_forestpersons/data"
    output_data_dir = "/mnt/home/annonymous/iclr2026/yolov11_forestpersons/data"
    
    # Create season splits
    create_season_splits(original_data_dir, output_data_dir)
    
    # Create YAML configs
    create_yaml_configs(output_data_dir)
    
    print("\n✅ Seasonal dataset splits and configurations created successfully!")
    print("Next steps:")
    print("  1. Train summer subset: python train_summer.py")
    print("  2. Train winter subset: python train_winter.py")
    create_yaml_configs(output_data_dir)