#!/usr/bin/env python3
"""
Script to create standing subset from the ForestPersons dataset
"""

import json
import os
from pathlib import Path

def create_standing_splits(original_data_dir: str, output_data_dir: str):
    """
    Create separate train/val/test splits for standing pose
    """
    data_dir = Path(original_data_dir)
    output_dir = Path(output_data_dir)
    
    # Ensure output directory exists
    output_dir.mkdir(exist_ok=True)
    
    # Load the converted annotation files  
    annotation_files = {
        'train': data_dir / 'train.json',
        'val': data_dir / 'val.json', 
        'test': data_dir / 'test.json'
    }
    
    # Process each split (train, val, test)
    for split in ['train', 'val', 'test']:
        ann_file = annotation_files[split]
        
        if not ann_file.exists():
            print(f"Warning: {ann_file} not found, skipping {split}...")
            continue
            
        with open(ann_file, 'r') as f:
            data = json.load(f)
        
        # Get all image IDs that have at least one 'standing' annotation
        standing_image_ids = set()
        
        for annotation in data['annotations']:
            if 'attributes' in annotation and 'pose' in annotation['attributes']:
                if annotation['attributes']['pose'] == 'standing':
                    standing_image_ids.add(annotation['image_id'])
        
        # Get image paths for standing images
        standing_paths = []
        for image in data['images']:
            if image['id'] in standing_image_ids:
                standing_paths.append(f"data/images/{image['file_name']}")
        
        # Write standing subset
        standing_file = output_dir / f'{split}_standing.txt'
        with open(standing_file, 'w') as f:
            for path in standing_paths:
                f.write(f'{path}\n')
        
        print(f"{split.upper()} split:")
        print(f"  Standing images: {len(standing_paths)}")
        print()

def create_yaml_configs(data_path: str):
    """
    Create YAML configuration file for standing subset
    """
    # Standing config
    standing_yaml = f"""# ForestPersons Standing Dataset Configuration

path: {data_path}

train: train_standing.txt
val: val_standing.txt
test: test_standing.txt

# number of classes
nc: 1

# class names
names: [ 'person' ]
"""
    
    # Write YAML file
    with open(f'{data_path}/forestpersons_standing.yaml', 'w') as f:
        f.write(standing_yaml)
    
    print("Created YAML configuration file:")
    print(f"  - {data_path}/forestpersons_standing.yaml")

if __name__ == "__main__":
    # Configuration
    original_data_dir = "/mnt/home/annonymous/iclr2026/yolov11_forestpersons/data"
    output_data_dir = "/mnt/home/annonymous/iclr2026/yolov11_forestpersons/data"
    
    # Create standing splits
    create_standing_splits(original_data_dir, output_data_dir)
    
    # Create YAML configs
    create_yaml_configs(output_data_dir)
    
    print("\n✅ Standing dataset splits and configurations created successfully!")
    print("Next steps:")
    print("  1. Train standing subset: python train_standing.py")