#!/usr/bin/env python3
"""
航空目标推理脚本 - Bg + Crop + Solar + Low-Original + Structured (Texture Guidance)
直接读取数据集进行推理
"""

import os
import sys
import yaml
import torch
import random
import numpy as np
from pathlib import Path
from PIL import Image
import torch.nn.functional as F
import torchvision.transforms as T

# 添加项目根目录到 Python 路径
project_root = Path(__file__).parent.parent.parent.parent
sys.path.insert(0, str(project_root))

from omini.train_flux.train_aircraft_bg_crop_solar import AircraftSolarDataset
from omini.train_flux.train_aircraft_bg_crop_solar_low_original import OminiSolarLowOriginalModel
from omini.pipeline.flux_omini_solar import Condition, generate, seed_everything
import omini.pipeline.flux_omini_solar as flux_omini_solar

def load_config(config_path: str):
    """加载配置文件"""
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    return config

def frequency_split_latents(latents, kernel_size=5):
    """Decompose latents into Low-Frequency and High-Frequency components."""
    B, L, C = latents.shape
    H = int(L**0.5)
    W = H
    x = latents.transpose(1, 2).view(B, C, H, W)
    pad = kernel_size // 2
    low = F.avg_pool2d(x, kernel_size=kernel_size, stride=1, padding=pad, count_include_pad=False)
    high = x - low
    low_flat = low.flatten(2).transpose(1, 2)
    high_flat = high.flatten(2).transpose(1, 2)
    return low_flat, high_flat

class LowFreqCondition(Condition):
    """Low Frequency Condition (Style/Lighting)"""
    def encode(self, pipe, empty=False):
        latents, ids = super().encode(pipe, empty)
        low, _ = frequency_split_latents(latents)
        return low, ids

# --- Texture Consistency Energy Guidance Logic ---

def gram_matrix(input_tensor, mask_area=None):
    """
    计算格拉姆矩阵: 衡量特征通道之间的相关性 (即纹理风格)
    input: [B, C, H, W]
    output: [B, C, C]
    """
    # Force float32 for precision
    input_tensor = input_tensor.float()
    B, C, H, W = input_tensor.size()
    features = input_tensor.view(B, C, H * W)
    G = torch.bmm(features, features.transpose(1, 2))
    
    if mask_area is not None and mask_area > 0:
        return G.div(C * mask_area)
    else:
        return G.div(C * H * W)

def get_surround_mask(box_mask, dilation_pixels=5):
    """获取包围盒外围一圈的掩码 (Dilated - Original)"""
    k = 2 * dilation_pixels + 1
    padding = dilation_pixels
    dilated_mask = F.max_pool2d(box_mask, kernel_size=k, stride=1, padding=padding)
    surround_mask = dilated_mask - box_mask
    return surround_mask

def apply_texture_energy_guidance(latents, box_mask, steps=3, scale=100.0):
    """Texture Energy Guidance Optimization"""
    B, L, C = latents.shape
    H = int(L**0.5)
    W = H
    
    # Cast to float32 for optimization
    original_dtype = latents.dtype
    x_in = latents.transpose(1, 2).view(B, C, H, W).detach().clone().float()
    x_in.requires_grad_(True)
    
    # Prepare masks
    if box_mask.shape[-1] != W:
        box_mask = F.interpolate(box_mask, size=(H, W), mode='nearest')
    
    surround_mask = get_surround_mask(box_mask, dilation_pixels=5)
    
    # Optimization Loop
    with torch.enable_grad():
        for i in range(steps):
            obj_feat = x_in * box_mask
            env_feat = x_in * surround_mask
            
            area_obj = box_mask.sum().item()
            area_env = surround_mask.sum().item()
            
            G_obj = gram_matrix(obj_feat, mask_area=area_obj)
            G_env = gram_matrix(env_feat, mask_area=area_env)
            
            energy = F.mse_loss(G_obj, G_env)
            
            if x_in.grad is not None:
                x_in.grad.zero_()
            energy.backward()
            
            # Debug Print (Optional)
            if i == 0:
                grad_mean = x_in.grad.abs().mean().item()
                # print(f"    [Texture Guidance] Energy: {energy.item():.6e} | Grad: {grad_mean:.6e}")
            
            # Normalize gradient update
            with torch.no_grad():
                grad = x_in.grad
                grad_norm = grad.norm() + 1e-8
                normalized_grad = grad / grad_norm
                step_size = 0.1 # Fixed step size
                x_in = x_in - step_size * normalized_grad
                
            x_in.requires_grad_(True)
        
    return x_in.flatten(2).transpose(1, 2).detach().to(original_dtype)

# --- End Texture Guidance Logic ---

def inference_on_training_samples(
    model,
    dataset,
    num_samples: int = 10,
    output_dir: str = "inference_results_solar_low_original_structured",
    seed: int = 42
):
    os.makedirs(output_dir, exist_ok=True)
    
    # Get config
    condition_size = model.training_config["dataset"]["condition_size"]
    target_size = model.training_config["dataset"]["target_size"]
    
    # Adapter Names
    adapter_low = "subject_low"
    adapter_original = "subject_original"
    adapter_bg = "background"
    
    print(f"\n{'='*70}")
    print(f"Inference on {num_samples} training samples (Solar + Low-Original + Structured)")
    print(f"{'='*70}")
    print(f"Output directory: {output_dir}")
    print(f"Adapters: {adapter_low}, {adapter_original}, {adapter_bg}")
    
    # Limit samples
    num_samples = min(num_samples, len(dataset))
    
    # Enable PIL return temporarily
    pil_dataset = dataset
    original_return_pil = pil_dataset.return_pil_image
    pil_dataset.return_pil_image = True
    
    with torch.no_grad():
        for idx in range(num_samples):
            print(f"\n[{idx+1}/{num_samples}] Processing sample {idx}...")
            
            sample = pil_dataset[idx]
            
            subject_img = sample["condition_0"]
            if isinstance(subject_img, torch.Tensor):
                subject_img = T.ToPILImage()(subject_img)

            background_img = sample["condition_1"]
            if isinstance(background_img, torch.Tensor):
                background_img = T.ToPILImage()(background_img)
            prompt = sample["description"]
            
            # Need mask for Solar Encoder & Texture Guidance
            # Assuming dataset returns mask or we can infer it?
            # AircraftSolarDataset should handle masks if configured properly.
            # But let's check if 'condition_latent_mask_0' or similar exists.
            # Wait, dataset[idx] returns a dict.
            # AircraftMaskWeightedDataset (parent) returns "target_mask" if available.
            # But we are in PIL mode.
            
            # We need the mask image to pass to Solar Encoder.
            # Usually mask is not returned as PIL in standard dataset.
            # We might need to construct it or assume it's available.
            # Let's try to get it from sample dict if possible, or create a dummy if not found.
            # But Solar model CRITICALLY needs the mask.
            
            # Workaround: Re-read mask from disk if we can find path?
            # No, dataset encapsulates paths.
            # Let's check sample keys.
            
            # If mask is missing, we might fail. 
            # Assuming for now we can get a mask. 
            # In training, mask comes from json/structure.
            # For inference here, we might need to rely on the fact that `sample` might contain `target_mask` tensor if we didn't fully disable transforms?
            # But we set return_pil_image=True.
            
            # Let's look at `inference_aircraft_bg_crop.py`. It doesn't use masks because standard model doesn't need them.
            # Solar model NEEDS mask.
            
            # Let's assume we can get a mask. If not, we generate a dummy mask (center box).
            mask_img = None
            if "target_mask" in sample:
                if isinstance(sample["target_mask"], Image.Image):
                    mask_img = sample["target_mask"]
                elif isinstance(sample["target_mask"], torch.Tensor):
                    m = sample["target_mask"]
                    if m.dim() == 3: m = m.squeeze(0)
                    mask_img = T.ToPILImage()(m)
            elif "mask" in sample:
                 if isinstance(sample["mask"], str):
                     try: mask_img = Image.open(sample["mask"]).convert("L")
                     except: pass
                 elif isinstance(sample["mask"], Image.Image):
                     mask_img = sample["mask"]
            
            if mask_img is None:
                # Fallback: Create a center box mask
                mask_img = Image.new("L", target_size, 0)
                # Simple heuristic: center 50%
                w, h = target_size
                mask_img.paste(255, (w//4, h//4, w*3//4, h*3//4))
                print("  ⚠️  Warning: Mask not found in sample, using dummy center mask.")
            
            # Conditions
            cond_low = LowFreqCondition(subject_img, adapter_low, [-16, -32])
            cond_original = Condition(subject_img, adapter_original, [-16, -32])
            cond_bg = Condition(background_img, adapter_bg, [16, -32])
            
            # Solar Parameters
            bg_latents, _ = flux_omini_solar.encode_images(model.flux_pipe, background_img)
            B, L, C = bg_latents.shape
            H_latent = int(L ** 0.5)
            W_latent = H_latent
            bg_spatial = bg_latents.transpose(1, 2).view(B, C, H_latent, W_latent).to(torch.float32)
            
            # Reset mask_tensor from scratch to be safe
            mask_np = np.array(mask_img.resize(target_size, Image.NEAREST))
            mask_tensor = torch.from_numpy(mask_np).float() / 255.0
            
            if mask_tensor.dim() == 2: # [H, W]
                mask_tensor = mask_tensor.unsqueeze(0).unsqueeze(0) # [1, 1, H, W]
            elif mask_tensor.dim() == 3: 
                if mask_tensor.shape[2] == 1:
                    mask_tensor = mask_tensor.permute(2, 0, 1).unsqueeze(0)
                else:
                    mask_tensor = mask_tensor.unsqueeze(0)
            
            mask_tensor = mask_tensor.to("cuda")
            
            # Force both to CUDA for solar encoder
            bg_spatial = bg_spatial.to("cuda")
            
            context_vector = model.solar_encoder(bg_spatial, mask_tensor)
            
            solar_params_list = []
            for proj in model.solar_projectors:
                params = proj(context_vector)
                scale, shift = params.chunk(2, dim=1)
                scale = scale.unsqueeze(1).to(model.dtype)
                shift = shift.unsqueeze(1).to(model.dtype)
                solar_params_list.append((scale, shift))
            
            # Texture Guidance Callback
            def texture_guidance_callback(pipe, step_index, timestep, callback_kwargs):
                total_steps = 28
                if step_index < int(total_steps * 0.3):
                    latents = callback_kwargs["latents"]
                    latents_new = apply_texture_energy_guidance(
                        latents, 
                        mask_tensor, 
                        steps=3, 
                        scale=100.0 # Will be normalized inside
                    )
                    callback_kwargs["latents"][:] = latents_new
                return callback_kwargs
            
            # Generate
            generator = torch.Generator(device=model.flux_pipe.device)
            generator.manual_seed(seed + idx)
            
            print(f"  Generating image...")
            try:
                res = generate(
                    model.flux_pipe,
                    prompt=prompt,
                    conditions=[cond_low, cond_original, cond_bg],
                    height=target_size[1],
                    width=target_size[0],
                    num_inference_steps=28,
                    guidance_scale=3.5,
                    generator=generator,
                    model_config=model.model_config,
                    kv_cache=model.model_config.get("independent_condition", False),
                    solar_params_list=solar_params_list,
                    callback_on_step_end=texture_guidance_callback,
                    callback_on_step_end_tensor_inputs=["latents"],
                )
                
                output_path = os.path.join(output_dir, f"sample_{idx}_generated.jpg")
                res.images[0].save(output_path)
                print(f"  ✓ Saved to {output_path}")
                
                # Save conditions
                condition_dir = os.path.join(output_dir, "conditions")
                os.makedirs(condition_dir, exist_ok=True)
                subject_img.save(os.path.join(condition_dir, f"sample_{idx}_subject.jpg"))
                background_img.save(os.path.join(condition_dir, f"sample_{idx}_background.jpg"))
                mask_img.save(os.path.join(condition_dir, f"sample_{idx}_mask.jpg"))
                original_img = sample["image"]
                if isinstance(original_img, torch.Tensor):
                    original_img = T.ToPILImage()(original_img)
                original_img.save(os.path.join(condition_dir, f"sample_{idx}_original.jpg"))
                    
            except Exception as e:
                print(f"  ❌ Generation failed: {e}")
                import traceback
                traceback.print_exc()
                continue
                
    pil_dataset.return_pil_image = original_return_pil
    print(f"\n{'='*70}")
    print(f"✓ Inference completed! Results saved to {output_dir}")

def main():
    seed = 42
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    seed_everything(seed)
    
    config_path = os.environ.get("OMINI_CONFIG", "./train/config/aircraft_bg_crop_inf.yaml")
    print(f"Loading config from {config_path}")
    config = load_config(config_path)
    training_config = config["train"]
    dataset_config = training_config["dataset"]
    
    print("Loading dataset...")
    dataset = AircraftSolarDataset(
        dataset_root=dataset_config["dataset_root"],
        condition_size=tuple(dataset_config["condition_size"]),
        target_size=tuple(dataset_config["target_size"]),
        drop_text_prob=0.0,
        drop_subject_prob=0.0,
        drop_position_prob=0.0,
        drop_background_prob=0.0,
        augmentation_prob=0.0,
        return_pil_image=True
    )
    
    # Checkpoint
    checkpoint_path = "runs_bg_crop/20260106-174326/ckpt/14000"
    print(f"Loading model from {checkpoint_path}...")
    
    model = OminiSolarLowOriginalModel(
        flux_pipe_id=config["flux_path"],
        lora_path=None,
        lora_config=None,
        device="cuda",
        dtype=torch.bfloat16 if config["dtype"] == "bfloat16" else torch.float32,
        model_config=config.get("model", {}),
        adapter_names=[None, None, "subject_low", "subject_original", "background"],
        gradient_checkpointing=False,
    )
    model = model.to("cuda")
    model.adapter_set = set(["subject_low", "subject_original", "background"])
    model.training_config = training_config
    
    # Load Weights
    solar_path = os.path.join(checkpoint_path, "solar_components.pt")
    if os.path.exists(solar_path):
        state = torch.load(solar_path, map_location=model.flux_pipe.device)
        model.solar_encoder.load_state_dict(state["encoder"])
        model.solar_projectors.load_state_dict(state["projectors"])
    
    for adapter_name in ["subject_low", "subject_original", "background"]:
        model.flux_pipe.load_lora_weights(checkpoint_path, weight_name=f"{adapter_name}.safetensors", adapter_name=adapter_name)
    
    adapter_list = list(model.adapter_set)
    model.transformer.set_adapters(adapter_list)
    model.eval()
    
    inference_on_training_samples(model, dataset, num_samples=len(dataset))

if __name__ == "__main__":
    main()