import numpy as np
import torch
import torchvision
from PIL import Image
from torchvision import transforms
from statsmodels.stats.diagnostic import acorr_ljungbox

img_path = 'resources/images/target_images/1/0.jpg'


# load the image
def load_image(img_path: str) -> Image.Image:
    """Loads an image from the specified path using PIL."""
    img = Image.open(img_path)
    return img

# Constants for the test
N_CROPS = 100000  # Number of crops to generate
OUTPUT_SIZE = 224  # Target size for crops (both for initial resize and RandomResizedCrop)
# Scale range for RandomResizedCrop, from user's original transform context
CROP_SCALE_RANGE = (0.03, 1.0)

def main():
    """
    Tests I.I.D. properties of RandomResizedCrop on an image.

    Generates multiple crops, calculates mean pixel value for each,
    and analyzes these means for independence (autocorrelation)
    and consistent distribution (mean/std dev of means, comparison of sequence halves).
    """
    # 'img_path' is used here as a global variable, defined at the top of the script.
    pil_img = load_image(img_path)

    # Define the image transformation pipeline.
    # This setup mimics a common scenario where an image might be resized
    # before RandomResizedCrop is applied.
    image_transform = transforms.Compose([
        transforms.Resize(OUTPUT_SIZE),  # Initial resize
        transforms.RandomResizedCrop(OUTPUT_SIZE, scale=CROP_SCALE_RANGE),
        transforms.ToTensor(),
    ])

    crop_means = []
    print(f"Generating {N_CROPS} crops from '{img_path}'...")
    
    # Generate multiple crops and store their mean values
    for i in range(N_CROPS):
        cropped_tensor = image_transform(pil_img) # Apply to the original PIL image each time
        if i == 0:
            # Print shape of the first crop to confirm dimensions
            print(f"Shape of the first cropped tensor: {cropped_tensor.shape}")
            if list(cropped_tensor.shape[1:]) != [OUTPUT_SIZE, OUTPUT_SIZE]:
                 print(f"Warning: Crop dimensions {list(cropped_tensor.shape[1:])} "
                       f"do not match expected [{OUTPUT_SIZE}, {OUTPUT_SIZE}]")
        crop_means.append(torch.mean(cropped_tensor).item())

    crop_means_np = np.array(crop_means)

    print(f"\n--- Analysis of {N_CROPS} Crop Means ---")

    # 1. Analyze overall distribution of crop means (for "identically distributed" aspect)
    mean_of_crop_means = np.mean(crop_means_np)
    std_of_crop_means = np.std(crop_means_np)
    print(f"Overall mean of crop means: {mean_of_crop_means:.6f}")
    print(f"Overall standard deviation of crop means: {std_of_crop_means:.6f}")

    # 2. Check for stable distribution by comparing halves (another "identically distributed" check)
    if N_CROPS >= 10: # Only if enough data for a somewhat meaningful comparison
        mid_point = N_CROPS // 2
        mean_first_half = np.mean(crop_means_np[:mid_point])
        mean_second_half = np.mean(crop_means_np[mid_point:])
        print(f"Mean of first {mid_point} crop means: {mean_first_half:.6f}")
        print(f"Mean of last {N_CROPS - mid_point} crop means: {mean_second_half:.6f}")
        if not np.isclose(mean_first_half, mean_second_half, rtol=0.05, atol=0.05 * std_of_crop_means):
            print("  Note: Means of the first and second halves of the sequence show some difference.")
    
    # 3. Analyze independence using lag-1 autocorrelation
    if N_CROPS > 1:
        autocorr_lag1 = np.corrcoef(crop_means_np[:-1], crop_means_np[1:])[0, 1]
        print(f"Lag-1 autocorrelation of crop means: {autocorr_lag1:.6f}")

        # Significance threshold for autocorrelation (approx. 95% CI for H0: rho=0)
        significance_threshold = 1.96 / np.sqrt(N_CROPS - 1)
        
        if abs(autocorr_lag1) < significance_threshold:
            print(f"  Lag-1 autocorrelation is within +/- {significance_threshold:.4f}, "
                  "suggesting crop means are sequentially independent at lag 1.")
        else:
            print(f"  Warning: Lag-1 autocorrelation is outside +/- {significance_threshold:.4f}, "
                  "which might indicate some sequential dependency in crop means at lag 1.")

        # More rigid test for independence: Ljung-Box test
        # The Ljung-Box test checks the null hypothesis that autocorrelations up to 'lags' are zero.
        # Small p-values suggest that there is significant autocorrelation.
        print("\n--- Ljung-Box Test for Independence ---")
        print("Note: This test requires the 'statsmodels' library.")
        print("Null Hypothesis (H0): The data are independently distributed (no serial autocorrelation).")
        print("Alternative Hypothesis (Ha): The data are not independently distributed.")
        
        # Determine number of lags for Ljung-Box test
        # A common recommendation is up to ln(N_CROPS) or min(20, N_CROPS // 5)
        # We'll use a fixed set of lags for clarity if N_CROPS is large enough
        if N_CROPS >= 100: # Ensure enough samples for these lags
            ljung_box_lags = [5, 10, 15, 20, 25, 30, 50, 100] 
        elif N_CROPS >= 40:
            ljung_box_lags = [5, 10]
        elif N_CROPS >= 20:
            ljung_box_lags = [5]
        else:
            ljung_box_lags = [max(1, N_CROPS // 5)] # At least 1 lag if possible
            if N_CROPS < 10 :
                 print("Warning: N_CROPS is very small for a meaningful Ljung-Box test.")

        if N_CROPS > max(ljung_box_lags) : # Ensure N_CROPS is greater than max lag
            try:
                lb_results = acorr_ljungbox(crop_means_np, lags=ljung_box_lags, return_df=True)
                print("Ljung-Box test results (p-values):")
                for lag, p_val in zip(lb_results.index, lb_results['lb_pvalue']):
                     print(f"  Lag {lag}: p-value = {p_val:.6f} "
                           f"({'Suggests NO significant autocorrelation' if p_val > 0.05 else 'Suggests significant autocorrelation'})")
                
                if any(lb_results['lb_pvalue'] <= 0.05):
                    print("  Conclusion: At least one lag shows significant autocorrelation (p <= 0.05).")
                    print("  This suggests the sequence of crop means may NOT be independent.")
                else:
                    print("  Conclusion: No significant autocorrelation detected up to the tested lags (all p > 0.05).")
                    print("  This supports the hypothesis that the sequence of crop means is independent.")

            except ImportError:
                print("  Error: 'statsmodels' library not found. Please install it to run the Ljung-Box test (e.g., pip install statsmodels).")
            except Exception as e:
                print(f"  Error running Ljung-Box test: {e}")
                print("  Please ensure N_CROPS is sufficiently large and greater than the number of lags.")
        else:
             print(f"  Skipping Ljung-Box test as N_CROPS ({N_CROPS}) is not large enough for the chosen lags {ljung_box_lags}.")

    else:
        print("Cannot calculate autocorrelation with fewer than 2 crops.")

    print("\n--- Interpretation Guidance ---")
    print("The above statistics provide insights into whether the sequence of crop means behaves as I.I.D.:")
    print("- 'Identically Distributed': Check if the overall mean/std dev are stable, and if means of sequence halves are similar.")
    print("- 'Independent': A low lag-1 autocorrelation and non-significant Ljung-Box test results (p-values > 0.05) support this.")
    print("  Significant autocorrelation (especially from Ljung-Box) would challenge independence.")
    print("For rigorous I.I.D. verification, more advanced statistical tests and analysis of higher-dimensional crop features would be necessary.")

if __name__ == "__main__":
    main()