{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "H52sie-33LLq"
   },
   "source": [
    "# Final Entropy-Based Complexity Measures"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "3iyDk7kd3SVn"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from PIL import Image\n",
    "import os\n",
    "import matplotlib.pyplot as plt\n",
    "from skimage import exposure\n",
    "from skimage.feature import graycomatrix\n",
    "from skimage.measure import shannon_entropy\n",
    "import json\n",
    "\n",
    "DATASETS = [\"ISIC-2018\", \"chest_xray\", \"PolypsSet\"]\n",
    "BASE_PATH = 'path/to/base'\n",
    "\n",
    "def delentropy(image):\n",
    "    # Using a 2x2 difference kernel [[-1,+1],[-1,+1]] results in artifacts!\n",
    "    # In tests the deldensity seemed to follow a diagonal because of the\n",
    "    # assymetry introduced by the backward/forward difference\n",
    "    # the central difference correspond to a convolution kernel of\n",
    "    # [[-1,0,1],[-1,0,1],[-1,0,1]] and its transposed, produces a symmetric\n",
    "    # deldensity for random noise.\n",
    "    if True:\n",
    "        # see paper eq. (4)\n",
    "        fx = ( image[:,2:] - image[:,:-2] )[1:-1,:]\n",
    "        fy = ( image[2:,:] - image[:-2,:] )[:,1:-1]\n",
    "    else:\n",
    "        # throw away last row, because it seems to show some artifacts which it shouldn't really\n",
    "        # Cleaning this up does not seem to work\n",
    "        kernelDiffY = np.array( [ [-1,-1],[1,1] ] )\n",
    "        fx = signal.fftconvolve( image, kernelDiffY.T ).astype( image.dtype )[:-1,:-1]\n",
    "        fy = signal.fftconvolve( image, kernelDiffY   ).astype( image.dtype )[:-1,:-1]\n",
    "    diffRange = np.max( [ np.abs( fx.min() ), np.abs( fx.max() ), np.abs( fy.min() ), np.abs( fy.max() ) ] )\n",
    "    if diffRange >= 200   and diffRange <= 255  : diffRange = 255\n",
    "    if diffRange >= 60000 and diffRange <= 65535: diffRange = 65535\n",
    "\n",
    "    # see paper eq. (17)\n",
    "    # The bin edges must be integers, that's why the number of bins and range depends on each other\n",
    "    nBins = min( 1024, 2*diffRange+1 )\n",
    "    if image.dtype == float:\n",
    "        nBins = 1024\n",
    "    # Centering the bins is necessary because else all value will lie on\n",
    "    # the bin edges thereby leading to assymetric artifacts\n",
    "    dbin = 0 if image.dtype == float else 0.5\n",
    "    r = diffRange + dbin\n",
    "    delDensity, xedges, yedges = np.histogram2d( fx.flatten(), fy.flatten(), bins = nBins, range = [ [-r,r], [-r,r] ] )\n",
    "    if nBins == 2*diffRange+1:\n",
    "        assert( xedges[1] - xedges[0] == 1.0 )\n",
    "        assert( yedges[1] - yedges[0] == 1.0 )\n",
    "\n",
    "    # Normalization for entropy calculation. np.sum( H ) should be ( imageWidth-1 )*( imageHeight-1 )\n",
    "    # The -1 stems from the lost pixels when calculating the gradients with non-periodic boundary conditions\n",
    "    #assert( np.product( np.array( image.shape ) - 1 ) == np.sum( delDensity ) )\n",
    "    delDensity = delDensity / np.sum( delDensity ) # see paper eq. (17)\n",
    "    delDensity = delDensity.T\n",
    "    # \"The entropy is a sum of terms of the form p log(p). When p=0 you instead use the limiting value (as p approaches 0 from above), which is 0.\"\n",
    "    # The 0.5 factor is discussed in the paper chapter \"4.3 Papoulis generalized sampling halves the delentropy\"\n",
    "    H = - 0.5 * np.sum( delDensity[ delDensity.nonzero() ] * np.log2( delDensity[ delDensity.nonzero() ] ) ) # see paper eq. (16)\n",
    "    return H\n",
    "\n",
    "def load_image(file_path):\n",
    "    with Image.open(file_path) as img:\n",
    "        return np.array(img.convert('L'))\n",
    "\n",
    "def analyze_dataset(dataset, dataset_path):\n",
    "    entropies = {'shannon': [], 'glcm': [], 'delentropy': []}\n",
    "    num_files = len([f for f in os.listdir(dataset_path) if os.path.isfile(os.path.join(dataset_path, f))])\n",
    "\n",
    "    img_num = 0\n",
    "\n",
    "    for filename in os.listdir(dataset_path):\n",
    "        if filename.endswith(('.png', '.jpg', '.jpeg')):\n",
    "            file_path = os.path.join(dataset_path, filename)\n",
    "            image = load_image(file_path)\n",
    "\n",
    "            entropies['shannon'].append(shannon_entropy(exposure.rescale_intensity(image, out_range=(0, 255))))\n",
    "            glcm = graycomatrix(image, distances=[1, 2, 3], angles=[0, np.pi/4, np.pi/2, 3*np.pi/4], symmetric=True, normed=True)\n",
    "            entropies['glcm'].append(shannon_entropy(glcm))\n",
    "            entropies['delentropy'].append(delentropy(image))\n",
    "\n",
    "            print(f'{dataset}: Processed {filename} | File: {img_num+1}/2500')\n",
    "\n",
    "            img_num += 1\n",
    "\n",
    "    return entropies\n",
    "\n",
    "def calculate_stats(data):\n",
    "    return {\n",
    "        'mean': np.mean(data),\n",
    "        'median': np.median(data),\n",
    "        'std': np.std(data)\n",
    "    }\n",
    "\n",
    "def plot_histogram(data, title, filename):\n",
    "    plt.figure(figsize=(12, 6))\n",
    "    plt.subplot(1, 2, 1)\n",
    "    plt.hist(data, bins=20, color='skyblue', edgecolor='black')\n",
    "    plt.title(title)\n",
    "    plt.xlabel('Entropy Value')\n",
    "    plt.ylabel('Frequency')\n",
    "    plt.savefig(filename)\n",
    "    plt.close()\n",
    "\n",
    "\n",
    "# Main execution\n",
    "results = {}\n",
    "\n",
    "for dataset in DATASETS:\n",
    "    dataset_path = os.path.join(BASE_PATH, dataset, 'preprocessed')\n",
    "    entropies = analyze_dataset(dataset, dataset_path)\n",
    "\n",
    "    results[dataset] = {\n",
    "        entropy_type: calculate_stats(values)\n",
    "        for entropy_type, values in entropies.items()\n",
    "    }\n",
    "\n",
    "    # Plot histograms\n",
    "    for entropy_type, values in entropies.items():\n",
    "        plot_histogram(values, f'{dataset} - {entropy_type} Histogram',\n",
    "              f'path/to/base/{dataset}/{entropy_type}_histogram.png')\n",
    "\n",
    "# Print results\n",
    "for dataset, dataset_results in results.items():\n",
    "    print(f\"\\nResults for {dataset}:\")\n",
    "    for entropy_type, stats in dataset_results.items():\n",
    "        print(f\"  {entropy_type}:\")\n",
    "        for stat_name, value in stats.items():\n",
    "            print(f\"    {stat_name}: {value:.4f}\")\n",
    "\n",
    "# Save results to file\n",
    "with open('path/to/base/entropy_results.json', 'w') as f:\n",
    "    json.dump(results, f, indent=2)\n",
    "\n",
    "print(\"\\nResults saved to path/to/base/entropy_results.json\")"
   ]
  }
 ],
 "metadata": {},
 "nbformat": 4,
 "nbformat_minor": 0
}
