{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Run for image moments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import re\n",
    "import torch\n",
    "\n",
    "def get_moments(df):\n",
    "    \"\"\"\n",
    "    Calculate moments for each column in a dataframe.\n",
    "    \n",
    "    Returns a dictionary with column names as keys and a list of moments as values.\n",
    "    \"\"\"\n",
    "    moments = {}\n",
    "    for col in df.columns:\n",
    "        moments[col] = [\n",
    "            df[col].mean(),\n",
    "            df[col].var(),\n",
    "            df[col].skew(),\n",
    "            df[col].kurt()\n",
    "        ]\n",
    "    return moments\n",
    "\n",
    "\n",
    "def process_file(file_path):\n",
    "    \"\"\"\n",
    "    Read a file and calculate its moments, considering the first column as index.\n",
    "    \"\"\"\n",
    "    df = pd.read_csv(file_path, delimiter='\\t', index_col=0)\n",
    "    return get_moments(df)\n",
    "\n",
    "def process_porosity_file(file_path):\n",
    "    \"\"\"\n",
    "    Read a porosity file and extract the single value.\n",
    "    \"\"\"\n",
    "    with open(file_path, 'r') as f:\n",
    "        f.readline()  # Read and discard the header\n",
    "        return f.readline().strip().split('\\t')[1]\n",
    "\n",
    "def save_to_file(image_name, binder_moments, damage_moments, porosity_value, directory):\n",
    "    \"\"\"\n",
    "    Save the moments to a new text file.\n",
    "    \"\"\"\n",
    "    moments_names = ['Mean', 'Variance', 'Skewness', 'Kurtosis']\n",
    "    \n",
    "    # Modifying the output path to use the tensor_dir instead\n",
    "    tensor_dir = '/path/to/dir/Moments'\n",
    "    output_path = os.path.join(tensor_dir, f\"{image_name}_moments.txt\")\n",
    "    \n",
    "    with open(output_path, 'w') as f:\n",
    "        f.write(\"Binder Moments:\\n\")\n",
    "        for col, moments in binder_moments.items():\n",
    "            descriptive_moments = ', '.join([f\"{moments_names[i]}: {moment}\" for i, moment in enumerate(moments)])\n",
    "            f.write(f\"{col}: {descriptive_moments}\\n\")\n",
    "\n",
    "        f.write(\"\\nDamage Moments:\\n\")\n",
    "        for col, moments in damage_moments.items():\n",
    "            descriptive_moments = ', '.join([f\"{moments_names[i]}: {moment}\" for i, moment in enumerate(moments)])\n",
    "            f.write(f\"{col}: {descriptive_moments}\\n\")\n",
    "\n",
    "        f.write(f\"\\nPorosity Value:\\n{porosity_value}\")\n",
    "\n",
    "def process_directory(directory):\n",
    "    results = {}\n",
    "    \n",
    "    # Helper function to process files within a directory or sub-directory\n",
    "    def process_files_in_directory(dir_path):\n",
    "        for file in os.listdir(dir_path):\n",
    "            file_path = os.path.join(dir_path, file)\n",
    "            image_name = file.rsplit('_', 1)[0]\n",
    "\n",
    "            if image_name not in results:\n",
    "                results[image_name] = {}\n",
    "\n",
    "            if file.endswith(\"_binder.txt\"):\n",
    "                results[image_name]['binder'] = process_file(file_path)\n",
    "            elif file.endswith(\"_damage.txt\"):\n",
    "                results[image_name]['damage'] = process_file(file_path)\n",
    "            elif file.endswith(\"_porosity.txt\"):\n",
    "                results[image_name]['porosity'] = process_porosity_file(file_path)\n",
    "\n",
    "            # Save to file if all required data is available\n",
    "            if 'binder' in results[image_name] and 'damage' in results[image_name] and 'porosity' in results[image_name]:\n",
    "                save_to_file(image_name, results[image_name]['binder'], results[image_name]['damage'], results[image_name]['porosity'], dir_path)\n",
    "                del results[image_name]  # clear memory\n",
    "    \n",
    "    # Process the main directory\n",
    "    process_files_in_directory(directory)\n",
    "\n",
    "    # Process the sub-directories\n",
    "    for subdir in os.listdir(directory):\n",
    "        subdir_path = os.path.join(directory, subdir)\n",
    "        if os.path.isdir(subdir_path):\n",
    "            process_files_in_directory(subdir_path)\n",
    "\n",
    "def extract_values(file_path):\n",
    "    with open(file_path, 'r') as f:\n",
    "        text = f.read()\n",
    "\n",
    "    keywords = [\"Mean\", \"Variance\", \"Skewness\", \"Kurtosis\", \"Porosity Value\"]\n",
    "    values = [float(val) for keyword in keywords for val in re.findall(rf\"{keyword}:\\s+([-+]?\\d*\\.\\d+|\\d+)\", text)]\n",
    "\n",
    "    return torch.tensor(values)\n",
    "\n",
    "# Running the code\n",
    "directory = '/path/to/dir/Morphometry/Morphometry/Morphometry'\n",
    "process_directory(directory)\n",
    "\n",
    "# Updated directory for extracting moments values\n",
    "#tensor_dir = '/path/to/dir/Moments'\n",
    "#test_file_path = os.path.join(tensor_dir, '0D4image_moments.txt')\n",
    "#tensor_values = extract_values(test_file_path)\n",
    "#print(tensor_values)"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "tensor([ 1.5119e+00,  8.3403e-01,  2.3530e+00,  4.7281e+01,  1.1632e+00,\n",
    "         7.2931e-01,  2.3336e+00,  4.7595e+01,  1.7985e+01,  7.9706e-01,\n",
    "         1.1841e+00,  4.3296e+02,  2.2734e+01,  1.3167e+00,  1.0503e+00,\n",
    "         5.1425e+02,  6.7259e+00,  4.1695e+00,  1.2779e+00,  5.2506e-02,\n",
    "         9.3561e+00,  8.7581e+00,  1.6150e+00,  3.5103e-01,  5.2393e+01,\n",
    "         2.1615e+01,  1.3127e+00, -3.9807e-01,  9.0103e+01,  8.2042e+01,\n",
    "         2.9374e+00, -6.7441e-01,  2.3400e-01])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train and Val"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import re\n",
    "import torch\n",
    "\n",
    "def get_moments(df):\n",
    "    \"\"\"\n",
    "    Calculate moments for each column in a dataframe.\n",
    "    \n",
    "    Returns a dictionary with column names as keys and a list of moments as values.\n",
    "    \"\"\"\n",
    "    moments = {}\n",
    "    for col in df.columns:\n",
    "        moments[col] = [\n",
    "            df[col].mean(),\n",
    "            df[col].var(),\n",
    "            df[col].skew(),\n",
    "            df[col].kurt()\n",
    "        ]\n",
    "    return moments\n",
    "\n",
    "\n",
    "def process_file(file_path):\n",
    "    \"\"\"\n",
    "    Read a file and calculate its moments, considering the first column as index.\n",
    "    \"\"\"\n",
    "    df = pd.read_csv(file_path, delimiter='\\t', index_col=0)\n",
    "    return get_moments(df)\n",
    "\n",
    "def process_porosity_file(file_path):\n",
    "    \"\"\"\n",
    "    Read a porosity file and extract the single value.\n",
    "    \"\"\"\n",
    "    with open(file_path, 'r') as f:\n",
    "        f.readline()  # Read and discard the header\n",
    "        return f.readline().strip().split('\\t')[1]\n",
    "\n",
    "def save_to_file(image_name, binder_moments, damage_moments, porosity_value):\n",
    "    \"\"\"\n",
    "    Save the moments to a new text file.\n",
    "    \"\"\"\n",
    "    moments_names = ['Mean', 'Variance', 'Skewness', 'Kurtosis']\n",
    "    \n",
    "    # Modifying the output path to use the tensor_dir instead\n",
    "    tensor_dir = '/path/to/dir/Moments/Train'\n",
    "    output_path = os.path.join(tensor_dir, f\"{image_name}_moments.txt\")\n",
    "    \n",
    "    with open(output_path, 'w') as f:\n",
    "        f.write(\"Binder Moments:\\n\")\n",
    "        for col, moments in binder_moments.items():\n",
    "            descriptive_moments = ', '.join([f\"{moments_names[i]}: {moment}\" for i, moment in enumerate(moments)])\n",
    "            f.write(f\"{col}: {descriptive_moments}\\n\")\n",
    "\n",
    "        f.write(\"\\nDamage Moments:\\n\")\n",
    "        for col, moments in damage_moments.items():\n",
    "            descriptive_moments = ', '.join([f\"{moments_names[i]}: {moment}\" for i, moment in enumerate(moments)])\n",
    "            f.write(f\"{col}: {descriptive_moments}\\n\")\n",
    "\n",
    "        f.write(f\"\\nPorosity Value:\\n{porosity_value}\")\n",
    "\n",
    "def process_directory(binder_dir, damage_dir, porosity_dir):\n",
    "    results = {}\n",
    "\n",
    "    # Helper function to map file paths\n",
    "    def map_file_paths(dir_path, file_suffix):\n",
    "        file_map = {}\n",
    "        for file in os.listdir(dir_path):\n",
    "            if file.endswith(file_suffix):\n",
    "                image_name = file.rsplit('_', 1)[0]\n",
    "                file_map[image_name] = os.path.join(dir_path, file)\n",
    "        return file_map\n",
    "\n",
    "    # Map the file paths\n",
    "    binder_files = map_file_paths(binder_dir, \"_binder.txt\")\n",
    "    damage_files = map_file_paths(damage_dir, \"_damage.txt\")\n",
    "    porosity_files = map_file_paths(porosity_dir, \"_porosity.txt\")\n",
    "\n",
    "    # Process each set of files\n",
    "    for image_name in binder_files:\n",
    "        if image_name in damage_files and image_name in porosity_files:\n",
    "            binder_moments = process_file(binder_files[image_name])\n",
    "            damage_moments = process_file(damage_files[image_name])\n",
    "            porosity_value = process_porosity_file(porosity_files[image_name])\n",
    "            \n",
    "            save_to_file(image_name, binder_moments, damage_moments, porosity_value)\n",
    "            # Note: some_directory_to_save_files needs to be defined or passed to the function\n",
    "\n",
    "\n",
    "def extract_values(file_path):\n",
    "    with open(file_path, 'r') as f:\n",
    "        text = f.read()\n",
    "\n",
    "    keywords = [\"Mean\", \"Variance\", \"Skewness\", \"Kurtosis\", \"Porosity Value\"]\n",
    "    values = [float(val) for keyword in keywords for val in re.findall(rf\"{keyword}:\\s+([-+]?\\d*\\.\\d+|\\d+)\", text)]\n",
    "\n",
    "    return torch.tensor(values)\n",
    "\n",
    "# Running the code\n",
    "binder_directory = '/path/to/dir/Morphometry/Morphometry/Morphometry/Binder_Stats'\n",
    "damage_directory = '/path/to/dir/Morphometry/Morphometry/Morphometry/Damage_Stats'\n",
    "porosity_directory = '/path/to/dir/Morphometry/Morphometry/Morphometry'\n",
    "process_directory(binder_directory, damage_directory, porosity_directory)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "# Set the directory you want to start from\n",
    "rootDir = '/path/to/dir/Moments/Train' # Use '.' for current directory or set your own path\n",
    "\n",
    "# Loop through all the directories and files in the directory\n",
    "for dirName, subdirList, fileList in os.walk(rootDir):\n",
    "    for fname in fileList:\n",
    "        # Check if the file is a .txt file\n",
    "        if fname.endswith('.txt'):\n",
    "            path = os.path.join(dirName, fname)\n",
    "            with open(path, 'r') as file:\n",
    "                # Count the number of rows in the file\n",
    "                rows = file.readlines()\n",
    "                if len(rows) == 1:\n",
    "                    print(f\"Only 1 row found in: {path}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Val"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import re\n",
    "import torch\n",
    "\n",
    "def get_moments(df):\n",
    "    \"\"\"\n",
    "    Calculate moments for each column in a dataframe.\n",
    "    \n",
    "    Returns a dictionary with column names as keys and a list of moments as values.\n",
    "    \"\"\"\n",
    "    moments = {}\n",
    "    for col in df.columns:\n",
    "        moments[col] = [\n",
    "            df[col].mean(),\n",
    "            df[col].var(),\n",
    "            df[col].skew(),\n",
    "            df[col].kurt()\n",
    "        ]\n",
    "    return moments\n",
    "\n",
    "\n",
    "def process_file(file_path):\n",
    "    \"\"\"\n",
    "    Read a file and calculate its moments, considering the first column as index.\n",
    "    \"\"\"\n",
    "    df = pd.read_csv(file_path, delimiter='\\t', index_col=0)\n",
    "    return get_moments(df)\n",
    "\n",
    "def process_porosity_file(file_path):\n",
    "    \"\"\"\n",
    "    Read a porosity file and extract the single value.\n",
    "    \"\"\"\n",
    "    with open(file_path, 'r') as f:\n",
    "        f.readline()  # Read and discard the header\n",
    "        return f.readline().strip().split('\\t')[1]\n",
    "\n",
    "def save_to_file(image_name, binder_moments, damage_moments, porosity_value):\n",
    "    \"\"\"\n",
    "    Save the moments to a new text file.\n",
    "    \"\"\"\n",
    "    moments_names = ['Mean', 'Variance', 'Skewness', 'Kurtosis']\n",
    "    \n",
    "    # Modifying the output path to use the tensor_dir instead\n",
    "    tensor_dir = '/path/to/dir/Moments/Val'\n",
    "    output_path = os.path.join(tensor_dir, f\"{image_name}_moments.txt\")\n",
    "    \n",
    "    with open(output_path, 'w') as f:\n",
    "        f.write(\"Binder Moments:\\n\")\n",
    "        for col, moments in binder_moments.items():\n",
    "            descriptive_moments = ', '.join([f\"{moments_names[i]}: {moment}\" for i, moment in enumerate(moments)])\n",
    "            f.write(f\"{col}: {descriptive_moments}\\n\")\n",
    "\n",
    "        f.write(\"\\nDamage Moments:\\n\")\n",
    "        for col, moments in damage_moments.items():\n",
    "            descriptive_moments = ', '.join([f\"{moments_names[i]}: {moment}\" for i, moment in enumerate(moments)])\n",
    "            f.write(f\"{col}: {descriptive_moments}\\n\")\n",
    "\n",
    "        f.write(f\"\\nPorosity Value:\\n{porosity_value}\")\n",
    "\n",
    "def process_directory(binder_dir, damage_dir, porosity_dir):\n",
    "    results = {}\n",
    "\n",
    "    # Helper function to map file paths\n",
    "    def map_file_paths(dir_path, file_suffix):\n",
    "        file_map = {}\n",
    "        for file in os.listdir(dir_path):\n",
    "            if file.endswith(file_suffix):\n",
    "                image_name = file.rsplit('_', 1)[0]\n",
    "                file_map[image_name] = os.path.join(dir_path, file)\n",
    "        return file_map\n",
    "\n",
    "    # Map the file paths\n",
    "    binder_files = map_file_paths(binder_dir, \"_binder.txt\")\n",
    "    damage_files = map_file_paths(damage_dir, \"_damage.txt\")\n",
    "    porosity_files = map_file_paths(porosity_dir, \"_porosity.txt\")\n",
    "\n",
    "    # Process each set of files\n",
    "    for image_name in binder_files:\n",
    "        if image_name in damage_files and image_name in porosity_files:\n",
    "            binder_moments = process_file(binder_files[image_name])\n",
    "            damage_moments = process_file(damage_files[image_name])\n",
    "            porosity_value = process_porosity_file(porosity_files[image_name])\n",
    "            \n",
    "            save_to_file(image_name, binder_moments, damage_moments, porosity_value)\n",
    "            # Note: some_directory_to_save_files needs to be defined or passed to the function\n",
    "\n",
    "\n",
    "def extract_values(file_path):\n",
    "    with open(file_path, 'r') as f:\n",
    "        text = f.read()\n",
    "\n",
    "    keywords = [\"Mean\", \"Variance\", \"Skewness\", \"Kurtosis\", \"Porosity Value\"]\n",
    "    values = [float(val) for keyword in keywords for val in re.findall(rf\"{keyword}:\\s+([-+]?\\d*\\.\\d+|\\d+)\", text)]\n",
    "\n",
    "    return torch.tensor(values)\n",
    "\n",
    "# Running the code\n",
    "binder_directory = '/path/to/dir/Morphometry/Morphometry/Morphometry/Binder_Stats'\n",
    "damage_directory = '/path/to/dir/Morphometry/Morphometry/Morphometry/Damage_Stats'\n",
    "porosity_directory = '/path/to/dir/Morphometry/Morphometry/Morphometry/Porosity_Stats'\n",
    "process_directory(binder_directory, damage_directory, porosity_directory)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Only 1 row found in: /path/to/dir/Moments/Val/100N5image_val_moments-Copy1.txt\n",
      "Only 1 row found in: /path/to/dir/Moments/Val/.ipynb_checkpoints/100N5image_val_moments-Copy1-checkpoint.txt\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "# Set the directory you want to start from\n",
    "rootDir = '/path/to/dir/Moments/Val' # Use '.' for current directory or set your own path\n",
    "\n",
    "# Loop through all the directories and files in the directory\n",
    "for dirName, subdirList, fileList in os.walk(rootDir):\n",
    "    for fname in fileList:\n",
    "        # Check if the file is a .txt file\n",
    "        if fname.endswith('.txt'):\n",
    "            path = os.path.join(dirName, fname)\n",
    "            with open(path, 'r') as file:\n",
    "                # Count the number of rows in the file\n",
    "                rows = file.readlines()\n",
    "                if len(rows) == 1:\n",
    "                    print(f\"Only 1 row found in: {path}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
