{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pyrosetta\n",
    "from pyrosetta import pose_from_pdb\n",
    "import nglview as nv\n",
    "from ipywidgets import HBox\n",
    "from tqdm import tqdm\n",
    "import mdtraj as md\n",
    "from Bio.PDB import PDBParser\n",
    "import os\n",
    "from foldingdiff.datasets import CathCanonicalAnglesDataset\n",
    "import scipy.io\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize PyRosetta\n",
    "pyrosetta.init()\n",
    "\n",
    "# Load the PDB file\n",
    "pdb_filename = \"data/cath/dompdb/152lA00.pdb\"  # Change this to your actual file\n",
    "pose = pose_from_pdb(pdb_filename)\n",
    "\n",
    "# Residue index to modify (change as needed)\n",
    "residue_index = 10  # Change to the residue you want to modify\n",
    "\n",
    "# Get initial torsion angles\n",
    "initial_phi = pose.phi(residue_index)\n",
    "initial_psi = pose.psi(residue_index)\n",
    "\n",
    "print(f\"Before modification - Phi: {initial_phi:.2f}, Psi: {initial_psi:.2f}\")\n",
    "\n",
    "# Save the original structure\n",
    "before_pdb = \"before.pdb\"\n",
    "pose.dump_pdb(before_pdb)\n",
    "\n",
    "# Modify the torsion angle\n",
    "pose.set_phi(residue_index, initial_phi + 50)  # Increase phi by 20 degrees\n",
    "pose.set_psi(residue_index, initial_psi)  # Decrease psi by 15 degrees\n",
    "\n",
    "# Get modified torsion angles\n",
    "modified_phi = pose.phi(residue_index)\n",
    "modified_psi = pose.psi(residue_index)\n",
    "\n",
    "print(f\"After modification - Phi: {modified_phi:.2f}, Psi: {modified_psi:.2f}\")\n",
    "\n",
    "# Save the modified structure\n",
    "after_pdb = \"after.pdb\"\n",
    "pose.dump_pdb(after_pdb)\n",
    "\n",
    "# Create two separate NGLView widgets\n",
    "view_before = nv.show_structure_file(before_pdb)\n",
    "view_after = nv.show_structure_file(after_pdb)\n",
    "\n",
    "# Set titles\n",
    "view_before._set_size('400px', '400px')\n",
    "view_after._set_size('400px', '400px')\n",
    "\n",
    "# Display side by side\n",
    "HBox([view_before, view_after])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def parse_pdb(pdb_file):\n",
    "    # Create a PDB parser object\n",
    "    parser = PDBParser(QUIET=True)\n",
    "\n",
    "    # Path to your PDB file (e.g., '12asA00.pdb')\n",
    "    structure = parser.get_structure(\"protein\", pdb_file)\n",
    "\n",
    "    # We'll store coordinates for each residue as a tuple: (N, CA, C)\n",
    "    backbone_coords = []\n",
    "\n",
    "    # Iterate over all residues in all chains\n",
    "    for model in structure:\n",
    "        for chain in model:\n",
    "            for residue in chain:\n",
    "                # Check that the residue has the backbone atoms we need.\n",
    "                if all(atom_name in residue for atom_name in ['N', 'CA', 'C']):\n",
    "                    # Extract coordinates\n",
    "                    N_coord = residue['N'].get_coord()\n",
    "                    CA_coord = residue['CA'].get_coord()\n",
    "                    C_coord = residue['C'].get_coord()\n",
    "                    backbone_coords.append((N_coord, CA_coord, C_coord))\n",
    "\n",
    "    # Now, backbone_coords is a list of tuples, each containing three numpy arrays of shape (3,).\n",
    "    # For a protein with N residues, you have N entries, corresponding to 3 x 3D coordinates.\n",
    "    for i, (N_coord, CA_coord, C_coord) in enumerate(backbone_coords, start=1):\n",
    "        print(f\"Residue {i}:\")\n",
    "        print(f\"  N:  {N_coord}\")\n",
    "        print(f\"  CA: {CA_coord}\")\n",
    "        print(f\"  C:  {C_coord}\")\n",
    "\n",
    "    return backbone_coords"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cath_folder = \"data/cath/dompdb/\"  # Change this to your actual file\n",
    "all_coords = []\n",
    "files = os.listdir(cath_folder)\n",
    "files = sorted(files, key=len)\n",
    "for f in tqdm(files[:10]):\n",
    "    if f:\n",
    "        print(f)\n",
    "        all_coords.append(parse_pdb(os.path.join(cath_folder, f)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = CathCanonicalAnglesDataset('data/cath/dompdb', use_cache=False, debug=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "G = []\n",
    "for i in range(9):\n",
    "    n = dataset[i]['lengths'].item()\n",
    "    coords = dataset[i]['coords'][:n]\n",
    "    if n%3 != 0:\n",
    "        pass\n",
    "    labels = np.tile([0,1,2], n//3)\n",
    "    edges = [[j, j+1, 0] for j in range(1, n)]\n",
    "    g = {\n",
    "        'nodelabels': np.array(labels, dtype=np.uint32)[:, None],\n",
    "        'nodepos': np.array(coords, dtype=np.float64),\n",
    "        'edges': np.array(edges, dtype=np.uint32)\n",
    "    }\n",
    "    G.append(g)\n",
    "scipy.io.savemat('data/cath/graphs.mat', {\"G\": G})"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
