{
	"cells": [
		{
			"cell_type": "code",
			"execution_count": null,
			"metadata": {
				"id": "Nqn4WiylzrzV"
			},
			"outputs": [],
			"source": [
				"from openmm.app import *\n",
				"from openmm import *\n",
				"from openmm.unit import *\n",
				"import mdtraj as md\n",
				"from pylab import *\n",
				"from math import pi\n",
				"from sys import stdout\n",
				"from mdtraj.reporters import HDF5Reporter      "
			]
		},
		{
			"cell_type": "code",
			"execution_count": null,
			"metadata": {},
			"outputs": [],
			"source": [
				"import multiprocessing as mp\n",
				"from simtk.openmm.app import *\n",
				"from simtk.openmm import *\n",
				"from simtk.unit import *\n",
				"\n",
				"def run_simulation(simulation_index):\n",
				"    pdb = PDBFile(\"Data/AlanineDipeptide.pdb\")\n",
				"    forcefield = ForceField('amber99sb.xml', 'tip3p.xml')\n",
				"    system = forcefield.createSystem(pdb.topology, nonbondedCutoff=1*nanometer, constraints=HBonds, nonbondedMethod=NoCutoff)\n",
				"    integrator = LangevinIntegrator(320*kelvin, 1/picosecond, 0.002*picoseconds)\n",
				"    simulation = Simulation(pdb.topology, system, integrator)\n",
				"    simulation.context.setPositions(pdb.positions)\n",
				"    simulation.minimizeEnergy()\n",
				"    # Add reporters\n",
				"    hdf5_reporter = HDF5Reporter('Data/AD_'+str(simulation_index)+'.h5', 10)\n",
				"    simulation.reporters.append(hdf5_reporter)\n",
				"    N = 10\n",
				"    X = 625000\n",
				"    for i in range(N):\n",
				"        if i > 0:\n",
				"            simulation.loadCheckpoint('Checkpoints/checkpoint_'+str(simulation_index)+'.chk')\n",
				"        simulation.step(X)\n",
				"        simulation.saveCheckpoint('Checkpoints/checkpoint_'+str(simulation_index)+'.chk')\n",
				"\n",
				"num_simulations = 16 # number of simulations to run\n",
				"with mp.Pool(processes=num_simulations) as pool:\n",
				"    pool.map(run_simulation, range(num_simulations))"
			]
		},
		{
			"cell_type": "markdown",
			"metadata": {
				"id": "Qi6YFs2fHZK0"
			},
			"source": [
				"# Verify Data"
			]
		},
		{
			"cell_type": "code",
			"execution_count": null,
			"metadata": {
				"executionInfo": {
					"elapsed": 13,
					"status": "ok",
					"timestamp": 1673880131722,
					"user": {
						"displayName": "M Petersen",
						"userId": "13637530209006951077"
					},
					"user_tz": -60
				},
				"id": "a3ZQvL8_3KaQ"
			},
			"outputs": [],
			"source": [
				"import numpy as np\n",
				"import matplotlib.pyplot as plt\n",
				"from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
				"from mdtraj.geometry import _geometry, distance\n",
				"from mdtraj import _rmsd\n",
				"import pickle\n",
				"\n",
				"%matplotlib inline"
			]
		},
		{
			"cell_type": "code",
			"execution_count": null,
			"metadata": {
				"executionInfo": {
					"elapsed": 13,
					"status": "ok",
					"timestamp": 1673880131724,
					"user": {
						"displayName": "M Petersen",
						"userId": "13637530209006951077"
					},
					"user_tz": -60
				},
				"id": "DxF4ba3CJUAS"
			},
			"outputs": [],
			"source": [
				"def _displacement(xyz, pairs):\n",
				"    \"Displacement vector between pairs of points in each frame\"\n",
				"    value = np.diff(xyz[:, pairs], axis=2)[:, :, 0]\n",
				"    assert value.shape == (xyz.shape[0], pairs.shape[0], 3), 'v.shape %s, xyz.shape %s, pairs.shape %s' % (str(value.shape), str(xyz.shape), str(pairs.shape))\n",
				"    return value\n",
				"\n",
				"def compute_dihedral(traj, indices, periodic, out=None):\n",
				"    \"\"\"Compute the dihedral angles of traj for the atom indices in indices.\n",
				"    Parameters\n",
				"    ----------\n",
				"    xyz : np.ndarray, shape=(num_frames, num_atoms, 3), dtype=float\n",
				"        The XYZ coordinates of a trajectory\n",
				"    indices : np.ndarray, shape=(num_dihedrals, 4), dtype=int\n",
				"        Atom indices to compute dihedrals.\n",
				"    periodic : bool, default=True\n",
				"        If `periodic` is True and the trajectory contains unitcell\n",
				"        information, we will treat dihedrals that cross periodic images\n",
				"        using the minimum image convention.\n",
				"    Returns\n",
				"    -------\n",
				"    dih : np.ndarray, shape=(num_dihedrals), dtype=float\n",
				"        dih[i,j] gives the dihedral angle at traj[i] correponding to indices[j].\n",
				"    \"\"\"\n",
				"    ix10 = indices[:, [0, 1]]\n",
				"    ix21 = indices[:, [1, 2]]\n",
				"    ix32 = indices[:, [2, 3]]\n",
				"\n",
				"    b1 = _displacement(traj, ix10)\n",
				"    b2 = _displacement(traj, ix21)\n",
				"    b3 = _displacement(traj, ix32)\n",
				"\n",
				"    c1 = np.cross(b2, b3)\n",
				"    c2 = np.cross(b1, b2)\n",
				"\n",
				"    p1 = (b1 * c1).sum(-1)\n",
				"    p1 *= (b2 * b2).sum(-1) ** 0.5\n",
				"    p2 = (c1 * c2).sum(-1)\n",
				"\n",
				"    return np.arctan2(p1, p2, out)\n",
				"\n",
				"def superpose(xyz, reference_xyz, frame=0, atom_indices=None,\n",
				"              ref_atom_indices=None, parallel=True):\n",
				"    \"\"\"Superpose each conformation in this trajectory upon a reference\n",
				"    Parameters\n",
				"    ----------\n",
				"    reference : md.Trajectory\n",
				"        Align self to a particular frame in `reference`\n",
				"    frame : int\n",
				"        The index of the conformation in `reference` to align to.\n",
				"    atom_indices : array_like, or None\n",
				"        The indices of the atoms to superpose. If not\n",
				"        supplied, all atoms will be used.\n",
				"    ref_atom_indices : array_like, or None\n",
				"        Use these atoms on the reference structure. If not supplied,\n",
				"        the same atom indices will be used for this trajectory and the\n",
				"        reference one.\n",
				"    parallel : bool\n",
				"        Use OpenMP to run the superposition in parallel over multiple cores\n",
				"    Returns\n",
				"    -------\n",
				"    self\n",
				"    \"\"\"\n",
				"\n",
				"    if atom_indices is None:\n",
				"        atom_indices = slice(None)\n",
				"\n",
				"    if ref_atom_indices is None:\n",
				"        ref_atom_indices = atom_indices\n",
				"\n",
				"    if not isinstance(ref_atom_indices, slice) and (\n",
				"        len(ref_atom_indices) != len(atom_indices)):\n",
				"        raise ValueError(\"Number of atoms must be consistent!\")\n",
				"\n",
				"    n_frames = xyz.shape[0]\n",
				"    self_align_xyz = np.asarray(xyz[:, atom_indices, :], order='c')\n",
				"    self_displace_xyz = np.asarray(xyz, order='c')\n",
				"    ref_align_xyz = np.array(reference_xyz[frame, ref_atom_indices, :],\n",
				"                              copy=True, order='c').reshape(1, -1, 3)\n",
				"\n",
				"    offset = np.mean(self_align_xyz, axis=1, dtype=np.float64).reshape(n_frames, 1, 3)\n",
				"    self_align_xyz -= offset\n",
				"    if self_align_xyz.ctypes.data != self_displace_xyz.ctypes.data:\n",
				"        # when atom_indices is None, these two arrays alias the same memory\n",
				"        # so we only need to do the centering once\n",
				"        self_displace_xyz -= offset\n",
				"\n",
				"    ref_offset = ref_align_xyz[0].astype('float64').mean(0)\n",
				"    ref_align_xyz[0] -= ref_offset\n",
				"\n",
				"    self_g = np.einsum('ijk,ijk->i', self_align_xyz, self_align_xyz)\n",
				"    ref_g = np.einsum('ijk,ijk->i', ref_align_xyz , ref_align_xyz)\n",
				"\n",
				"    _rmsd.superpose_atom_major(\n",
				"        ref_align_xyz.astype(np.float32()), self_align_xyz.astype(np.float32()), ref_g.astype(np.float32()), self_g.astype(np.float32()), self_displace_xyz.astype(np.float32()),\n",
				"        0, parallel=parallel)\n",
				"\n",
				"    self_displace_xyz += ref_offset\n",
				"    xyz = self_displace_xyz\n",
				"    return xyz\n",
				"\n",
				"def average_timesteps(force, n):\n",
				"    #averages n timesteps together\n",
				"    #truncates the last few timesteps if they don't fit evenly\n",
				"    force = force[:len(force) - len(force)%n]\n",
				"    force = force.reshape(-1, n, force.shape[1], force.shape[2], force.shape[3])\n",
				"    force = np.mean(force, axis=1)\n",
				"    return force"
			]
		},
		{
			"cell_type": "code",
			"execution_count": null,
			"metadata": {},
			"outputs": [],
			"source": [
				"trajectories = []\n",
				"skip_list = [5, 7, 15]\n",
				"for i in range(0, 16):\n",
				"    if i not in skip_list:\n",
				"        traj = md.load_hdf5(\"Data/AD_\" + str(i) + \".h5\")\n",
				"        \n",
				"        if i == 0:\n",
				"            top = traj.topology\n",
				"            ref = traj[-100]\n",
				"        \n",
				"        traj.superpose(ref)\n",
				"        traj = traj.atom_slice(top.select(\"not element H\"))\n",
				"        trajectories.append(traj.xyz)\n",
				"trajectories = np.array(trajectories)"
			]
		},
		{
			"cell_type": "code",
			"execution_count": null,
			"metadata": {},
			"outputs": [],
			"source": [
				"fig, axes = plt.subplots(trajectories.shape[2], 3, figsize=(15, 15))\n",
				"\n",
				"traj_flattened = trajectories.reshape(trajectories.shape[0], trajectories.shape[1], trajectories.shape[2]*trajectories.shape[3])\n",
				"\n",
				"for i in range(traj_flattened.shape[2]):\n",
				"    axes[i//3, i%3].hist(traj_flattened[:, :, i].flatten(), bins=100)\n",
				"    axes[i//3, i%3].set_title(\"Atom \" + str(i))"
			]
		},
		{
			"cell_type": "code",
			"execution_count": null,
			"metadata": {},
			"outputs": [],
			"source": [
				"#given an ensemble of trajectories, of shape (n_trajectories, n_frames, n_atoms, 3), rescale the coordinates to be between -1 and 1\n",
				"max_min_array = np.array([trajectories.max(axis=(0, 1)), trajectories.min(axis=(0, 1))])\n",
				"trajectories = (trajectories - max_min_array[1])/(max_min_array[0] - max_min_array[1])\n",
				"trajectories = trajectories*2 - 1"
			]
		},
		{
			"cell_type": "code",
			"execution_count": null,
			"metadata": {},
			"outputs": [],
			"source": [
				"#invert the scaling\n",
				"trajectories = trajectories*(max_min_array[0] - max_min_array[1])/2 + (max_min_array[0] + max_min_array[1])/2"
			]
		},
		{
			"cell_type": "code",
			"execution_count": null,
			"metadata": {},
			"outputs": [],
			"source": [
				"#visualize the dihedral angles\n",
				"from pylab import *\n",
				"from math import pi\n",
				"\n",
				"plt.figure()\n",
				"plt.title('Dihedral Map: Alanine dipeptide')\n",
				"psi_indices_non_h, phi_indices_non_h = [3, 4, 6, 8], [1, 3, 4, 6]\n",
				"angles = compute_dihedral(trajectories.reshape((-1, trajectories.shape[2], trajectories.shape[3])), np.array([phi_indices_non_h, psi_indices_non_h]), True)\n",
				"plt.scatter(angles[:, 0], angles[:, 1])\n",
				"cbar = plt.colorbar()\n",
				"cbar.set_label('Time [ps]')\n",
				"plt.xlabel(r'$\\Phi$ Angle [radians]')\n",
				"plt.xlim(-pi, pi)\n",
				"plt.ylabel(r'$\\Psi$ Angle [radians]')\n",
				"plt.ylim(-pi, pi)"
			]
		},
		{
			"cell_type": "code",
			"execution_count": null,
			"metadata": {},
			"outputs": [],
			"source": [
				"#save the trajectories and the max and min values\n",
				"np.save(\"Data/trajectories.npy\", trajectories)\n",
				"np.save(\"Data/max_min.npy\", max_min_array)"
			]
		},
		{
			"cell_type": "code",
			"execution_count": null,
			"metadata": {},
			"outputs": [],
			"source": []
		}
	],
	"metadata": {
		"kernelspec": {
			"display_name": "base",
			"language": "python",
			"name": "python3"
		},
		"language_info": {
			"codemirror_mode": {
				"name": "ipython",
				"version": 3
			},
			"file_extension": ".py",
			"mimetype": "text/x-python",
			"name": "python",
			"nbconvert_exporter": "python",
			"pygments_lexer": "ipython3",
			"version": "3.9.13"
		},
		"orig_nbformat": 4,
		"vscode": {
			"interpreter": {
				"hash": "639d8a7c3e620b1d142eea4deabde5aac9ed3b21a6e651e4622d69fbdac2ed0a"
			}
		}
	},
	"nbformat": 4,
	"nbformat_minor": 2
}
