{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gymnasium as gym\n",
    "import numpy as np\n",
    "from pde_control_gym.src import NSReward\n",
    "from tqdm import tqdm\n",
    "import gymnasium as gym\n",
    "import numpy as np\n",
    "import math\n",
    "import stable_baselines3\n",
    "import matplotlib.pyplot as plt\n",
    "import time \n",
    "from tqdm import tqdm\n",
    "from pde_control_gym.src import NSReward\n",
    "from stable_baselines3.common.callbacks import CheckpointCallback\n",
    "from stable_baselines3 import PPO\n",
    "from stable_baselines3 import SAC\n",
    "\n",
    "from pde_control_gym.src.environments2d.navier_stokes2D import central_difference, laplace\n",
    "import time \n",
    "from tqdm import tqdm\n",
    "import scipy"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set initial condition function to be zero\n",
    "def getInitialCondition(X):\n",
    "    u = np.zeros_like(X) \n",
    "    v = np.zeros_like(X) \n",
    "    p = np.zeros_like(X) \n",
    "    return u, v, p\n",
    "\n",
    "# Set up boundary conditions here\n",
    "boundary_condition = {\n",
    "    \"upper\": [\"Controllable\", \"Dirchilet\"], \n",
    "    \"lower\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "    \"left\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "    \"right\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "}\n",
    "\n",
    "# Timestep and spatial step for PDE Solver\n",
    "T = 0.201 # To perform 200 steps\n",
    "dt = 1e-3\n",
    "dx, dy = 0.05, 0.05\n",
    "X, Y = 1, 1\n",
    "u_target = np.load('target.npz')['u']\n",
    "v_target = np.load('target.npz')['v']\n",
    "desire_states = np.stack([u_target, v_target], axis=-1) # (NT, Nx, Ny, 2)\n",
    "NS2DParameters = {\n",
    "        \"T\": T, \n",
    "        \"dt\": dt, \n",
    "        \"X\": X,\n",
    "        \"dx\": dx, \n",
    "        \"Y\": Y,\n",
    "        \"dy\":dy,\n",
    "        \"action_dim\": 1, \n",
    "        \"reward_class\": NSReward(0.1),\n",
    "        \"normalize\": False, \n",
    "        \"reset_init_condition_func\": getInitialCondition,\n",
    "        \"boundary_condition\": boundary_condition,\n",
    "        \"U_ref\": desire_states, \n",
    "        \"action_ref\": 2.0 * np.ones(1000), \n",
    "}\n",
    "\n",
    "# Make the NavierStokes PDE gym\n",
    "env = gym.make(\"PDEControlGym-NavierStokes2D\", **NS2DParameters)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test PPO"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from stable_baselines3 import SAC, PPO\n",
    "model = PPO.load(\"models/PPO\")\n",
    "N_experiments = 50\n",
    "T = 200\n",
    "total_reward = 0\n",
    "for i_id in tqdm(range(N_experiments)):\n",
    "    obs, _ = env.reset(seed=i_id)\n",
    "    for t in range(T):\n",
    "        action, _states = model.predict(obs)\n",
    "        obs, reward, done, _ , _  = env.step(action)\n",
    "        total_reward += reward"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f\"Total reward for PPO: {np.round(total_reward/N_experiments, 3)}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test SAC"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = SAC.load(\"models/SAC\")\n",
    "N_experiments = 50\n",
    "T = 200\n",
    "total_reward = 0\n",
    "for i_id in tqdm(range(N_experiments)):\n",
    "    obs, _ = env.reset(seed=i_id)\n",
    "    for t in range(T):\n",
    "        action, _states = model.predict(obs)\n",
    "        obs, reward, done, _ , _  = env.step(action)\n",
    "        total_reward += reward"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f\"Total reward for SAC: {np.round(total_reward/N_experiments,3)}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test Optimization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pde_control_gym.src.environments2d.navier_stokes2D import central_difference, laplace\n",
    "# Model-Based Optimization to optimize action \n",
    "def apply_boundary(a1, a2):\n",
    "    a1[:,[-1, 0]] = 0.\n",
    "    a1[[-1,0],:] = 0.\n",
    "    a2[:,[-1, 0]] = 0.\n",
    "    a2[[-1,0],:] = 0.\n",
    "    return a1, a2\n",
    "\n",
    "N_experiments = 50\n",
    "rewards = []\n",
    "for i_id in range(N_experiments):\n",
    "    np.random.seed(i_id)\n",
    "    total_reward = 0.\n",
    "    U, V = [], []\n",
    "    env.reset(seed=0)\n",
    "    for t in range(T):\n",
    "        obs, reward, done, _ , _ = env.step(np.random.uniform(2,4)) \n",
    "        U.append(env.u)\n",
    "        V.append(env.v)\n",
    "        total_reward += reward\n",
    "    u_target = np.load('target.npz')['u'][1:,:,:]\n",
    "    v_target = np.load('target.npz')['v'][1:,:,:]\n",
    "    u_ref = [2 for _ in range(T)]\n",
    "    for ite in range(1):\n",
    "        Lam1, Lam2 = [], []\n",
    "        Lam1.append(np.zeros_like(U[0]))\n",
    "        Lam2.append(np.zeros_like(U[0]))\n",
    "        pressure = np.zeros_like(U[0])\n",
    "        for t in range(T-1):\n",
    "            lam1, lam2 = Lam1[-1], Lam2[-1]\n",
    "            dl1dx, dl1dy = central_difference(lam1,\"x\",dx), central_difference(lam1, \"y\", dy)\n",
    "            dl2dx, dl2dy = central_difference(lam2,\"x\", dx), central_difference(lam2, \"y\", dy) \n",
    "            laplace_l1, laplace_l2 = laplace(lam1, dx, dy), laplace(lam2, dx, dy)\n",
    "            dlam1dt = - 2 * dl1dx * U[-1-t] - dl1dy * V[-1-t] - dl2dx * V[-1-t] - 0.1 * laplace_l1 + (U[-1-t]-u_target[-1-t])\n",
    "            dlam2dt = - 2 * dl2dy * V[-1-t] - dl1dy * U[-1-t] - dl2dx * U[-1-t] - 0.1 * laplace_l2 + (V[-1-t]-v_target[-1-t])\n",
    "            lam1 = lam1 - dt * dlam1dt\n",
    "            lam2 = lam2 - dt * dlam2dt\n",
    "            lam1, lam2 = apply_boundary(lam1, lam2)\n",
    "            pressure = env.solve_pressure(lam1, lam2, pressure)\n",
    "            lam1 = lam1 - dt * central_difference(pressure, \"x\", dx)\n",
    "            lam2 = lam2 - dt * central_difference(pressure, \"y\", dy)\n",
    "            lam1, lam2 = apply_boundary(lam1, lam2)\n",
    "            Lam1.append(lam1)\n",
    "            Lam2.append(lam2)\n",
    "        Lam1 = Lam1[::-1]\n",
    "        actions = []\n",
    "        for t in range(T):\n",
    "            dl1dx2 = central_difference(Lam1[t], \"y\", dy)\n",
    "            actions.append(u_ref[t] - 0.1/0.1 * sum(dl1dx2[-2, 12:17])*5*dx)\n",
    "        U, V = [], []\n",
    "        env.reset(seed=0)\n",
    "        total_reward = 0.\n",
    "        for t in tqdm(range(T)):\n",
    "            obs, reward, done, _ , _ = env.step(actions[t])\n",
    "            U.append(env.u)\n",
    "            V.append(env.v)\n",
    "            total_reward += reward\n",
    "        print(total_reward)\n",
    "    rewards.append(total_reward)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f\"Total reward for Optimization {np.round(np.mean(rewards), 3)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def runSingleEpisodeRL(model, env, parameter):\n",
    "    terminate = False\n",
    "    truncate = False\n",
    "\n",
    "    # Holds the resulting states\n",
    "    uStorage = []\n",
    "\n",
    "    # Reset Environment\n",
    "    obs,__ = env.reset()\n",
    "    uStorage.append(obs)\n",
    "\n",
    "    i = 0\n",
    "    rew = 0\n",
    "    action_list = []\n",
    "    while not truncate and not terminate:\n",
    "        # use backstepping controller\n",
    "        action = model(obs, parameter)\n",
    "        # action_list.append(action)\n",
    "        obs, rewards, terminate, truncate, info = env.step(action)\n",
    "        # print(action, obs)\n",
    "        uStorage.append(obs)\n",
    "        rew += rewards \n",
    "    # print(action_list)\n",
    "    u = np.array(uStorage)\n",
    "    # action_np = np.array(action_list)\n",
    "    # print(action_np.shape)\n",
    "    return rew, u\n",
    "\n",
    "def RLController(obs, model):\n",
    "    action, _state = model.predict(obs)\n",
    "    return action\n",
    "\n",
    "ppoModel = PPO.load(\"models/PPO\")\n",
    "sacModel = SAC.load(\"models/SAC\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getInitialCondition(X):\n",
    "    u = np.random.uniform(-0.1, 0.1) * np.ones_like(X) \n",
    "    v = 0 * np.ones_like(X) \n",
    "    p = 0 * np.ones_like(X) \n",
    "    return u, v, p\n",
    "\n",
    "# # Set up boundary conditions here\n",
    "# boundary_condition = {\n",
    "#     \"upper\": [\"Controllable\", \"Dirchilet\"], \n",
    "#     \"lower\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "#     \"left\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "#     \"right\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "# }\n",
    "\n",
    "# # Timestep and spatial step for PDE Solver\n",
    "# T = 0.2\n",
    "# dt = 1e-3\n",
    "# dx, dy = 0.05, 0.05\n",
    "# X, Y = 1, 1\n",
    "# u_target = np.load('target.npz')['u']\n",
    "# v_target = np.load('target.npz')['v']\n",
    "# desire_states = np.stack([u_target, v_target], axis=-1) # (NT, Nx, Ny, 2)\n",
    "# NS2DParameters = {\n",
    "#         \"T\": T, \n",
    "#         \"dt\": dt, \n",
    "#         \"X\": X,\n",
    "#         \"dx\": dx, \n",
    "#         \"Y\": Y,\n",
    "#         \"dy\":dy,\n",
    "#         \"action_dim\": 1, \n",
    "#         \"reward_class\": NSReward(0.1),\n",
    "#         \"normalize\": False, \n",
    "#         \"reset_init_condition_func\": getInitialCondition,\n",
    "#         \"boundary_condition\": boundary_condition,\n",
    "#         \"U_ref\": desire_states, \n",
    "#         \"action_ref\": 2.0 * np.ones(1000), \n",
    "# }\n",
    "\n",
    "# # Make the NavierStokes PDE gym\n",
    "# env = gym.make(\"PDEControlGym-NavierStokes2D\", **NS2DParameters)\n",
    "\n",
    "\n",
    "# Set initial condition function to be zero\n",
    "# def getInitialCondition(X):\n",
    "#     u = np.zeros_like(X) \n",
    "#     v = np.zeros_like(X) \n",
    "#     p = np.zeros_like(X) \n",
    "#     return u, v, p\n",
    "\n",
    "# Set up boundary conditions here\n",
    "boundary_condition = {\n",
    "    \"upper\": [\"Controllable\", \"Dirchilet\"], \n",
    "    \"lower\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "    \"left\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "    \"right\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "}\n",
    "\n",
    "# Timestep and spatial step for PDE Solver\n",
    "T = 0.201 # To perform 200 steps\n",
    "dt = 1e-3\n",
    "dx, dy = 0.05, 0.05\n",
    "X, Y = 1, 1\n",
    "u_target = np.load('target.npz')['u']\n",
    "v_target = np.load('target.npz')['v']\n",
    "desire_states = np.stack([u_target, v_target], axis=-1) # (NT, Nx, Ny, 2)\n",
    "NS2DParameters = {\n",
    "        \"T\": T, \n",
    "        \"dt\": dt, \n",
    "        \"X\": X,\n",
    "        \"dx\": dx, \n",
    "        \"Y\": Y,\n",
    "        \"dy\":dy,\n",
    "        \"action_dim\": 1, \n",
    "        \"reward_class\": NSReward(0.1),\n",
    "        \"normalize\": False, \n",
    "        \"reset_init_condition_func\": getInitialCondition,\n",
    "        \"boundary_condition\": boundary_condition,\n",
    "        \"U_ref\": desire_states, \n",
    "        \"action_ref\": 2.0 * np.ones(1000), \n",
    "}\n",
    "\n",
    "# Make the NavierStokes PDE gym\n",
    "env = gym.make(\"PDEControlGym-NavierStokes2D\", **NS2DParameters)\n",
    "\n",
    "# Model-Based Optimization to optimize action \n",
    "def apply_boundary(a1, a2):\n",
    "    a1[:,[-1, 0]] = 0.\n",
    "    a1[[-1,0],:] = 0.\n",
    "    a2[:,[-1, 0]] = 0.\n",
    "    a2[[-1,0],:] = 0.\n",
    "    return a1, a2\n",
    "\n",
    "total_reward = 0.\n",
    "U, V = [], []\n",
    "T = 200\n",
    "\n",
    "rewards = []\n",
    "times = []\n",
    "for experiment_i in range(1):\n",
    "    # np.random.seed(experiment_i)\n",
    "    # env.reset(seed=400)\n",
    "    env.reset()\n",
    "    s = time.time()\n",
    "    for t in tqdm(range(T)):\n",
    "        obs, reward, done, _ , _ = env.step(np.random.uniform(2,4)) \n",
    "        U.append(env.u)\n",
    "        V.append(env.v)\n",
    "        total_reward += reward\n",
    "    print(\"Total Reward random:\", total_reward)\n",
    "u_target = np.load('target.npz')['u']\n",
    "v_target = np.load('target.npz')['v']\n",
    "u_ref = [2 for _ in range(T)]\n",
    "xs_opt = []\n",
    "ys_opt = []\n",
    "xs_ppo = []\n",
    "ys_ppo = []\n",
    "xs_sac = []\n",
    "ys_sac = []\n",
    "for ite in tqdm(range(5000)):\n",
    "    env.reset()\n",
    "    Lam1, Lam2 = [], []\n",
    "    Lam1.append(np.zeros_like(U[0]))\n",
    "    Lam2.append(np.zeros_like(U[0]))\n",
    "    pressure = np.zeros_like(U[0])\n",
    "    for t in (range(T-1)):\n",
    "        lam1, lam2 = Lam1[-1], Lam2[-1]\n",
    "        dl1dx, dl1dy = central_difference(lam1,\"x\",dx), central_difference(lam1, \"y\", dy)\n",
    "        dl2dx, dl2dy = central_difference(lam2,\"x\", dx), central_difference(lam2, \"y\", dy) \n",
    "        laplace_l1, laplace_l2 = laplace(lam1, dx, dy), laplace(lam2, dx, dy)\n",
    "        dlam1dt = - 2 * dl1dx * U[-1-t] - dl1dy * V[-1-t] - dl2dx * V[-1-t] - 0.1 * laplace_l1 + (U[-1-t]-u_target[-1-t])\n",
    "        dlam2dt = - 2 * dl2dy * V[-1-t] - dl1dy * U[-1-t] - dl2dx * U[-1-t] - 0.1 * laplace_l2 + (V[-1-t]-v_target[-1-t])\n",
    "        lam1 = lam1 - dt * dlam1dt\n",
    "        lam2 = lam2 - dt * dlam2dt\n",
    "        lam1, lam2 = apply_boundary(lam1, lam2)\n",
    "        pressure = env.solve_pressure(lam1, lam2, pressure)\n",
    "        lam1 = lam1 - dt * central_difference(pressure, \"x\", dx)\n",
    "        lam2 = lam2 - dt * central_difference(pressure, \"y\", dy)\n",
    "        lam1, lam2 = apply_boundary(lam1, lam2)\n",
    "        Lam1.append(lam1)\n",
    "        Lam2.append(lam2)\n",
    "    Lam1 = Lam1[::-1]\n",
    "    actions = []\n",
    "    for t in (range(T)):\n",
    "        dl1dx2 = central_difference(Lam1[t], \"y\", dy)\n",
    "        actions.append(u_ref[t] - 0.1/0.1 * sum(dl1dx2[-2, :])*5*dx)\n",
    "    U, V = [], []\n",
    "    # env.reset(seed=400)\n",
    "    env.reset()\n",
    "    total_reward = 0.\n",
    "    for t in (range(T)):\n",
    "        obs, reward, done, _ , _ = env.step(actions[t]) # actions === env.U[1:,-1,1,0]\n",
    "        U.append(env.u)\n",
    "        V.append(env.v)\n",
    "        total_reward += reward\n",
    "        print(env.U[100,-2,10,0])\n",
    "    # plt.plot(env.U[1:,-1,1,0])\n",
    "    # # plt.plot(u_target[1:, -2,10])\n",
    "    # plt.plot(env.U[1:,-2,10,0])\n",
    "    # rewSACRandom, uSACRandom = runSingleEpisode(RLController, envRLRandom, sacModel)\n",
    "    xs_opt.append(env.U[:,-1,1,0])\n",
    "    ys_opt.append(env.U[:,-2,10,0])\n",
    "    # if i % 1000 == 0: print(i)\n",
    "    _ppo_r, _u_ppo = runSingleEpisodeRL(RLController, env, ppoModel)\n",
    "    xs_ppo.append(_u_ppo[:,-1,1,0])\n",
    "    ys_ppo.append(_u_ppo[:,-2,10,0])\n",
    "    \n",
    "    _sac_r, _u_sac = runSingleEpisodeRL(RLController, env, sacModel)\n",
    "    xs_sac.append(_u_sac[:,-1,1,0])\n",
    "    ys_sac.append(_u_sac[:,-2,10,0])\n",
    "    # print(total_reward,_ppo_r,_sac_r)\n",
    "\n",
    "    \n",
    "\n",
    "print(np.stack(xs_opt).shape,np.stack(ys_opt).shape)\n",
    "\n",
    "print(np.stack(xs_ppo).shape,np.stack(ys_ppo).shape)\n",
    "\n",
    "\n",
    "print(np.stack(xs_sac).shape,np.stack(ys_sac).shape)\n",
    "# data_opt = {\"a\": np.stack(xs_opt), \"u\": np.stack(ys_opt)}\n",
    "# scipy.io.savemat(\"data_opt_ns__0init_2.mat\", data_opt)\n",
    "\n",
    "# data_ppo = {\"a\": np.stack(xs_ppo), \"u\": np.stack(ys_ppo)}\n",
    "# scipy.io.savemat(\"data_ppo_ns__0init_2.mat\", data_ppo)\n",
    "\n",
    "# data_sac = {\"a\": np.stack(xs_sac), \"u\": np.stack(ys_sac)}\n",
    "# scipy.io.savemat(\"data_sac_ns__0init_2.mat\", data_sac)\n",
    "    # print(\"Total Reward optimized:\", total_reward)\n",
    "    # plt.plot(actions)\n",
    "    # plt.show()\n",
    "    # np.savez('result/NS_optmization.npz', U=env.U[:,:,:,0], V=env.U[:,:,:,1], desired_U=np.array(u_target), desired_V=np.array(v_target), actions=actions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_opt[\"u\"].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def getInitialCondition(X):\n",
    "    u = np.random.uniform(-0.1, 0.1) * np.ones_like(X) \n",
    "    v = 0 * np.ones_like(X) \n",
    "    p = 0 * np.ones_like(X) \n",
    "    return u, v, p\n",
    "\n",
    "# # Set up boundary conditions here\n",
    "# boundary_condition = {\n",
    "#     \"upper\": [\"Controllable\", \"Dirchilet\"], \n",
    "#     \"lower\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "#     \"left\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "#     \"right\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "# }\n",
    "\n",
    "# # Timestep and spatial step for PDE Solver\n",
    "# T = 0.2\n",
    "# dt = 1e-3\n",
    "# dx, dy = 0.05, 0.05\n",
    "# X, Y = 1, 1\n",
    "# u_target = np.load('target.npz')['u']\n",
    "# v_target = np.load('target.npz')['v']\n",
    "# desire_states = np.stack([u_target, v_target], axis=-1) # (NT, Nx, Ny, 2)\n",
    "# NS2DParameters = {\n",
    "#         \"T\": T, \n",
    "#         \"dt\": dt, \n",
    "#         \"X\": X,\n",
    "#         \"dx\": dx, \n",
    "#         \"Y\": Y,\n",
    "#         \"dy\":dy,\n",
    "#         \"action_dim\": 1, \n",
    "#         \"reward_class\": NSReward(0.1),\n",
    "#         \"normalize\": False, \n",
    "#         \"reset_init_condition_func\": getInitialCondition,\n",
    "#         \"boundary_condition\": boundary_condition,\n",
    "#         \"U_ref\": desire_states, \n",
    "#         \"action_ref\": 2.0 * np.ones(1000), \n",
    "# }\n",
    "\n",
    "# # Make the NavierStokes PDE gym\n",
    "# env = gym.make(\"PDEControlGym-NavierStokes2D\", **NS2DParameters)\n",
    "\n",
    "\n",
    "# Set initial condition function to be zero\n",
    "# def getInitialCondition(X):\n",
    "#     u = np.zeros_like(X) \n",
    "#     v = np.zeros_like(X) \n",
    "#     p = np.zeros_like(X) \n",
    "#     return u, v, p\n",
    "\n",
    "# Set up boundary conditions here\n",
    "boundary_condition = {\n",
    "    \"upper\": [\"Controllable\", \"Dirchilet\"], \n",
    "    \"lower\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "    \"left\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "    \"right\": [\"Dirchilet\", \"Dirchilet\"], \n",
    "}\n",
    "\n",
    "# Timestep and spatial step for PDE Solver\n",
    "T = 0.201 # To perform 200 steps\n",
    "dt = 1e-3\n",
    "dx, dy = 0.05, 0.05\n",
    "X, Y = 1, 1\n",
    "u_target = np.load('target.npz')['u']\n",
    "v_target = np.load('target.npz')['v']\n",
    "desire_states = np.stack([u_target, v_target], axis=-1) # (NT, Nx, Ny, 2)\n",
    "NS2DParameters = {\n",
    "        \"T\": T, \n",
    "        \"dt\": dt, \n",
    "        \"X\": X,\n",
    "        \"dx\": dx, \n",
    "        \"Y\": Y,\n",
    "        \"dy\":dy,\n",
    "        \"action_dim\": 1, \n",
    "        \"reward_class\": NSReward(0.1),\n",
    "        \"normalize\": False, \n",
    "        \"reset_init_condition_func\": getInitialCondition,\n",
    "        \"boundary_condition\": boundary_condition,\n",
    "        \"U_ref\": desire_states, \n",
    "        \"action_ref\": 2.0 * np.ones(1000), \n",
    "}\n",
    "\n",
    "# Make the NavierStokes PDE gym\n",
    "env = gym.make(\"PDEControlGym-NavierStokes2D\", **NS2DParameters)\n",
    "\n",
    "# Model-Based Optimization to optimize action \n",
    "def apply_boundary(a1, a2):\n",
    "    a1[:,[-1, 0]] = 0.\n",
    "    a1[[-1,0],:] = 0.\n",
    "    a2[:,[-1, 0]] = 0.\n",
    "    a2[[-1,0],:] = 0.\n",
    "    return a1, a2\n",
    "\n",
    "total_reward = 0.\n",
    "U, V = [], []\n",
    "T = 200\n",
    "\n",
    "rewards = []\n",
    "times = []\n",
    "for experiment_i in range(1):\n",
    "    # np.random.seed(experiment_i)\n",
    "    # env.reset(seed=400)\n",
    "    env.reset()\n",
    "    s = time.time()\n",
    "    for t in tqdm(range(T)):\n",
    "        obs, reward, done, _ , _ = env.step(np.random.uniform(2,4)) \n",
    "        U.append(env.u)\n",
    "        V.append(env.v)\n",
    "        total_reward += reward\n",
    "    print(\"Total Reward random:\", total_reward)\n",
    "u_target = np.load('target.npz')['u']\n",
    "v_target = np.load('target.npz')['v']\n",
    "u_ref = [2 for _ in range(T)]\n",
    "xs_opt = []\n",
    "ys_opt = []\n",
    "xs_ppo = []\n",
    "ys_ppo = []\n",
    "xs_sac = []\n",
    "ys_sac = []\n",
    "for ite in tqdm(range(100)):\n",
    "    env.reset()\n",
    "    '''\n",
    "    Lam1, Lam2 = [], []\n",
    "    Lam1.append(np.zeros_like(U[0]))\n",
    "    Lam2.append(np.zeros_like(U[0]))\n",
    "    pressure = np.zeros_like(U[0])\n",
    "    for t in (range(T-1)):\n",
    "        lam1, lam2 = Lam1[-1], Lam2[-1]\n",
    "        dl1dx, dl1dy = central_difference(lam1,\"x\",dx), central_difference(lam1, \"y\", dy)\n",
    "        dl2dx, dl2dy = central_difference(lam2,\"x\", dx), central_difference(lam2, \"y\", dy) \n",
    "        laplace_l1, laplace_l2 = laplace(lam1, dx, dy), laplace(lam2, dx, dy)\n",
    "        dlam1dt = - 2 * dl1dx * U[-1-t] - dl1dy * V[-1-t] - dl2dx * V[-1-t] - 0.1 * laplace_l1 + (U[-1-t]-u_target[-1-t])\n",
    "        dlam2dt = - 2 * dl2dy * V[-1-t] - dl1dy * U[-1-t] - dl2dx * U[-1-t] - 0.1 * laplace_l2 + (V[-1-t]-v_target[-1-t])\n",
    "        lam1 = lam1 - dt * dlam1dt\n",
    "        lam2 = lam2 - dt * dlam2dt\n",
    "        lam1, lam2 = apply_boundary(lam1, lam2)\n",
    "        pressure = env.solve_pressure(lam1, lam2, pressure)\n",
    "        lam1 = lam1 - dt * central_difference(pressure, \"x\", dx)\n",
    "        lam2 = lam2 - dt * central_difference(pressure, \"y\", dy)\n",
    "        lam1, lam2 = apply_boundary(lam1, lam2)\n",
    "        Lam1.append(lam1)\n",
    "        Lam2.append(lam2)\n",
    "    Lam1 = Lam1[::-1]\n",
    "    actions = []\n",
    "    for t in (range(T)):\n",
    "        dl1dx2 = central_difference(Lam1[t], \"y\", dy)\n",
    "        actions.append(u_ref[t] - 0.1/0.1 * sum(dl1dx2[-2, :])*5*dx)\n",
    "    U, V = [], []\n",
    "    # env.reset(seed=400)\n",
    "    env.reset()\n",
    "    total_reward = 0.\n",
    "    for t in (range(T)):\n",
    "        obs, reward, done, _ , _ = env.step(actions[t]) # actions === env.U[1:,-1,1,0]\n",
    "        U.append(env.u)\n",
    "        V.append(env.v)\n",
    "        total_reward += reward\n",
    "    # plt.plot(env.U[1:,-1,1,0])\n",
    "    # # plt.plot(u_target[1:, -2,10])\n",
    "    # plt.plot(env.U[1:,-2,10,0])\n",
    "    # rewSACRandom, uSACRandom = runSingleEpisode(RLController, envRLRandom, sacModel)\n",
    "    xs_opt.append(env.U[:,-1,1,0])\n",
    "    ys_opt.append(env.U[:,-2,10,0])\n",
    "    '''\n",
    "    # if i % 1000 == 0: print(i)\n",
    "    _ppo_r, _u_ppo = runSingleEpisodeRL(RLController, env, ppoModel)\n",
    "    xs_ppo.append(_u_ppo[:,-1,1,0])\n",
    "    ys_ppo.append(_u_ppo[:,-2,10,0])\n",
    "    \n",
    "    _sac_r, _u_sac = runSingleEpisodeRL(RLController, env, sacModel)\n",
    "    xs_sac.append(_u_sac[:,-1,1,0])\n",
    "    ys_sac.append(_u_sac[:,-2,10,0])\n",
    "    # print(total_reward,_ppo_r,_sac_r)\n",
    "\n",
    "    \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# print(np.stack(xs_opt).shape,np.stack(ys_opt).shape)\n",
    "\n",
    "print(np.stack(xs_ppo).shape,np.stack(ys_ppo).shape)\n",
    "\n",
    "\n",
    "print(np.stack(xs_sac).shape,np.stack(ys_sac).shape)\n",
    "# data_opt = {\"a\": np.stack(xs_opt), \"u\": np.stack(ys_opt)}\n",
    "# scipy.io.savemat(\"data_opt_ns_new3.mat\", data_opt)\n",
    "\n",
    "\n",
    "data_ppo = {\"a\": np.stack(xs_ppo), \"u\": np.stack(ys_ppo)}\n",
    "scipy.io.savemat(\"data_ppo_ns_test_0init.mat\", data_ppo)\n",
    "\n",
    "data_sac = {\"a\": np.stack(xs_sac), \"u\": np.stack(ys_sac)}\n",
    "scipy.io.savemat(\"data_sac_ns_test_0init.mat\", data_sac)\n",
    "    # print(\"Total Reward optimized:\", total_reward)\n",
    "    # plt.plot(actions)\n",
    "    # plt.show()\n",
    "    # np.savez('result/NS_optmization.npz', U=env.U[:,:,:,0], V=env.U[:,:,:,1], desired_U=np.array(u_target), desired_V=np.array(v_target), actions=actions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "total_ppo_reward = 0\n",
    "for i in range(1):\n",
    "    rew, _a = runSingleEpisodeRL(RLController, env, ppoModel)\n",
    "    total_ppo_reward += rew\n",
    "print(\"PPO Reward Average:\", total_ppo_reward)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "# # Define Controllers\n",
    "# def ModelController(obs, beta):\n",
    "#     kernel = solveKernelFunction(beta)\n",
    "#     return solveControl(kernel, obs)\n",
    "\n",
    "\n",
    "\n",
    "# def openLoopController(_, _a):\n",
    "#     return 0\n",
    "print(_a.shape)\n",
    "# _a[:, 1,1,:]\n",
    "print(u_target.shape)\n",
    "# plt.plot(u_target[:, -1,19])\n",
    "# plt.plot(u_target[:, -1,3])\n",
    "# print(actions)\n",
    "# plt.plot(actions)\n",
    "# plt.plot(env.U[1:,-1,1,0])\n",
    "# # plt.plot(u_target[1:, -2,10])\n",
    "# plt.plot(env.U[1:,-2,10,0])\n",
    "\n",
    "# plt.plot(action_np[:, 0])\n",
    "\n",
    "plt.plot(_a[:,-1,2,0])\n",
    "# plt.plot(_a[:,-1,10,0]) #_a[:,-1,10,0] = _a[:,-1,2,0]\n",
    "plt.plot(_a[:,-2,10,0]) #plt.plot(_a[:,19,10,0])\n",
    "# plt.plot(u_target[:, -2,10])\n",
    "print(env.U[1:,:,:,0].shape)\n",
    "# plt.plot(_a[:, 20,19,0])\n",
    "print(_a[0,-1,1,0])\n",
    "print((_a[0,-2,10,0]))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "def runSingleEpisodeQP(model, env, parameter):\n",
    "    terminate = False\n",
    "    truncate = False\n",
    "\n",
    "    # Holds the resulting states\n",
    "    uStorage = []\n",
    "\n",
    "    # Reset Environment\n",
    "    obs,__ = env.reset()\n",
    "    uStorage.append(obs)\n",
    "\n",
    "    i = 0\n",
    "    rew = 0\n",
    "    while not truncate and not terminate:\n",
    "        # use backstepping controller\n",
    "        action = model(obs, parameter,i)\n",
    "        \n",
    "        obs, rewards, terminate, truncate, info = env.step(action)\n",
    "        # print(action, obs)\n",
    "        uStorage.append(obs)\n",
    "        rew += rewards \n",
    "        i += 1\n",
    "    u = np.array(uStorage)\n",
    "    return rew, u\n",
    "def QP_filter_Controller(obs, parameter,index):\n",
    "    # print(obs)\n",
    "    # print(parameter)\n",
    "    return parameter[index+1]\n",
    "\n",
    "def find_earliest_true(condition):\n",
    "    # Iterate over the first two dimensions (10 and 8) and check for each slice\n",
    "    earliest_indices = np.full(condition.shape[:2], 0)  # Initialize with -1 (indicating no valid index)\n",
    "\n",
    "    for i in range(condition.shape[0]):  # Iterate over first dimension\n",
    "        for j in range(condition.shape[1]):  # Iterate over second dimension\n",
    "            # For each slice (i, j), find the earliest index where the condition is True\n",
    "            # and all subsequent values are also True\n",
    "            for k in range(condition.shape[2]):\n",
    "                if not condition[i, j, condition.shape[2]-k-1]: \n",
    "                    # print(k)\n",
    "                    if k == 0:\n",
    "                        earliest_indices[i,j] = -1\n",
    "                    else:\n",
    "                        earliest_indices[i,j] = condition.shape[2]-k\n",
    "                    break\n",
    "            # valid_indices = np.where(np.cumprod(condition[i, j, :]) == 1)[0]\n",
    "            # if len(valid_indices) > 0:\n",
    "            #     earliest_indices[i, j] = valid_indices[0]  # Store the first valid index\n",
    "    return earliest_indices\n",
    "\n",
    "RL_1000 = np.load(\"../../../verify-pde-control/ns_ppo_all_train145_100_20_ns_12_1s_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\")\n",
    "RL_reward_beforeQP = []\n",
    "RL_reward_afterQP = []\n",
    "uBcks_beforeQP_list = []\n",
    "uBcks_afterQP_list = []\n",
    "u_target = np.load('target.npz')['u']\n",
    "v_target = np.load('target.npz')['v']\n",
    "# uBcks_beforeQP,uBcks_afterQP = 0,0\n",
    "for i in range(RL_1000[\"safe_label\"].transpose().shape[0]):\n",
    "    # if i < 1:continue\n",
    "    # if RL_1000[\"Y_nominal\"][-1, i] < 0.5: continue\n",
    "    U_list = RL_1000[\"U_nominal\"][:, i]\n",
    "    Y_list = RL_1000[\"Y_nominal\"][:, i]\n",
    "    print(i)\n",
    "    # print(RL_1000[\"safe_label\"][:, i])\n",
    "    # def getInitialConditionFixed(nx):\n",
    "    #     return np.ones(nx) * U_list[0]\n",
    "    def getInitialConditionFixed(X):\n",
    "        u = U_list[0] * np.ones_like(X) \n",
    "        v = 0 * np.ones_like(X) \n",
    "        p = 0 * np.ones_like(X) \n",
    "        return u, v, p\n",
    "    NS2DParametersFixed = NS2DParameters.copy()\n",
    "    NS2DParametersFixed[\"reset_init_condition_func\"] = getInitialConditionFixed\n",
    "    envBcksFixed = gym.make(\"PDEControlGym-NavierStokes2D\", **NS2DParametersFixed)\n",
    "    reward_beforeQP, uBcks_beforeQP = runSingleEpisodeQP(QP_filter_Controller, envBcksFixed, U_list)\n",
    "    \n",
    "    uBcks_beforeQP_list.append(uBcks_beforeQP[:,-2,10,0])\n",
    "    RL_reward_beforeQP.append(reward_beforeQP)\n",
    "\n",
    "    U_safe_list = RL_1000[\"U_safe\"][:, i]\n",
    "    def getInitialConditionFixed(X):\n",
    "        u = U_list[0] * np.ones_like(X) \n",
    "        v = 0 * np.ones_like(X) \n",
    "        p = 0 * np.ones_like(X) \n",
    "        return u, v, p\n",
    "    NS2DParametersFixed = NS2DParameters.copy()\n",
    "    NS2DParametersFixed[\"reset_init_condition_func\"] = getInitialConditionFixed\n",
    "    envBcksFixed = gym.make(\"PDEControlGym-NavierStokes2D\", **NS2DParametersFixed)\n",
    "    reward_afterQP, uBcks_afterQP = runSingleEpisodeQP(QP_filter_Controller, envBcksFixed, U_safe_list)\n",
    "    # print(uBcks_afterQP.shape) \n",
    "    uBcks_afterQP_list.append(uBcks_afterQP[:,-2,10,0])\n",
    "    # print(Y_list-uBcks_beforeQP[:,-2,10,0])\n",
    "    RL_reward_afterQP.append(reward_afterQP)\n",
    "    print(reward_beforeQP,reward_afterQP)\n",
    "    # break\n",
    "    # if reward_beforeQP > -5.37: break\n",
    "    # if i > 10: break\n",
    "\n",
    "result = np.array([uBcks_beforeQP_list, uBcks_afterQP_list]) #(2,100,51, 100) # first 100 is num of samples, second 100 is num of 100 spatial steps\n",
    "print(result.shape)\n",
    "# u_target[:,-2,10]\n",
    "# condition = result[:, :,:]-u_target[:,-2,10] < 0 \n",
    "condition = (result[:, :,:]-u_target[:,-2,10] < 0.145) & (result[:, :,:]-u_target[:,-2,10] > -0.145)\n",
    "earliest_index = find_earliest_true(condition)\n",
    "valid_earliest_index_beforeQP = earliest_index[0,earliest_index[0,:]>=0]\n",
    "valid_earliest_index_afterQP = earliest_index[1,earliest_index[1,:]>=0]\n",
    "# result.shape[2] - earliest_index\n",
    "print(f\"beforeQP PF steps among {valid_earliest_index_beforeQP.shape[0]} PF trajectories\", np.mean(result.shape[2] - valid_earliest_index_beforeQP), np.std(result.shape[2] - valid_earliest_index_beforeQP))\n",
    "print(f\"afterQP PF steps among {valid_earliest_index_afterQP.shape[0]} PF trajectories\", np.mean(result.shape[2] - valid_earliest_index_afterQP), np.std(result.shape[2] - valid_earliest_index_afterQP))\n",
    "\n",
    "\n",
    "# output_boundary_endtime_diff = result[:, :,-1, 0] - 1\n",
    "# output_boundary_endtime_diff = np.where(output_boundary_endtime_diff < 0, 0, output_boundary_endtime_diff)\n",
    "# # print(\"< 1 distance: beforeQP and afterQP\")\n",
    "# # print(np.mean(output_boundary_endtime_diff, axis=1))\n",
    "# # print(np.std(output_boundary_endtime_diff, axis=1))\n",
    "# print(\"times less than 1: beforeQP and afterQP\",np.sum(output_boundary_endtime_diff == 0, axis=1))\n",
    "\n",
    "reward_result = np.array([RL_reward_beforeQP,RL_reward_afterQP])\n",
    "print(\"reward: beforeQP and afterQP\")\n",
    "print(np.mean(reward_result, axis=1))\n",
    "print(np.std(reward_result, axis=1))\n",
    "\n",
    "# ns_ppo_all_train145_100_20_ns_145_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 66 PF trajectories 1.7878787878787878 0.7285342594043217\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.38985462]\n",
    "# [0.01178686 0.02533289]\n",
    "\n",
    "# ns_ppo_all_train145_100_20_ns_145_0.1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 73 PF trajectories 1.9452054794520548 1.0053338702415768\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.38591909]\n",
    "# [0.01178686 0.02424937]\n",
    "\n",
    "# < 0.14, >-0.14\n",
    "# ns_ppo_all_train145_100_20_ns_145_0.1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20\n",
    "# beforeQP PF steps among 57 PF trajectories 1.4912280701754386 0.5657724735648105\n",
    "# afterQP PF steps among 43 PF trajectories 1.6744186046511629 0.7988634435961706\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.38591909]\n",
    "# [0.01178686 0.02424937]\n",
    "\n",
    "# ns_ppo_all_train145_100_50_ns_145_0.1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 59 PF trajectories 2.3728813559322033 1.5171148474983218\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.41304788]\n",
    "# [0.01178686 0.06742862]\n",
    "\n",
    "# ns_ppo_all__NOfixed_train145_100_20_ns_145_0.1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 73 PF trajectories 2.0 0.9363291775690444\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.38102047]\n",
    "# [0.01178686 0.01687528]\n",
    "\n",
    "# ns_ppo_all_train145_100_20_ns_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.bson\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 100 PF trajectories 28.67 6.497776542787541\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.55368201]\n",
    "# [0.01178686 0.1629205 ]\n",
    "\n",
    "# ns_ppo_all_NOfixed_train145_100_20_ns_145_1s_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 73 PF trajectories 1.8356164383561644 0.7766048100305953\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.38336327]\n",
    "# [0.01178686 0.01836462]\n",
    "# \n",
    "\n",
    "# ns_ppo_all_train145_100_20_ns_12_1s_1reg_1pf_time_CBFnoT_pfall_addsafe__abs_20.npy running\n",
    "# used\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 86 PF trajectories 2.197674418604651 0.9743186938843825\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.37318814]\n",
    "# [0.01178686 0.01385474]\n",
    "\n",
    "# ns_ppo_all_train145_100_20_ns_145_1s_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy todo\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 85 PF trajectories 5.752941176470588 5.19027570921972\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.37358549]\n",
    "# [0.01178686 0.0392465 ]\n",
    "\n",
    "# ns_ppo_all_train145_100_20_ns_12_1s_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# use this\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 99 PF trajectories 31.96969696969697 6.285231279070403\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.72149912]\n",
    "# [0.01178686 0.16546378]\n",
    "\n",
    "# ns_ppo_all_train145_100_50_ns_12_1s_1reg_1pf_time_CBFnoT_pfall_addsafe__abs_20.npy\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 86 PF trajectories 2.255813953488372 0.9786802517442239\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.37262777]\n",
    "# [0.01178686 0.0146308 ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "RL_1000 = np.load(\"../../../verify-pde-control/ns_sac_all_train145_100_80_ns_145_1reg_1pf_time_CBFnoT_pfall_addsafe__abs_20.npy\")\n",
    "RL_reward_beforeQP = []\n",
    "RL_reward_afterQP = []\n",
    "uBcks_beforeQP_list = []\n",
    "uBcks_afterQP_list = []\n",
    "u_target = np.load('target.npz')['u']\n",
    "v_target = np.load('target.npz')['v']\n",
    "# uBcks_beforeQP,uBcks_afterQP = 0,0\n",
    "for i in range(RL_1000[\"safe_label\"].transpose().shape[0]):\n",
    "    # if i < 1:continue\n",
    "    # if RL_1000[\"Y_nominal\"][-1, i] < 0.5: continue\n",
    "    U_list = RL_1000[\"U_nominal\"][:, i]\n",
    "    Y_list = RL_1000[\"Y_nominal\"][:, i]\n",
    "    # print(i)\n",
    "    # print(RL_1000[\"safe_label\"][:, i])\n",
    "    # def getInitialConditionFixed(nx):\n",
    "    #     return np.ones(nx) * U_list[0]\n",
    "    def getInitialConditionFixed(X):\n",
    "        u = U_list[0] * np.ones_like(X) \n",
    "        v = 0 * np.ones_like(X) \n",
    "        p = 0 * np.ones_like(X) \n",
    "        return u, v, p\n",
    "    NS2DParametersFixed = NS2DParameters.copy()\n",
    "    NS2DParametersFixed[\"reset_init_condition_func\"] = getInitialConditionFixed\n",
    "    envBcksFixed = gym.make(\"PDEControlGym-NavierStokes2D\", **NS2DParametersFixed)\n",
    "    reward_beforeQP, uBcks_beforeQP = runSingleEpisodeQP(QP_filter_Controller, envBcksFixed, U_list)\n",
    "    \n",
    "    uBcks_beforeQP_list.append(uBcks_beforeQP[:,-2,10,0])\n",
    "    RL_reward_beforeQP.append(reward_beforeQP)\n",
    "\n",
    "    U_safe_list = RL_1000[\"U_safe\"][:, i]\n",
    "    def getInitialConditionFixed(X):\n",
    "        u = U_list[0] * np.ones_like(X) \n",
    "        v = 0 * np.ones_like(X) \n",
    "        p = 0 * np.ones_like(X) \n",
    "        return u, v, p\n",
    "    NS2DParametersFixed = NS2DParameters.copy()\n",
    "    NS2DParametersFixed[\"reset_init_condition_func\"] = getInitialConditionFixed\n",
    "    envBcksFixed = gym.make(\"PDEControlGym-NavierStokes2D\", **NS2DParametersFixed)\n",
    "    reward_afterQP, uBcks_afterQP = runSingleEpisodeQP(QP_filter_Controller, envBcksFixed, U_safe_list)\n",
    "    # print(uBcks_afterQP.shape) \n",
    "    uBcks_afterQP_list.append(uBcks_afterQP[:,-2,10,0])\n",
    "    # print(Y_list-uBcks_beforeQP[:,-2,10,0])\n",
    "    RL_reward_afterQP.append(reward_afterQP)\n",
    "    # print(reward_beforeQP,reward_afterQP)\n",
    "    # break\n",
    "    # if reward_beforeQP > -5.37: break\n",
    "    # if i > 10: break\n",
    "\n",
    "result = np.array([uBcks_beforeQP_list, uBcks_afterQP_list]) #(2,100,51, 100) # first 100 is num of samples, second 100 is num of 100 spatial steps\n",
    "print(result.shape)\n",
    "# u_target[:,-2,10]\n",
    "# condition = result[:, :,:]-u_target[:,-2,10] < 0 \n",
    "condition = (result[:, :,:]-u_target[:,-2,10] < 0.145) & (result[:, :,:]-u_target[:,-2,10] > -0.145)\n",
    "earliest_index = find_earliest_true(condition)\n",
    "valid_earliest_index_beforeQP = earliest_index[0,earliest_index[0,:]>=0]\n",
    "valid_earliest_index_afterQP = earliest_index[1,earliest_index[1,:]>=0]\n",
    "# result.shape[2] - earliest_index\n",
    "print(f\"beforeQP PF steps among {valid_earliest_index_beforeQP.shape[0]} PF trajectories\", np.mean(result.shape[2] - valid_earliest_index_beforeQP), np.std(result.shape[2] - valid_earliest_index_beforeQP))\n",
    "print(f\"afterQP PF steps among {valid_earliest_index_afterQP.shape[0]} PF trajectories\", np.mean(result.shape[2] - valid_earliest_index_afterQP), np.std(result.shape[2] - valid_earliest_index_afterQP))\n",
    "\n",
    "\n",
    "# output_boundary_endtime_diff = result[:, :,-1, 0] - 1\n",
    "# output_boundary_endtime_diff = np.where(output_boundary_endtime_diff < 0, 0, output_boundary_endtime_diff)\n",
    "# # print(\"< 1 distance: beforeQP and afterQP\")\n",
    "# # print(np.mean(output_boundary_endtime_diff, axis=1))\n",
    "# # print(np.std(output_boundary_endtime_diff, axis=1))\n",
    "# print(\"times less than 1: beforeQP and afterQP\",np.sum(output_boundary_endtime_diff == 0, axis=1))\n",
    "\n",
    "reward_result = np.array([RL_reward_beforeQP,RL_reward_afterQP])\n",
    "print(\"reward: beforeQP and afterQP\")\n",
    "print(np.mean(reward_result, axis=1))\n",
    "print(np.std(reward_result, axis=1))\n",
    "\n",
    "# ns_sac_all_train145_100_20_ns_145_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# beforeQP PF steps among 80 PF trajectories 17.5125 12.082004955718235\n",
    "# afterQP PF steps among 86 PF trajectories 17.88372093023256 12.648576171962363\n",
    "# reward: beforeQP and afterQP\n",
    "# [-18.0499873  -18.07479318]\n",
    "# [1.13579015 1.14247683]\n",
    "\n",
    "# ns_sac_all_train145_100_20_ns_145_0.1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# (2, 100, 201)\n",
    "# beforeQP PF steps among 80 PF trajectories 17.5125 12.082004955718235\n",
    "# afterQP PF steps among 82 PF trajectories 17.353658536585368 12.304442932672846\n",
    "# reward: beforeQP and afterQP\n",
    "# [-18.0499873  -18.07111313]\n",
    "# [1.13579015 1.14130688]\n",
    "\n",
    "# ns_sac_all_train145_100_50_ns_145_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# use this\n",
    "# beforeQP PF steps among 80 PF trajectories 17.5125 12.082004955718235\n",
    "# afterQP PF steps among 85 PF trajectories 21.31764705882353 13.55443210753536\n",
    "# reward: beforeQP and afterQP\n",
    "# [-18.0499873  -18.36229554]\n",
    "# [1.13579015 1.2484941 ]\n",
    "\n",
    "# ns_sac_all_train145_100_50_ns_145_1reg_1pf_time_CBFnoT_pfall_addsafe__abs_20.npy\n",
    "# used\n",
    "# beforeQP PF steps among 80 PF trajectories 17.5125 12.082004955718235\n",
    "# afterQP PF steps among 79 PF trajectories 17.772151898734176 11.993615777569529\n",
    "# reward: beforeQP and afterQP\n",
    "# [-18.0499873  -18.04696494]\n",
    "# [1.13579015 1.13748504]\n",
    "\n",
    "# ns_sac_all_train145_100_80_ns_145_1reg_1pf_time_CBFnoT_pfall_addsafe__abs_20.npy\n",
    "# (2, 100, 201)\n",
    "# beforeQP PF steps among 80 PF trajectories 17.5125 12.082004955718235\n",
    "# afterQP PF steps among 79 PF trajectories 17.696202531645568 11.88377729488499\n",
    "# reward: beforeQP and afterQP\n",
    "# [-18.0499873  -18.04417703]\n",
    "# [1.13579015 1.13556596]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result[:, :,-1]-u_target[-1,-2,10] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib as mpl\n",
    "mpl.rcParams.update(mpl.rcParamsDefault)\n",
    "x = np.linspace(0, 0.201, 201)  # Generate 100 points from 0 to 10\n",
    "y1 = uBcks_beforeQP_list[-1]\n",
    "y2 = uBcks_afterQP_list[-1]\n",
    "y3 = u_target[:,-2,10]\n",
    "u_afterQP = uBcks_afterQP[:,-1,1,0]\n",
    "u_beforeQP = uBcks_beforeQP[:,-1,1,0]\n",
    "# Create the plot\n",
    "plt.figure(figsize=(8, 6))\n",
    "plt.plot(x, y1, label='before QP', linestyle='-', marker='o')  # Plot the first line with markers\n",
    "plt.plot(x, y2, label='after QP', linestyle='--', marker='x')  # Plot the second line with a different style\n",
    "plt.plot(x, y3, label='GT', linestyle='--')  # Plot the second line with a different style\n",
    "plt.plot(x, u_afterQP, label='u_afterQP', linestyle='--')  # Plot the second line with a different style\n",
    "plt.plot(x, u_beforeQP, label='u_beforeQP', linestyle='--')  # Plot the second line with a different style\n",
    "# Add titles and labels\n",
    "plt.title('Plot of Two Lines')\n",
    "plt.xlabel('x-axis')\n",
    "plt.ylabel('y-axis')\n",
    "\n",
    "# Add a legend to differentiate the lines\n",
    "plt.legend()\n",
    "\n",
    "# Show the plot\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib as mpl\n",
    "mpl.rcParams.update(mpl.rcParamsDefault)\n",
    "x = np.linspace(0, 0.201, 201)  # Generate 100 points from 0 to 10\n",
    "# y1 = uBcks_beforeQP_list[-1]\n",
    "# y2 = uBcks_afterQP_list[-1]\n",
    "y3 = u_target[:,-2,-2]\n",
    "u_afterQP = uBcks_afterQP[:,-1,1,0]\n",
    "u_beforeQP = uBcks_beforeQP[:,-1,1,0]\n",
    "\n",
    "y_afterQP = uBcks_afterQP[:,-2,-2,0]\n",
    "y_beforeQP = uBcks_beforeQP[:,-2,-2,0]\n",
    "# Create the plot\n",
    "plt.figure(figsize=(8, 6))\n",
    "plt.plot(x, y_beforeQP, label='before QP', linestyle='-', marker='o')  # Plot the first line with markers\n",
    "plt.plot(x, y_afterQP, label='after QP', linestyle='--', marker='x')  # Plot the second line with a different style\n",
    "plt.plot(x, y3, label='GT', linestyle='--')  # Plot the second line with a different style\n",
    "plt.plot(x, u_afterQP, label='u_afterQP', linestyle='--')  # Plot the second line with a different style\n",
    "plt.plot(x, u_beforeQP, label='u_beforeQP', linestyle='--')  # Plot the second line with a different style\n",
    "# Add titles and labels\n",
    "plt.title('Plot of Two Lines')\n",
    "plt.xlabel('x-axis')\n",
    "plt.ylabel('y-axis')\n",
    "\n",
    "# Add a legend to differentiate the lines\n",
    "plt.legend()\n",
    "\n",
    "# Show the plot\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "def runSingleEpisodeQP(model, env, parameter):\n",
    "    terminate = False\n",
    "    truncate = False\n",
    "\n",
    "    # Holds the resulting states\n",
    "    uStorage = []\n",
    "\n",
    "    # Reset Environment\n",
    "    obs,__ = env.reset()\n",
    "    uStorage.append(obs)\n",
    "\n",
    "    i = 0\n",
    "    rew = 0\n",
    "    while not truncate and not terminate:\n",
    "        # use backstepping controller\n",
    "        action = model(obs, parameter,i)\n",
    "        \n",
    "        obs, rewards, terminate, truncate, info = env.step(action)\n",
    "        # print(action, obs)\n",
    "        uStorage.append(obs)\n",
    "        rew += rewards \n",
    "        i += 1\n",
    "    u = np.array(uStorage)\n",
    "    return rew, u\n",
    "def QP_filter_Controller(obs, parameter,index):\n",
    "    # print(obs)\n",
    "    # print(parameter)\n",
    "    return parameter[index+1]\n",
    "\n",
    "def find_earliest_true(condition):\n",
    "    # Iterate over the first two dimensions (10 and 8) and check for each slice\n",
    "    earliest_indices = np.full(condition.shape[:2], 0)  # Initialize with -1 (indicating no valid index)\n",
    "\n",
    "    for i in range(condition.shape[0]):  # Iterate over first dimension\n",
    "        for j in range(condition.shape[1]):  # Iterate over second dimension\n",
    "            # For each slice (i, j), find the earliest index where the condition is True\n",
    "            # and all subsequent values are also True\n",
    "            for k in range(condition.shape[2]):\n",
    "                if not condition[i, j, condition.shape[2]-k-1]: \n",
    "                    # print(k)\n",
    "                    if k == 0:\n",
    "                        earliest_indices[i,j] = -1\n",
    "                    else:\n",
    "                        earliest_indices[i,j] = condition.shape[2]-k\n",
    "                    break\n",
    "            # valid_indices = np.where(np.cumprod(condition[i, j, :]) == 1)[0]\n",
    "            # if len(valid_indices) > 0:\n",
    "            #     earliest_indices[i, j] = valid_indices[0]  # Store the first valid index\n",
    "    return earliest_indices\n",
    "\n",
    "RL_1000 = np.load(\"../../../verify-pde-control/ns_ppo_all_train145_100_20_ns_12_1s_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\")\n",
    "RL_reward_beforeQP = []\n",
    "RL_reward_afterQP = []\n",
    "uBcks_beforeQP_list = []\n",
    "uBcks_afterQP_list = []\n",
    "u_target = np.load('target.npz')['u']\n",
    "v_target = np.load('target.npz')['v']\n",
    "# uBcks_beforeQP,uBcks_afterQP = 0,0\n",
    "for i in range(RL_1000[\"safe_label\"].transpose().shape[0]):\n",
    "    # if i < 1:continue\n",
    "    # if RL_1000[\"Y_nominal\"][-1, i] < 0.5: continue\n",
    "    U_list = RL_1000[\"U_nominal\"][:, i]\n",
    "    Y_list = RL_1000[\"Y_nominal\"][:, i]\n",
    "    print(i)\n",
    "    # print(RL_1000[\"safe_label\"][:, i])\n",
    "    # def getInitialConditionFixed(nx):\n",
    "    #     return np.ones(nx) * U_list[0]\n",
    "    def getInitialConditionFixed(X):\n",
    "        u = U_list[0] * np.ones_like(X) \n",
    "        v = 0 * np.ones_like(X) \n",
    "        p = 0 * np.ones_like(X) \n",
    "        return u, v, p\n",
    "    NS2DParametersFixed = NS2DParameters.copy()\n",
    "    NS2DParametersFixed[\"reset_init_condition_func\"] = getInitialConditionFixed\n",
    "    envBcksFixed = gym.make(\"PDEControlGym-NavierStokes2D\", **NS2DParametersFixed)\n",
    "    reward_beforeQP, uBcks_beforeQP = runSingleEpisodeQP(QP_filter_Controller, envBcksFixed, U_list)\n",
    "    \n",
    "    uBcks_beforeQP_list.append(uBcks_beforeQP)\n",
    "    RL_reward_beforeQP.append(reward_beforeQP)\n",
    "\n",
    "    U_safe_list = RL_1000[\"U_safe\"][:, i]\n",
    "    def getInitialConditionFixed(X):\n",
    "        u = U_list[0] * np.ones_like(X) \n",
    "        v = 0 * np.ones_like(X) \n",
    "        p = 0 * np.ones_like(X) \n",
    "        return u, v, p\n",
    "    NS2DParametersFixed = NS2DParameters.copy()\n",
    "    NS2DParametersFixed[\"reset_init_condition_func\"] = getInitialConditionFixed\n",
    "    envBcksFixed = gym.make(\"PDEControlGym-NavierStokes2D\", **NS2DParametersFixed)\n",
    "    reward_afterQP, uBcks_afterQP = runSingleEpisodeQP(QP_filter_Controller, envBcksFixed, U_safe_list)\n",
    "    # print(uBcks_afterQP.shape) \n",
    "    uBcks_afterQP_list.append(uBcks_afterQP)\n",
    "    # print(Y_list-uBcks_beforeQP[:,-2,10,0])\n",
    "    RL_reward_afterQP.append(reward_afterQP)\n",
    "    print(reward_beforeQP,reward_afterQP)\n",
    "    # break\n",
    "    # if reward_beforeQP > -5.37: break\n",
    "    # if i > 10: break\n",
    "\n",
    "# result = np.array([uBcks_beforeQP_list, uBcks_afterQP_list]) #(2,100,51, 100) # first 100 is num of samples, second 100 is num of 100 spatial steps\n",
    "# print(result.shape)\n",
    "# # u_target[:,-2,10]\n",
    "# # condition = result[:, :,:]-u_target[:,-2,10] < 0 \n",
    "# condition = (result[:, :,:]-u_target[:,-2,10] < 0.145) & (result[:, :,:]-u_target[:,-2,10] > -0.145)\n",
    "# earliest_index = find_earliest_true(condition)\n",
    "# valid_earliest_index_beforeQP = earliest_index[0,earliest_index[0,:]>=0]\n",
    "# valid_earliest_index_afterQP = earliest_index[1,earliest_index[1,:]>=0]\n",
    "# # result.shape[2] - earliest_index\n",
    "# print(f\"beforeQP PF steps among {valid_earliest_index_beforeQP.shape[0]} PF trajectories\", np.mean(result.shape[2] - valid_earliest_index_beforeQP), np.std(result.shape[2] - valid_earliest_index_beforeQP))\n",
    "# print(f\"afterQP PF steps among {valid_earliest_index_afterQP.shape[0]} PF trajectories\", np.mean(result.shape[2] - valid_earliest_index_afterQP), np.std(result.shape[2] - valid_earliest_index_afterQP))\n",
    "\n",
    "\n",
    "# output_boundary_endtime_diff = result[:, :,-1, 0] - 1\n",
    "# output_boundary_endtime_diff = np.where(output_boundary_endtime_diff < 0, 0, output_boundary_endtime_diff)\n",
    "# # print(\"< 1 distance: beforeQP and afterQP\")\n",
    "# # print(np.mean(output_boundary_endtime_diff, axis=1))\n",
    "# # print(np.std(output_boundary_endtime_diff, axis=1))\n",
    "# print(\"times less than 1: beforeQP and afterQP\",np.sum(output_boundary_endtime_diff == 0, axis=1))\n",
    "\n",
    "reward_result = np.array([RL_reward_beforeQP,RL_reward_afterQP])\n",
    "print(\"reward: beforeQP and afterQP\")\n",
    "print(np.mean(reward_result, axis=1))\n",
    "print(np.std(reward_result, axis=1))\n",
    "\n",
    "# ns_ppo_all_train145_100_20_ns_145_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 66 PF trajectories 1.7878787878787878 0.7285342594043217\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.38985462]\n",
    "# [0.01178686 0.02533289]\n",
    "\n",
    "# ns_ppo_all_train145_100_20_ns_145_0.1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 73 PF trajectories 1.9452054794520548 1.0053338702415768\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.38591909]\n",
    "# [0.01178686 0.02424937]\n",
    "\n",
    "# < 0.14, >-0.14\n",
    "# ns_ppo_all_train145_100_20_ns_145_0.1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20\n",
    "# beforeQP PF steps among 57 PF trajectories 1.4912280701754386 0.5657724735648105\n",
    "# afterQP PF steps among 43 PF trajectories 1.6744186046511629 0.7988634435961706\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.38591909]\n",
    "# [0.01178686 0.02424937]\n",
    "\n",
    "# ns_ppo_all_train145_100_50_ns_145_0.1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 59 PF trajectories 2.3728813559322033 1.5171148474983218\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.41304788]\n",
    "# [0.01178686 0.06742862]\n",
    "\n",
    "# ns_ppo_all__NOfixed_train145_100_20_ns_145_0.1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 73 PF trajectories 2.0 0.9363291775690444\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.38102047]\n",
    "# [0.01178686 0.01687528]\n",
    "\n",
    "# ns_ppo_all_train145_100_20_ns_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.bson\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 100 PF trajectories 28.67 6.497776542787541\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.55368201]\n",
    "# [0.01178686 0.1629205 ]\n",
    "\n",
    "# ns_ppo_all_NOfixed_train145_100_20_ns_145_1s_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 73 PF trajectories 1.8356164383561644 0.7766048100305953\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.38336327]\n",
    "# [0.01178686 0.01836462]\n",
    "# \n",
    "\n",
    "# ns_ppo_all_train145_100_20_ns_12_1s_1reg_1pf_time_CBFnoT_pfall_addsafe__abs_20.npy running\n",
    "# used\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 86 PF trajectories 2.197674418604651 0.9743186938843825\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.37318814]\n",
    "# [0.01178686 0.01385474]\n",
    "\n",
    "# ns_ppo_all_train145_100_20_ns_145_1s_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy todo\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 85 PF trajectories 5.752941176470588 5.19027570921972\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.37358549]\n",
    "# [0.01178686 0.0392465 ]\n",
    "\n",
    "# ns_ppo_all_train145_100_20_ns_12_1s_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# use this\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 99 PF trajectories 31.96969696969697 6.285231279070403\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.72149912]\n",
    "# [0.01178686 0.16546378]\n",
    "\n",
    "# ns_ppo_all_train145_100_50_ns_12_1s_1reg_1pf_time_CBFnoT_pfall_addsafe__abs_20.npy\n",
    "# beforeQP PF steps among 86 PF trajectories 1.9534883720930232 0.7910395977532534\n",
    "# afterQP PF steps among 86 PF trajectories 2.255813953488372 0.9786802517442239\n",
    "# reward: beforeQP and afterQP\n",
    "# [-5.37608562 -5.37262777]\n",
    "# [0.01178686 0.0146308 ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib import cm\n",
    "\n",
    "def plot_comparative_velocity_fields(data1, data2, target_data, t_index, title):\n",
    "    # Create a mesh grid for the velocity fields\n",
    "    X, Y = np.meshgrid(np.linspace(0, 1, data1.shape[2]), np.linspace(0, 1, data1.shape[1]))\n",
    "    \n",
    "    # Extract the velocity components for each dataset\n",
    "    U1 = data1[t_index, :, :, 0]\n",
    "    V1 = data1[t_index, :, :, 1]\n",
    "    U2 = data2[t_index, :, :, 0]\n",
    "    V2 = data2[t_index, :, :, 1]\n",
    "    U_target = target_data[t_index, :, :, 0]\n",
    "    V_target = target_data[t_index, :, :, 1]\n",
    "    \n",
    "    # Calculate the speed for color mapping\n",
    "    speed = np.sqrt(U2**2 + V2**2)\n",
    "\n",
    "    # Create the plot\n",
    "    fig, ax = plt.subplots(figsize=(6, 6))\n",
    "    # Use the 'start_points' parameter to specify the seed points for streamlines\n",
    "    seed_points = np.array([[x, y] for x in np.linspace(0, 1, 10) for y in np.linspace(0, 1, 10)])\n",
    "\n",
    "    ax.streamplot(X, Y, U1, V1, color='red', linewidth=0.5, density=2, arrowstyle='->', arrowsize=1,\n",
    "                  start_points=seed_points, integration_direction='both', maxlength=0.4, minlength=0.1)\n",
    "    ax.streamplot(X, Y, U2, V2, color='green', linewidth=0.5, density=2, arrowstyle='->', arrowsize=1,\n",
    "                  start_points=seed_points, integration_direction='both', maxlength=0.4, minlength=0.1)\n",
    "    ax.streamplot(X, Y, U_target, V_target, color='blue', linewidth=0.5, density=2, arrowstyle='->', arrowsize=1,\n",
    "                  start_points=seed_points, integration_direction='both', maxlength=0.4, minlength=0.1)\n",
    "\n",
    "    # Set plot limits and labels\n",
    "    ax.set_xlim([0, 1])\n",
    "    ax.set_ylim([0, 1])\n",
    "    ax.set_title(title)\n",
    "    ax.set_xlabel('x')\n",
    "    ax.set_ylabel('y')\n",
    "    ax.plot([0], [0], color='red', lw=0.5, label='PPO without filtering'),\n",
    "    ax.plot([0], [0], color='green', lw=0.5, label='PPO with filtering'),\n",
    "    ax.plot([0], [0], color='blue', lw=0.5, label='Target ground truth')\n",
    "    ax.legend(loc='lower right')\n",
    "\n",
    "    # Display the underlying speed field\n",
    "    img = ax.imshow(speed, extent=(0, 1, 0, 1), origin='lower', cmap='coolwarm', alpha=0.5)\n",
    "    fig.colorbar(img, ax=ax, label='Speed')\n",
    "    plt.savefig(\"ns_ppo.png\", dpi=400, bbox_inches='tight')\n",
    "    plt.show()\n",
    "\n",
    "# Example usage\n",
    "# Ensure that you have loaded your data and target velocities correctly\n",
    "plot_comparative_velocity_fields(uBcks_beforeQP_list[92], uBcks_afterQP_list[92], np.stack([u_target, v_target], axis=-1), -1, 'Comparative Velocity Fields at t=0.2')\n",
    "# 92 ppo"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "RL_1000 = np.load(\"../../../verify-pde-control/ns_sac_all_train145_100_50_ns_145_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\")\n",
    "RL_reward_beforeQP = []\n",
    "RL_reward_afterQP = []\n",
    "uBcks_beforeQP_list = []\n",
    "uBcks_afterQP_list = []\n",
    "u_target = np.load('target.npz')['u']\n",
    "v_target = np.load('target.npz')['v']\n",
    "# uBcks_beforeQP,uBcks_afterQP = 0,0\n",
    "for i in range(RL_1000[\"safe_label\"].transpose().shape[0]):\n",
    "    # if i < 1:continue\n",
    "    # if RL_1000[\"Y_nominal\"][-1, i] < 0.5: continue\n",
    "    U_list = RL_1000[\"U_nominal\"][:, i]\n",
    "    Y_list = RL_1000[\"Y_nominal\"][:, i]\n",
    "    print(i)\n",
    "    # print(RL_1000[\"safe_label\"][:, i])\n",
    "    # def getInitialConditionFixed(nx):\n",
    "    #     return np.ones(nx) * U_list[0]\n",
    "    def getInitialConditionFixed(X):\n",
    "        u = U_list[0] * np.ones_like(X) \n",
    "        v = 0 * np.ones_like(X) \n",
    "        p = 0 * np.ones_like(X) \n",
    "        return u, v, p\n",
    "    NS2DParametersFixed = NS2DParameters.copy()\n",
    "    NS2DParametersFixed[\"reset_init_condition_func\"] = getInitialConditionFixed\n",
    "    envBcksFixed = gym.make(\"PDEControlGym-NavierStokes2D\", **NS2DParametersFixed)\n",
    "    reward_beforeQP, uBcks_beforeQP = runSingleEpisodeQP(QP_filter_Controller, envBcksFixed, U_list)\n",
    "    \n",
    "    uBcks_beforeQP_list.append(uBcks_beforeQP)\n",
    "    RL_reward_beforeQP.append(reward_beforeQP)\n",
    "\n",
    "    U_safe_list = RL_1000[\"U_safe\"][:, i]\n",
    "    def getInitialConditionFixed(X):\n",
    "        u = U_list[0] * np.ones_like(X) \n",
    "        v = 0 * np.ones_like(X) \n",
    "        p = 0 * np.ones_like(X) \n",
    "        return u, v, p\n",
    "    NS2DParametersFixed = NS2DParameters.copy()\n",
    "    NS2DParametersFixed[\"reset_init_condition_func\"] = getInitialConditionFixed\n",
    "    envBcksFixed = gym.make(\"PDEControlGym-NavierStokes2D\", **NS2DParametersFixed)\n",
    "    reward_afterQP, uBcks_afterQP = runSingleEpisodeQP(QP_filter_Controller, envBcksFixed, U_safe_list)\n",
    "    # print(uBcks_afterQP.shape) \n",
    "    uBcks_afterQP_list.append(uBcks_afterQP)\n",
    "    # print(Y_list-uBcks_beforeQP[:,-2,10,0])\n",
    "    RL_reward_afterQP.append(reward_afterQP)\n",
    "    print(reward_beforeQP,reward_afterQP)\n",
    "    # break\n",
    "    # if reward_beforeQP > -5.37: break\n",
    "    # if i > 10: break\n",
    "\n",
    "# result = np.array([uBcks_beforeQP_list, uBcks_afterQP_list]) #(2,100,51, 100) # first 100 is num of samples, second 100 is num of 100 spatial steps\n",
    "# print(result.shape)\n",
    "# # u_target[:,-2,10]\n",
    "# # condition = result[:, :,:]-u_target[:,-2,10] < 0 \n",
    "# condition = (result[:, :,:]-u_target[:,-2,10] < 0.145) & (result[:, :,:]-u_target[:,-2,10] > -0.145)\n",
    "# earliest_index = find_earliest_true(condition)\n",
    "# valid_earliest_index_beforeQP = earliest_index[0,earliest_index[0,:]>=0]\n",
    "# valid_earliest_index_afterQP = earliest_index[1,earliest_index[1,:]>=0]\n",
    "# # result.shape[2] - earliest_index\n",
    "# print(f\"beforeQP PF steps among {valid_earliest_index_beforeQP.shape[0]} PF trajectories\", np.mean(result.shape[2] - valid_earliest_index_beforeQP), np.std(result.shape[2] - valid_earliest_index_beforeQP))\n",
    "# print(f\"afterQP PF steps among {valid_earliest_index_afterQP.shape[0]} PF trajectories\", np.mean(result.shape[2] - valid_earliest_index_afterQP), np.std(result.shape[2] - valid_earliest_index_afterQP))\n",
    "\n",
    "\n",
    "# output_boundary_endtime_diff = result[:, :,-1, 0] - 1\n",
    "# output_boundary_endtime_diff = np.where(output_boundary_endtime_diff < 0, 0, output_boundary_endtime_diff)\n",
    "# # print(\"< 1 distance: beforeQP and afterQP\")\n",
    "# # print(np.mean(output_boundary_endtime_diff, axis=1))\n",
    "# # print(np.std(output_boundary_endtime_diff, axis=1))\n",
    "# print(\"times less than 1: beforeQP and afterQP\",np.sum(output_boundary_endtime_diff == 0, axis=1))\n",
    "\n",
    "reward_result = np.array([RL_reward_beforeQP,RL_reward_afterQP])\n",
    "print(\"reward: beforeQP and afterQP\")\n",
    "print(np.mean(reward_result, axis=1))\n",
    "print(np.std(reward_result, axis=1))\n",
    "\n",
    "# ns_sac_all_train145_100_20_ns_145_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# beforeQP PF steps among 80 PF trajectories 17.5125 12.082004955718235\n",
    "# afterQP PF steps among 86 PF trajectories 17.88372093023256 12.648576171962363\n",
    "# reward: beforeQP and afterQP\n",
    "# [-18.0499873  -18.07479318]\n",
    "# [1.13579015 1.14247683]\n",
    "\n",
    "# ns_sac_all_train145_100_20_ns_145_0.1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# (2, 100, 201)\n",
    "# beforeQP PF steps among 80 PF trajectories 17.5125 12.082004955718235\n",
    "# afterQP PF steps among 82 PF trajectories 17.353658536585368 12.304442932672846\n",
    "# reward: beforeQP and afterQP\n",
    "# [-18.0499873  -18.07111313]\n",
    "# [1.13579015 1.14130688]\n",
    "\n",
    "# ns_sac_all_train145_100_50_ns_145_1reg_1pf_time_CBFnoNOnotpf_pfall_addsafe__abs_20.npy\n",
    "# use this\n",
    "# beforeQP PF steps among 80 PF trajectories 17.5125 12.082004955718235\n",
    "# afterQP PF steps among 85 PF trajectories 21.31764705882353 13.55443210753536\n",
    "# reward: beforeQP and afterQP\n",
    "# [-18.0499873  -18.36229554]\n",
    "# [1.13579015 1.2484941 ]\n",
    "\n",
    "# ns_sac_all_train145_100_50_ns_145_1reg_1pf_time_CBFnoT_pfall_addsafe__abs_20.npy\n",
    "# used\n",
    "# beforeQP PF steps among 80 PF trajectories 17.5125 12.082004955718235\n",
    "# afterQP PF steps among 79 PF trajectories 17.772151898734176 11.993615777569529\n",
    "# reward: beforeQP and afterQP\n",
    "# [-18.0499873  -18.04696494]\n",
    "# [1.13579015 1.13748504]\n",
    "\n",
    "# ns_sac_all_train145_100_80_ns_145_1reg_1pf_time_CBFnoT_pfall_addsafe__abs_20.npy\n",
    "# (2, 100, 201)\n",
    "# beforeQP PF steps among 80 PF trajectories 17.5125 12.082004955718235\n",
    "# afterQP PF steps among 79 PF trajectories 17.696202531645568 11.88377729488499\n",
    "# reward: beforeQP and afterQP\n",
    "# [-18.0499873  -18.04417703]\n",
    "# [1.13579015 1.13556596]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib import cm\n",
    "\n",
    "def plot_comparative_velocity_fields(data1, data2, target_data, t_index, title):\n",
    "    # Create a mesh grid for the velocity fields\n",
    "    X, Y = np.meshgrid(np.linspace(0, 1, data1.shape[2]), np.linspace(0, 1, data1.shape[1]))\n",
    "    \n",
    "    # Extract the velocity components for each dataset\n",
    "    U1 = data1[t_index, :, :, 0]\n",
    "    V1 = data1[t_index, :, :, 1]\n",
    "    U2 = data2[t_index, :, :, 0]\n",
    "    V2 = data2[t_index, :, :, 1]\n",
    "    U_target = target_data[t_index, :, :, 0]\n",
    "    V_target = target_data[t_index, :, :, 1]\n",
    "    \n",
    "    # Calculate the speed for color mapping\n",
    "    speed = np.sqrt(U2**2 + V2**2)\n",
    "\n",
    "    # Create the plot\n",
    "    fig, ax = plt.subplots(figsize=(6, 6))\n",
    "    # Use the 'start_points' parameter to specify the seed points for streamlines\n",
    "    seed_points = np.array([[x, y] for x in np.linspace(0, 1, 10) for y in np.linspace(0, 1, 10)])\n",
    "\n",
    "    ax.streamplot(X, Y, U1, V1, color='red', linewidth=0.5, density=2, arrowstyle='->', arrowsize=1,\n",
    "                  start_points=seed_points, integration_direction='both')\n",
    "    ax.streamplot(X, Y, U2, V2, color='green', linewidth=0.5, density=2, arrowstyle='->', arrowsize=1,\n",
    "                  start_points=seed_points, integration_direction='both')\n",
    "    ax.streamplot(X, Y, U_target, V_target, color='blue', linewidth=0.5, density=2, arrowstyle='->', arrowsize=1,\n",
    "                  start_points=seed_points, integration_direction='both')\n",
    "\n",
    "    # Set plot limits and labels\n",
    "    ax.set_xlim([0, 1])\n",
    "    ax.set_ylim([0, 1])\n",
    "    ax.set_title(title)\n",
    "    ax.set_xlabel('x')\n",
    "    ax.set_ylabel('y')\n",
    "    ax.plot([0], [0], color='red', lw=0.5, label='SAC without filtering'),\n",
    "    ax.plot([0], [0], color='green', lw=0.5, label='SAC with filtering'),\n",
    "    ax.plot([0], [0], color='blue', lw=0.5, label='Target ground truth')\n",
    "    ax.legend(loc='lower right')\n",
    "\n",
    "    # Display the underlying speed field\n",
    "    img = ax.imshow(speed, extent=(0, 1, 0, 1), origin='lower', cmap='coolwarm', alpha=0.5)\n",
    "    fig.colorbar(img, ax=ax, label='Speed')\n",
    "    plt.savefig(\"ns_sac.png\", dpi=400, bbox_inches='tight')\n",
    "    plt.show()\n",
    "\n",
    "# Example usage\n",
    "# Ensure that you have loaded your data and target velocities correctly\n",
    "plot_comparative_velocity_fields(uBcks_beforeQP_list[1], uBcks_afterQP_list[1], np.stack([u_target, v_target], axis=-1), -1, 'Comparative Velocity Fields at t=0.2')\n",
    "# 25 32 51 53 91 95"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pdecontrol",
   "language": "python",
   "name": "pdecontrol"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
