\documentclass[tikz,border=6pt]{standalone}
\usepackage{amsmath}
\usepackage{graphicx}
\usetikzlibrary{arrows.meta,positioning,calc,fit,backgrounds,shapes.geometric}
\usepackage{fontawesome5}
% ----------------- Colors (tweak as you like) -----------------
\definecolor{vaeBlue}{RGB}{190,220,228}
\definecolor{flowBlue}{RGB}{106,132,150}
\definecolor{clfBlue}{RGB}{20,64,95}
\definecolor{imgGray}{RGB}{210,210,210}

% ----------------- Styles -----------------
\tikzset{
  arrow/.style={-Latex, line width=1.2pt},
  box/.style={draw, line width=1.2pt, rounded corners=2pt, fill=white, minimum height=9mm, align=center, inner sep=3pt},
  smallbox/.style={box, minimum width=28mm},
  img/.style={line width=1.2pt, fill=imgGray, minimum width=16mm, minimum height=16mm},
}

\begin{document}
\begin{tikzpicture}[node distance=12mm]

% ===================== Top row =====================
\node[img,  draw=none] (img) {\includegraphics[width=1.8cm]{figures/input_image_72.png}}; % replace with \includegraphics if desired

\node[right=12mm of img] (lenc) {};
\node[right=18mm of lenc] (renc) {};

\draw[draw=black,line width=1.2pt,fill=flowBlue]
  ($(lenc.west)+(0mm,-8mm)$) --
  ($(lenc.west)+(0mm, 8mm)$) --
  ($(renc.east)+(0mm, 4mm)$) --
  ($(renc.east)+(0mm,-4mm)$) -- cycle;

\node[right=15mm of img] (enc) {\color{white}Encoder};
\node[text=cyan!40!white] at ($(25mm, 4.5mm)$) {\large\faSnowflake};
\node[smallbox, right=12mm of renc] (time) {Backward Euler:\\$t=T,\hdots,\tau$};

\node[right=12mm of time] (thelper) {};

\node[below=6mm of time] (helper) {};

\node[right=12mm of thelper] (lflow) {};
\node[right=18mm of lflow] (cflow) {};
\node[right=18mm of cflow] (rflow) {};
\draw[draw=black,line width=1.2pt,fill=flowBlue]
  ($(lflow.west)+(0mm,-8mm)$) --
  ($(lflow.west)+(0mm, 8mm)$) --
  ($(cflow.center)+(0mm, 4mm)$) --
  ($(rflow.east)+(0mm, 8mm)$) --
  ($(rflow.east)+(0mm,-8mm)$) --
  ($(cflow.center)+(0mm,-4mm)$) -- cycle;

\node[right=31mm of time] (flow) {\color{white}Rectified flow model};
\node[text=cyan!40!white] at ($(114.5mm,4.5mm)$) {\large\faSnowflake};
\node[smallbox, below=12mm of img] (clean) {Clean latent estimate:\\$\hat z \leftarrow z + v_\theta(z,t)\cdot(T-t) \cdot dt$};

\node[above=9mm of cflow] (fhelper) {};

\node[right=10mm of clean] (ldec) {};
\node[right=18mm of ldec] (rdec) {};

\draw[draw=black,line width=1.2pt,fill=flowBlue]
  ($(ldec.west)+(0mm,-4mm)$) --
  ($(ldec.west)+(0mm, 4mm)$) --
  ($(rdec.east)+(0mm, 8mm)$) --
  ($(rdec.east)+(0mm,-8mm)$) -- cycle;

\node[right=2mm of ldec] (dec) {\color{white}Decoder};
\node[text=cyan!40!white] at ($(ldec.north)+(18.7mm,3.5mm)$) {\large\faSnowflake};
\node[right=10mm of rdec] (lclf) {};
\node[right=18mm of lclf] (rclf) {};

\draw[draw=black,line width=1.2pt,fill=flowBlue]
  ($(lclf.west)+(0mm,-8mm)$) --
  ($(lclf.west)+(0mm, 8mm)$) --
  ($(rclf.east)+(0mm, 4mm)$) --
  ($(rclf.east)+(0mm,-4mm)$) -- cycle;

\node[right=0mm of lclf] (clf) {\color{white}Predictor};
\node[text=cyan!40!white] at ($(ldec.north)+(36.3mm,3.5mm)$) {\large\faSnowflake};
\node[smallbox, right=10mm of rclf] (guide)
{Guidance:\\$z \leftarrow z - s\cdot \nabla_{\hat z}\mathcal{L}(\hat y,y)$};

\node[smallbox, right=14mm of guide] (ztm1)
{Forward Euler:\\$t\leftarrow t+1$};

\draw[arrow] (img) -- (lenc) node[midway, below] {$X$};
\draw[arrow] (renc) -- (time) node[midway, below] {$z$};
\draw[arrow] (time) -- (lflow) node[midway, below] {$t=\tau \;\;\;\;\;(z, t)$};
%\draw[arrow] (rflow) -- (clean) node[midway, above] {$v_\theta(z_t, t)$};
\draw[arrow
] (clean) -- (ldec) node[midway, below] {$\hat z$};
\draw[arrow] (rdec) -- (lclf) node[midway, below] {$\hat X$};
\draw[arrow] (rclf) -- (guide) node[midway, below] {$\hat y$};
\draw[arrow] (guide) -- (ztm1) node[midway, below] {$z$};
%\draw[arrow] (ztm1) -- (lflow) node[midway, above] {$z_{t+1}$};

%\draw[arrow]
%(ztm1.east)
%to[out=45, in=90, looseness=1.25] (thelper.center) node[below, xshift=20mm, yshift=16mm] {$t=\tau+1,\dots,1$};

\draw[arrow] (ztm1.north) |- (fhelper.west);
\draw[arrow] (fhelper.east) -| (thelper.center) node[above, xshift=20mm, yshift=11mm] {$t=\tau+1,\dots,T$};

\draw[arrow] ($(cflow.south)+(0mm,-3mm)$) |- (helper.west) node[below, xshift=30mm, yshift=0mm] {$v_\theta(z,t)$};
\draw[arrow] (helper.east) -| (clean.north);


%\draw[arrow]
%(rflow)
%to[out=-30, in=0, looseness=1] (helper.west) node[below, %xshift=10mm, yshift=0mm] {$v_\theta(z_t,t)$};
%\draw[arrow]
%(helper.east)
%to[out=180, in=60, looseness=0.5] (clean) node[below, xshift=30mm, yshift=0mm] {};


%\coordinate (vr) at ($(vout.east)+(10mm,0)$);
%\coordinate (vd) at ($(vr)+(0,-30mm)$);
%\draw[arrow] (vout.east) -- (vr) -- (vd) -- (ztm1.east);

%\coordinate (bL) at ($(zhat0.west)+(-14mm,0)$);
%\coordinate (bU) at ($(bL)+(0,32mm)$);
%\coordinate (bR) at ($(flow.west)+(-12mm,0)$);
%\draw[arrow] (zhat0.west) -- (bL) -- (bU) -- (bR) -- (flow.west);

\end{tikzpicture}
\end{document}