
\renewcommand{\tanh}{\textsc{Tanh}}
\newcommand{\relu}{\textsc{ReLU}}

\usepackage{xcolor}
\definecolor{colTanh}{rgb}{0,0.5,0}
\definecolor{colRelu}{rgb}{0.5, 0, 0}

\definecolor{colTrain}{rgb}{0,0.5,0}
\definecolor{colTest}{rgb}{0.5, 0, 0}

\colorlet{col1}{blue}
\colorlet{col2}{black!33!blue}
\colorlet{col3}{black!66!blue}
\colorlet{col4}{black}
\colorlet{col5}{red!50!black}
\colorlet{col6}{red}

\pgfplotsset{
compat=1.16,
/pgfplots/colormap={cmprefit}{
	[1cm]rgb255(0cm)=(200,200,200),rgb255(1cm)=(0,0,0)
},
IB plot/.style={
    scatter,only marks,mark=none,
    scatter/position=absolute,  
    scatter/@pre marker code/.code={
        \node[circle,fill,inner sep=1.5pt,color of colormap=\pgfkeysvalueof{/data point/index}/5.0] (pt-\pgfkeysvalueof{/data point/index}) at 
    (\pgfkeysvalueof{/data point/x},\pgfkeysvalueof{/data point/y}){};},
    scatter/@post marker code/.code={%
    \ifnum\pgfkeysvalueof{/data point/index}=0
    \else
      \pgfmathtruncatemacro{\itest}{mod(\pgfkeysvalueof{/data point/index},6)}
      \ifnum\itest=0
      \else
        \pgfmathtruncatemacro{\lastindex}{\pgfkeysvalueof{/data point/index}-1}
        \draw[line width=1pt,color of colormap=\pgfkeysvalueof{/data point/index}/5.0, opacity=0.15] (pt-\lastindex) -- (pt-\pgfkeysvalueof{/data point/index});
      \fi
    \fi
}},
IB prefit/.style={
    colormap name=cmprefit,
    scatter,only marks,mark=none,
    scatter/position=absolute,  
    scatter/@pre marker code/.code={
        \node[circle,fill,inner sep=1.5pt,color of colormap=\pgfkeysvalueof{/data point/index}] (pt-\pgfkeysvalueof{/data point/index}) at 
    (\pgfkeysvalueof{/data point/x},\pgfkeysvalueof{/data point/y}){};},
    %only marks,fill=gray,color=gray
    scatter/@post marker code/.code={},%
},
IB axis/.style={
	height=0.7\linewidth,
	width=0.7\linewidth,
	colormap name=hot,
	point meta min=1,
	point meta max=8000,	
	ymajorgrids=true,
	xmajorgrids=true,
	yticklabels={0.0,0.0,0.2,0.4,0.6,0.8,1.0},
        xticklabels={,0,2,4,6,8,10,12},
        ymin=0,ymax=1.03,
	xmin=0,xmax=12.36,
	ylabel={$I(T_i,Y)$},
	xlabel={$I(X,T_i)$},
},
IB plot REAL5/.style={
    scatter,only marks,mark=none,
    scatter/position=absolute,  
    scatter/@pre marker code/.code={
        \node[circle,fill,inner sep=1.5pt,color of colormap=\pgfkeysvalueof{/data point/index}/2.0] (pt-\pgfkeysvalueof{/data point/index}) at 
    (\pgfkeysvalueof{/data point/x},\pgfkeysvalueof{/data point/y}){};},
    scatter/@post marker code/.code={%
    \ifnum\pgfkeysvalueof{/data point/index}=0
    \else
      \pgfmathtruncatemacro{\itest}{mod(\pgfkeysvalueof{/data point/index},5)} % 5 layers
      \ifnum\itest=0
      \else
        \pgfmathtruncatemacro{\lastindex}{\pgfkeysvalueof{/data point/index}-1}
        \draw[line width=1pt,color of colormap=\pgfkeysvalueof{/data point/index}/2.0, opacity=0.65] (pt-\lastindex) -- (pt-\pgfkeysvalueof{/data point/index});
      \fi
    \fi
}},
IB plot REAL6/.style={
    scatter,only marks,mark=none,
    scatter/position=absolute,  
    scatter/@pre marker code/.code={
        \node[circle,fill,inner sep=1.5pt,color of colormap=\pgfkeysvalueof{/data point/index}/2.0] (pt-\pgfkeysvalueof{/data point/index}) at 
    (\pgfkeysvalueof{/data point/x},\pgfkeysvalueof{/data point/y}){};},
    scatter/@post marker code/.code={%
    \ifnum\pgfkeysvalueof{/data point/index}=0
    \else
      \pgfmathtruncatemacro{\itest}{mod(\pgfkeysvalueof{/data point/index},5)} % 5 layers
      \ifnum\itest=0
      \else
        \pgfmathtruncatemacro{\lastindex}{\pgfkeysvalueof{/data point/index}-1}
        \draw[line width=1pt,color of colormap=\pgfkeysvalueof{/data point/index}/2.0, opacity=0.65] (pt-\lastindex) -- (pt-\pgfkeysvalueof{/data point/index});
      \fi
    \fi
}},
IB plot REAL7/.style={
    scatter,only marks,mark=none,
    scatter/position=absolute,  
    scatter/@pre marker code/.code={
        \node[circle,fill,inner sep=1.5pt,color of colormap=\pgfkeysvalueof{/data point/index}/2.0] (pt-\pgfkeysvalueof{/data point/index}) at 
    (\pgfkeysvalueof{/data point/x},\pgfkeysvalueof{/data point/y}){};},
    scatter/@post marker code/.code={%
    \ifnum\pgfkeysvalueof{/data point/index}=0
    \else
      \pgfmathtruncatemacro{\itest}{mod(\pgfkeysvalueof{/data point/index},7)} % 7 layers
      \ifnum\itest=0
      \else
        \pgfmathtruncatemacro{\lastindex}{\pgfkeysvalueof{/data point/index}-1}
        \draw[line width=1pt,color of colormap=\pgfkeysvalueof{/data point/index}/2.0, opacity=0.65] (pt-\lastindex) -- (pt-\pgfkeysvalueof{/data point/index});
      \fi
    \fi
}},
IB axis MNIST/.style={
	height=0.7\linewidth,
	width=0.7\linewidth,
	colormap name=hot,
	point meta min=1,
	point meta max=3000,	
	ymajorgrids=true,
	xmajorgrids=true,
	ylabel={$I(T_i,Y)$},
	xlabel={$I(X,T_i)$},
  xmin=3.9,xmax=16.1,
  ymin=0.4,ymax=3.5,
},
IB colorbar single/.style={
   colorbar,
},
IB noY/.style={
	yticklabels={},
	ylabel={},
},
IB noX/.style={
	xticklabels={},
	xlabel={},
},
MI axis/.style={
	height=0.7\linewidth,
	width=0.7\linewidth,
	ymin=0,
  xmin=0,xmax=8000,
	xlabel={Epoch},
  ymajorgrids=true,
  xtick={0,2000,4000,6000},
},
MI axis MNIST/.style={
	height=0.7\linewidth,
	width=0.7\linewidth,
	ymin=0,
  xmin=0,xmax=3000,
	xlabel={Epoch},
  ymajorgrids=true,
  xtick={0,1000,2000,3000},
},
}
\tikzset{
IB text/.style={
	fill=black!10,
},
}
