# Complete, reproducible script to build and prepare environment
PSLM_REPO=$(pwd)

# modify the installation path and env name if you want
WRKSPC=$(dirname $PSLM_REPO)
INSTALLDIR=${WRKSPC}
ENV_NAME="pslm_conda_25_62"

cd ${INSTALLDIR}

# Base the installation on conda from module load
source deactivate > /dev/null 2>&1 # discard potentially preloaded conda environments
module load miniforge3
echo "Conda Version:" $(which conda) 


# Create conda environment, and print whether it is loaded correctly
conda create --prefix ${INSTALLDIR}/$ENV_NAME python=3.11 --yes -c defaults
source activate ${INSTALLDIR}/$ENV_NAME
echo "Pip Version:" $(which pip)  # should be from the new environment!

# Conda packages:
conda install -c conda-forge conda-pack libstdcxx-ng --yes # install here, for the unpack


# Load module family
rocm_version=6.2.0

# Load modules
module load PrgEnv-gnu/8.5.0
module load rocm/$rocm_version
module load craype-accel-amd-gfx90a
module load gcc-native/12.3
module load cray-mpich/8.1.28
libfabric_path=/opt/cray/libfabric/1.15.2.0

######### COMPILE PIP PACKAGES ########################

# pytorch and core reqs
cd "${PSLM_REPO}"
pip install torch==2.5.1 --index-url https://download.pytorch.org/whl/rocm6.2
pip install '.[torchgte240]'
cd ${INSTALLDIR}

# amdsmi
cp -R /opt/rocm-${rocm_version}/share/amd_smi/ $WRKSPC/amd_smi_${rocm_version}
cd $WRKSPC/amd_smi_${rocm_version}
pip install .
cd ${INSTALLDIR}

## FA2/3
git clone https://github.com/ROCmSoftwarePlatform/flash-attention flash-attention_25_62_retrieval
cd flash-attention_25_62_retrieval
sed -i 's/c++20/c++17/g' setup.py # Annoying patch for now, there used to be a particular module config that loads a more modern cc version
MAX_JOBS=64 PYTORCH_ROCM_ARCH='gfx90a' GPU_ARCHS='gfx90a' pip install . --no-build-isolation
cd ${INSTALLDIR}
rm -rf flash-attention_25_62_retrieval

######### interconnects ########################
# Download the plugin repo
git clone --recursive https://github.com/ROCmSoftwarePlatform/aws-ofi-rccl aws-ofi-rccl_25_62_retrieval
cd aws-ofi-rccl_25_62_retrieval

# Build the plugin
./autogen.sh
export LD_LIBRARY_PATH=/opt/rocm-$rocm_version/hip/lib:$LD_LIBRARY_PATH
PLUG_PREFIX=$PWD

CC=hipcc CFLAGS=-I/opt/rocm-$rocm_version/rccl/include ./configure \
--with-libfabric=$libfabric_path --with-rccl=/opt/rocm-$rocm_version --enable-trace \
--prefix=$PLUG_PREFIX --with-hip=/opt/rocm-$rocm_version/hip --with-mpi=$MPICH_DIR

make
make install

# Reminder to export the plugin to your path
echo $PLUG_PREFIX
echo "Add the following line in the environment to use the AWS OFI RCCL plugin"
echo "export LD_LIBRARY_PATH="$PLUG_PREFIX"/lib:$""LD_LIBRARY_PATH"


######### PACK A STATIC COPY OF THE ENVIRONMENT  and RCCL ########################
# This step needs to be repeated if the env is changed
# export INSTALLDIR=${WRKSPC}
# export ENV_NAME="pslm_conda_25_62"

# Pack up the entire thing
cd ${INSTALLDIR}
rm -f ${ENV_NAME}.tar.gz
conda pack -p ${INSTALLDIR}/$ENV_NAME -o ${ENV_NAME}.tar.gz --compress-level=1
# and pack up the aws-ofi-rccl
rm -f aws-ofi-rccl_25_62_retrieval.tar.gz
tar -czf aws-ofi-rccl_25_62_retrieval.tar.gz -C aws-ofi-rccl_25_62_retrieval/ .
