# Copyright 2025 The corr_faith Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

# Image for LLM faithfulness experiments.
# Start from an NVidia PyTorch container. See list of releases here:
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/
# We specify sha to avoid the need to check for updates on every run.
# To find the sha when incrementing the version, use:
# docker images --digests | grep pytorch
FROM nvcr.io/nvidia/pytorch:25.03-py3@sha256:64887d431df52013f766878a546fad94716cbe0b39d991c3ea5c63a1a7d14c61

# Clear the user and group with id 1000 (these may belong to "ubuntu" in the
# base image). This is necessary to avoid conflicts with the nonroot user we
# create below; nonroot may need UID 1000 due to the way docker containers are
# run on our cluster.
RUN USER_TO_DELETE=$(getent passwd 1000 | cut -d: -f1) && \
    if [ -n "$USER_TO_DELETE" ]; then userdel $USER_TO_DELETE; fi
RUN GROUP_TO_DELETE=$(getent group 1000 | cut -d: -f1) && \
    if [ -n "$GROUP_TO_DELETE" ]; then groupdel $GROUP_TO_DELETE; fi

# Create a non-root user to run the container for security.
WORKDIR /home/nonroot
RUN apt-get update -y
# libaio-dev: necessary for async_io.
# wget: necessary for downloading CQA.
RUN apt-get install libaio-dev wget -y
# Prevents warning about a new release of pip being available.
RUN pip install --no-cache-dir --upgrade pip

# Download datasets. Some python packages are required for dataset processing,
# so we install them before the rest of requirements.txt.
RUN pip install --no-cache-dir spacy nltk pandas
WORKDIR data
RUN python -m spacy download en_core_web_lg
RUN python -m nltk.downloader wordnet -d /home/nonroot/nltk_data/
RUN git clone https://github.com/OanaMariaCamburu/e-SNLI.git
RUN git clone https://github.com/wangcunxiang/SemEval2020-Task4-Commonsense-Validation-and-Explanation.git
# Download ECQA and join it with CQA.
RUN git clone https://github.com/dair-iitd/ECQA-Dataset.git
WORKDIR ECQA-Dataset
# Note: We use RUN instead of ADD because s3 doesn't seem to support caching;
# ADD re-downloads these files on every run.
RUN wget --directory-prefix=cqa/ https://s3.amazonaws.com/commensenseqa/train_rand_split.jsonl
RUN wget --directory-prefix=cqa/ https://s3.amazonaws.com/commensenseqa/dev_rand_split.jsonl
# ADD https://s3.amazonaws.com/commensenseqa/train_rand_split.jsonl cqa/
# ADD https://s3.amazonaws.com/commensenseqa/dev_rand_split.jsonl cqa/
# Join ECQA with CQA, using the script from ECQA-Dataset.
RUN python generate_data.py
WORKDIR ..

# Ensure that everything downloaded above is under the nonroot owner.
WORKDIR /home/nonroot
# RUN chown -R nonroot .

# Separate our package install into layers to take advantage of Docker's cache.
# Only reinstall dependencies when pyproject.toml changes.
WORKDIR corr-faith
COPY pyproject.toml .
RUN pip install --no-cache-dir .
# Only rerun pip when directory structure changes.
COPY src/**/__init__.py src/
RUN pip install -e .

# Create a non-root user to run the container, for security.
# Allow configuring its UID and GID to match the host user, so that we can
# read/write bind mounted files.
# https://dev.to/izackv/running-a-docker-container-with-a-custom-non-root-user-syncing-host-and-container-permissions-26mb
ARG UID=1000
ARG GID=1000
RUN addgroup --gid $GID nonroot
RUN adduser --uid $UID --gid $GID --disabled-password --gecos "" nonroot
WORKDIR /home/nonroot
RUN chown -R nonroot .
WORKDIR corr-faith

# With an editable install, we don't need to run pip when source changes.
COPY --chown=nonroot . .

USER nonroot
# We need to override NVIDIA's default entrypoint. We'll set cmd in the
# launch script so that this container can run any main file.
ENTRYPOINT ["torchrun"]
