apiVersion: batch/v1
kind: Job
metadata:
  name: {NAME}
  labels:
    kueue.x-k8s.io/queue-name: farai
spec:
  suspend: true
  backoffLimit: 1  # How many times to try to run the job until giving up
  template:
    metadata:
      name: {NAME}
    spec:
      securityContext:
        runAsUser: {USER_ID}
        runAsGroup: {GROUP_ID}
      priorityClassName: interactive
      volumes:
      - name: dshm
        emptyDir:
          medium: Memory
          sizeLimit: "{SHM_SIZE}"
      - name: training
        persistentVolumeClaim:
          claimName: az-learned-planners

      containers:
      - name: devbox-container
        image: "{IMAGE}"
        imagePullPolicy: Always
        command:
          - bash
          - -c
          - |
            sudo mkdir -p "/opt/sokoban_cache"
            sudo chown -R dev:dev "/opt/"
            sudo mkdir -p "/training/cleanba"
            sudo chown dev:dev "/training/cleanba"
            git clone https://github.com/google-deepmind/boxoban-levels "/opt/sokoban_cache/boxoban-levels-master"
            git pull
            git checkout {COMMIT_HASH}
            git submodule update --recursive
            pip install matplotlib jupyter
            pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
            sleep 1d
        resources:
          requests:
            cpu: {CPU}
          limits:
            memory: "{MEMORY}"
            nvidia.com/gpu: {GPU}
        env:
        - name: OMP_NUM_THREADS
          value: "{CPU}"
        - name: WANDB_MODE
          value: offline
        - name: WANDB_PROJECT
          value: lp-cleanba
        - name: WANDB_ENTITY
          value: farai
        - name: WANDB_RUN_GROUP
          value: devbox
        - name: GIT_ASKPASS
          value: "true"
        - name: GITHUB_PAT
          valueFrom:
            secretKeyRef:
              name: github-credentials
              key: pat
        - name: WANDB_API_KEY
          valueFrom:
            secretKeyRef:
              name: wandb
              key: api-key
        - name: GIT_CONFIG_PARAMETERS
          value: "'credential.https://github.com.username=$(GITHUB_PAT)'"
        volumeMounts:
        - name: dshm
          mountPath: /dev/shm
        - name: training
          mountPath: /training
      restartPolicy: Never
      imagePullSecrets:
      - name: docker

