# Code for PanoGen: Text-Conditioned Panoramic Environment Generation for Vision-and-Language Navigation

Step1: Caption R2R images

cd BLIP-2
python caption_generation.py

Step2: Generate first image with stable diffusion

cd Stable-diffusion
python generate_images_from_captions.py

Step3: Inpaint panorama

cd Stable-diffusion
python image_inpainting.py

Step4: Generate speaker data

cd mPLUG
bash scripts/vln_train.sh
bash scripts/vln_inference.sh

Step5: VLN training

cd VLN-DUET

Pre-training:
cd pretrain_src
bash run_r2r.sh

Fine-tuning:
cd map_nav_src
bash scripts/run_r2r.sh
bash scripts/run_r4r.sh
bash scripts/run_cvdn.sh
