#!/bin/bash

echo "Assuming you are in a venv or env with the required packages installed..."
echo "Starting FDA data scraping and CSV update..."

# Check if GEMINI_API_KEY is set in the environment
if [ -z "$GEMINI_API_KEY" ]; then
    echo "Error: GEMINI_API_KEY is not set in the environment."
    echo "Please set the GEMINI_API_KEY environment variable before running this script."
    echo "Example: export GEMINI_API_KEY='your_api_key_here'"
    exit 1
fi

echo "GEMINI_API_KEY is set. Proceeding with FDA data scraping..."

python py_src/fda_scraper.py

if [ $? -eq 0 ]; then
    echo "FDA data scraping and CSV update completed successfully."
    echo "Starting synthetic data generation (PDF summaries, keywords, questions, concepts, thesis, search_boost_text)..."
    # Process a large number of records to ensure all new/incomplete ones are handled
    python py_src/process_pdf_summary.py --num_records 999999

    if [ $? -eq 0 ]; then
        echo "Synthetic data generation completed successfully."
        echo "Starting embeddings update..."
        python py_src/create_embeddings.py --update --csv_path py_src/fda_ai_records.csv --output_dir embedding_data/
        if [ $? -eq 0 ]; then
            echo "Embeddings update completed successfully."
            echo "All tasks finished."
        else
            echo "Error: Embeddings update failed."
        fi
    else
        echo "Error: Synthetic data generation failed. Aborting embeddings update."
    fi
else
    echo "Error: FDA data scraping and CSV update failed. Aborting further tasks."
fi
