If you have newish cmakes the env variable `CMAKE_POLICY_VERSION_MINIMUM=3.5` may need to be set, we use uv, run in the following order

```bash
mkdir -p data
mkdir -p fetch
wget https://archive.org/download/nf_prize_dataset.tar/nf_prize_dataset.tar.gz -P fetch
wget https://web.archive.org/web/20250407102923/https://eigentaste.berkeley.edu/dataset/Dataset4JokeSet.zip -P fetch
wget https://web.archive.org/web/20250414013636/https://eigentaste.berkeley.edu/dataset/JesterDataset3.zip -P fetch
wget https://web.archive.org/web/20250409000905/https://eigentaste.berkeley.edu/dataset/JesterDataset4.zip -P fetch
wget https://files.grouplens.org/datasets/movielens/ml-32m.zip -P fetch

tar xzvf fetch/nf_prize_dataset.tar.gz -C data/ && mv data/download data/nf_prize_dataset
wget https://www.kamishima.net/asset/sushi3-2016.zip -P fetch
unzip fetch/sushi3-2016.zip -d data
unzip fetch/Dataset4JokeSet.zip -d data
unzip fetch/JesterDataset3.zip -d data
unzip fetch/JesterDataset4.zip -d data
unzip fetch/ml-32m.zip -d data
uv run gen-synth.py # generate syntehtic data
uv run load_nf.py
uv run load_ml.py
uv run jokes.py
uv run llm_emebedding.py # set TMDB env var to TMDB API Key
uv run run.py all # to see run commands
```
