"""Configurations for preprocessing of pre-trained models."""

from copy import deepcopy

preprocess_configs = dict()

# tm_tabvec
temp_configs = dict()
temp_configs["model_name"] = "tabvec"
temp_configs["model_base_path"] = None
temp_configs["huggingface"] = False
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
preprocess_configs["tm_tabvec"] = deepcopy(temp_configs)

# tm_tabula
temp_configs = dict()
temp_configs["model_name"] = "mlfoundations/tabula-8b"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = False
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["tm_tabula"] = deepcopy(temp_configs)

# llm_qwen3-0.6b
temp_configs = dict()
temp_configs["model_name"] = "Qwen/Qwen3-Embedding-0.6B"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
preprocess_configs["llm-col_qwen3-0.6b"] = deepcopy(temp_configs)
preprocess_configs["llm-row_qwen3-0.6b"] = deepcopy(temp_configs)
preprocess_configs["llm-row_qwen3-0.6b"]["cache_embedding"] = False

# llm_qwen3-4b
temp_configs = dict()
temp_configs["model_name"] = "Qwen/Qwen3-Embedding-4B"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
preprocess_configs["llm-col_qwen3-4b"] = deepcopy(temp_configs)
preprocess_configs["llm-row_qwen3-4b"] = deepcopy(temp_configs)
preprocess_configs["llm-row_qwen3-4b"]["cache_embedding"] = False

# llm_qwen3-8b
temp_configs = dict()
temp_configs["model_name"] = "Qwen/Qwen3-Embedding-8B"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
preprocess_configs["llm-col_qwen3-8b"] = deepcopy(temp_configs)
preprocess_configs["llm-row_qwen3-8b"] = deepcopy(temp_configs)
preprocess_configs["llm-row_qwen3-8b"]["cache_embedding"] = False

# llm_llama3
temp_configs = dict()
temp_configs["model_name"] = "meta-llama/Llama-3.1-8B"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
temp_configs["tokenizer_kwargs"] = {"pad_token": "[PAD]"}
preprocess_configs["llm-col_llama3"] = deepcopy(temp_configs)
preprocess_configs["llm-row_llama3"] = deepcopy(temp_configs)
preprocess_configs["llm-row_llama3"]["cache_embedding"] = False

# llm_llama-3.1-70b
temp_configs = dict()
temp_configs["model_name"] = "meta-llama/Llama-3.1-70B"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
preprocess_configs["llm-col_llama-3.1-70b"] = deepcopy(temp_configs)
preprocess_configs["llm-row_llama-3.1-70b"] = deepcopy(temp_configs)
preprocess_configs["llm-row_llama-3.1-70b"]["cache_embedding"] = False

# llm_llama-3.2-1b
temp_configs = dict()
temp_configs["model_name"] = "meta-llama/Llama-3.2-1B"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
preprocess_configs["llm-col_llama-3.2-1b"] = deepcopy(temp_configs)
preprocess_configs["llm-row_llama-3.2-1b"] = deepcopy(temp_configs)
preprocess_configs["llm-row_llama-3.2-1b"]["cache_embedding"] = False

# llm_llama-3.2-3b
temp_configs = dict()
temp_configs["model_name"] = "meta-llama/Llama-3.2-3B"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
preprocess_configs["llm-col_llama-3.2-3b"] = deepcopy(temp_configs)
preprocess_configs["llm-row_llama-3.2-3b"] = deepcopy(temp_configs)
preprocess_configs["llm-row_llama-3.2-3b"]["cache_embedding"] = False

# llm_t5-small
temp_configs = dict()
temp_configs["model_name"] = "google-t5/t5-small"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["llm-col_t5-small"] = deepcopy(temp_configs)
preprocess_configs["llm-row_t5-small"] = deepcopy(temp_configs)
preprocess_configs["llm-row_t5-small"]["cache_embedding"] = False

# llm_t5
temp_configs = dict()
temp_configs["model_name"] = "google-t5/t5-base"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["llm-col_t5"] = deepcopy(temp_configs)
preprocess_configs["llm-row_t5"] = deepcopy(temp_configs)
preprocess_configs["llm-row_t5"]["cache_embedding"] = False

# llm_e5-base-v2
temp_configs = dict()
temp_configs["model_name"] = "intfloat/e5-base-v2"
temp_configs["huggingface"] = True
temp_configs["prefix"] = "query: "
temp_configs["cache_embedding"] = True
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["llm-col_e5-base-v2"] = deepcopy(temp_configs)
preprocess_configs["llm-row_e5-base-v2"] = deepcopy(temp_configs)
preprocess_configs["llm-row_e5-base-v2"]["cache_embedding"] = False

# llm_e5-small-v2
temp_configs = dict()
temp_configs["model_name"] = "intfloat/e5-small-v2"
temp_configs["huggingface"] = True
temp_configs["prefix"] = "query: "
temp_configs["cache_embedding"] = True
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["llm-col_e5-small-v2"] = deepcopy(temp_configs)
preprocess_configs["llm-row_e5-small-v2"] = deepcopy(temp_configs)
preprocess_configs["llm-row_e5-small-v2"]["cache_embedding"] = False

# llm_roberta-base
temp_configs = dict()
temp_configs["model_name"] = "FacebookAI/roberta-base"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["llm-col_roberta-base"] = deepcopy(temp_configs)
preprocess_configs["llm-row_roberta-base"] = deepcopy(temp_configs)
preprocess_configs["llm-row_roberta-base"]["cache_embedding"] = False

# llm_roberta-large
temp_configs = dict()
temp_configs["model_name"] = "FacebookAI/roberta-large"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["llm-col_roberta-large"] = deepcopy(temp_configs)
preprocess_configs["llm-row_roberta-large"] = deepcopy(temp_configs)
preprocess_configs["llm-row_roberta-large"]["cache_embedding"] = False

# llm_all-MiniLM-L6-v2
temp_configs = dict()
temp_configs["model_name"] = "sentence-transformers/all-MiniLM-L6-v2"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["llm-col_all-MiniLM-L6-v2"] = deepcopy(temp_configs)
preprocess_configs["llm-row_all-MiniLM-L6-v2"] = deepcopy(temp_configs)
preprocess_configs["llm-row_all-MiniLM-L6-v2"]["cache_embedding"] = False

# llm_ernie-base
temp_configs = dict()
temp_configs["model_name"] = "nghuyong/ernie-2.0-base-en"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["llm-col_ernie-base"] = deepcopy(temp_configs)
preprocess_configs["llm-row_ernie-base"] = deepcopy(temp_configs)
preprocess_configs["llm-row_ernie-base"]["cache_embedding"] = False

# llm_ernie-large
temp_configs = dict()
temp_configs["model_name"] = "nghuyong/ernie-2.0-large-en"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["llm-col_ernie-large"] = deepcopy(temp_configs)
preprocess_configs["llm-row_ernie-large"] = deepcopy(temp_configs)
preprocess_configs["llm-row_ernie-large"]["cache_embedding"] = False

# llm_fasttext
temp_configs = dict()
temp_configs["model_name"] = "fasttext"
temp_configs["huggingface"] = False
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
preprocess_configs["llm-col_fasttext"] = deepcopy(temp_configs)
preprocess_configs["llm-row_fasttext"] = deepcopy(temp_configs)
preprocess_configs["llm-row_fasttext"]["cache_embedding"] = False

# llm_opt-1.3b
temp_configs = dict()
temp_configs["model_name"] = "facebook/opt-1.3b"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = True
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["llm-col_opt-1.3b"] = deepcopy(temp_configs)
preprocess_configs["llm-row_opt-1.3b"] = deepcopy(temp_configs)
preprocess_configs["llm-row_opt-1.3b"]["cache_embedding"] = False

# kg_knowledge-card-wiki
temp_configs = dict()
temp_configs["model_name"] = "bunsenfeng/knowledge-card-wikidata"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = False
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["kg_knowledge-card-wiki"] = deepcopy(temp_configs)

# kg_knowledge-card-yago
temp_configs = dict()
temp_configs["model_name"] = "bunsenfeng/knowledge-card-yago"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = False
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["kg_knowledge-card-yago"] = deepcopy(temp_configs)

# kg_knowledge-card-atomic
temp_configs = dict()
temp_configs["model_name"] = "bunsenfeng/knowledge-card-atomic"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = False
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["kg_knowledge-card-atomic"] = deepcopy(temp_configs)

# kg_knowledge-card-concept
temp_configs = dict()
temp_configs["model_name"] = "bunsenfeng/knowledge-card-ConceptNet"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = False
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["kg_knowledge-card-concept"] = deepcopy(temp_configs)

# kg_kgt5
temp_configs = dict()
temp_configs["model_name"] = "apoorvumang/kgt5-base-wikikg90mv2"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = False
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["kg_kgt5"] = deepcopy(temp_configs)

# kg_kgt5-context
temp_configs = dict()
temp_configs["model_name"] = "AdrianKs/kgt5-context-wikidata5m"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = False
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["kg_kgt5-context"] = deepcopy(temp_configs)

# kg_kgt5-small
temp_configs = dict()
temp_configs["model_name"] = "apoorvumang/kgt5-wikikg90mv2"
temp_configs["huggingface"] = True
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = False
temp_configs["tokenizer_kwargs"] = {}
preprocess_configs["kg_kgt5-small"] = deepcopy(temp_configs)

# kg_tarte
temp_configs = dict()
temp_configs["model_name"] = "tarte"
temp_configs["huggingface"] = False
temp_configs["prefix"] = None
temp_configs["cache_embedding"] = False
preprocess_configs["kg_tarte"] = deepcopy(temp_configs)
