# Vector Translation Configuration
# This file contains all default configurations for the Vector Translation system

train_dataset: arguana
test_dataset: fiqa
dataset:
  name: arguana
  embedding_path: ./data/processed/embeddings/
  sample_size: 5000
  align_dimension: true
  train_dataset_list:
    - arguana
  test_dataset_list:
    - scifact

# Model configuration
model:
  # source_model: mistral
  # target_model: nv-embed
  source_model: gemini
  target_model: openai
  supported_models:
    - fast-text
    - glove
    - gte
    - mistral
    - nv-embed
    - openai

# Mapper configuration
mapper:
  mapper_name: gating-moe
  supported_mappers:
    - gating-moe
  loss_type: cos

  # Gating MoE configuration
  gating_moe:
    num_experts: 8
    pq_components: 8
    pq_clusters: 256
    clustering_method: kmeans
    distance_metric: cosine
    confidence_method: percentile
    use_soft_routing: false
    gating_temperature: 1
    use_pq_clustering: false
    random_state: 42

    mapper_config:
      learning_rate: 0.0001
      num_epochs: 250
      batch_size: 1280
      gradient_clip: 1
      weight_decay: 0.00001
      scheduler_patience: 5
      scheduler_factor: 0.5
      alpha: 0.5
      beta: 0.7
      early_stopping_patience: 10
      min_delta: 0.000001
      device: null
      layer_num: 4
      activation: relu
      dropout: 0.1
      global_weight: 0.5

# WandB configuration
wandb:
  enabled: true
  project: vector_translation_cross_zenml
  entity: null
  tags: []

# Logging configuration
logging:
  level: INFO
  format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
  file_path: null

# Paths configuration
paths:
  embedding_path: ./data/processed/embeddings/
  analysis_path: ./output/analysis
  analysis_cache_path: ./output/analysis_cache
  config_path: ./.vectortranslation/config.yaml

# Additional parameters
run_entity: null