# RAG-Factory 配置文件 (适配2wikimultihopqa数据集)
dataset:
 dataset_name: legal
 n_samples: 100
 chunk_size: 1024 # 每个chunk包含的token数  
 chunk_overlap: 20 # 每个chunk之间的重叠token数

llm:
  type: OpenAICompatible
  base_url: "http://192.168.190.10:9997/v1"
  api_key: "not used actually"
  model: "qwen2.5-instruct"

embedding:
  type: OpenAICompatibleEmbedding
  base_url: "http://192.168.190.3:9997/v1"
  api_key: "not used actually"
  model: "jina-embeddings-v3"
  dimension: 1024

reranker:
  type: XinferenceRerank
  model: "jina-reranker-v2"
  base_url: "http://192.168.190.3:9997/v1"
  top_n: 3

storage:
  type: graph_store
  url: "bolt://localhost:7692"
  username: "neo4j"
  password: "4rfvXSW@"
  refresh_schema: False

rag:
  solution: "graph_rag"
  mode: "global"
  use_ReAct: False
  use_rerank: False
  use_multi_step: False
  insert_community_nodes: True
  num_workers: 16 # 并行处理chunk的worker数
  similarity_top_k: 10 # 检索到的top_k个节点
  stages: ["inference", "evaluation"]
  # graph_rag参数
  max_paths_per_chunk: 2 # 每个chunk的最大path数, 也就是每个chunk抽取的max_knowledge_triplets
  max_cluster_size: 5 # 对graph进行聚类以获得commuities
