sample.yaml

--- # TODO:
# - Model Generation Config: Temperature, Sampling and etc...

# VectorBase 
#/home/erfan/Desktop/actives/card/src2
vb_cache_dir: ./caches/sample_experiment
vb_verbose: true
split_overlap: 0.1
## Documents Source
docs_from_hf: true 
docs_dataset_id: yixuantt/MultiHopRAG
docs_dataset_subset: corpus
docs_dataset_split: train
docs_dataset_column: body 
## Embedding Model Source
embedding_model_id: thenlper/gte-small #BAAI/bge-large-en-v1.5  #BAAI/llm-embedder
embedding_model_device: cuda
chunk_size: 256 # <= model_max_seq_len


# Retriever
rets_cache_dir: ./caches/sample_experiment
rets_verbose: true
num_retrievals: 10 # line 67: top_k
## Queries Source
queries_from_hf: true 
queries_dataset_id: yixuantt/MultiHopRAG
queries_dataset_subset: MultiHopRAG
queries_dataset_split: train
queries_dataset_column: query


# Generator
gens_cache_dir: ./caches/sample_experiment
gens_verbose: true
num_selections: 4 # line 139: top_n = top_k
## Generator Model Source
generator_model_id: microsoft/Phi-3.5-mini-instruct
generator_model_trust_remote_code: True
generator_model_device: cuda
generator_model_torch_dtype: bf16
generator_model_config:
  - max_new_tokens: 8
  - return_full_text: False
  - temperature: 0
## Reranker Model
reranker_model_id: colbert-ir/colbertv2.0
## RAG Prompts
system_prompt: |
  Using the information contained in the context, give a comprehensive answer to the question.
  Respond only to the question asked, response should be concise and relevant to the question.
  If the answer cannot be deduced from the context, do not generate any response on your own.
  Place your response only in the following JSON and do not generate anything else:
  {{
      "found_the_answer": <true or false>,
      "actual_response": <Str>,
      "id_of_relevant_documents": <List(Int)>,
  }}

context_prompt: |
  Context:
  {CONTEXT}
  ---
  Now here is the question you need to answer.
  {QUERY}