From QueryEngineSettings.schema.yaml (jarvisv2/QueryEngineSettings.schema
)
---
$defs:
AgenticModuleSettings:
description: |-
Defines settings for Agentic modules
# schema: jarvisv2/QueryEngineSettings.schema
---
name: agentic-module-query-engine
tenants: [__all__]
modules:
- type: agentic-module
name: agentic-module-v0
storage_prefix: agentic-module-v0-
#end agentic-module-v0
#end modules
properties:
early_terminate:
default: false
description: Defines whether to allow early termination (before LLM call).
True when stock phrase functionality is requested. Default is False.
title: Allow early termination
type: boolean
emb_model_to_use:
default: cohere.embed-multilingual-v3
description: Name of model to be used, it should correspond with one of the
values in embedding_model.
title: Model to use
type: string
embedding_model:
description: Defines settings for the embedding model during data and query
flow.
items:
$ref: '#/$defs/EmbeddingModelSettings'
title: Embedding
type: array
llm_model:
$ref: '#/$defs/LLMModelSettings'
title: LLM Settings
metadata_filter_keys:
description: Configure valid keys that will be used to filter metadata. They
should by default be at least any of the types of QueryFeatureSettings.
items:
type: string
title: Metadata Filter keys
type: array
name:
description: The name of the module. This should be unique within the query
engine.
title: Name
type: string
postprocessor_settings:
$ref: '#/$defs/LLMPostprocessorSettings'
description: Configure which postprocessor functions to use
title: Postprocessor settings
preprocessor_settings:
$ref: '#/$defs/PreprocessorSettings'
description: Configure which postprocessor functions to use
title: Postprocessor settings
query_feature_settings:
$ref: '#/$defs/QueryFeatureSettings'
storage_prefix:
const: agentic-module-v0-
default: agentic-module-v0-
title: Storage Prefix
type: string
type:
const: agentic-module
default: agentic-module
title: Type
type: string
required: [name, llm_model]
title: Agentic Module Settings
type: object
AmazonTranslatorLLMPostprocessorFunction:
description: Amazon Translator LLM Postprocessor function.
properties:
aws_region_name:
default: us-east-1
description: AWS region name
title: AWS region name
type: string
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the LLM Postprocessor function. This is used to identify
the LLM postprocessor functions that has been applied.
title: Name
type:
const: amazon-translator
default: amazon-translator
title: Type
type: string
title: Amazon Translator
type: object
AnswerFieldRerankerFunction:
description: Reranker that keeps entries containing matching answer fields.
properties:
answer_fields:
description: The field in the document to use as the answer.
items:
anyOf:
- description: Answer field that we expect to be matched in the answer.
Answer field is specified in `<answer_type>.<answer_field>` format.
pattern: ^[\w\-]+\.[\w\_]+$
title: Answer Field
type: string
- const: intent
type: string
minItems: 1
title: Answer Fields
type: array
keep_original_if_all_discarded:
default: false
description: If all documents are discarded, keep the original candidates.
title: Keep Original If All Discarded
type: boolean
match_type:
default: all
description: Whether to match `all` or `any` of the answer fields.
enum: [all, any]
title: Match Type
type: string
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the reranker function. This is used to identify the
reranker functions that has been applied.
title: Name
type:
const: answer-field-reranker
default: answer-field-reranker
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
required: [answer_fields]
title: Answer Field Reranker
type: object
AnswerFinderSettings:
description: Configure how we retrieve, score, and rank answers in the module.
properties:
reranker:
$ref: '#/$defs/RerankerSettings'
description: Configure how we rerank answers.
title: Reranker
retriever:
$ref: '#/$defs/RetrieverSettings'
description: Configure how we retrieve answer.
title: Retriever
scorer:
$ref: '#/$defs/ScorerSettings'
description: Configure how we score answers.
title: Scorer
title: Answer Finder Settings
type: object
AnswerPhraseFinderSettings:
description: Configure how we retrieve, score, and rank answer phrases in the
module.
properties:
answer_field:
description: The field in the answer where the object is used.
title: Answer Field
type: string
answer_type:
description: The answer type where the object is used.
enum: [clinical-trial, dialogue, directory, document, document-snippet, monograph,
roster, test]
title: Answer Type
type: string
reranker:
$ref: '#/$defs/RerankerSettings'
description: Configure how we rerank phrases.
title: Reranker
retriever:
$ref: '#/$defs/RetrieverSettings'
scorer:
$ref: '#/$defs/ScorerSettings'
description: Configure how we score phrases.
title: Scorer
type:
const: answer
default: answer
title: Type
type: string
required: [answer_type, answer_field]
title: Answer Phrase Finder Settings
type: object
AnswerPhraseRetrieverFunction:
description: Settings for retrieving intent phrases.
properties:
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the retriever function. This is used to identify
the retriever functions used.
title: Name
size:
default: 128
description: The `size` parameter to use when retrieving documents from the
KB. Note that `scan` is not used.
minimum: 0
title: Size
type: integer
tiered_fuzzy_match:
anyOf:
- $ref: '#/$defs/TieredFuzzyMatchSettings'
- type: 'null'
default:
fuzzy_transpositions: true
length_thresholds: [3, 6]
prefix_length: 2
scores: [0.8, 0.9, 1.0]
description: Configure how we perform fuzzy matching when retrieving phrases.
title: Tiered Fuzzy Match
type:
const: answer-phrase-retriever
default: answer-phrase-retriever
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
title: Answer Phrase Retriever
type: object
AnswerRetrieverFunction:
description: Settings for retrieving answers.
properties:
max_clause_count:
default: 1000
description: The maximum number of answer phrase clauses to use in a query.
When there are more clauses, multiple queries will be executed concurrently.
minimum: 0
title: Max Clause Count
type: integer
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the retriever function. This is used to identify
the retriever functions used.
title: Name
size:
default: 128
description: The `size` parameter to use when retrieving documents from the
KB. Note that `scan` is not used.
minimum: 0
title: Size
type: integer
type:
const: answer-retriever
default: answer-retriever
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
weights:
description: The weights to use for each field in the retrieval function.
By default, each field is weighted 1.
items:
$ref: '#/$defs/RetrievalWeightsSettings'
title: Retrieval Weights Settings
type: array
title: Answer Retriever
type: object
AnswerScorerFunction:
description: Score an answer based on standard token level P/R/F metrics.
properties:
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the scorer function. This is used to identify the
scorer functions that has been applied.
title: Name
type:
const: answer-scorer
default: answer-scorer
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
title: Basic Answer Scorer
type: object
CleoDialogueLLMModuleSettings:
description: Settings for our Cleo dialogue LLM module.
properties:
early_terminate:
default: false
description: Defines whether to allow early termination (before LLM call).
True when stock phrase functionality is requested. Default is False.
title: Allow early termination
type: boolean
emb_model_to_use:
default: cohere.embed-multilingual-v3
description: Name of model to be used, it should correspond with one of the
values in embedding_model.
title: Model to use
type: string
embedding_model:
description: Defines settings for the embedding model during data and query
flow.
items:
$ref: '#/$defs/EmbeddingModelSettings'
title: Embedding
type: array
llm_model:
$ref: '#/$defs/LLMModelSettings'
title: LLM Settings
metadata_filter_keys:
description: Configure valid keys that will be used to filter metadata. They
should by default be at least any of the types of QueryFeatureSettings.
items:
type: string
title: Metadata Filter keys
type: array
name:
description: The name of the module. This should be unique within the query
engine.
title: Name
type: string
postprocessor_settings:
$ref: '#/$defs/LLMPostprocessorSettings'
description: Configure which postprocessor functions to use
title: Postprocessor settings
preprocessor_settings:
$ref: '#/$defs/PreprocessorSettings'
description: Configure which postprocessor functions to use
title: Postprocessor settings
query_feature_settings:
$ref: '#/$defs/QueryFeatureSettings'
storage_prefix:
const: cleo-dialogue-llm
default: cleo-dialogue-llm
title: Storage Prefix
type: string
type:
const: dialogue-llm
default: dialogue-llm
title: Type
type: string
required: [name, llm_model]
title: Cleo Dialogue LLM Module Settings
type: object
ClinicalTrialIRModuleResponderSettings:
description: Settings for our clinical trial module responder.
properties:
max_answer_candidates_to_load:
anyOf:
- exclusiveMinimum: 0
type: integer
- type: 'null'
default: null
description: The maximum number of answer candidate bare answers that we will
load in order to sort and display to the user. Default to `None` which means
to load all answer candidates.
title: Max Answer Candidates To Load
max_answer_candidates_to_render:
anyOf:
- exclusiveMinimum: 0
type: integer
- type: 'null'
default: 3
description: The maximum number of answer candidate bare answers that we will
render and display to the user.
title: Max Answer Candidates to Render
type:
const: clinical-trial-ir
default: clinical-trial-ir
title: Type
type: string
title: Clinical Trial Module Responder Settings
type: object
Condition:
description: A condition to check before applying a reranker.
properties:
operator:
description: The comparison operator.
enum: [gt, gte, lt, lte, eq, ne, '>', "\u2265", '>=', <, "\u2264", <=, '=',
==, '!=', <>, "\u2260"]
title: Operator
type: string
rhs:
description: The value to compare against.
title: RHS Operand
type: number
score_key:
description: The key in the score vector to use for the comparison. When the
key doesn't exist or is None, we return false.
minLength: 1
title: Score Key
type: string
required: [operator, score_key, rhs]
title: Condition
type: object
DirectoryAnswerScorerFunction:
description: Score an answer based on standard token level P/R/F metrics.
properties:
institution_affinities:
description: The list of institution affinities.
items:
$ref: '#/$defs/InstitutionAffinity'
title: Institution Priority Order
type: array
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the scorer function. This is used to identify the
scorer functions that has been applied.
title: Name
type:
const: directory-answer-scorer
default: directory-answer-scorer
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
title: Basic Answer Scorer
type: object
DirectoryIRModuleResponderSettings:
description: Settings for our directory module responder.
properties:
max_answer_candidates_to_load:
anyOf:
- exclusiveMinimum: 0
type: integer
- type: 'null'
default: null
description: The maximum number of answer candidate bare answers that we will
load in order to sort and display to the user. Default to `None` which means
to load all answer candidates.
title: Max Answer Candidates To Load
max_answer_candidates_to_render:
anyOf:
- exclusiveMinimum: 0
type: integer
- type: 'null'
default: 3
description: The maximum number of answer candidate bare answers that we will
render and display to the user.
title: Max Answer Candidates to Render
type:
const: directory-ir
default: directory-ir
title: Type
type: string
title: Directory Module Responder Settings
type: object
DiscardOrKeepInformationRerankerFunction:
description: Reranker that shortlists or discards entry based on the condition.
properties:
discard_or_keep:
description: The field that decides whether to discard or keep the entries.
enum: [discard, keep]
title: Discard Or Keep
type: string
fields_to_check:
description: The fields in the document to check for the values.
items:
type: string
title: Fields To Check
type: array
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the reranker function. This is used to identify the
reranker functions that has been applied.
title: Name
type:
const: discard-or-keep-information-reranker
default: discard-or-keep-information-reranker
title: Type
type: string
value:
description: The field based on which discard or keep is decided.
title: Value
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
required: [fields_to_check, discard_or_keep, value]
title: Discard Or Keep Information Reranker
type: object
DocumentLLMModuleSettings:
description: Settings for our document LLM module.
properties:
early_terminate:
default: false
description: Defines whether to allow early termination (before LLM call).
True when stock phrase functionality is requested. Default is False.
title: Allow early termination
type: boolean
emb_model_to_use:
default: cohere.embed-multilingual-v3
description: Name of model to be used, it should correspond with one of the
values in embedding_model.
title: Model to use
type: string
embedding_model:
description: Defines settings for the embedding model during data and query
flow.
items:
$ref: '#/$defs/EmbeddingModelSettings'
title: Embedding
type: array
llm_model:
$ref: '#/$defs/LLMModelSettings'
title: LLM Settings
name:
description: The name of the module. This should be unique within the query
engine.
title: Name
type: string
postprocessor_settings:
$ref: '#/$defs/LLMPostprocessorSettings'
description: Configure which postprocessor functions to use
title: Postprocessor settings
preprocessor_settings:
$ref: '#/$defs/PreprocessorSettings'
description: Configure which postprocessor functions to use
title: Postprocessor settings
query_feature_settings:
$ref: '#/$defs/QueryFeatureSettings'
description: Configure which query feature functions to use
title: Query Feature settings
storage_prefix:
const: document-llm
default: document-llm
title: Storage Prefix
type: string
type:
const: document-llm
default: document-llm
title: Type
type: string
required: [name, llm_model]
title: LLM Document Module Settings
type: object
EmbeddingModelSettings:
description: Defines settings for the embedding model during data and query flow.
properties:
context_size:
default: 512
description: Context size
title: Context window size of the embedding model
type: integer
default_prompt_name:
default: query
description: Prompt for the embedding.
enum: [query, text]
title: Prompt Name
type: string
dimension:
default: 1024
description: Dimension of the embedding
title: Dimension
type: integer
emb_model_source:
default: bedrock
description: Where is the model from?
enum: [huggingface, bedrock]
title: Model Source
type: string
hnsw_kwargs:
anyOf:
- $ref: '#/$defs/HNSWSettings'
- type: 'null'
default: null
description: HNSW settings
title: HNSW settings
model:
default: cohere.embed-multilingual-v3
description: Name of embedding model to use.
minLength: 1
title: Model
type: string
query_instruction:
default: 'Represent this sentence for searching relevant passages:'
description: Instruction for the query.
title: Query Instruction
type: string
title: Embedding Model Settings
type: object
FirstNRerankerFunction:
description: Reranker keeps the first N candidates and discards the rest.
properties:
n:
default: 16
description: Keep the first N candidates.
exclusiveMinimum: 0
title: N
type: integer
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the reranker function. This is used to identify the
reranker functions that has been applied.
title: Name
type:
const: first-n-reranker
default: first-n-reranker
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
title: First N Reranker
type: object
HNSWSettings:
description: |-
Defines settings for HNSW graph. See
https://github.com/run-llama/llama_index/blob/977d60a058c691957dae3eb3c66c1894faea24ac/llama-index-integrations/vector_stores/llama-index-vector-stores-postgres/llama_index/vector_stores/postgres/base.py#L570
properties:
hnsw_dist_method:
default: vector_cosine_ops
description: Distance metric to use. Note that by default PGVectorStore._build_query
calls cosine_distance
enum: [vector_l2_ops, vector_ip_ops, vector_cosine_ops, vector_l1_ops, bit_hamming_ops,
bit_jaccard_ops]
title: dist_method
type: string
hnsw_ef_construction:
default: 64
description: Size of the dynamic candidate list for constructing the graph.
Higher value provides better recall at the cost of speed
title: ef_construction
type: integer
hnsw_ef_search:
default: 40
description: Size of the dynamic candidate list for search. Higher value provides
better recall at the cost of speed.
title: ef_search
type: integer
hnsw_m:
default: 16
description: Max number of connections per layer.
title: m
type: integer
title: HNSW Graph Settings
type: object
InstitutionAffinity:
properties:
affinities:
description: The list of institutions starting from most to least affiniated.
This field is case insensitive.
items:
type: string
minItems: 1
title: Affinities
type: array
tenant:
anyOf:
- description: The `cleo` app tenant applicable to this object.
pattern: ^cleo\:[a-zA-Z0-9][\w\-\_]*$
title: Cleo Tenant
type: string
- description: The `hospital` app tenant applicable to this object.
pattern: ^hospital\:[a-zA-Z0-9][\w\-\_]*$
title: Hospital Tenant
type: string
description: The tenant for which the priority order is defined.
title: Tenant
required: [tenant, affinities]
title: InstitutionAffinity
type: object
IntentPhraseFinderSettings:
description: Configure how we retrieve, score, and rank intent phrases in the
module.
properties:
reranker:
$ref: '#/$defs/RerankerSettings'
description: Configure how we rerank phrases.
title: Reranker
retriever:
$ref: '#/$defs/RetrieverSettings'
scorer:
$ref: '#/$defs/ScorerSettings'
description: Configure how we score phrases.
title: Scorer
type:
const: intent
default: intent
title: Type
type: string
title: Intent Phrase Finder Settings
type: object
IntentPhraseRetrieverFunction:
description: Settings for retrieving intent phrases.
properties:
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the retriever function. This is used to identify
the retriever functions used.
title: Name
size:
default: 128
description: The `size` parameter to use when retrieving documents from the
KB. Note that `scan` is not used.
minimum: 0
title: Size
type: integer
tiered_fuzzy_match:
anyOf:
- $ref: '#/$defs/TieredFuzzyMatchSettings'
- type: 'null'
default:
fuzzy_transpositions: true
length_thresholds: [3, 6]
prefix_length: 2
scores: [0.8, 0.9, 1.0]
description: Configure how we perform fuzzy matching when retrieving phrases.
title: Tiered Fuzzy Match
type:
const: intent-phrase-retriever
default: intent-phrase-retriever
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
title: Intent Phrase Retriever
type: object
LLMModelSettings:
description: Defines settings for the LLM model during query flow.
properties:
context_size:
anyOf:
- type: integer
- type: 'null'
default: null
description: Context size aka context length
title: Context size
context_template:
default: |-
You are a friendly and warm question-answer hospital assistant, employed as an employee of the hospital.
The only information you know is the context provided. You can use only that information as your hidden knowledge base.
When you answer, the context provided will be known as "my training."
If the context provided provides links, show it to the user.
description: Template for the context
title: Context Template
type: string
guardrail_identifier:
default: arn:aws:bedrock:us-west-2:394252546268:guardrail/f83z6kx1d6hl
description: Name of AWS guardrail if applicable. Should start with arn:aws:bedrock:us-west-2:.
title: Guardrail Identifier
type: string
guardrail_version:
default: '1'
description: AWS Guardrail version (string) if applicable
title: Guardrail Version
type: string
max_tokens:
default: 1000
description: Max tokens returned by LLM
title: Max Tokens
type: integer
model:
description: Name of LLM as per HuggingFace
minLength: 1
title: Model
type: string
reranker_similarity_cutoff:
default: 0
description: Similarity score cutoff for node postprocessing based on reranker
score
title: Reranker Similarity Cutoff
type: number
similarity_cutoff:
default: 0.38
description: Similarity score cutoff for node postprocessing based on embedding
score
title: Similarity Cutoff
type: number
similarity_top_k:
default: 6
description: Number of nodes to return after retrieval
title: Similarity top K
type: integer
supported_language_codes:
description: Restricts bot to only answer only these languages. Provide list
of 2-letter language codes, and double-check that Amazon Comprehend / Translate
supports them.
items:
type: string
title: Supported language codes
type: array
system_prompt:
default: Answer the QUERY below only using the DOCUMENTs below as context,
and not your trained knowledge.
description: Prompt for the LLM
title: System Prompt
type: string
temperature:
default: 0
description: Temperature
title: Temperature
type: number
required: [model]
title: LLM Model Settings
type: object
LLMPostprocessorSettings:
description: Configure how we rerank documents.
properties:
postprocessor_functions:
description: Postprocessor functions to be applied one after another in the
model.
items:
discriminator:
mapping:
amazon-translator: '#/$defs/AmazonTranslatorLLMPostprocessorFunction'
language-remover: '#/$defs/LanguageRemoverLLMPostprocessorFunction'
propertyName: type
oneOf:
- $ref: '#/$defs/LanguageRemoverLLMPostprocessorFunction'
- $ref: '#/$defs/AmazonTranslatorLLMPostprocessorFunction'
title: Postprocessor Functions
type: array
required: [postprocessor_functions]
title: Scorer Settings
type: object
LanguageDetectorQueryFeatureFunction:
description: Language Detector Query Feature function.
properties:
aws_region_name:
default: us-west-2
description: AWS region name
title: AWS region name
type: string
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the query feature function. This is used to identify
the query feature functions that has been applied.
title: Name
type:
const: language-detector
default: language-detector
title: Type
type: string
title: Language Detector
type: object
LanguageFilterPreprocessorFunction:
description: Language Filter Preprocessor function.
properties:
aws_region_name:
default: us-east-1
description: AWS region name
title: AWS region name
type: string
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the preprocessor function. This is used to identify
the preprocessor functions that has been applied.
title: Name
supported_language_codes:
description: The language codes that are supported for this client. By default,
if this is empty, all languages are supported. If a language is not supported,
query will be nullified
items:
type: string
title: Supported language codes
type: array
type:
const: language-filter
default: language-filter
title: Type
type: string
title: Language Filter
type: object
LanguageRemoverLLMPostprocessorFunction:
description: Language Remover LLM Postprocessor function.
properties:
language_tag:
const: query_language=
default: query_language=
title: Language Tag
type: string
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the LLM Postprocessor function. This is used to identify
the LLM postprocessor functions that has been applied.
title: Name
type:
const: language-remover
default: language-remover
title: Type
type: string
title: Language Remover
type: object
LastNRerankerFunction:
description: Reranker keeps the last N candidates and discards the rest.
properties:
n:
default: 16
description: Keep the last N candidates.
exclusiveMinimum: 0
title: N
type: integer
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the reranker function. This is used to identify the
reranker functions that has been applied.
title: Name
type:
const: last-n-reranker
default: last-n-reranker
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
title: Last N Reranker
type: object
LoadFullDocumentRerankerFunction:
description: A pseudo-reranker that calls the `load` method on all candidate documents.
properties:
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the reranker function. This is used to identify the
reranker functions that has been applied.
title: Name
type:
const: load-full-document-reranker
default: load-full-document-reranker
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
title: Load Full Document Reranker
type: object
MaxCutoffRerankerFunction:
description: Maximum cutoff reranker function. All resulting candidates will have
scores lesser or equal to the cutoff.
properties:
conditions:
default: []
description: Conditions for documents before we apply the reranker Documents
that fail the condition will be passed through to the next reranker.
items:
$ref: '#/$defs/Condition'
title: Conditions
type: array
cutoff:
anyOf:
- type: number
- type: integer
- pattern: ^\d+(\.\d+)?\%$
type: string
description: The score value to cut at. If specified as a percentage, the
cutoff is calculated as a percentage of the minimum/maximum score.
title: Cutoff
keep_original_if_all_discarded:
default: false
description: If all documents are discarded, keep the original candidates.
title: Keep Original If All Discarded
type: boolean
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the reranker function. This is used to identify the
reranker functions that has been applied.
title: Name
score_key:
description: The key in the score vector to use for the cutoff.
minLength: 1
title: Score Key
type: string
type:
const: max-cutoff-reranker
default: max-cutoff-reranker
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
required: [score_key, cutoff]
title: Max Cutoff Reranker
type: object
MinCutoffRerankerFunction:
description: Minimum cutoff reranker function. All resulting candidates will have
scores greater or equal to the cutoff.
properties:
conditions:
default: []
description: Conditions for documents before we apply the reranker Documents
that fail the condition will be passed through to the next reranker.
items:
$ref: '#/$defs/Condition'
title: Conditions
type: array
cutoff:
anyOf:
- type: number
- type: integer
- pattern: ^\d+(\.\d+)?\%$
type: string
description: The score value to cut at. If specified as a percentage, the
cutoff is calculated as a percentage of the minimum/maximum score.
title: Cutoff
keep_original_if_all_discarded:
default: false
description: If all documents are discarded, keep the original candidates.
title: Keep Original If All Discarded
type: boolean
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the reranker function. This is used to identify the
reranker functions that has been applied.
title: Name
score_key:
description: The key in the score vector to use for the cutoff.
minLength: 1
title: Score Key
type: string
type:
const: min-cutoff-reranker
default: min-cutoff-reranker
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
required: [score_key, cutoff]
title: Min Cutoff Reranker
type: object
NoopModuleSettings:
description: Noop module settings.
properties:
name:
default: noop
description: The name of the module. This should be unique within the query
engine.
title: Name
type: string
type:
const: noop
default: noop
title: Type
type: string
title: Noop Module Settings
type: object
OrderByRerankerFunction:
description: Reorder the candidates based on scoring keys.
properties:
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the reranker function. This is used to identify the
reranker functions that has been applied.
title: Name
order_by:
description: The scoring fields to order the candidates by. Prefix with a
`+/-` for ascending/descending order. You can also access `sv`, `doc`, and
`source` fields using JMESPath syntax, e.g., `sv.missed_tokens_count`, `doc.name`,
`source.phone`, etc.
items:
minLength: 1
type: string
minItems: 1
title: Order By
type: array
type:
const: order-by-reranker
default: order-by-reranker
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
required: [order_by]
title: Order By Reranker
type: object
PhraseScorerFunction:
description: Settings for a phrase scorer.
properties:
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the scorer function. This is used to identify the
scorer functions that has been applied.
title: Name
type:
const: phrase-scorer
default: phrase-scorer
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
title: Phrase Scorer
type: object
PreprocessorSettings:
description: Configure how we preprocess queries.
properties:
preprocessor_functions:
description: Preprocessor functions to be applied one after another.
items:
discriminator:
mapping:
language-filter: '#/$defs/LanguageFilterPreprocessorFunction'
truncate: '#/$defs/TruncatePreprocessorFunction'
propertyName: type
oneOf:
- $ref: '#/$defs/TruncatePreprocessorFunction'
- $ref: '#/$defs/LanguageFilterPreprocessorFunction'
title: Preprocessor Functions
type: array
required: [preprocessor_functions]
title: Scorer Settings
type: object
QueryFeatureSettings:
description: Configure how we generate query features.
properties:
query_feature_functions:
description: Query Feature functions to be applied one after another.
items:
discriminator:
mapping:
language-detector: '#/$defs/LanguageDetectorQueryFeatureFunction'
propertyName: type
oneOf:
- $ref: '#/$defs/LanguageDetectorQueryFeatureFunction'
title: Query Feature Functions
type: array
required: [query_feature_functions]
title: Scorer Settings
type: object
RerankerSettings:
description: Configure how we rerank documents.
properties:
reranker_functions:
description: Rerankers are executed one after the other.
items:
discriminator:
mapping:
answer-field-reranker: '#/$defs/AnswerFieldRerankerFunction'
discard-or-keep-information-reranker: '#/$defs/DiscardOrKeepInformationRerankerFunction'
first-n-reranker: '#/$defs/FirstNRerankerFunction'
last-n-reranker: '#/$defs/LastNRerankerFunction'
load-full-document-reranker: '#/$defs/LoadFullDocumentRerankerFunction'
max-cutoff-reranker: '#/$defs/MaxCutoffRerankerFunction'
min-cutoff-reranker: '#/$defs/MinCutoffRerankerFunction'
order-by-reranker: '#/$defs/OrderByRerankerFunction'
reverse-reranker: '#/$defs/ReverseRerankerFunction'
top-k-reranker: '#/$defs/TopKRerankerFunction'
propertyName: type
oneOf:
- $ref: '#/$defs/MinCutoffRerankerFunction'
- $ref: '#/$defs/MaxCutoffRerankerFunction'
- $ref: '#/$defs/TopKRerankerFunction'
- $ref: '#/$defs/LoadFullDocumentRerankerFunction'
- $ref: '#/$defs/OrderByRerankerFunction'
- $ref: '#/$defs/FirstNRerankerFunction'
- $ref: '#/$defs/LastNRerankerFunction'
- $ref: '#/$defs/ReverseRerankerFunction'
- $ref: '#/$defs/AnswerFieldRerankerFunction'
- $ref: '#/$defs/DiscardOrKeepInformationRerankerFunction'
title: Reranker Functions
type: array
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
required: [reranker_functions]
title: Scorer Settings
type: object
RetrievalWeightsSettings:
description: Settings for retrieval weights.
properties:
answer_field:
description: The field in the answer where the object is used.
title: Answer Field
type: string
answer_type:
description: The answer type where the object is used.
enum: [clinical-trial, dialogue, directory, document, document-snippet, monograph,
roster, test]
title: Answer Type
type: string
multiplier:
default: 1
description: Multiply to the weight.
title: Multiplier
type: number
weight:
anyOf:
- type: number
- enum: [phrase_tokens_count, matched_tokens_score, missed_tokens_score, matched_tokens_count,
missed_tokens_count, matched_tokens_precision, missed_tokens_precision]
type: string
default: matched_tokens_precision
description: The weight to assign to the field. It can also be one of the
supported phrase scoring keys or a constant weight value.
title: Weight
required: [answer_type, answer_field]
title: Retrieval Weights
type: object
RetrieverSettings:
description: Configure how we retrieve documents.
properties:
retriever_functions:
description: Retrievers are executed concurrently and then concatenated after.
items:
discriminator:
mapping:
answer-phrase-retriever: '#/$defs/AnswerPhraseRetrieverFunction'
answer-retriever: '#/$defs/AnswerRetrieverFunction'
intent-phrase-retriever: '#/$defs/IntentPhraseRetrieverFunction'
propertyName: type
oneOf:
- $ref: '#/$defs/AnswerPhraseRetrieverFunction'
- $ref: '#/$defs/IntentPhraseRetrieverFunction'
- $ref: '#/$defs/AnswerRetrieverFunction'
minItems: 1
title: Retriever Functions
type: array
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
required: [retriever_functions]
title: Retriever Settings
type: object
ReverseRerankerFunction:
description: Reranker reverses the order of the candidates.
properties:
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the reranker function. This is used to identify the
reranker functions that has been applied.
title: Name
type:
const: reverse-reranker
default: reverse-reranker
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
title: Reverse Reranker
type: object
ScorerSettings:
description: Configure how we score documents.
properties:
scorer_functions:
description: Scorer functions are executed concurrently and then merged together
after.
items:
discriminator:
mapping:
answer-scorer: '#/$defs/AnswerScorerFunction'
directory-answer-scorer: '#/$defs/DirectoryAnswerScorerFunction'
phrase-scorer: '#/$defs/PhraseScorerFunction'
propertyName: type
oneOf:
- $ref: '#/$defs/PhraseScorerFunction'
- $ref: '#/$defs/AnswerScorerFunction'
- $ref: '#/$defs/DirectoryAnswerScorerFunction'
minItems: 1
title: Scorer Functions
type: array
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
required: [scorer_functions]
title: Scorer Settings
type: object
StandardIRModuleSettings:
description: Settings for our standard IR-based module.
properties:
answer_finder:
$ref: '#/$defs/AnswerFinderSettings'
description: Configure how we retrieve, score, and rank answers.
title: Answer Finder
answer_phrase_finders:
description: Configure how we retrieve, score, and rank answer phrases. This
is done per answer type/field. Multiple finders for each answer type/field
is supported.
items:
$ref: '#/$defs/AnswerPhraseFinderSettings'
title: Answer Phrase Finders
type: array
intent_phrase_finder:
$ref: '#/$defs/IntentPhraseFinderSettings'
description: Configure how we retrieve, score, and rank intent phrases.
title: Intent Phrase Finder
intents:
description: The intents will likely trigger this module.
items:
description: The intent phrase.
minLength: 1
title: Intent
type: string
title: Intents
type: array
module_responder:
anyOf:
- $ref: '#/$defs/DirectoryIRModuleResponderSettings'
- $ref: '#/$defs/ClinicalTrialIRModuleResponderSettings'
description: The responder to use for this module. Module responders evaluate
all the candidate answers to form a cohesive response for a module.
title: Module Responder
module_stopwords:
description: Stopwords used in this module.
items:
$ref: '#/$defs/StopwordSettings'
title: Module Stopwords
type: array
module_synonyms:
description: Synonyms used in this module.
items:
$ref: '#/$defs/SynonymSettings'
title: Module Synonyms
type: array
name:
description: The name of the module. This should be unique within the query
engine.
title: Name
type: string
type:
const: standard-ir
default: standard-ir
title: Type
type: string
required: [name, module_responder]
title: Standard IR Module Settings
type: object
StopwordSettings:
description: Configure stopwords on a per answer type/field level.
properties:
answer_field:
description: The field in the answer where the object is used.
title: Answer Field
type: string
answer_type:
description: The answer type where the object is used.
enum: [clinical-trial, dialogue, directory, document, document-snippet, monograph,
roster, test]
title: Answer Type
type: string
stopwords:
anyOf:
- const: en-standard
type: string
- items:
minLength: 1
type: string
type: array
description: The stopwords for this answer type/field. It can be a list of
phrases or a stopword set name.
maxLength: 512
minLength: 1
title: Stopwords
required: [answer_type, answer_field, stopwords]
title: Stopword Settings
type: object
SynonymSettings:
description: Configure synonyms on a per answer type/field level.
properties:
answer_field:
description: The field in the answer where the object is used.
title: Answer Field
type: string
answer_type:
description: The answer type where the object is used.
enum: [clinical-trial, dialogue, directory, document, document-snippet, monograph,
roster, test]
title: Answer Type
type: string
synonym_expansion_rounds:
default: 3
description: The number of rounds to expand the synonyms. Each round will
generate more candidate synonyms.
minimum: 0
title: Synonym Expansion Rounds
type: integer
synonyms:
description: The synonyms for this answer type/field as a list of synonym
groups.
items:
anyOf:
- additionalProperties:
description: Synonyms of the first phrase.
items:
type: string
minItems: 1
type: array
description: In a one-way synonym group, all phrases are synonyms of the
first phrase but not the other way round.
minProperties: 1
title: One Way Synonym Group
type: object
- description: In a two-way synonym group, all pairs of phrases are synonyms
of each other and can be used interchangeably.
items:
description: A Single synonym phrase.
minLength: 1
title: Synonym
type: string
minItems: 2
title: Two Way Synonym Group
type: array
description: Describes how synonyms are generated
title: Synonym Group
minItems: 1
title: Synonyms
type: array
required: [answer_type, answer_field, synonyms]
title: Synonym Settings
type: object
TieredFuzzyMatchSettings:
description: Settings for tiered fuzzy matching.
properties:
fuzzy_transpositions:
default: true
description: Whether to allow transpositions in fuzzy matching. See https://opensearch.org/docs/latest/query-dsl/full-text/match/#transpositions
title: Fuzzy Transpositions
type: boolean
length_thresholds:
default: [3, 6]
description: "A tuple `[a, b]` where string lengths in [0, a] do not have\
\ fuzzy matching, lengths in (a, b] allow 1 Levenshtein distance, and lengths\
\ (b, \u221E) allow 2 Levenshtein distances."
maxItems: 2
minItems: 2
prefixItems:
- minimum: 0
type: integer
- exclusiveMinimum: 0
type: integer
title: Length Thresholds
type: array
prefix_length:
default: 2
description: The length of the prefix to use for fuzzy matching. See https://opensearch.org/docs/latest/query-dsl/full-text/match/#prefix-length
minimum: 0
title: Prefix Length
type: integer
scores:
default: [0.8, 0.9, 1.0]
description: A tuple `[a, b]` where a is the score for 2 Levenshtein distance,
b is the score for 1 Levenshtein distances, and c is the score for exact
matches.
maxItems: 3
minItems: 3
prefixItems:
- type: number
- type: number
- type: number
title: Match Scores
type: array
title: TieredFuzzyMatchSettings
type: object
TopKRerankerFunction:
description: Reranker keeps the top K candidates based on distinct score value
and discards the rest.
properties:
k:
default: 1
description: Keep the top K candidates.
exclusiveMinimum: 0
title: K
type: integer
keep_ties:
default: true
description: Whether to keep candidates whose score tie for top K. Defaults
to `true`.
title: Keep Ties
type: boolean
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the reranker function. This is used to identify the
reranker functions that has been applied.
title: Name
score_key:
description: The key in the score vector to use for the cutoff.
minLength: 1
title: Score Key
type: string
type:
const: top-k-reranker
default: top-k-reranker
title: Type
type: string
verbose:
default: false
description: Whether to log verbose debugging information.
title: Verbose
type: boolean
required: [score_key]
title: Top K Reranker
type: object
TruncatePreprocessorFunction:
description: Truncate Preprocessor function.
properties:
max_length:
default: 1000
description: Maximum length of query string
title: Max length
type: integer
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the preprocessor function. This is used to identify
the preprocessor functions that has been applied.
title: Name
type:
const: truncate
default: truncate
title: Type
type: string
title: Truncate
type: object
$id: https://skeleton.botmd.io/jarvisv2/QueryEngineSettings.schema
$schema: http://json-schema.org/draft-07/schema#
description: This contains all the settings needed for the Jarvis v2 query engine.
properties:
enabled:
default: true
description: Whether this query engine is enabled.
title: Enabled
type: boolean
modules:
description: The modules to be used in the query engine.
items:
discriminator:
mapping:
agentic-module: '#/$defs/AgenticModuleSettings'
dialogue-llm: '#/$defs/CleoDialogueLLMModuleSettings'
document-llm: '#/$defs/DocumentLLMModuleSettings'
noop: '#/$defs/NoopModuleSettings'
standard-ir: '#/$defs/StandardIRModuleSettings'
propertyName: type
oneOf:
- $ref: '#/$defs/StandardIRModuleSettings'
- $ref: '#/$defs/DocumentLLMModuleSettings'
- $ref: '#/$defs/CleoDialogueLLMModuleSettings'
- $ref: '#/$defs/AgenticModuleSettings'
- $ref: '#/$defs/NoopModuleSettings'
minItems: 1
title: Modules
type: array
name:
description: The name of the query engine. This should be globally unique.
title: Name
type: string
version:
default: unknown
description: Version of this query engine based on the Kondo resource version.
title: Version
type: string
required: [name]
title: Query Engine Settings
type: object