Schema for Jarvisv2 Query Engine Settings

From QueryEngineSettings.schema.yaml (jarvisv2/QueryEngineSettings.schema)

---
$defs:
  AgenticModuleSettings:
    description: |-
      Defines settings for Agentic modules
      # schema: jarvisv2/QueryEngineSettings.schema
      ---
      name: agentic-module-query-engine
      tenants: [__all__]
      modules:
        - type: agentic-module
          name: agentic-module-v0
          storage_prefix: agentic-module-v0-
        #end agentic-module-v0
      #end modules
    properties:
      early_terminate:
        default: false
        description: Defines whether to allow early termination (before LLM call).
          True when stock phrase functionality is requested. Default is False.
        title: Allow early termination
        type: boolean
      emb_model_to_use:
        default: cohere.embed-multilingual-v3
        description: Name of model to be used, it should correspond with one of the
          values in embedding_model.
        title: Model to use
        type: string
      embedding_model:
        description: Defines settings for the embedding model during data and query
          flow.
        items:
          $ref: '#/$defs/EmbeddingModelSettings'
        title: Embedding
        type: array
      llm_model:
        $ref: '#/$defs/LLMModelSettings'
        title: LLM Settings
      metadata_filter_keys:
        description: Configure valid keys that will be used to filter metadata. They
          should by default be at least any of the types of QueryFeatureSettings.
        items:
          type: string
        title: Metadata Filter keys
        type: array
      name:
        description: The name of the module. This should be unique within the query
          engine.
        title: Name
        type: string
      postprocessor_settings:
        $ref: '#/$defs/LLMPostprocessorSettings'
        description: Configure which postprocessor functions to use
        title: Postprocessor settings
      preprocessor_settings:
        $ref: '#/$defs/PreprocessorSettings'
        description: Configure which postprocessor functions to use
        title: Postprocessor settings
      query_feature_settings:
        $ref: '#/$defs/QueryFeatureSettings'
      storage_prefix:
        const: agentic-module-v0-
        default: agentic-module-v0-
        title: Storage Prefix
        type: string
      type:
        const: agentic-module
        default: agentic-module
        title: Type
        type: string
    required: [name, llm_model]
    title: Agentic Module Settings
    type: object
  AmazonTranslatorLLMPostprocessorFunction:
    description: Amazon Translator LLM Postprocessor function.
    properties:
      aws_region_name:
        default: us-east-1
        description: AWS region name
        title: AWS region name
        type: string
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the LLM Postprocessor function. This is used to identify
          the LLM postprocessor functions that has been applied.
        title: Name
      type:
        const: amazon-translator
        default: amazon-translator
        title: Type
        type: string
    title: Amazon Translator
    type: object
  AnswerFieldRerankerFunction:
    description: Reranker that keeps entries containing matching answer fields.
    properties:
      answer_fields:
        description: The field in the document to use as the answer.
        items:
          anyOf:
          - description: Answer field that we expect to be matched in the answer.
              Answer field is specified in `<answer_type>.<answer_field>` format.
            pattern: ^[\w\-]+\.[\w\_]+$
            title: Answer Field
            type: string
          - const: intent
            type: string
        minItems: 1
        title: Answer Fields
        type: array
      keep_original_if_all_discarded:
        default: false
        description: If all documents are discarded, keep the original candidates.
        title: Keep Original If All Discarded
        type: boolean
      match_type:
        default: all
        description: Whether to match `all` or `any` of the answer fields.
        enum: [all, any]
        title: Match Type
        type: string
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the reranker function. This is used to identify the
          reranker functions that has been applied.
        title: Name
      type:
        const: answer-field-reranker
        default: answer-field-reranker
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    required: [answer_fields]
    title: Answer Field Reranker
    type: object
  AnswerFinderSettings:
    description: Configure how we retrieve, score, and rank answers in the module.
    properties:
      reranker:
        $ref: '#/$defs/RerankerSettings'
        description: Configure how we rerank answers.
        title: Reranker
      retriever:
        $ref: '#/$defs/RetrieverSettings'
        description: Configure how we retrieve answer.
        title: Retriever
      scorer:
        $ref: '#/$defs/ScorerSettings'
        description: Configure how we score answers.
        title: Scorer
    title: Answer Finder Settings
    type: object
  AnswerPhraseFinderSettings:
    description: Configure how we retrieve, score, and rank answer phrases in the
      module.
    properties:
      answer_field:
        description: The field in the answer where the object is used.
        title: Answer Field
        type: string
      answer_type:
        description: The answer type where the object is used.
        enum: [clinical-trial, dialogue, directory, document, document-snippet, monograph,
          roster, test]
        title: Answer Type
        type: string
      reranker:
        $ref: '#/$defs/RerankerSettings'
        description: Configure how we rerank phrases.
        title: Reranker
      retriever:
        $ref: '#/$defs/RetrieverSettings'
      scorer:
        $ref: '#/$defs/ScorerSettings'
        description: Configure how we score phrases.
        title: Scorer
      type:
        const: answer
        default: answer
        title: Type
        type: string
    required: [answer_type, answer_field]
    title: Answer Phrase Finder Settings
    type: object
  AnswerPhraseRetrieverFunction:
    description: Settings for retrieving intent phrases.
    properties:
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the retriever function. This is used to identify
          the retriever functions used.
        title: Name
      size:
        default: 128
        description: The `size` parameter to use when retrieving documents from the
          KB. Note that `scan` is not used.
        minimum: 0
        title: Size
        type: integer
      tiered_fuzzy_match:
        anyOf:
        - $ref: '#/$defs/TieredFuzzyMatchSettings'
        - type: 'null'
        default:
          fuzzy_transpositions: true
          length_thresholds: [3, 6]
          prefix_length: 2
          scores: [0.8, 0.9, 1.0]
        description: Configure how we perform fuzzy matching when retrieving phrases.
        title: Tiered Fuzzy Match
      type:
        const: answer-phrase-retriever
        default: answer-phrase-retriever
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    title: Answer Phrase Retriever
    type: object
  AnswerRetrieverFunction:
    description: Settings for retrieving answers.
    properties:
      max_clause_count:
        default: 1000
        description: The maximum number of answer phrase clauses to use in a query.
          When there are more clauses, multiple queries will be executed concurrently.
        minimum: 0
        title: Max Clause Count
        type: integer
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the retriever function. This is used to identify
          the retriever functions used.
        title: Name
      size:
        default: 128
        description: The `size` parameter to use when retrieving documents from the
          KB. Note that `scan` is not used.
        minimum: 0
        title: Size
        type: integer
      type:
        const: answer-retriever
        default: answer-retriever
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
      weights:
        description: The weights to use for each field in the retrieval function.
          By default, each field is weighted 1.
        items:
          $ref: '#/$defs/RetrievalWeightsSettings'
        title: Retrieval Weights Settings
        type: array
    title: Answer Retriever
    type: object
  AnswerScorerFunction:
    description: Score an answer based on standard token level P/R/F metrics.
    properties:
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the scorer function. This is used to identify the
          scorer functions that has been applied.
        title: Name
      type:
        const: answer-scorer
        default: answer-scorer
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    title: Basic Answer Scorer
    type: object
  CleoDialogueLLMModuleSettings:
    description: Settings for our Cleo dialogue LLM module.
    properties:
      early_terminate:
        default: false
        description: Defines whether to allow early termination (before LLM call).
          True when stock phrase functionality is requested. Default is False.
        title: Allow early termination
        type: boolean
      emb_model_to_use:
        default: cohere.embed-multilingual-v3
        description: Name of model to be used, it should correspond with one of the
          values in embedding_model.
        title: Model to use
        type: string
      embedding_model:
        description: Defines settings for the embedding model during data and query
          flow.
        items:
          $ref: '#/$defs/EmbeddingModelSettings'
        title: Embedding
        type: array
      llm_model:
        $ref: '#/$defs/LLMModelSettings'
        title: LLM Settings
      metadata_filter_keys:
        description: Configure valid keys that will be used to filter metadata. They
          should by default be at least any of the types of QueryFeatureSettings.
        items:
          type: string
        title: Metadata Filter keys
        type: array
      name:
        description: The name of the module. This should be unique within the query
          engine.
        title: Name
        type: string
      postprocessor_settings:
        $ref: '#/$defs/LLMPostprocessorSettings'
        description: Configure which postprocessor functions to use
        title: Postprocessor settings
      preprocessor_settings:
        $ref: '#/$defs/PreprocessorSettings'
        description: Configure which postprocessor functions to use
        title: Postprocessor settings
      query_feature_settings:
        $ref: '#/$defs/QueryFeatureSettings'
      storage_prefix:
        const: cleo-dialogue-llm
        default: cleo-dialogue-llm
        title: Storage Prefix
        type: string
      type:
        const: dialogue-llm
        default: dialogue-llm
        title: Type
        type: string
    required: [name, llm_model]
    title: Cleo Dialogue LLM Module Settings
    type: object
  ClinicalTrialIRModuleResponderSettings:
    description: Settings for our clinical trial module responder.
    properties:
      max_answer_candidates_to_load:
        anyOf:
        - exclusiveMinimum: 0
          type: integer
        - type: 'null'
        default: null
        description: The maximum number of answer candidate bare answers that we will
          load in order to sort and display to the user. Default to `None` which means
          to load all answer candidates.
        title: Max Answer Candidates To Load
      max_answer_candidates_to_render:
        anyOf:
        - exclusiveMinimum: 0
          type: integer
        - type: 'null'
        default: 3
        description: The maximum number of answer candidate bare answers that we will
          render and display to the user.
        title: Max Answer Candidates to Render
      type:
        const: clinical-trial-ir
        default: clinical-trial-ir
        title: Type
        type: string
    title: Clinical Trial Module Responder Settings
    type: object
  Condition:
    description: A condition to check before applying a reranker.
    properties:
      operator:
        description: The comparison operator.
        enum: [gt, gte, lt, lte, eq, ne, '>', "\u2265", '>=', <, "\u2264", <=, '=',
          ==, '!=', <>, "\u2260"]
        title: Operator
        type: string
      rhs:
        description: The value to compare against.
        title: RHS Operand
        type: number
      score_key:
        description: The key in the score vector to use for the comparison. When the
          key doesn't exist or is None, we return false.
        minLength: 1
        title: Score Key
        type: string
    required: [operator, score_key, rhs]
    title: Condition
    type: object
  DirectoryAnswerScorerFunction:
    description: Score an answer based on standard token level P/R/F metrics.
    properties:
      institution_affinities:
        description: The list of institution affinities.
        items:
          $ref: '#/$defs/InstitutionAffinity'
        title: Institution Priority Order
        type: array
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the scorer function. This is used to identify the
          scorer functions that has been applied.
        title: Name
      type:
        const: directory-answer-scorer
        default: directory-answer-scorer
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    title: Basic Answer Scorer
    type: object
  DirectoryIRModuleResponderSettings:
    description: Settings for our directory module responder.
    properties:
      max_answer_candidates_to_load:
        anyOf:
        - exclusiveMinimum: 0
          type: integer
        - type: 'null'
        default: null
        description: The maximum number of answer candidate bare answers that we will
          load in order to sort and display to the user. Default to `None` which means
          to load all answer candidates.
        title: Max Answer Candidates To Load
      max_answer_candidates_to_render:
        anyOf:
        - exclusiveMinimum: 0
          type: integer
        - type: 'null'
        default: 3
        description: The maximum number of answer candidate bare answers that we will
          render and display to the user.
        title: Max Answer Candidates to Render
      type:
        const: directory-ir
        default: directory-ir
        title: Type
        type: string
    title: Directory Module Responder Settings
    type: object
  DiscardOrKeepInformationRerankerFunction:
    description: Reranker that shortlists or discards entry based on the condition.
    properties:
      discard_or_keep:
        description: The field that decides whether to discard or keep the entries.
        enum: [discard, keep]
        title: Discard Or Keep
        type: string
      fields_to_check:
        description: The fields in the document to check for the values.
        items:
          type: string
        title: Fields To Check
        type: array
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the reranker function. This is used to identify the
          reranker functions that has been applied.
        title: Name
      type:
        const: discard-or-keep-information-reranker
        default: discard-or-keep-information-reranker
        title: Type
        type: string
      value:
        description: The field based on which discard or keep is decided.
        title: Value
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    required: [fields_to_check, discard_or_keep, value]
    title: Discard Or Keep Information Reranker
    type: object
  DocumentLLMModuleSettings:
    description: Settings for our document LLM module.
    properties:
      early_terminate:
        default: false
        description: Defines whether to allow early termination (before LLM call).
          True when stock phrase functionality is requested. Default is False.
        title: Allow early termination
        type: boolean
      emb_model_to_use:
        default: cohere.embed-multilingual-v3
        description: Name of model to be used, it should correspond with one of the
          values in embedding_model.
        title: Model to use
        type: string
      embedding_model:
        description: Defines settings for the embedding model during data and query
          flow.
        items:
          $ref: '#/$defs/EmbeddingModelSettings'
        title: Embedding
        type: array
      llm_model:
        $ref: '#/$defs/LLMModelSettings'
        title: LLM Settings
      name:
        description: The name of the module. This should be unique within the query
          engine.
        title: Name
        type: string
      postprocessor_settings:
        $ref: '#/$defs/LLMPostprocessorSettings'
        description: Configure which postprocessor functions to use
        title: Postprocessor settings
      preprocessor_settings:
        $ref: '#/$defs/PreprocessorSettings'
        description: Configure which postprocessor functions to use
        title: Postprocessor settings
      query_feature_settings:
        $ref: '#/$defs/QueryFeatureSettings'
        description: Configure which query feature functions to use
        title: Query Feature settings
      storage_prefix:
        const: document-llm
        default: document-llm
        title: Storage Prefix
        type: string
      type:
        const: document-llm
        default: document-llm
        title: Type
        type: string
    required: [name, llm_model]
    title: LLM Document Module Settings
    type: object
  EmbeddingModelSettings:
    description: Defines settings for the embedding model during data and query flow.
    properties:
      context_size:
        default: 512
        description: Context size
        title: Context window size of the embedding model
        type: integer
      default_prompt_name:
        default: query
        description: Prompt for the embedding.
        enum: [query, text]
        title: Prompt Name
        type: string
      dimension:
        default: 1024
        description: Dimension of the embedding
        title: Dimension
        type: integer
      emb_model_source:
        default: bedrock
        description: Where is the model from?
        enum: [huggingface, bedrock]
        title: Model Source
        type: string
      hnsw_kwargs:
        anyOf:
        - $ref: '#/$defs/HNSWSettings'
        - type: 'null'
        default: null
        description: HNSW settings
        title: HNSW settings
      model:
        default: cohere.embed-multilingual-v3
        description: Name of embedding model to use.
        minLength: 1
        title: Model
        type: string
      query_instruction:
        default: 'Represent this sentence for searching relevant passages:'
        description: Instruction for the query.
        title: Query Instruction
        type: string
    title: Embedding Model Settings
    type: object
  FirstNRerankerFunction:
    description: Reranker keeps the first N candidates and discards the rest.
    properties:
      n:
        default: 16
        description: Keep the first N candidates.
        exclusiveMinimum: 0
        title: N
        type: integer
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the reranker function. This is used to identify the
          reranker functions that has been applied.
        title: Name
      type:
        const: first-n-reranker
        default: first-n-reranker
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    title: First N Reranker
    type: object
  HNSWSettings:
    description: |-
      Defines settings for HNSW graph. See
      https://github.com/run-llama/llama_index/blob/977d60a058c691957dae3eb3c66c1894faea24ac/llama-index-integrations/vector_stores/llama-index-vector-stores-postgres/llama_index/vector_stores/postgres/base.py#L570
    properties:
      hnsw_dist_method:
        default: vector_cosine_ops
        description: Distance metric to use. Note that by default PGVectorStore._build_query
          calls cosine_distance
        enum: [vector_l2_ops, vector_ip_ops, vector_cosine_ops, vector_l1_ops, bit_hamming_ops,
          bit_jaccard_ops]
        title: dist_method
        type: string
      hnsw_ef_construction:
        default: 64
        description: Size of the dynamic candidate list for constructing the graph.
          Higher value provides better recall at the cost of speed
        title: ef_construction
        type: integer
      hnsw_ef_search:
        default: 40
        description: Size of the dynamic candidate list for search. Higher value provides
          better recall at the cost of speed.
        title: ef_search
        type: integer
      hnsw_m:
        default: 16
        description: Max number of connections per layer.
        title: m
        type: integer
    title: HNSW Graph Settings
    type: object
  InstitutionAffinity:
    properties:
      affinities:
        description: The list of institutions starting from most to least affiniated.
          This field is case insensitive.
        items:
          type: string
        minItems: 1
        title: Affinities
        type: array
      tenant:
        anyOf:
        - description: The `cleo` app tenant applicable to this object.
          pattern: ^cleo\:[a-zA-Z0-9][\w\-\_]*$
          title: Cleo Tenant
          type: string
        - description: The `hospital` app tenant applicable to this object.
          pattern: ^hospital\:[a-zA-Z0-9][\w\-\_]*$
          title: Hospital Tenant
          type: string
        description: The tenant for which the priority order is defined.
        title: Tenant
    required: [tenant, affinities]
    title: InstitutionAffinity
    type: object
  IntentPhraseFinderSettings:
    description: Configure how we retrieve, score, and rank intent phrases in the
      module.
    properties:
      reranker:
        $ref: '#/$defs/RerankerSettings'
        description: Configure how we rerank phrases.
        title: Reranker
      retriever:
        $ref: '#/$defs/RetrieverSettings'
      scorer:
        $ref: '#/$defs/ScorerSettings'
        description: Configure how we score phrases.
        title: Scorer
      type:
        const: intent
        default: intent
        title: Type
        type: string
    title: Intent Phrase Finder Settings
    type: object
  IntentPhraseRetrieverFunction:
    description: Settings for retrieving intent phrases.
    properties:
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the retriever function. This is used to identify
          the retriever functions used.
        title: Name
      size:
        default: 128
        description: The `size` parameter to use when retrieving documents from the
          KB. Note that `scan` is not used.
        minimum: 0
        title: Size
        type: integer
      tiered_fuzzy_match:
        anyOf:
        - $ref: '#/$defs/TieredFuzzyMatchSettings'
        - type: 'null'
        default:
          fuzzy_transpositions: true
          length_thresholds: [3, 6]
          prefix_length: 2
          scores: [0.8, 0.9, 1.0]
        description: Configure how we perform fuzzy matching when retrieving phrases.
        title: Tiered Fuzzy Match
      type:
        const: intent-phrase-retriever
        default: intent-phrase-retriever
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    title: Intent Phrase Retriever
    type: object
  LLMModelSettings:
    description: Defines settings for the LLM model during query flow.
    properties:
      context_size:
        anyOf:
        - type: integer
        - type: 'null'
        default: null
        description: Context size aka context length
        title: Context size
      context_template:
        default: |-
          You are a friendly and warm question-answer hospital assistant, employed as an employee of the hospital.
                  The only information you know is the context provided. You can use only that information as your hidden knowledge base.
                  When you answer, the context provided will be known as "my training."
                  If the context provided provides links, show it to the user.
        description: Template for the context
        title: Context Template
        type: string
      guardrail_identifier:
        default: arn:aws:bedrock:us-west-2:394252546268:guardrail/f83z6kx1d6hl
        description: Name of AWS guardrail if applicable. Should start with arn:aws:bedrock:us-west-2:.
        title: Guardrail Identifier
        type: string
      guardrail_version:
        default: '1'
        description: AWS Guardrail version (string) if applicable
        title: Guardrail Version
        type: string
      max_tokens:
        default: 1000
        description: Max tokens returned by LLM
        title: Max Tokens
        type: integer
      model:
        description: Name of LLM as per HuggingFace
        minLength: 1
        title: Model
        type: string
      reranker_similarity_cutoff:
        default: 0
        description: Similarity score cutoff for node postprocessing based on reranker
          score
        title: Reranker Similarity Cutoff
        type: number
      similarity_cutoff:
        default: 0.38
        description: Similarity score cutoff for node postprocessing based on embedding
          score
        title: Similarity Cutoff
        type: number
      similarity_top_k:
        default: 6
        description: Number of nodes to return after retrieval
        title: Similarity top K
        type: integer
      supported_language_codes:
        description: Restricts bot to only answer only these languages. Provide list
          of 2-letter language codes, and double-check that Amazon Comprehend / Translate
          supports them.
        items:
          type: string
        title: Supported language codes
        type: array
      system_prompt:
        default: Answer the QUERY below only using the DOCUMENTs below as context,
          and not your trained knowledge.
        description: Prompt for the LLM
        title: System Prompt
        type: string
      temperature:
        default: 0
        description: Temperature
        title: Temperature
        type: number
    required: [model]
    title: LLM Model Settings
    type: object
  LLMPostprocessorSettings:
    description: Configure how we rerank documents.
    properties:
      postprocessor_functions:
        description: Postprocessor functions to be applied one after another in the
          model.
        items:
          discriminator:
            mapping:
              amazon-translator: '#/$defs/AmazonTranslatorLLMPostprocessorFunction'
              language-remover: '#/$defs/LanguageRemoverLLMPostprocessorFunction'
            propertyName: type
          oneOf:
          - $ref: '#/$defs/LanguageRemoverLLMPostprocessorFunction'
          - $ref: '#/$defs/AmazonTranslatorLLMPostprocessorFunction'
        title: Postprocessor Functions
        type: array
    required: [postprocessor_functions]
    title: Scorer Settings
    type: object
  LanguageDetectorQueryFeatureFunction:
    description: Language Detector Query Feature function.
    properties:
      aws_region_name:
        default: us-west-2
        description: AWS region name
        title: AWS region name
        type: string
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the query feature function. This is used to identify
          the query feature functions that has been applied.
        title: Name
      type:
        const: language-detector
        default: language-detector
        title: Type
        type: string
    title: Language Detector
    type: object
  LanguageFilterPreprocessorFunction:
    description: Language Filter Preprocessor function.
    properties:
      aws_region_name:
        default: us-east-1
        description: AWS region name
        title: AWS region name
        type: string
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the preprocessor function. This is used to identify
          the preprocessor functions that has been applied.
        title: Name
      supported_language_codes:
        description: The language codes that are supported for this client. By default,
          if this is empty, all languages are supported. If a language is not supported,
          query will be nullified
        items:
          type: string
        title: Supported language codes
        type: array
      type:
        const: language-filter
        default: language-filter
        title: Type
        type: string
    title: Language Filter
    type: object
  LanguageRemoverLLMPostprocessorFunction:
    description: Language Remover LLM Postprocessor function.
    properties:
      language_tag:
        const: query_language=
        default: query_language=
        title: Language Tag
        type: string
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the LLM Postprocessor function. This is used to identify
          the LLM postprocessor functions that has been applied.
        title: Name
      type:
        const: language-remover
        default: language-remover
        title: Type
        type: string
    title: Language Remover
    type: object
  LastNRerankerFunction:
    description: Reranker keeps the last N candidates and discards the rest.
    properties:
      n:
        default: 16
        description: Keep the last N candidates.
        exclusiveMinimum: 0
        title: N
        type: integer
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the reranker function. This is used to identify the
          reranker functions that has been applied.
        title: Name
      type:
        const: last-n-reranker
        default: last-n-reranker
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    title: Last N Reranker
    type: object
  LoadFullDocumentRerankerFunction:
    description: A pseudo-reranker that calls the `load` method on all candidate documents.
    properties:
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the reranker function. This is used to identify the
          reranker functions that has been applied.
        title: Name
      type:
        const: load-full-document-reranker
        default: load-full-document-reranker
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    title: Load Full Document Reranker
    type: object
  MaxCutoffRerankerFunction:
    description: Maximum cutoff reranker function. All resulting candidates will have
      scores lesser or equal to the cutoff.
    properties:
      conditions:
        default: []
        description: Conditions for documents before we apply the reranker Documents
          that fail the condition will be passed through to the next reranker.
        items:
          $ref: '#/$defs/Condition'
        title: Conditions
        type: array
      cutoff:
        anyOf:
        - type: number
        - type: integer
        - pattern: ^\d+(\.\d+)?\%$
          type: string
        description: The score value to cut at. If specified as a percentage, the
          cutoff is calculated as a percentage of the minimum/maximum score.
        title: Cutoff
      keep_original_if_all_discarded:
        default: false
        description: If all documents are discarded, keep the original candidates.
        title: Keep Original If All Discarded
        type: boolean
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the reranker function. This is used to identify the
          reranker functions that has been applied.
        title: Name
      score_key:
        description: The key in the score vector to use for the cutoff.
        minLength: 1
        title: Score Key
        type: string
      type:
        const: max-cutoff-reranker
        default: max-cutoff-reranker
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    required: [score_key, cutoff]
    title: Max Cutoff Reranker
    type: object
  MinCutoffRerankerFunction:
    description: Minimum cutoff reranker function. All resulting candidates will have
      scores greater or equal to the cutoff.
    properties:
      conditions:
        default: []
        description: Conditions for documents before we apply the reranker Documents
          that fail the condition will be passed through to the next reranker.
        items:
          $ref: '#/$defs/Condition'
        title: Conditions
        type: array
      cutoff:
        anyOf:
        - type: number
        - type: integer
        - pattern: ^\d+(\.\d+)?\%$
          type: string
        description: The score value to cut at. If specified as a percentage, the
          cutoff is calculated as a percentage of the minimum/maximum score.
        title: Cutoff
      keep_original_if_all_discarded:
        default: false
        description: If all documents are discarded, keep the original candidates.
        title: Keep Original If All Discarded
        type: boolean
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the reranker function. This is used to identify the
          reranker functions that has been applied.
        title: Name
      score_key:
        description: The key in the score vector to use for the cutoff.
        minLength: 1
        title: Score Key
        type: string
      type:
        const: min-cutoff-reranker
        default: min-cutoff-reranker
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    required: [score_key, cutoff]
    title: Min Cutoff Reranker
    type: object
  NoopModuleSettings:
    description: Noop module settings.
    properties:
      name:
        default: noop
        description: The name of the module. This should be unique within the query
          engine.
        title: Name
        type: string
      type:
        const: noop
        default: noop
        title: Type
        type: string
    title: Noop Module Settings
    type: object
  OrderByRerankerFunction:
    description: Reorder the candidates based on scoring keys.
    properties:
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the reranker function. This is used to identify the
          reranker functions that has been applied.
        title: Name
      order_by:
        description: The scoring fields to order the candidates by. Prefix with a
          `+/-` for ascending/descending order. You can also access `sv`, `doc`, and
          `source` fields using JMESPath syntax, e.g., `sv.missed_tokens_count`, `doc.name`,
          `source.phone`, etc.
        items:
          minLength: 1
          type: string
        minItems: 1
        title: Order By
        type: array
      type:
        const: order-by-reranker
        default: order-by-reranker
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    required: [order_by]
    title: Order By Reranker
    type: object
  PhraseScorerFunction:
    description: Settings for a phrase scorer.
    properties:
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the scorer function. This is used to identify the
          scorer functions that has been applied.
        title: Name
      type:
        const: phrase-scorer
        default: phrase-scorer
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    title: Phrase Scorer
    type: object
  PreprocessorSettings:
    description: Configure how we preprocess queries.
    properties:
      preprocessor_functions:
        description: Preprocessor functions to be applied one after another.
        items:
          discriminator:
            mapping:
              language-filter: '#/$defs/LanguageFilterPreprocessorFunction'
              truncate: '#/$defs/TruncatePreprocessorFunction'
            propertyName: type
          oneOf:
          - $ref: '#/$defs/TruncatePreprocessorFunction'
          - $ref: '#/$defs/LanguageFilterPreprocessorFunction'
        title: Preprocessor Functions
        type: array
    required: [preprocessor_functions]
    title: Scorer Settings
    type: object
  QueryFeatureSettings:
    description: Configure how we generate query features.
    properties:
      query_feature_functions:
        description: Query Feature functions to be applied one after another.
        items:
          discriminator:
            mapping:
              language-detector: '#/$defs/LanguageDetectorQueryFeatureFunction'
            propertyName: type
          oneOf:
          - $ref: '#/$defs/LanguageDetectorQueryFeatureFunction'
        title: Query Feature Functions
        type: array
    required: [query_feature_functions]
    title: Scorer Settings
    type: object
  RerankerSettings:
    description: Configure how we rerank documents.
    properties:
      reranker_functions:
        description: Rerankers are executed one after the other.
        items:
          discriminator:
            mapping:
              answer-field-reranker: '#/$defs/AnswerFieldRerankerFunction'
              discard-or-keep-information-reranker: '#/$defs/DiscardOrKeepInformationRerankerFunction'
              first-n-reranker: '#/$defs/FirstNRerankerFunction'
              last-n-reranker: '#/$defs/LastNRerankerFunction'
              load-full-document-reranker: '#/$defs/LoadFullDocumentRerankerFunction'
              max-cutoff-reranker: '#/$defs/MaxCutoffRerankerFunction'
              min-cutoff-reranker: '#/$defs/MinCutoffRerankerFunction'
              order-by-reranker: '#/$defs/OrderByRerankerFunction'
              reverse-reranker: '#/$defs/ReverseRerankerFunction'
              top-k-reranker: '#/$defs/TopKRerankerFunction'
            propertyName: type
          oneOf:
          - $ref: '#/$defs/MinCutoffRerankerFunction'
          - $ref: '#/$defs/MaxCutoffRerankerFunction'
          - $ref: '#/$defs/TopKRerankerFunction'
          - $ref: '#/$defs/LoadFullDocumentRerankerFunction'
          - $ref: '#/$defs/OrderByRerankerFunction'
          - $ref: '#/$defs/FirstNRerankerFunction'
          - $ref: '#/$defs/LastNRerankerFunction'
          - $ref: '#/$defs/ReverseRerankerFunction'
          - $ref: '#/$defs/AnswerFieldRerankerFunction'
          - $ref: '#/$defs/DiscardOrKeepInformationRerankerFunction'
        title: Reranker Functions
        type: array
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    required: [reranker_functions]
    title: Scorer Settings
    type: object
  RetrievalWeightsSettings:
    description: Settings for retrieval weights.
    properties:
      answer_field:
        description: The field in the answer where the object is used.
        title: Answer Field
        type: string
      answer_type:
        description: The answer type where the object is used.
        enum: [clinical-trial, dialogue, directory, document, document-snippet, monograph,
          roster, test]
        title: Answer Type
        type: string
      multiplier:
        default: 1
        description: Multiply to the weight.
        title: Multiplier
        type: number
      weight:
        anyOf:
        - type: number
        - enum: [phrase_tokens_count, matched_tokens_score, missed_tokens_score, matched_tokens_count,
            missed_tokens_count, matched_tokens_precision, missed_tokens_precision]
          type: string
        default: matched_tokens_precision
        description: The weight to assign to the field. It can also be one of the
          supported phrase scoring keys or a constant weight value.
        title: Weight
    required: [answer_type, answer_field]
    title: Retrieval Weights
    type: object
  RetrieverSettings:
    description: Configure how we retrieve documents.
    properties:
      retriever_functions:
        description: Retrievers are executed concurrently and then concatenated after.
        items:
          discriminator:
            mapping:
              answer-phrase-retriever: '#/$defs/AnswerPhraseRetrieverFunction'
              answer-retriever: '#/$defs/AnswerRetrieverFunction'
              intent-phrase-retriever: '#/$defs/IntentPhraseRetrieverFunction'
            propertyName: type
          oneOf:
          - $ref: '#/$defs/AnswerPhraseRetrieverFunction'
          - $ref: '#/$defs/IntentPhraseRetrieverFunction'
          - $ref: '#/$defs/AnswerRetrieverFunction'
        minItems: 1
        title: Retriever Functions
        type: array
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    required: [retriever_functions]
    title: Retriever Settings
    type: object
  ReverseRerankerFunction:
    description: Reranker reverses the order of the candidates.
    properties:
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the reranker function. This is used to identify the
          reranker functions that has been applied.
        title: Name
      type:
        const: reverse-reranker
        default: reverse-reranker
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    title: Reverse Reranker
    type: object
  ScorerSettings:
    description: Configure how we score documents.
    properties:
      scorer_functions:
        description: Scorer functions are executed concurrently and then merged together
          after.
        items:
          discriminator:
            mapping:
              answer-scorer: '#/$defs/AnswerScorerFunction'
              directory-answer-scorer: '#/$defs/DirectoryAnswerScorerFunction'
              phrase-scorer: '#/$defs/PhraseScorerFunction'
            propertyName: type
          oneOf:
          - $ref: '#/$defs/PhraseScorerFunction'
          - $ref: '#/$defs/AnswerScorerFunction'
          - $ref: '#/$defs/DirectoryAnswerScorerFunction'
        minItems: 1
        title: Scorer Functions
        type: array
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    required: [scorer_functions]
    title: Scorer Settings
    type: object
  StandardIRModuleSettings:
    description: Settings for our standard IR-based module.
    properties:
      answer_finder:
        $ref: '#/$defs/AnswerFinderSettings'
        description: Configure how we retrieve, score, and rank answers.
        title: Answer Finder
      answer_phrase_finders:
        description: Configure how we retrieve, score, and rank answer phrases. This
          is done per answer type/field. Multiple finders for each answer type/field
          is supported.
        items:
          $ref: '#/$defs/AnswerPhraseFinderSettings'
        title: Answer Phrase Finders
        type: array
      intent_phrase_finder:
        $ref: '#/$defs/IntentPhraseFinderSettings'
        description: Configure how we retrieve, score, and rank intent phrases.
        title: Intent Phrase Finder
      intents:
        description: The intents will likely trigger this module.
        items:
          description: The intent phrase.
          minLength: 1
          title: Intent
          type: string
        title: Intents
        type: array
      module_responder:
        anyOf:
        - $ref: '#/$defs/DirectoryIRModuleResponderSettings'
        - $ref: '#/$defs/ClinicalTrialIRModuleResponderSettings'
        description: The responder to use for this module. Module responders evaluate
          all the candidate answers to form a cohesive response for a module.
        title: Module Responder
      module_stopwords:
        description: Stopwords used in this module.
        items:
          $ref: '#/$defs/StopwordSettings'
        title: Module Stopwords
        type: array
      module_synonyms:
        description: Synonyms used in this module.
        items:
          $ref: '#/$defs/SynonymSettings'
        title: Module Synonyms
        type: array
      name:
        description: The name of the module. This should be unique within the query
          engine.
        title: Name
        type: string
      type:
        const: standard-ir
        default: standard-ir
        title: Type
        type: string
    required: [name, module_responder]
    title: Standard IR Module Settings
    type: object
  StopwordSettings:
    description: Configure stopwords on a per answer type/field level.
    properties:
      answer_field:
        description: The field in the answer where the object is used.
        title: Answer Field
        type: string
      answer_type:
        description: The answer type where the object is used.
        enum: [clinical-trial, dialogue, directory, document, document-snippet, monograph,
          roster, test]
        title: Answer Type
        type: string
      stopwords:
        anyOf:
        - const: en-standard
          type: string
        - items:
            minLength: 1
            type: string
          type: array
        description: The stopwords for this answer type/field. It can be a list of
          phrases or a stopword set name.
        maxLength: 512
        minLength: 1
        title: Stopwords
    required: [answer_type, answer_field, stopwords]
    title: Stopword Settings
    type: object
  SynonymSettings:
    description: Configure synonyms on a per answer type/field level.
    properties:
      answer_field:
        description: The field in the answer where the object is used.
        title: Answer Field
        type: string
      answer_type:
        description: The answer type where the object is used.
        enum: [clinical-trial, dialogue, directory, document, document-snippet, monograph,
          roster, test]
        title: Answer Type
        type: string
      synonym_expansion_rounds:
        default: 3
        description: The number of rounds to expand the synonyms. Each round will
          generate more candidate synonyms.
        minimum: 0
        title: Synonym Expansion Rounds
        type: integer
      synonyms:
        description: The synonyms for this answer type/field as a list of synonym
          groups.
        items:
          anyOf:
          - additionalProperties:
              description: Synonyms of the first phrase.
              items:
                type: string
              minItems: 1
              type: array
            description: In a one-way synonym group, all phrases are synonyms of the
              first phrase but not the other way round.
            minProperties: 1
            title: One Way Synonym Group
            type: object
          - description: In a two-way synonym group, all pairs of phrases are synonyms
              of each other and can be used interchangeably.
            items:
              description: A Single synonym phrase.
              minLength: 1
              title: Synonym
              type: string
            minItems: 2
            title: Two Way Synonym Group
            type: array
          description: Describes how synonyms are generated
          title: Synonym Group
        minItems: 1
        title: Synonyms
        type: array
    required: [answer_type, answer_field, synonyms]
    title: Synonym Settings
    type: object
  TieredFuzzyMatchSettings:
    description: Settings for tiered fuzzy matching.
    properties:
      fuzzy_transpositions:
        default: true
        description: Whether to allow transpositions in fuzzy matching. See https://opensearch.org/docs/latest/query-dsl/full-text/match/#transpositions
        title: Fuzzy Transpositions
        type: boolean
      length_thresholds:
        default: [3, 6]
        description: "A tuple `[a, b]` where string lengths in [0, a] do not have\
          \ fuzzy matching, lengths in (a, b] allow 1 Levenshtein distance, and lengths\
          \ (b, \u221E) allow 2 Levenshtein distances."
        maxItems: 2
        minItems: 2
        prefixItems:
        - minimum: 0
          type: integer
        - exclusiveMinimum: 0
          type: integer
        title: Length Thresholds
        type: array
      prefix_length:
        default: 2
        description: The length of the prefix to use for fuzzy matching. See https://opensearch.org/docs/latest/query-dsl/full-text/match/#prefix-length
        minimum: 0
        title: Prefix Length
        type: integer
      scores:
        default: [0.8, 0.9, 1.0]
        description: A tuple `[a, b]` where a is the score for 2 Levenshtein distance,
          b is the score for 1 Levenshtein distances, and c is the score for exact
          matches.
        maxItems: 3
        minItems: 3
        prefixItems:
        - type: number
        - type: number
        - type: number
        title: Match Scores
        type: array
    title: TieredFuzzyMatchSettings
    type: object
  TopKRerankerFunction:
    description: Reranker keeps the top K candidates based on distinct score value
      and discards the rest.
    properties:
      k:
        default: 1
        description: Keep the top K candidates.
        exclusiveMinimum: 0
        title: K
        type: integer
      keep_ties:
        default: true
        description: Whether to keep candidates whose score tie for top K. Defaults
          to `true`.
        title: Keep Ties
        type: boolean
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the reranker function. This is used to identify the
          reranker functions that has been applied.
        title: Name
      score_key:
        description: The key in the score vector to use for the cutoff.
        minLength: 1
        title: Score Key
        type: string
      type:
        const: top-k-reranker
        default: top-k-reranker
        title: Type
        type: string
      verbose:
        default: false
        description: Whether to log verbose debugging information.
        title: Verbose
        type: boolean
    required: [score_key]
    title: Top K Reranker
    type: object
  TruncatePreprocessorFunction:
    description: Truncate Preprocessor function.
    properties:
      max_length:
        default: 1000
        description: Maximum length of query string
        title: Max length
        type: integer
      name:
        anyOf:
        - type: string
        - type: 'null'
        default: null
        description: The name of the preprocessor function. This is used to identify
          the preprocessor functions that has been applied.
        title: Name
      type:
        const: truncate
        default: truncate
        title: Type
        type: string
    title: Truncate
    type: object
$id: https://skeleton.botmd.io/jarvisv2/QueryEngineSettings.schema
$schema: http://json-schema.org/draft-07/schema#
description: This contains all the settings needed for the Jarvis v2 query engine.
properties:
  enabled:
    default: true
    description: Whether this query engine is enabled.
    title: Enabled
    type: boolean
  modules:
    description: The modules to be used in the query engine.
    items:
      discriminator:
        mapping:
          agentic-module: '#/$defs/AgenticModuleSettings'
          dialogue-llm: '#/$defs/CleoDialogueLLMModuleSettings'
          document-llm: '#/$defs/DocumentLLMModuleSettings'
          noop: '#/$defs/NoopModuleSettings'
          standard-ir: '#/$defs/StandardIRModuleSettings'
        propertyName: type
      oneOf:
      - $ref: '#/$defs/StandardIRModuleSettings'
      - $ref: '#/$defs/DocumentLLMModuleSettings'
      - $ref: '#/$defs/CleoDialogueLLMModuleSettings'
      - $ref: '#/$defs/AgenticModuleSettings'
      - $ref: '#/$defs/NoopModuleSettings'
    minItems: 1
    title: Modules
    type: array
  name:
    description: The name of the query engine. This should be globally unique.
    title: Name
    type: string
  version:
    default: unknown
    description: Version of this query engine based on the Kondo resource version.
    title: Version
    type: string
required: [name]
title: Query Engine Settings
type: object

[Main Page] [Schema Documentation] [Examples]