From TestSuite.schema.yaml (jarvis_tf/TestSuite.schema
)
---
$defs:
AnswerRelevancyAssert:
description: 'Implements AnswerRelevancy from Deepeval: https://docs.confident-ai.com/docs/metrics-answer-relevancy'
properties:
llm_settings:
$ref: '#/$defs/LLMModelSettings'
description: Settings for the evaluation LLM to process LLM metrics.
title: Evaluation LLM settings
maximum:
anyOf:
- minimum: 0
type: number
- type: 'null'
default: null
description: The maximum metric score required. Defaults to `None` meaning
there is no upper bound.
title: Maximum
metric_kwargs:
additionalProperties: true
description: Additional metric keyword arguments that you supply to the metric
class
title: Metric Keyword args
type: object
minimum:
default: 0.0
description: The minimum metric score required. Defaults to 0.
minimum: 0
title: Minimum
type: number
type:
const: answer_relevancy
default: answer_relevancy
title: Type
type: string
required: [llm_settings]
title: Answer Relevancy Assert
type: object
CleoQuerySessionOptions:
properties:
delivery_uid:
items:
type: string
title: Delivery Uid
type: array
device_uid:
items:
type: string
title: Device Uid
type: array
heimdall_session_uid:
items:
type: string
title: Heimdall Session Uid
type: array
user_uid:
items:
type: string
title: User Uid
type: array
title: CleoQuerySessionOptions
type: object
ContextualPrecisionAssert:
description: 'Implements ContextualPrecision from Deepeval: https://docs.confident-ai.com/docs/metrics-contextual-precision'
properties:
expected_output:
description: The expected output for the query which we will evaluate against.
minLength: 1
title: Expected Output
type: string
llm_settings:
$ref: '#/$defs/LLMModelSettings'
description: Settings for the evaluation LLM to process LLM metrics.
title: Evaluation LLM settings
maximum:
anyOf:
- minimum: 0
type: number
- type: 'null'
default: null
description: The maximum metric score required. Defaults to `None` meaning
there is no upper bound.
title: Maximum
metric_kwargs:
additionalProperties: true
description: Additional metric keyword arguments that you supply to the metric
class
title: Metric Keyword args
type: object
minimum:
default: 0.0
description: The minimum metric score required. Defaults to 0.
minimum: 0
title: Minimum
type: number
type:
const: contextual_precision
default: contextual_precision
title: Type
type: string
required: [llm_settings, expected_output]
title: Contextual Precision Assert
type: object
ContextualRecallAssert:
description: 'Implements ContextualRecall from Deepeval: https://docs.confident-ai.com/docs/metrics-contextual-recall'
properties:
expected_output:
description: The expected output for the query which we will evaluate against.
minLength: 1
title: Expected Output
type: string
llm_settings:
$ref: '#/$defs/LLMModelSettings'
description: Settings for the evaluation LLM to process LLM metrics.
title: Evaluation LLM settings
maximum:
anyOf:
- minimum: 0
type: number
- type: 'null'
default: null
description: The maximum metric score required. Defaults to `None` meaning
there is no upper bound.
title: Maximum
metric_kwargs:
additionalProperties: true
description: Additional metric keyword arguments that you supply to the metric
class
title: Metric Keyword args
type: object
minimum:
default: 0.0
description: The minimum metric score required. Defaults to 0.
minimum: 0
title: Minimum
type: number
type:
const: contextual_recall
default: contextual_recall
title: Type
type: string
required: [llm_settings, expected_output]
title: Contextual Recall Assert
type: object
ContextualRelevancyAssert:
description: 'Implements ContextualRelevancy from Deepeval: https://docs.confident-ai.com/docs/metrics-contextual-relevancy'
properties:
llm_settings:
$ref: '#/$defs/LLMModelSettings'
description: Settings for the evaluation LLM to process LLM metrics.
title: Evaluation LLM settings
maximum:
anyOf:
- minimum: 0
type: number
- type: 'null'
default: null
description: The maximum metric score required. Defaults to `None` meaning
there is no upper bound.
title: Maximum
metric_kwargs:
additionalProperties: true
description: Additional metric keyword arguments that you supply to the metric
class
title: Metric Keyword args
type: object
minimum:
default: 0.0
description: The minimum metric score required. Defaults to 0.
minimum: 0
title: Minimum
type: number
type:
const: contextual_relevancy
default: contextual_relevancy
title: Type
type: string
required: [llm_settings]
title: Contextual Relevancy Assert
type: object
CorrectnessAssert:
description: 'Implements Correctness from Deepeval: https://docs.confident-ai.com/docs/guides-answer-correctness-metric#:~:text=Answer%20Correctness%20(or%20Correctness)%20is,0%20indicating%20an%20incorrect%20one.'
properties:
expected_output:
description: The expected output for the query which we will evaluate against.
minLength: 1
title: Expected Output
type: string
llm_settings:
$ref: '#/$defs/LLMModelSettings'
description: Settings for the evaluation LLM to process LLM metrics.
title: Evaluation LLM settings
maximum:
anyOf:
- minimum: 0
type: number
- type: 'null'
default: null
description: The maximum metric score required. Defaults to `None` meaning
there is no upper bound.
title: Maximum
metric_kwargs:
additionalProperties: true
description: Additional metric keyword arguments that you supply to the metric
class
title: Metric Keyword args
type: object
minimum:
default: 0.0
description: The minimum metric score required. Defaults to 0.
minimum: 0
title: Minimum
type: number
type:
const: correctness
default: correctness
title: Type
type: string
required: [llm_settings, expected_output]
title: Correctness Assert
type: object
EinsteinQuerySessionOptions:
properties:
document_ids:
items:
type: string
title: Document Ids
type: array
llm_search_mode:
enum: [retrieval, search_summarize]
title: Llm Search Mode
type: string
user_acl_dnfs:
items:
type: string
title: User Acl Dnfs
type: array
title: EinsteinQuerySessionOptions
type: object
FaithfulnessAssert:
description: 'Implements Faithfulness from Deepeval: https://docs.confident-ai.com/docs/metrics-faithfulness'
properties:
llm_settings:
$ref: '#/$defs/LLMModelSettings'
description: Settings for the evaluation LLM to process LLM metrics.
title: Evaluation LLM settings
maximum:
anyOf:
- minimum: 0
type: number
- type: 'null'
default: null
description: The maximum metric score required. Defaults to `None` meaning
there is no upper bound.
title: Maximum
metric_kwargs:
additionalProperties: true
description: Additional metric keyword arguments that you supply to the metric
class
title: Metric Keyword args
type: object
minimum:
default: 0.0
description: The minimum metric score required. Defaults to 0.
minimum: 0
title: Minimum
type: number
type:
const: faithfulness
default: faithfulness
title: Type
type: string
required: [llm_settings]
title: Faithfulness Assert
type: object
HallucinationAssert:
description: 'Implements Hallucination from Deepeval: https://docs.confident-ai.com/docs/metrics-hallucination'
properties:
context:
description: The context to evaluate hallucination against.
items:
minLength: 1
type: string
minItems: 1
title: Context
type: array
llm_settings:
$ref: '#/$defs/LLMModelSettings'
description: Settings for the evaluation LLM to process LLM metrics.
title: Evaluation LLM settings
maximum:
anyOf:
- minimum: 0
type: number
- type: 'null'
default: null
description: The maximum metric score required. Defaults to `None` meaning
there is no upper bound.
title: Maximum
metric_kwargs:
additionalProperties: true
description: Additional metric keyword arguments that you supply to the metric
class
title: Metric Keyword args
type: object
minimum:
default: 0.0
description: The minimum metric score required. Defaults to 0.
minimum: 0
title: Minimum
type: number
type:
const: hallucination
default: hallucination
title: Type
type: string
required: [llm_settings, context]
title: Hallucination Assert
type: object
HospitalProfileMetadata:
description: Profile metadata associated with a hospital user.
properties:
institution:
anyOf:
- type: string
- type: 'null'
default: null
description: The institution that the user belongs to. This is usually similar
to the tenant
title: Institution
title: HospitalProfileMetadata
type: object
InverseRegexesAssert:
description: Assert that one or more regular expressions don't match.
properties:
case_sensitive:
default: false
description: Whether the regexes should be case sensitive.
title: Case Sensitive
type: boolean
minimum_should_not_match:
anyOf:
- exclusiveMinimum: 0
type: integer
- const: all
type: string
default: all
description: The minimum number of regexes or substrings that should not match
message text. If set to `all`, it must not match any of the regexes. Defaults
to `all`.
title: Minimum Should Not Match
regexes:
description: The list of regexes for the assert.
items:
type: string
title: Regexes
type: array
substrings:
description: The list of substrings for the assert.
items:
type: string
title: Substrings
type: array
type:
const: inverse_regexes
default: inverse_regexes
title: Type
type: string
title: Inverse Regexes Assert
type: object
JarvisProfile:
description: Profile for Jarvis V2.
properties:
hospital:
anyOf:
- $ref: '#/$defs/HospitalProfileMetadata'
- type: 'null'
default: null
description: Profile metadata associated with a hospital user.
title: Hospital
tenant:
anyOf:
- description: The `cleo` app tenant applicable to this object.
pattern: ^cleo\:[a-zA-Z0-9][\w\-\_]*$
title: Cleo Tenant
type: string
- description: The `hospital` app tenant applicable to this object.
pattern: ^hospital\:[a-zA-Z0-9][\w\-\_]*$
title: Hospital Tenant
type: string
description: The tenant of this profile.
title: Tenant
required: [tenant]
title: JarvisProfile
type: object
JarvisV1Endpoint:
description: Describes the endpoint for Jarvis V1.
properties:
endpoint:
anyOf:
- format: uri
maxLength: 2083
minLength: 1
type: string
- type: 'null'
default: null
description: The HTTPS endpoint for Jarvis.
title: Endpoint URL
http_timeout:
default: 10
description: The timeout in seconds for HTTP requests.
title: HTTP Timeout
type: number
max_concurrency:
default: 10
description: The maximum number of concurrent queries to run. Defaults to
10.
title: Max Concurrency
type: integer
type:
const: v1
default: v1
title: Type
type: string
title: Jarvis V1 Endpoint
type: object
JarvisV1Profile:
description: Profile for Jarvis V1.
properties:
department:
default: ''
description: The department of the profile user
title: Department
type: string
designation:
default: ''
description: The designation of the profile user
title: Designation
type: string
organization_key:
description: The organization key associated with the hospital profile.
minLength: 1
title: Hospital
type: string
tags:
description: The tags associated with the profile.
items:
type: string
title: Tags
type: array
uid:
default: undefined
description: The unique identifier for the profile.
minLength: 1
title: UID
type: string
required: [organization_key]
title: Jarvis V1 Profile
type: object
JarvisV2Endpoint:
description: Describes the endpoint for Jarvis V2.
properties:
async_timeout:
default: 60
description: The timeout in seconds for async queries. Only applicable when
`use_async` is True.
title: Async Timeout
type: number
endpoint:
anyOf:
- format: uri
maxLength: 2083
minLength: 1
type: string
- type: 'null'
default: null
description: The HTTPS endpoint for Jarvis.
title: Endpoint URL
http_timeout:
default: 10
description: The timeout in seconds for HTTP requests.
title: HTTP Timeout
type: number
index_name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the index to run the query against.
title: Index Name
max_concurrency:
default: 10
description: The maximum number of concurrent queries to run. Defaults to
10.
title: Max Concurrency
type: integer
query_engine:
description: The query engine to use.
title: Query Engine
type: string
type:
const: v2
default: v2
title: Type
type: string
use_async:
default: true
description: Whether to use the async query flow. This is only applicable
when querying against a remote server. Local queries are async by default.
Defaults to `true`.
title: Use Async
type: boolean
required: [query_engine]
title: Jarvis V2 Endpoint
type: object
LLMModelSettings:
description: Defines settings for the LLM model during query flow.
properties:
context_size:
anyOf:
- type: integer
- type: 'null'
default: null
description: Context size aka context length
title: Context size
context_template:
default: |-
You are a friendly and warm question-answer hospital assistant, employed as an employee of the hospital.
The only information you know is the context provided. You can use only that information as your hidden knowledge base.
When you answer, the context provided will be known as "my training."
If the context provided provides links, show it to the user.
description: Template for the context
title: Context Template
type: string
guardrail_identifier:
default: arn:aws:bedrock:us-west-2:394252546268:guardrail/f83z6kx1d6hl
description: Name of AWS guardrail if applicable. Should start with arn:aws:bedrock:us-west-2:.
title: Guardrail Identifier
type: string
guardrail_version:
default: '1'
description: AWS Guardrail version (string) if applicable
title: Guardrail Version
type: string
max_tokens:
default: 1000
description: Max tokens returned by LLM
title: Max Tokens
type: integer
model:
description: Name of LLM as per HuggingFace
minLength: 1
title: Model
type: string
reranker_similarity_cutoff:
default: 0
description: Similarity score cutoff for node postprocessing based on reranker
score
title: Reranker Similarity Cutoff
type: number
similarity_cutoff:
default: 0.38
description: Similarity score cutoff for node postprocessing based on embedding
score
title: Similarity Cutoff
type: number
similarity_top_k:
default: 6
description: Number of nodes to return after retrieval
title: Similarity top K
type: integer
supported_language_codes:
description: Restricts bot to only answer only these languages. Provide list
of 2-letter language codes, and double-check that Amazon Comprehend / Translate
supports them.
items:
type: string
title: Supported language codes
type: array
system_prompt:
default: Answer the QUERY below only using the DOCUMENTs below as context,
and not your trained knowledge.
description: Prompt for the LLM
title: System Prompt
type: string
temperature:
default: 0
description: Temperature
title: Temperature
type: number
required: [model]
title: LLM Model Settings
type: object
MessageTypeAssert:
description: Assert that a messages response has a certain type.
properties:
any_order:
default: false
description: If `True`, the message types can be in any order.
title: Any Order
type: boolean
match_all:
default: true
description: If `True`, all message types should be present.
title: Match All
type: boolean
message_types:
description: The message types that we want to assert.
items:
minLength: 1
type: string
minItems: 1
title: Message Types
type: array
type:
const: message_type
default: message_type
title: Type
type: string
required: [message_types]
title: Message Type Assert
type: object
QueryElapsedTimeAssert:
description: Assert that a query took less than a certain time.
properties:
maximum:
anyOf:
- minimum: 0
type: number
- type: 'null'
default: null
description: The maximum amount of time that a query took. Defaults to `None`
meaning there is no upper bound.
title: Maximum
minimum:
default: 0.0
description: The minimum amount of time that a query took. Defaults to 0.
minimum: 0
title: Minimum
type: number
type:
const: query_elapsed_time
default: query_elapsed_time
title: Type
type: string
title: Query Elapsed Time Assert
type: object
QuerySessionOptions:
properties:
cleo:
$ref: '#/$defs/CleoQuerySessionOptions'
einstein:
$ref: '#/$defs/EinsteinQuerySessionOptions'
override_now:
anyOf:
- format: date-time
type: string
- type: string
title: Override Now
raise_module_exceptions:
title: Raise Module Exceptions
type: boolean
title: QuerySessionOptions
type: object
RegexesAssert:
description: Assert that one or more regular expressions match.
properties:
case_sensitive:
default: false
description: Whether the regexes should be case sensitive.
title: Case Sensitive
type: boolean
is_ordered:
default: true
description: Whether the regexes should be matched in order within the message
text. We assume `regexes` first followed by `substrings`. Defaults to `true`.
title: Is Ordered
type: boolean
minimum_should_match:
anyOf:
- exclusiveMinimum: 0
type: integer
- const: all
type: string
default: all
description: The minimum number of regexes or substrings that should match
message text. If set to `all`, it must match all the regexes. Defaults to
`all`.
title: Minimum Should Match
regexes:
description: The list of regexes for the assert.
items:
type: string
title: Regexes
type: array
substrings:
description: The list of substrings for the assert.
items:
type: string
title: Substrings
type: array
type:
const: regexes
default: regexes
title: Type
type: string
title: Regexes Assert
type: object
RegexesMatchCountAssert:
description: Assert that the occurrences of regular expressions that match are
in a certain range.
properties:
case_sensitive:
default: false
description: Whether the regexes should be case sensitive.
title: Case Sensitive
type: boolean
equals:
anyOf:
- minimum: 0
type: integer
- type: 'null'
default: null
description: The number of occurrences of regexes or substrings that match
message text should be equal to this. If this is set, `minimum` and `maximum`
will be ignored. Defaults to `None`.
title: Equals
maximum:
anyOf:
- minimum: 0
type: integer
- type: 'null'
default: null
description: The maximum number of occurrences of regexes or substrings that
match message text. Defaults to `None` meaning there is no upper bound.
title: Maximum
minimum:
default: 1
description: The minimum number of occurrences of regexes or substrings that
match message text. Defaults to 1.
minimum: 0
title: Minimum
type: integer
regexes:
description: The list of regexes for the assert.
items:
type: string
title: Regexes
type: array
substrings:
description: The list of substrings for the assert.
items:
type: string
title: Substrings
type: array
type:
const: regexes_count
default: regexes_count
title: Type
type: string
title: Regexes Match Count Assert
type: object
ResponseMetadataAssert:
description: Asserts that a response metadata has a certain value.
properties:
equals:
anyOf:
- type: number
- type: integer
- type: string
- type: 'null'
default: null
description: The value that this response metadata should equals.
title: Equals
maximum:
anyOf:
- type: number
- type: integer
- type: string
- type: 'null'
default: null
description: The maximum value (inclusive) that this response metadata should
have.
title: Maximum
minimum:
anyOf:
- type: number
- type: integer
- type: string
- type: 'null'
default: null
description: The minimum value (inclusive) that this response metadata should
have.
title: Minimum
path:
description: The [JMES path](https://jmespath.org/tutorial.html) to the metadata
value that we want to assert.
minLength: 1
title: Path
type: string
type:
const: response_metadata
default: response_metadata
title: Type
type: string
required: [path]
title: Response Metadata Assert
type: object
TestCase:
description: Test cases are a single unit of test within a test suite.
properties:
asserts:
description: The asserts for this test case.
items:
discriminator:
mapping:
answer_relevancy: '#/$defs/AnswerRelevancyAssert'
contextual_precision: '#/$defs/ContextualPrecisionAssert'
contextual_recall: '#/$defs/ContextualRecallAssert'
contextual_relevancy: '#/$defs/ContextualRelevancyAssert'
correctness: '#/$defs/CorrectnessAssert'
faithfulness: '#/$defs/FaithfulnessAssert'
hallucination: '#/$defs/HallucinationAssert'
inverse_regexes: '#/$defs/InverseRegexesAssert'
message_type: '#/$defs/MessageTypeAssert'
query_elapsed_time: '#/$defs/QueryElapsedTimeAssert'
regexes: '#/$defs/RegexesAssert'
regexes_count: '#/$defs/RegexesMatchCountAssert'
response_metadata: '#/$defs/ResponseMetadataAssert'
propertyName: type
oneOf:
- $ref: '#/$defs/ResponseMetadataAssert'
- $ref: '#/$defs/QueryElapsedTimeAssert'
- $ref: '#/$defs/MessageTypeAssert'
- $ref: '#/$defs/RegexesAssert'
- $ref: '#/$defs/InverseRegexesAssert'
- $ref: '#/$defs/RegexesMatchCountAssert'
- $ref: '#/$defs/AnswerRelevancyAssert'
- $ref: '#/$defs/ContextualPrecisionAssert'
- $ref: '#/$defs/ContextualRecallAssert'
- $ref: '#/$defs/ContextualRelevancyAssert'
- $ref: '#/$defs/CorrectnessAssert'
- $ref: '#/$defs/FaithfulnessAssert'
- $ref: '#/$defs/HallucinationAssert'
minItems: 1
title: Asserts
type: array
description:
anyOf:
- type: string
- type: 'null'
default: null
description: The description of the test case. This can be used to describe
the motivation for the test case.
title: Description
name:
anyOf:
- type: string
- type: 'null'
default: null
description: The name of the test case. Defaults to the query.
title: Name
query:
$ref: '#/$defs/TestQuery'
description: The query for the test case, which includes actual message and
user profile.
title: Query
tc_id:
anyOf:
- type: string
- type: 'null'
default: null
description: The test case ID. Note that this is usually set during loading
of the test suite.
title: Test Case ID
tenant:
anyOf:
- type: string
- type: 'null'
default: null
description: The tenant for which the docs come from/are to be answered.
title: Tenant
required: [query, asserts]
title: Test Case
type: object
TestQuery:
description: Query model for Jarvis test cases.
properties:
profile:
anyOf:
- $ref: '#/$defs/JarvisV1Profile'
- $ref: '#/$defs/JarvisProfile'
- type: string
description: The profile of the user executing the query. If specified as
a string, it will attempt to load the predefined profiles.
title: Profile
query_session_options:
anyOf:
- $ref: '#/$defs/QuerySessionOptions'
- type: 'null'
default: null
description: Additional Options regarding the session related changes, like
overriding certain values, etc.
title: Session Options
query_text:
description: The text of the query.
minLength: 1
title: Query Text
type: string
required: [query_text, profile]
title: Test Query
type: object
TestSuiteMetadata:
description: Metadata associated with a test suite. This is mostly for UX and
organizational purposes.
properties:
description:
default: ''
description: The description of the test suite.
title: Description
type: string
name:
description: The name of the test suite.
minLength: 1
title: Name
type: string
version:
default: unknown
description: Version of this test suite based on Kondo.
title: Version
type: string
required: [name]
title: Test Suite Metadata
type: object
$id: https://skeleton.botmd.io/jarvis_tf/TestSuite.schema
$schema: http://json-schema.org/draft-07/schema#
description: Test suite containing test cases and associated metadata.
properties:
endpoint:
description: The Jarvis endpoint for running the test. Defaults to the runner's
environment.
discriminator:
mapping:
v1: '#/$defs/JarvisV1Endpoint'
v2: '#/$defs/JarvisV2Endpoint'
propertyName: type
oneOf:
- $ref: '#/$defs/JarvisV1Endpoint'
- $ref: '#/$defs/JarvisV2Endpoint'
title: Endpoint
metadata:
$ref: '#/$defs/TestSuiteMetadata'
description: The metadata for the root model
title: Metadata
test_cases:
description: The list of test cases
items:
$ref: '#/$defs/TestCase'
minItems: 1
title: Test Cases
type: array
required: [metadata, endpoint, test_cases]
title: Test Suite
type: object