From DataSource.schema.yaml (einstein/DataSource.schema
)
---
$id: https://skeleton.botmd.io/einstein/DataSource.schema
$schema: http://json-schema.org/draft-07/schema#
title: Data Source
description: Settings for configuring an Einstein data source.
type: object
additionalProperties: false
anyOf:
- required: [google_drive]
- required: [ingestor]
- required: [mock]
- required: [microsoft_sharepoint]
- required: [noop]
- required: [noop_dialogue]
- required: [s3]
- required: [tabular]
properties:
# Flags
allow_user_upload:
title: Allow User Upload
description: Whether this data source allows user upload. Defaults to `false`.
type: boolean
convert_to_pdf:
title: Convert To PDF
description: Determines if a entries from this data source should be converted to PDF. Defaults to `true`.
type: boolean
delete_deactivated_entries:
title: Permanantly delete deactivated entries.
description: Boolean flag to indicate that deactivated entries should be deleted. Defaults to `false`.
type: boolean
recreate_entries:
title: Recreate all entries.
description: Recreate all entries skipping `find` and `is_same` in syncer (faster for large number of entries eg. 50k directory). Defaults to `false`.
type: boolean
ui_flags:
type: object
properties:
deletable:
title: Deletable
description: Should UI be able to delete entries in this `DataSource`. Defaults to `true`.
type: boolean
user_created:
title: User Created
description: Indicates if the entries in the `DataSource` are user created or preset. Defaults to `true`.
type: boolean
# Settings
generate_pages:
title: Generate file pages.json
description: Determines if a file `pages.json` containing a mapping of text page number to text will be generated and uploaded to storages. Defaults to `false`.
type: boolean
pdf_export_method:
title: Method For PDF Export
description: The method used for converting office documents to PDF. Defaults to `convertapi`.
enum: [convertapi, libreoffice]
# Jarvis provider mapping
chernobyl:
$ref: "/einstein/ModuleSettings.schema#/properties/chernobyl"
eliza:
$ref: "/einstein/ModuleSettings.schema#/properties/eliza"
rosters:
$ref: "/einstein/ModuleSettings.schema#/properties/rosters"
singhealth:
type: object
title: SingHealth
description: Settings related to singhealth datasources.
properties:
institutions:
type: array
minItems: 1
uniqueItems: true
title: Institutions
description: The Singhealth institution names as it appears in the column to filter the rows by.
items:
type: string
minLength: 1
# DataSource types
google_drive:
title: Google Drive
description: Settings for using Google Drive as a data source.
type: object
additionalProperties: false
required: [url, credentials]
properties:
credentials:
title: Google Cloud Credentials
description: The service account credentials.
type: object
required: [type, private_key]
additionalProperties: true
properties:
type:
title: Type
description: Credential type. Must be `service_account`.
const: service_account
private_key:
title: Private Key
description: Private key associated with the credential.
type: string
minLength: 1
url:
title: Google Drive URL
description: The unique URL of the Google Drive or folder.
type: string
format: uri
minLength: 1
ingestor:
title: Ingestor
description: Settings for using a ingestor as a data source.
type: object
anyOf:
- required: [content_uid]
- required: [worker_keys]
- required: [content_uri, worker_key]
- required: [content_uris, worker_key]
- required: [content_uri__startswith, worker_key]
- required: [content_uri__regex, worker_key]
properties:
content_uid:
title: Content UID
description: UID of the `IngestorContent` to sync (may change on ingestor wipe, use `content_uri` and `worker_key` instead).
type: string
minLength: 1
content_uri:
title: Content URI
description: URI of the `IngestorContent` to sync (use with `worker_key`).
type: string
minLength: 1
content_uris:
title: Content URI List
description: List of URIs of the `IngestorContent` to sync (use with `worker_key`).
type: array
minItems: 1
uniqueItems: true
items:
type: string
minLength: 1
worker_key:
title: Worker Key
description: Worker key of the `IngestorContent` to sync (use with `content_uri`).
type: string
minLength: 1
content_uri__startswith:
title: URI Starts With
description: Prefix for the `IngestorContent` content URI.
type: string
minLength: 1
content_uri__regex:
title: URI Match for Regex
description: Regex filter for `IngestorContent` content URI.
type: string
minLength: 1
worker_keys:
title: Worker Keys
description: The worker keys of `IngestorContent` to sync.
type: array
minItems: 1
uniqueItems: true
items:
type: string
minLength: 1
active_sheets:
title: Active Excel Sheets
description: The sheet names that should be synced.
type: array
minItems: 1
uniqueItems: true
items:
type: string
minLength: 1
start_from_row:
title: Start From Row
description: The excel row to start parsing from (the first row should default to the column header, defaults to 1).
type: integer
mock:
title: Mock
description: Settings for Mock data source. This is used in unit tests and not be used in real life.
type: object
required: []
additionalProperties: false
microsoft_sharepoint:
title: Microsoft Sharepoint
description: Settings for using Microsoft Sharepoint as a data source.
type: object
additionalProperties: false
required: [url, credentials]
properties:
credentials:
title: Microsoft Sharepoint Credentials
description: Sharepoint client secret
type: object
required: [grant_type, client_secret, scope, client_id]
additionalProperties: true
properties:
grant_type:
title: grant_type
description: Grant permission type. Must be `client_credentials`
const: client_credentials
client_secret:
title: client_secret
description: Client secret generated from MS Azure AD.
type: string
minLength: 1
scope:
title: scope
description: Scope of the token generated. Must be `https://graph.microsoft.com/.default`
type: string
const: https://graph.microsoft.com/.default
client_id:
title: client_id
description: client_id generated from MS Azure AD.
type: string
minLength: 1
tenant_id:
title: Tenant id
description: Tenant id of the app.
type: string
url:
title: Sharepoint Drive url
description: Url of sharepoint drive id that contains all the files.
type: string
format: uri
minLength: 1
noop:
title: Noop
description: Settings for using a Noop data source. This is useful as a dummy data source for user uploaded entries.
type: object
required: []
additionalProperties: false
noop_dialogue:
title: Noop Dialogue
description: Settings for using a Noop dialogue data source. This is useful as a dummy data source for user uploaded entries.
type: object
required: []
additionalProperties: false
s3:
title: S3
description: >-
Settings for using S3 as a data source. Access control will depend on permissions being assigned to the Hippocrates task role: `staging-hippo-ecs_task_role-20180611204658589300000001` and `product-hippo-ecs_task_role-20180620231135019400000002`.
type: object
additionalProperties: false
oneOf:
- required: ["uri"]
- required: ["uris"]
properties:
uri:
title: URI
description: The `s3://` URI to the bucket along with key prefix. A trailing slash is always added.
type: string
format: uri
minLength: 1
uris:
title: URIs
description: The `s3://` URIs to the buckets along with key prefix. A trailing slash is always added.
type: array
minItems: 1
uniqueItems: true
items:
type: string
minLength: 1
tabular:
title: Tabular interface
description: Settings for using a tabular interface as a data source.
$ref: "/scalpel/TabularInterfaceSettings.schema#/properties/tabular"