Schema for Einstein Data Source

---
$id: https://skeleton.botmd.io/einstein/DataSource.schema
$schema: http://json-schema.org/draft-07/schema#

title: Data Source
description: Settings for configuring an Einstein data source.

type: object
additionalProperties: false
anyOf:
  - required: [google_drive]
  - required: [ingestor]
  - required: [mock]
  - required: [microsoft_sharepoint]
  - required: [noop]
  - required: [noop_dialogue]
  - required: [s3]
  - required: [tabular]

properties:
  # Flags
  allow_user_upload:
    title: Allow User Upload
    description: Whether this data source allows user upload. Defaults to `false`.
    type: boolean

  convert_to_pdf:
    title: Convert To PDF
    description: Determines if a entries from this data source should be converted to PDF. Defaults to `true`.
    type: boolean

  delete_deactivated_entries:
    title: Permanantly delete deactivated entries.
    description: Boolean flag to indicate that deactivated entries should be deleted. Defaults to `false`.
    type: boolean

  recreate_entries:
    title: Recreate all entries.
    description: Recreate all entries skipping `find` and `is_same` in syncer (faster for large number of entries eg. 50k directory). Defaults to `false`.
    type: boolean

  ui_flags:
    type: object
    properties:
      deletable:
        title: Deletable
        description: Should UI be able to delete entries in this `DataSource`. Defaults to `true`.
        type: boolean
      user_created:
        title: User Created
        description: Indicates if the entries in the `DataSource` are user created or preset. Defaults to `true`.
        type: boolean

  # Settings
  generate_pages:
    title: Generate file pages.json
    description: Determines if a file `pages.json` containing a mapping of text page number to text will be generated and uploaded to storages. Defaults to `false`.
    type: boolean

  pdf_export_method:
    title: Method For PDF Export
    description: The method used for converting office documents to PDF. Defaults to `convertapi`.
    enum: [convertapi, libreoffice]

  # Jarvis provider mapping
  chernobyl:
    $ref: "/einstein/ModuleSettings.schema#/properties/chernobyl"

  eliza:
    $ref: "/einstein/ModuleSettings.schema#/properties/eliza"

  rosters:
    $ref: "/einstein/ModuleSettings.schema#/properties/rosters"

  singhealth:
    type: object
    title: SingHealth
    description: Settings related to singhealth datasources.
    properties:
      institutions:
        type: array
        minItems: 1
        uniqueItems: true
        title: Institutions
        description: The Singhealth institution names as it appears in the column to filter the rows by.
        items:
          type: string
          minLength: 1

  # DataSource types
  google_drive:
    title: Google Drive
    description: Settings for using Google Drive as a data source.

    type: object
    additionalProperties: false
    required: [url, credentials]

    properties:
      credentials:
        title: Google Cloud Credentials
        description: The service account credentials.
        type: object
        required: [type, private_key]
        additionalProperties: true

        properties:
          type:
            title: Type
            description: Credential type. Must be `service_account`.
            const: service_account

          private_key:
            title: Private Key
            description: Private key associated with the credential.
            type: string
            minLength: 1

      url:
        title: Google Drive URL
        description: The unique URL of the Google Drive or folder.
        type: string
        format: uri
        minLength: 1

  ingestor:
    title: Ingestor
    description: Settings for using a ingestor as a data source.
    type: object
    anyOf:
      - required: [content_uid]
      - required: [worker_keys]
      - required: [content_uri, worker_key]
      - required: [content_uris, worker_key]
      - required: [content_uri__startswith, worker_key]
      - required: [content_uri__regex, worker_key]
    properties:
      content_uid:
        title: Content UID
        description: UID of the `IngestorContent` to sync (may change on ingestor wipe, use `content_uri` and `worker_key` instead).
        type: string
        minLength: 1
      content_uri:
        title: Content URI
        description: URI of the `IngestorContent` to sync (use with `worker_key`).
        type: string
        minLength: 1
      content_uris:
        title: Content URI List
        description: List of URIs of the `IngestorContent` to sync (use with `worker_key`).
        type: array
        minItems: 1
        uniqueItems: true
        items:
          type: string
          minLength: 1
      worker_key:
        title: Worker Key
        description: Worker key of the `IngestorContent` to sync (use with `content_uri`).
        type: string
        minLength: 1
      content_uri__startswith:
        title: URI Starts With
        description: Prefix for the `IngestorContent` content URI.
        type: string
        minLength: 1
      content_uri__regex:
        title: URI Match for Regex
        description: Regex filter for `IngestorContent` content URI.
        type: string
        minLength: 1
      worker_keys:
        title: Worker Keys
        description: The worker keys of `IngestorContent` to sync.
        type: array
        minItems: 1
        uniqueItems: true
        items:
          type: string
          minLength: 1
      active_sheets:
        title: Active Excel Sheets
        description: The sheet names that should be synced.
        type: array
        minItems: 1
        uniqueItems: true
        items:
          type: string
          minLength: 1
      start_from_row:
        title: Start From Row
        description: The excel row to start parsing from (the first row should default to the column header, defaults to 1).
        type: integer

  mock:
    title: Mock
    description: Settings for Mock data source. This is used in unit tests and not be used in real life.

    type: object
    required: []
    additionalProperties: false

  microsoft_sharepoint:
    title: Microsoft Sharepoint
    description: Settings for using Microsoft Sharepoint as a data source.

    type: object
    additionalProperties: false
    required: [url, credentials]

    properties:
      credentials:
        title: Microsoft Sharepoint Credentials
        description: Sharepoint client secret
        type: object
        required: [grant_type, client_secret, scope, client_id]
        additionalProperties: true

        properties:
          grant_type:
            title: grant_type
            description: Grant permission type. Must be `client_credentials`
            const: client_credentials

          client_secret:
            title: client_secret
            description: Client secret generated from MS Azure AD.
            type: string
            minLength: 1

          scope:
            title: scope
            description: Scope of the token generated. Must be `https://graph.microsoft.com/.default`
            type: string
            const: https://graph.microsoft.com/.default

          client_id:
            title: client_id
            description: client_id generated from MS Azure AD.
            type: string
            minLength: 1

          tenant_id:
            title: Tenant id
            description: Tenant id of the app.
            type: string

      url:
        title: Sharepoint Drive url
        description: Url of sharepoint drive id that contains all the files.
        type: string
        format: uri
        minLength: 1

  noop:
    title: Noop
    description: Settings for using a Noop data source. This is useful as a dummy data source for user uploaded entries.

    type: object
    required: []
    additionalProperties: false

  noop_dialogue:
    title: Noop Dialogue
    description: Settings for using a Noop dialogue data source. This is useful as a dummy data source for user uploaded entries.

    type: object
    required: []
    additionalProperties: false

  s3:
    title: S3
    description: >-
      Settings for using S3 as a data source. Access control will depend on permissions being assigned to the Hippocrates task role: `staging-hippo-ecs_task_role-20180611204658589300000001` and `product-hippo-ecs_task_role-20180620231135019400000002`.

    type: object
    additionalProperties: false

    oneOf:
      - required: ["uri"]
      - required: ["uris"]

    properties:
      uri:
        title: URI
        description: The `s3://` URI to the bucket along with key prefix. A trailing slash is always added.
        type: string
        format: uri
        minLength: 1
      uris:
        title: URIs
        description: The `s3://` URIs to the buckets along with key prefix. A trailing slash is always added.
        type: array
        minItems: 1
        uniqueItems: true
        items:
          type: string
          minLength: 1

  tabular:
    title: Tabular interface
    description: Settings for using a tabular interface as a data source.
    $ref: "/scalpel/TabularInterfaceSettings.schema#/properties/tabular"