microproduct/deformation-sentiral/smallbaselineApp/dask/dask-schema.yaml

properties:
  temporary-directory:
    type:
    - string
    - "null"
    description: |
      Temporary directory for local disk storage /tmp, /scratch,
      or /local.  This directory is used during dask spill-to-disk operations.

      When the value is "null" (default), dask will create a directory from
      where dask was launched: `cwd/dask-worker-space`

  tokenize:
    type: object
    properties:
      ensure-deterministic:
        type:
        - boolean
        description: |
          If ``true``, tokenize will error instead of falling back to uuids
          when a deterministic token cannot be generated. Defaults to
          ``false``.

  dataframe:
    type: object
    properties:

      shuffle-compression:
        type:
        - string
        - "null"
        description: |
          Compression algorithm used for on disk-shuffling. Partd, the library used
          for compression supports ZLib, BZ2, SNAPPY, and BLOSC

      parquet:
        type: object
        properties:

          metadata-task-size-local:
            type: integer
            description: |
              The number of files to handle within each metadata-processing
              task when reading a parquet dataset from a LOCAL file system.
              Specifying 0 will result in serial execution on the client.

          metadata-task-size-remote:
            type: integer
            description: |
              The number of files to handle within each metadata-processing
              task when reading a parquet dataset from a REMOTE file system.
              Specifying 0 will result in serial execution on the client.

  array:
    type: object
    properties:

      svg:
        type: object
        properties:

          size:
            type: integer
            description: |
              The size of pixels used when displaying a dask array as an SVG image.
              This is used, for example, for nice rendering in a Jupyter notebook

      slicing:
        type: object
        properties:
          split-large-chunks:
            type: [boolean, 'null']
            description: |
              How to handle large chunks created when slicing Arrays. By default a
              warning is produced. Set to ``False`` to silence the warning
              and allow large output chunks. Set to ``True`` to silence the
              warning and avoid large output chunks.

  optimization:
    type: object
    properties:

      fuse:
        type: object
        description: Options for Dask's task fusion optimizations
        properties:

          active:
            type: [boolean, 'null']
            description: |
              Turn task fusion on/off. This option refers to the fusion of a
              fully-materialized task graph (not a high-Level graph). By default
              (None), the active task-fusion option will be treated as ``False``
              for Dask-Dataframe collections, and as ``True`` for all other graphs
              (including Dask-Array collections).

          ave-width:
            type: number
            minimum: 0
            description:
              Upper limit for width, where width = num_nodes / height, a good measure
              of parallelizability

          max-width:
            type: [number, 'null']
            minimum: 0
            description:
              Don't fuse if total width is greater than this. Set to null to dynamically
              adjust to 1.5 + ave_width * log(ave_width + 1)

          max-height:
            type: number
            minimum: 0
            description: Don't fuse more than this many levels

          max-depth-new-edges:
            type: [number, 'null']
            minimum: 0
            description:
              Don't fuse if new dependencies are added after this many levels.
              Set to null to dynamically adjust to ave_width * 1.5.

          subgraphs:
            type: [boolean, 'null']
            description: |
              Set to True to fuse multiple tasks into SubgraphCallable objects. Set to
              None to let the default optimizer of individual dask collections decide.
              If no collection-specific default exists, None defaults to False.

          rename-keys:
            type: boolean
            description:
              Set to true to rename the fused keys with `default_fused_keys_renamer`.
              Renaming fused keys can keep the graph more understandable and
              comprehensible, but it comes at the cost of additional processing. If
              False, then the top-most key will be used. For advanced usage, a function
              to create the new name is also accepted.