cvat/cvat-sdk/cvat_sdk/masks.py

# Copyright (C) CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

import math
from collections.abc import Sequence
from typing import Optional

import numpy as np
from numpy.typing import ArrayLike, NDArray


def encode_mask(bitmap: ArrayLike, /, bbox: Optional[Sequence[float]] = None) -> list[float]:
    """
    Encodes an image mask into an array of numbers suitable for the "points"
    attribute of a LabeledShapeRequest object of type "mask".

    bitmap must be a boolean array of shape (H, W), where H is the height and
    W is the width of the image that the mask applies to.

    bbox, if specified, must have the form [x1, y1, x2, y2],
    where (0, 0) <= (x1, y1) < (x2, y2) <= (W, H).
    The encoded mask will be limited to points between (x1, y1) and (x2, y2).
    If bbox is None, the encoded mask will include all non-zero points of the bitmap.
    """

    bitmap = np.asanyarray(bitmap)
    if bitmap.ndim != 2:
        raise ValueError("bitmap must have 2 dimensions")
    if bitmap.dtype != np.bool_:
        raise ValueError("bitmap must have boolean items")

    if bbox is None:
        nz_y, nz_x = bitmap.nonzero()
        if nz_x.size == 0 or nz_y.size == 0:
            x1 = y1 = 0
            x2 = y2 = 1
        else:
            x1, y1 = (np.min(nz).item() for nz in (nz_x, nz_y))
            x2, y2 = (np.max(nz).item() + 1 for nz in (nz_x, nz_y))

    else:
        x1, y1 = map(math.floor, bbox[0:2])
        x2, y2 = map(math.ceil, bbox[2:4])

        if not (0 <= x1 < x2 <= bitmap.shape[1] and 0 <= y1 < y2 <= bitmap.shape[0]):
            raise ValueError("bbox has invalid coordinates")

    flat = bitmap[y1:y2, x1:x2].ravel()

    (run_indices,) = np.diff(flat, prepend=[not flat[0]], append=[not flat[-1]]).nonzero()
    if flat[0]:
        run_lengths = np.diff(run_indices, prepend=[0])
    else:
        run_lengths = np.diff(run_indices)

    return run_lengths.tolist() + [x1, y1, x2 - 1, y2 - 1]


def decode_mask(
    encoded: Sequence[float], /, *, image_width: int, image_height: int
) -> NDArray[bool]:
    """
    Decodes a "points" attribute of a LabeledShape/LabeledShapeRequest object of type "mask"
    into a 2D boolean array representing the mask.

    `image_width` and `image_height` must be set to the dimensions
    of the image that the mask applies to.
    The returned mask will have shape (image_height, image_width).
    """

    if len(encoded) < 5:
        raise ValueError("too few elements in encoded mask")

    if image_width <= 0 or image_height <= 0:
        raise ValueError("invalid image dimensions")

    def to_int(x: float) -> int:
        if isinstance(x, int):
            return x
        if isinstance(x, float) and x.is_integer():
            return int(x)
        raise ValueError(f"non-integer value in encoded mask: {x!r}")

    *run_lengths, x1, y1, x2, y2 = map(to_int, encoded)
    x2 += 1
    y2 += 1

    if not (0 <= x1 < x2 <= image_width and 0 <= y1 < y2 <= image_height):
        raise ValueError("invalid encoded bounding box")

    alternating_bools = (np.arange(len(run_lengths)) & 1) != 0
    mask = np.repeat(alternating_bools, run_lengths)
    if mask.size != (y2 - y1) * (x2 - x1):
        raise ValueError("encoded bitmap does not match encoded bounding box")

    full_mask = np.zeros((image_height, image_width), dtype=bool)
    full_mask[y1:y2, x1:x2] = mask.reshape((y2 - y1, x2 - x1))
    return full_mask
继承于2.44.3版本 2025-09-16 01:19:40 +00:00			`# Copyright (C) CVAT.ai Corporation`
			`#`
			`# SPDX-License-Identifier: MIT`

			`import math`
			`from collections.abc import Sequence`
			`from typing import Optional`

			`import numpy as np`
			`from numpy.typing import ArrayLike, NDArray`


			`def encode_mask(bitmap: ArrayLike, /, bbox: Optional[Sequence[float]] = None) -> list[float]:`
			`"""`
			`Encodes an image mask into an array of numbers suitable for the "points"`
			`attribute of a LabeledShapeRequest object of type "mask".`

			`bitmap must be a boolean array of shape (H, W), where H is the height and`
			`W is the width of the image that the mask applies to.`

			`bbox, if specified, must have the form [x1, y1, x2, y2],`
			`where (0, 0) <= (x1, y1) < (x2, y2) <= (W, H).`
			`The encoded mask will be limited to points between (x1, y1) and (x2, y2).`
			`If bbox is None, the encoded mask will include all non-zero points of the bitmap.`
			`"""`

			`bitmap = np.asanyarray(bitmap)`
			`if bitmap.ndim != 2:`
			`raise ValueError("bitmap must have 2 dimensions")`
			`if bitmap.dtype != np.bool_:`
			`raise ValueError("bitmap must have boolean items")`

			`if bbox is None:`
			`nz_y, nz_x = bitmap.nonzero()`
			`if nz_x.size == 0 or nz_y.size == 0:`
			`x1 = y1 = 0`
			`x2 = y2 = 1`
			`else:`
			`x1, y1 = (np.min(nz).item() for nz in (nz_x, nz_y))`
			`x2, y2 = (np.max(nz).item() + 1 for nz in (nz_x, nz_y))`

			`else:`
			`x1, y1 = map(math.floor, bbox[0:2])`
			`x2, y2 = map(math.ceil, bbox[2:4])`

			`if not (0 <= x1 < x2 <= bitmap.shape[1] and 0 <= y1 < y2 <= bitmap.shape[0]):`
			`raise ValueError("bbox has invalid coordinates")`

			`flat = bitmap[y1:y2, x1:x2].ravel()`

			`(run_indices,) = np.diff(flat, prepend=[not flat[0]], append=[not flat[-1]]).nonzero()`
			`if flat[0]:`
			`run_lengths = np.diff(run_indices, prepend=[0])`
			`else:`
			`run_lengths = np.diff(run_indices)`

			`return run_lengths.tolist() + [x1, y1, x2 - 1, y2 - 1]`


			`def decode_mask(`
			`encoded: Sequence[float], /, *, image_width: int, image_height: int`
			`) -> NDArray[bool]:`
			`"""`
			`Decodes a "points" attribute of a LabeledShape/LabeledShapeRequest object of type "mask"`
			`into a 2D boolean array representing the mask.`

			`image_width` and `image_height` must be set to the dimensions
			`of the image that the mask applies to.`
			`The returned mask will have shape (image_height, image_width).`
			`"""`

			`if len(encoded) < 5:`
			`raise ValueError("too few elements in encoded mask")`

			`if image_width <= 0 or image_height <= 0:`
			`raise ValueError("invalid image dimensions")`

			`def to_int(x: float) -> int:`
			`if isinstance(x, int):`
			`return x`
			`if isinstance(x, float) and x.is_integer():`
			`return int(x)`
			`raise ValueError(f"non-integer value in encoded mask: {x!r}")`

			`*run_lengths, x1, y1, x2, y2 = map(to_int, encoded)`
			`x2 += 1`
			`y2 += 1`

			`if not (0 <= x1 < x2 <= image_width and 0 <= y1 < y2 <= image_height):`
			`raise ValueError("invalid encoded bounding box")`

			`alternating_bools = (np.arange(len(run_lengths)) & 1) != 0`
			`mask = np.repeat(alternating_bools, run_lengths)`
			`if mask.size != (y2 - y1) * (x2 - x1):`
			`raise ValueError("encoded bitmap does not match encoded bounding box")`

			`full_mask = np.zeros((image_height, image_width), dtype=bool)`
			`full_mask[y1:y2, x1:x2] = mask.reshape((y2 - y1, x2 - x1))`
			`return full_mask`