cvat/tests/python/rest_api/test_requests.py

# Copyright (C) CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

import io
import json
from http import HTTPStatus
from typing import Optional
from urllib.parse import parse_qsl, urlparse

import pytest
from cvat_sdk.api_client import ApiClient, models
from cvat_sdk.api_client.api_client import Endpoint
from cvat_sdk.core.helpers import get_paginated_collection

from shared.fixtures.data import Container
from shared.fixtures.init import docker_exec_redis_inmem, kube_exec_redis_inmem
from shared.utils.config import make_api_client
from shared.utils.helpers import generate_image_files

from .utils import (
    CollectionSimpleFilterTestBase,
    create_task,
    export_job_dataset,
    export_project_backup,
    export_project_dataset,
    export_task_backup,
    export_task_dataset,
    import_job_annotations,
    import_project_backup,
    import_project_dataset,
    import_task_annotations,
    import_task_backup,
    wait_background_request,
)


@pytest.mark.usefixtures("restore_db_per_class")
@pytest.mark.usefixtures("restore_redis_inmem_per_function")
@pytest.mark.usefixtures("restore_redis_ondisk_per_function")
@pytest.mark.timeout(30)
class TestRequestsListFilters(CollectionSimpleFilterTestBase):

    field_lookups = {
        "target": ["operation", "target"],
        "subresource": ["operation", "type", lambda x: x.split(":")[1]],
        "action": ["operation", "type", lambda x: x.split(":")[0]],
        "project_id": ["operation", "project_id"],
        "task_id": ["operation", "task_id"],
        "job_id": ["operation", "job_id"],
        "format": ["operation", "format"],
    }

    def _get_endpoint(self, api_client: ApiClient) -> Endpoint:
        return api_client.requests_api.list_endpoint

    @pytest.fixture(autouse=True)
    def setup(self, find_users):
        self.user = find_users(privilege="user")[0]["username"]

    @pytest.fixture
    def fxt_resources_ids(self):
        with make_api_client(self.user) as api_client:
            project_ids = [
                api_client.projects_api.create(
                    {"name": f"Test project {idx + 1}", "labels": [{"name": "car"}]}
                )[0].id
                for idx in range(3)
            ]

            task_ids = [
                create_task(
                    self.user,
                    spec={"name": f"Test task {idx + 1}", "labels": [{"name": "car"}]},
                    data={
                        "image_quality": 75,
                        "client_files": generate_image_files(2),
                        "segment_size": 1,
                    },
                )[0]
                for idx in range(3)
            ]

            job_ids = []
            for task_id in task_ids:
                jobs, _ = api_client.jobs_api.list(task_id=task_id)
                job_ids.extend([j.id for j in jobs.results])

        return project_ids, task_ids, job_ids

    @pytest.fixture
    def fxt_make_requests(
        self,
        fxt_make_export_project_requests,
        fxt_make_export_task_requests,
        fxt_make_export_job_requests,
        fxt_download_file,
    ):
        def _make_requests(project_ids: list[int], task_ids: list[int], job_ids: list[int]):
            # make requests to export projects|tasks|jobs annotations|datasets|backups
            fxt_make_export_project_requests(project_ids[1:])
            fxt_make_export_task_requests(task_ids[1:])
            fxt_make_export_job_requests(job_ids[1:])

            # make requests to download files and then import them
            for resource_type, first_resource in zip(
                ("project", "task", "job"), (project_ids[0], task_ids[0], job_ids[0])
            ):
                for subresource in ("dataset", "annotations", "backup"):
                    if resource_type == "job" and subresource == "backup":
                        continue

                    data = fxt_download_file(resource_type, first_resource, subresource)

                    tmp_file = io.BytesIO(data)
                    tmp_file.name = f"{resource_type}_{subresource}.zip"

                    if resource_type == "task" and subresource == "backup":
                        import_task_backup(
                            self.user,
                            file_content=tmp_file,
                        )

            empty_file = io.BytesIO(b"empty_file")
            empty_file.name = "empty.zip"

            # import corrupted backup
            import_task_backup(
                self.user,
                file_content=empty_file,
            )

        return _make_requests

    @pytest.fixture
    def fxt_download_file(self):
        def download_file(resource: str, rid: int, subresource: str):
            func = {
                ("project", "dataset"): lambda *args, **kwargs: export_project_dataset(
                    *args, **kwargs, save_images=True
                ),
                ("project", "annotations"): lambda *args, **kwargs: export_project_dataset(
                    *args, **kwargs, save_images=False
                ),
                ("project", "backup"): export_project_backup,
                ("task", "dataset"): lambda *args, **kwargs: export_task_dataset(
                    *args, **kwargs, save_images=True
                ),
                ("task", "annotations"): lambda *args, **kwargs: export_task_dataset(
                    *args, **kwargs, save_images=False
                ),
                ("task", "backup"): export_task_backup,
                ("job", "dataset"): lambda *args, **kwargs: export_job_dataset(
                    *args, **kwargs, save_images=True
                ),
                ("job", "annotations"): lambda *args, **kwargs: export_job_dataset(
                    *args, **kwargs, save_images=False
                ),
            }[(resource, subresource)]

            data = func(self.user, id=rid, download_result=True)
            assert data, f"Failed to download {resource} {subresource} locally"
            return data

        return download_file

    @pytest.fixture
    def fxt_make_export_project_requests(self):
        def make_requests(project_ids: list[int]):
            for project_id in project_ids:
                export_project_backup(self.user, id=project_id, download_result=False)
                export_project_dataset(
                    self.user, save_images=True, id=project_id, download_result=False
                )
                export_project_dataset(
                    self.user,
                    save_images=False,
                    id=project_id,
                    download_result=False,
                )

        return make_requests

    @pytest.fixture
    def fxt_make_export_task_requests(self):
        def make_requests(task_ids: list[int]):
            for task_id in task_ids:
                export_task_backup(self.user, id=task_id, download_result=False)
                export_task_dataset(self.user, save_images=True, id=task_id, download_result=False)
                export_task_dataset(self.user, save_images=False, id=task_id, download_result=False)

        return make_requests

    @pytest.fixture
    def fxt_make_export_job_requests(self):
        def make_requests(job_ids: list[int]):
            for job_id in job_ids:
                export_job_dataset(
                    self.user,
                    save_images=True,
                    id=job_id,
                    format="COCO 1.0",
                    download_result=False,
                )
                export_job_dataset(
                    self.user,
                    save_images=False,
                    id=job_id,
                    format="YOLO 1.1",
                    download_result=False,
                )

        return make_requests

    @pytest.mark.parametrize(
        "simple_filter, values",
        [
            ("subresource", ["annotations", "dataset", "backup"]),
            ("action", ["create", "export", "import"]),
            ("status", ["finished", "failed"]),
            ("project_id", []),
            ("task_id", []),
            ("job_id", []),
            ("format", ["CVAT for images 1.1", "COCO 1.0", "YOLO 1.1"]),
            ("target", ["project", "task", "job"]),
        ],
    )
    def test_can_use_simple_filter_for_object_list(
        self, simple_filter: str, values: list, fxt_resources_ids, fxt_make_requests
    ):
        project_ids, task_ids, job_ids = fxt_resources_ids
        fxt_make_requests(project_ids, task_ids, job_ids)

        if simple_filter in ("project_id", "task_id", "job_id"):
            # check last project|task|job
            if simple_filter == "project_id":
                values = project_ids[-1:]
            elif simple_filter == "task_id":
                values = task_ids[-1:]
            else:
                values = job_ids[-1:]

        with make_api_client(self.user) as api_client:
            self.samples = get_paginated_collection(
                self._get_endpoint(api_client), return_json=True
            )

        return super()._test_can_use_simple_filter_for_object_list(simple_filter, values)

    def test_list_requests_when_there_is_job_with_non_regular_or_corrupted_meta(
        self, jobs: Container, admin_user: str, request: pytest.FixtureRequest
    ):
        job = next(iter(jobs))

        export_job_dataset(admin_user, save_images=True, id=job["id"], download_result=False)
        export_job_dataset(admin_user, save_images=False, id=job["id"], download_result=False)

        with make_api_client(admin_user) as api_client:
            background_requests, response = api_client.requests_api.list(_check_status=False)
            assert response.status == HTTPStatus.OK
            assert 2 == background_requests.count

            corrupted_job, normal_job = background_requests.results
            corrupted_job_key = f"rq:job:{corrupted_job['id']}"
            remove_meta_command = f'redis-cli -e HDEL "{corrupted_job_key}" meta'

            if request.config.getoption("--platform") == "local":
                stdout, _ = docker_exec_redis_inmem(["sh", "-c", remove_meta_command])
            else:
                stdout, _ = kube_exec_redis_inmem(
                    [
                        "sh",
                        "-c",
                        'export REDISCLI_AUTH="${REDIS_PASSWORD}" && ' + remove_meta_command,
                    ]
                )
            assert bool(int(stdout.strip()))

            _, response = api_client.requests_api.list(_check_status=False, _parse_response=False)
            assert response.status == HTTPStatus.OK

            background_requests = json.loads(response.data)
            assert 1 == background_requests["count"]
            assert normal_job.id == background_requests["results"][0]["id"]


@pytest.mark.usefixtures("restore_db_per_class")
@pytest.mark.usefixtures("restore_redis_inmem_per_function")
class TestGetRequests:

    def _test_get_request_200(
        self, api_client: ApiClient, rq_id: str, validate_rq_id: bool = True, **kwargs
    ) -> models.Request:
        (background_request, response) = api_client.requests_api.retrieve(rq_id, **kwargs)
        assert response.status == HTTPStatus.OK

        if validate_rq_id:
            assert background_request.id == rq_id

        return background_request

    def _test_get_request_403(self, api_client: ApiClient, rq_id: str):
        (_, response) = api_client.requests_api.retrieve(
            rq_id, _parse_response=False, _check_status=False
        )
        assert response.status == HTTPStatus.FORBIDDEN

    @pytest.mark.parametrize("format_name", ("CVAT for images 1.1",))
    @pytest.mark.parametrize("save_images", (True, False))
    def test_owner_can_retrieve_request(self, format_name: str, save_images: bool, projects):
        project = next(
            p
            for p in projects
            if p["owner"] and (p["target_storage"] or {}).get("location") == "local"
        )
        owner = project["owner"]

        subresource = "dataset" if save_images else "annotations"
        request_id = export_project_dataset(
            owner["username"],
            save_images=save_images,
            id=project["id"],
            download_result=False,
            format=format_name,
        )

        with make_api_client(owner["username"]) as owner_client:
            bg_request = self._test_get_request_200(owner_client, request_id)

            assert (
                bg_request.created_date
                < bg_request.started_date
                < bg_request.finished_date
                < bg_request.expiry_date
            )
            assert bg_request.operation.format == format_name
            assert bg_request.operation.project_id == project["id"]
            assert bg_request.operation.target == "project"
            assert bg_request.operation.task_id is None
            assert bg_request.operation.job_id is None
            assert bg_request.operation.type == f"export:{subresource}"
            assert bg_request.owner.id == owner["id"]
            assert bg_request.owner.username == owner["username"]

            parsed_url = urlparse(bg_request.result_url)
            assert all([parsed_url.scheme, parsed_url.netloc, parsed_url.path, parsed_url.query])

    def test_non_owner_cannot_retrieve_request(self, find_users, projects):
        project = next(
            p
            for p in projects
            if p["owner"] and (p["target_storage"] or {}).get("location") == "local"
        )
        owner = project["owner"]
        malefactor = find_users(exclude_username=owner["username"])[0]

        request_id = export_project_dataset(
            owner["username"],
            save_images=True,
            id=project["id"],
            download_result=False,
        )
        with make_api_client(malefactor["username"]) as malefactor_client:
            self._test_get_request_403(malefactor_client, request_id)

    def _test_get_request_using_legacy_id(
        self,
        legacy_request_id: str,
        username: str,
        *,
        action: str,
        target_type: str,
        subresource: Optional[str] = None,
    ):
        with make_api_client(username) as api_client:
            bg_requests, _ = api_client.requests_api.list(
                target=target_type,
                action=action,
                **({"subresource": subresource} if subresource else {}),
            )
            assert len(bg_requests.results) == 1
            request_id = bg_requests.results[0].id
            bg_request = self._test_get_request_200(
                api_client, legacy_request_id, validate_rq_id=False
            )
            assert bg_request.id == request_id

    @pytest.mark.parametrize("target_type", ("project", "task", "job"))
    @pytest.mark.parametrize("save_images", (True, False))
    @pytest.mark.parametrize("export_format", ("CVAT for images 1.1",))
    @pytest.mark.parametrize("import_format", ("CVAT 1.1",))
    def test_can_retrieve_dataset_import_export_requests_using_legacy_ids(
        self,
        target_type: str,
        save_images: bool,
        export_format: str,
        import_format: str,
        projects,
        tasks,
        jobs,
    ):
        def build_legacy_id_for_export_request(
            *,
            target_type: str,
            target_id: int,
            subresource: str,
            format_name: str,
            user_id: int,
        ):
            return f"export:{target_type}-{target_id}-{subresource}-in-{format_name.replace(' ', '_').replace('.', '@')}-format-by-{user_id}"

        def build_legacy_id_for_import_request(
            *,
            target_type: str,
            target_id: int,
            subresource: str,
        ):
            return f"import:{target_type}-{target_id}-{subresource}"

        if target_type == "project":
            export_func, import_func = export_project_dataset, import_project_dataset
            target = next(iter(projects))
            owner = target["owner"]
        elif target_type == "task":
            export_func, import_func = export_task_dataset, import_task_annotations
            target = next(iter(tasks))
            owner = target["owner"]
        else:
            assert target_type == "job"
            export_func, import_func = export_job_dataset, import_job_annotations
            target = next(iter(jobs))
            owner = tasks[target["task_id"]]["owner"]

        target_id = target["id"]
        subresource = "dataset" if save_images else "annotations"
        file_content = io.BytesIO(
            export_func(
                owner["username"],
                save_images=save_images,
                format=export_format,
                id=target_id,
            )
        )
        file_content.name = "file.zip"

        legacy_request_id = build_legacy_id_for_export_request(
            target_type=target_type,
            target_id=target["id"],
            subresource=subresource,
            format_name=export_format,
            user_id=owner["id"],
        )

        self._test_get_request_using_legacy_id(
            legacy_request_id,
            owner["username"],
            action="export",
            target_type=target_type,
            subresource=subresource,
        )

        # check import requests
        if not save_images and target_type == "project" or save_images and target_type != "project":
            # skip:
            # importing annotations into a project
            # importing datasets into a task or job
            return

        import_func(
            owner["username"],
            file_content=file_content,
            id=target_id,
            format=import_format,
        )

        legacy_request_id = build_legacy_id_for_import_request(
            target_type=target_type, target_id=target_id, subresource=subresource
        )
        self._test_get_request_using_legacy_id(
            legacy_request_id,
            owner["username"],
            action="import",
            target_type=target_type,
            subresource=subresource,
        )

    @pytest.mark.parametrize("target_type", ("project", "task"))
    def test_can_retrieve_backup_import_export_requests_using_legacy_ids(
        self,
        target_type: str,
        projects,
        tasks,
    ):
        def build_legacy_id_for_export_request(
            *,
            target_type: str,
            target_id: int,
            user_id: int,
        ):
            return f"export:{target_type}-{target_id}-backup-by-{user_id}"

        def build_legacy_id_for_import_request(
            *,
            target_type: str,
            uuid_: str,
        ):
            return f"import:{target_type}-{uuid_}-backup"

        if target_type == "project":
            export_func, import_func = export_project_backup, import_project_backup
            target = next(iter(projects))
        else:
            assert target_type == "task"
            export_func, import_func = export_task_backup, import_task_backup
            target = next(iter(tasks))

        owner = target["owner"]

        # check export requests
        backup_file = io.BytesIO(
            export_func(
                owner["username"],
                id=target["id"],
            )
        )
        backup_file.name = "file.zip"

        legacy_request_id = build_legacy_id_for_export_request(
            target_type=target_type, target_id=target["id"], user_id=owner["id"]
        )
        self._test_get_request_using_legacy_id(
            legacy_request_id,
            owner["username"],
            action="export",
            target_type=target_type,
            subresource="backup",
        )

        # check import requests
        result_id = import_func(
            owner["username"],
            file_content=backup_file,
        ).id
        legacy_request_id = build_legacy_id_for_import_request(
            target_type=target_type, uuid_=dict(parse_qsl(result_id))["id"]
        )

        self._test_get_request_using_legacy_id(
            legacy_request_id,
            owner["username"],
            action="import",
            target_type=target_type,
            subresource="backup",
        )

    def test_can_retrieve_task_creation_requests_using_legacy_ids(self, admin_user: str):
        task_id = create_task(
            admin_user,
            spec={"name": "Test task", "labels": [{"name": "car"}]},
            data={
                "image_quality": 75,
                "client_files": generate_image_files(2),
                "segment_size": 1,
            },
        )[0]

        legacy_request_id = f"create:task-{task_id}"
        self._test_get_request_using_legacy_id(
            legacy_request_id, admin_user, action="create", target_type="task"
        )

    def test_can_retrieve_quality_calculation_requests_using_legacy_ids(self, jobs, tasks):
        gt_job = next(
            j
            for j in jobs
            if (
                j["type"] == "ground_truth"
                and j["stage"] == "acceptance"
                and j["state"] == "completed"
            )
        )
        task_id = gt_job["task_id"]
        owner = tasks[task_id]["owner"]

        legacy_request_id = f"quality-check-task-{task_id}-user-{owner['id']}"

        with make_api_client(owner["username"]) as api_client:
            # initiate quality report calculation
            (_, response) = api_client.quality_api.create_report(
                quality_report_create_request=models.QualityReportCreateRequest(task_id=task_id),
                _parse_response=False,
            )
            assert response.status == HTTPStatus.ACCEPTED
            request_id = json.loads(response.data)["rq_id"]

            # get background request details using common request API
            bg_request = self._test_get_request_200(
                api_client, legacy_request_id, validate_rq_id=False
            )
            assert bg_request.id == request_id

            # get quality report by legacy request ID using the deprecated API endpoint
            wait_background_request(api_client, request_id)
            api_client.quality_api.create_report(
                quality_report_create_request=models.QualityReportCreateRequest(task_id=task_id),
                rq_id=request_id,
            )