120 lines
4.2 KiB
Python
120 lines
4.2 KiB
Python
# Copyright (C) CVAT.ai Corporation
|
|
#
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
import cv2
|
|
import numpy as np
|
|
import onnxruntime as ort
|
|
|
|
|
|
class ModelHandler:
|
|
def __init__(self, labels):
|
|
self.model = None
|
|
self.load_network(model="yolov7-nms-640.onnx")
|
|
self.labels = labels
|
|
|
|
def load_network(self, model):
|
|
device = ort.get_device()
|
|
cuda = True if device == 'GPU' else False
|
|
try:
|
|
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
|
|
so = ort.SessionOptions()
|
|
so.log_severity_level = 3
|
|
|
|
self.model = ort.InferenceSession(model, providers=providers, sess_options=so)
|
|
self.output_details = [i.name for i in self.model.get_outputs()]
|
|
self.input_details = [i.name for i in self.model.get_inputs()]
|
|
|
|
self.is_inititated = True
|
|
except Exception as e:
|
|
raise Exception(f"Cannot load model {model}: {e}")
|
|
|
|
def letterbox(self, im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
|
|
# Resize and pad image while meeting stride-multiple constraints
|
|
shape = im.shape[:2] # current shape [height, width]
|
|
if isinstance(new_shape, int):
|
|
new_shape = (new_shape, new_shape)
|
|
|
|
# Scale ratio (new / old)
|
|
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
|
if not scaleup: # only scale down, do not scale up (for better val mAP)
|
|
r = min(r, 1.0)
|
|
|
|
# Compute padding
|
|
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
|
|
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
|
|
|
|
if auto: # minimum rectangle
|
|
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
|
|
|
|
dw /= 2 # divide padding into 2 sides
|
|
dh /= 2
|
|
|
|
if shape[::-1] != new_unpad: # resize
|
|
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
|
|
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
|
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
|
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
|
|
return im, r, (dw, dh)
|
|
|
|
def _infer(self, inputs: np.ndarray):
|
|
try:
|
|
img = cv2.cvtColor(inputs, cv2.COLOR_BGR2RGB)
|
|
image = img.copy()
|
|
image, ratio, dwdh = self.letterbox(image, auto=False)
|
|
image = image.transpose((2, 0, 1))
|
|
image = np.expand_dims(image, 0)
|
|
image = np.ascontiguousarray(image)
|
|
|
|
im = image.astype(np.float32)
|
|
im /= 255
|
|
|
|
inp = {self.input_details[0]: im}
|
|
# ONNX inference
|
|
output = list()
|
|
detections = self.model.run(self.output_details, inp)[0]
|
|
|
|
# for det in detections:
|
|
boxes = detections[:, 1:5]
|
|
labels = detections[:, 5]
|
|
scores = detections[:, -1]
|
|
|
|
boxes -= np.array(dwdh * 2)
|
|
boxes /= ratio
|
|
boxes = boxes.round().astype(np.int32)
|
|
output.append(boxes)
|
|
output.append(labels)
|
|
output.append(scores)
|
|
return output
|
|
|
|
except Exception as e:
|
|
print(e)
|
|
|
|
def infer(self, image, threshold):
|
|
image = np.array(image)
|
|
image = image[:, :, ::-1].copy()
|
|
h, w, _ = image.shape
|
|
detections = self._infer(image)
|
|
|
|
results = []
|
|
if detections:
|
|
boxes = detections[0]
|
|
labels = detections[1]
|
|
scores = detections[2]
|
|
|
|
for label, score, box in zip(labels, scores, boxes):
|
|
if score >= threshold:
|
|
xtl = max(int(box[0]), 0)
|
|
ytl = max(int(box[1]), 0)
|
|
xbr = min(int(box[2]), w)
|
|
ybr = min(int(box[3]), h)
|
|
|
|
results.append({
|
|
"confidence": str(score),
|
|
"label": self.labels.get(label, "unknown"),
|
|
"points": [xtl, ytl, xbr, ybr],
|
|
"type": "rectangle",
|
|
})
|
|
|
|
return results
|