cvat/serverless/openvino/omz/public/yolo-v3-tf/nuclio/model_handler.py

176 lines
7.6 KiB
Python

# Copyright (C) 2020-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
import os
from math import exp
import ngraph
from model_loader import ModelLoader
class YoloParams:
# ------------------------------------------- Extracting layer parameters ------------------------------------------
# Magic numbers are copied from yolo samples
def __init__(self, param, side):
self.num = param.get('num', 3)
self.coords = param.get('coords', 4)
self.classes = param.get('classes', 80)
self.side = side
self.anchors = param.get('anchors', [
10.0, 13.0, 16.0, 30.0, 33.0, 23.0, 30.0, 61.0, 62.0, 45.0, 59.0,
119.0, 116.0, 90.0, 156.0, 198.0, 373.0, 326.0])
self.isYoloV3 = False
if mask := param.get('mask'):
self.num = len(mask)
maskedAnchors = []
for idx in mask:
maskedAnchors += [self.anchors[idx * 2], self.anchors[idx * 2 + 1]]
self.anchors = maskedAnchors
self.isYoloV3 = True # Weak way to determine but the only one.
def entry_index(side, coord, classes, location, entry):
side_power_2 = side ** 2
n = location // side_power_2
loc = location % side_power_2
return int(side_power_2 * (n * (coord + classes + 1) + entry) + loc)
def scale_bbox(x, y, h, w, class_id, confidence, h_scale, w_scale):
xmin = int((x - w / 2) * w_scale)
ymin = int((y - h / 2) * h_scale)
xmax = int(xmin + w * w_scale)
ymax = int(ymin + h * h_scale)
return dict(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, class_id=class_id, confidence=confidence)
def parse_yolo_region(blob, resized_image_shape, original_im_shape, params, threshold):
# ------------------------------------------ Validating output parameters ------------------------------------------
_, _, out_blob_h, out_blob_w = blob.shape
assert out_blob_w == out_blob_h, "Invalid size of output blob. It should be in NCHW layout and height should " \
"be equal to width. Current height = {}, current width = {}" \
"".format(out_blob_h, out_blob_w)
# ------------------------------------------ Extracting layer parameters -------------------------------------------
orig_im_h, orig_im_w = original_im_shape
resized_image_h, resized_image_w = resized_image_shape
objects = list()
predictions = blob.flatten()
side_square = params.side * params.side
# ------------------------------------------- Parsing YOLO Region output -------------------------------------------
for i in range(side_square):
row = i // params.side
col = i % params.side
for n in range(params.num):
obj_index = entry_index(params.side, params.coords, params.classes, n * side_square + i, params.coords)
scale = predictions[obj_index]
if scale < threshold:
continue
box_index = entry_index(params.side, params.coords, params.classes, n * side_square + i, 0)
# Network produces location predictions in absolute coordinates of feature maps.
# Scale it to relative coordinates.
x = (col + predictions[box_index + 0 * side_square]) / params.side
y = (row + predictions[box_index + 1 * side_square]) / params.side
# Value for exp is very big number in some cases so following construction is using here
try:
w_exp = exp(predictions[box_index + 2 * side_square])
h_exp = exp(predictions[box_index + 3 * side_square])
except OverflowError:
continue
# Depends on topology we need to normalize sizes by feature maps (up to YOLOv3) or by input shape (YOLOv3)
w = w_exp * params.anchors[2 * n] / (resized_image_w if params.isYoloV3 else params.side)
h = h_exp * params.anchors[2 * n + 1] / (resized_image_h if params.isYoloV3 else params.side)
for j in range(params.classes):
class_index = entry_index(params.side, params.coords, params.classes, n * side_square + i,
params.coords + 1 + j)
confidence = scale * predictions[class_index]
if confidence < threshold:
continue
objects.append(scale_bbox(x=x, y=y, h=h, w=w, class_id=j, confidence=confidence,
h_scale=orig_im_h, w_scale=orig_im_w))
return objects
def intersection_over_union(box_1, box_2):
width_of_overlap_area = min(box_1['xmax'], box_2['xmax']) - max(box_1['xmin'], box_2['xmin'])
height_of_overlap_area = min(box_1['ymax'], box_2['ymax']) - max(box_1['ymin'], box_2['ymin'])
if width_of_overlap_area < 0 or height_of_overlap_area < 0:
area_of_overlap = 0
else:
area_of_overlap = width_of_overlap_area * height_of_overlap_area
box_1_area = (box_1['ymax'] - box_1['ymin']) * (box_1['xmax'] - box_1['xmin'])
box_2_area = (box_2['ymax'] - box_2['ymin']) * (box_2['xmax'] - box_2['xmin'])
area_of_union = box_1_area + box_2_area - area_of_overlap
if area_of_union == 0:
return 0
return area_of_overlap / area_of_union
class ModelHandler:
def __init__(self, labels):
base_dir = os.path.abspath(os.environ.get("MODEL_PATH",
"/opt/nuclio/open_model_zoo/public/yolo-v3-tf/FP32"))
model_xml = os.path.join(base_dir, "yolo-v3-tf.xml")
model_bin = os.path.join(base_dir, "yolo-v3-tf.bin")
self.model = ModelLoader(model_xml, model_bin)
self.labels = labels
ng_func = ngraph.function_from_cnn(self.model.network)
self.output_info = {}
for node in ng_func.get_ordered_ops():
layer_name = node.get_friendly_name()
if layer_name not in self.model.network.outputs:
continue
parent_node = node.inputs()[0].get_source_output().get_node()
shape = list(parent_node.shape)
yolo_params = YoloParams(node._get_attributes(), shape[2])
self.output_info[layer_name] = (shape, yolo_params)
def infer(self, image, threshold):
output_layer = self.model.infer(image)
# Collecting object detection results
objects = []
origin_im_size = (image.height, image.width)
for layer_name, out_blob in output_layer.items():
shape, yolo_params = self.output_info[layer_name]
out_blob = out_blob.reshape(shape)
objects += parse_yolo_region(out_blob, self.model.input_size(),
origin_im_size, yolo_params, threshold)
# Filtering overlapping boxes (non-maximum suppression)
IOU_THRESHOLD = 0.4
objects = sorted(objects, key=lambda obj : obj['confidence'], reverse=True)
for i, obj in enumerate(objects):
if obj['confidence'] == 0:
continue
for j in range(i + 1, len(objects)):
if intersection_over_union(obj, objects[j]) > IOU_THRESHOLD:
objects[j]['confidence'] = 0
results = []
for obj in objects:
if obj['confidence'] >= threshold:
xtl = max(obj['xmin'], 0)
ytl = max(obj['ymin'], 0)
xbr = min(obj['xmax'], image.width)
ybr = min(obj['ymax'], image.height)
obj_class = int(obj['class_id'])
results.append({
"confidence": str(obj['confidence']),
"label": self.labels.get(obj_class, "unknown"),
"points": [xtl, ytl, xbr, ybr],
"type": "rectangle",
})
return results