722 lines
29 KiB
Python
722 lines
29 KiB
Python
import math
|
||
|
||
import numpy as np
|
||
import scipy.ndimage as ndi
|
||
|
||
from .._shared.utils import check_nD, _supported_float_type
|
||
from ..feature.util import DescriptorExtractor, FeatureDetector
|
||
from .._shared.filters import gaussian
|
||
from ..transform import rescale
|
||
from ..util import img_as_float
|
||
from ._sift import (_local_max, _ori_distances, _update_histogram)
|
||
|
||
|
||
def _edgeness(hxx, hyy, hxy):
|
||
"""Compute edgeness (eq. 18 of Otero et. al. IPOL paper)"""
|
||
trace = hxx + hyy
|
||
determinant = hxx * hyy - hxy * hxy
|
||
return (trace * trace) / determinant
|
||
|
||
|
||
def _sparse_gradient(vol, positions):
|
||
"""Gradient of a 3D volume at the provided `positions`.
|
||
|
||
For SIFT we only need the gradient at specific positions and do not need
|
||
the gradient at the edge positions, so can just use this simple
|
||
implementation instead of numpy.gradient.
|
||
"""
|
||
p0 = positions[..., 0]
|
||
p1 = positions[..., 1]
|
||
p2 = positions[..., 2]
|
||
g0 = vol[p0 + 1, p1, p2] - vol[p0 - 1, p1, p2]
|
||
g0 *= 0.5
|
||
g1 = vol[p0, p1 + 1, p2] - vol[p0, p1 - 1, p2]
|
||
g1 *= 0.5
|
||
g2 = vol[p0, p1, p2 + 1] - vol[p0, p1, p2 - 1]
|
||
g2 *= 0.5
|
||
return g0, g1, g2
|
||
|
||
|
||
def _hessian(d, positions):
|
||
"""Compute the non-redundant 3D Hessian terms at the requested positions.
|
||
|
||
Source: "Anatomy of the SIFT Method" p.380 (13)
|
||
"""
|
||
p0 = positions[..., 0]
|
||
p1 = positions[..., 1]
|
||
p2 = positions[..., 2]
|
||
two_d0 = 2 * d[p0, p1, p2]
|
||
# 0 = row, 1 = col, 2 = octave
|
||
h00 = d[p0 - 1, p1, p2] + d[p0 + 1, p1, p2] - two_d0
|
||
h11 = d[p0, p1 - 1, p2] + d[p0, p1 + 1, p2] - two_d0
|
||
h22 = d[p0, p1, p2 - 1] + d[p0, p1, p2 + 1] - two_d0
|
||
h01 = 0.25 * (d[p0 + 1, p1 + 1, p2] - d[p0 - 1, p1 + 1, p2]
|
||
- d[p0 + 1, p1 - 1, p2] + d[p0 - 1, p1 - 1, p2])
|
||
h02 = 0.25 * (d[p0 + 1, p1, p2 + 1] - d[p0 + 1, p1, p2 - 1]
|
||
+ d[p0 - 1, p1, p2 - 1] - d[p0 - 1, p1, p2 + 1])
|
||
h12 = 0.25 * (d[p0, p1 + 1, p2 + 1] - d[p0, p1 + 1, p2 - 1]
|
||
+ d[p0, p1 - 1, p2 - 1] - d[p0, p1 - 1, p2 + 1])
|
||
return (h00, h11, h22, h01, h02, h12)
|
||
|
||
|
||
def _offsets(grad, hess):
|
||
"""Compute position refinement offsets from gradient and Hessian.
|
||
|
||
This is equivalent to np.linalg.solve(-H, J) where H is the Hessian
|
||
matrix and J is the gradient (Jacobian).
|
||
|
||
This analytical solution is adapted from (BSD-licensed) C code by
|
||
Otero et. al (see SIFT docstring References).
|
||
"""
|
||
h00, h11, h22, h01, h02, h12 = hess
|
||
g0, g1, g2 = grad
|
||
det = h00 * h11 * h22
|
||
det -= h00 * h12 * h12
|
||
det -= h01 * h01 * h22
|
||
det += 2 * h01 * h02 * h12
|
||
det -= h02 * h02 * h11
|
||
aa = (h11 * h22 - h12 * h12) / det
|
||
ab = (h02 * h12 - h01 * h22) / det
|
||
ac = (h01 * h12 - h02 * h11) / det
|
||
bb = (h00 * h22 - h02 * h02) / det
|
||
bc = (h01 * h02 - h00 * h12) / det
|
||
cc = (h00 * h11 - h01 * h01) / det
|
||
offset0 = -aa * g0 - ab * g1 - ac * g2
|
||
offset1 = -ab * g0 - bb * g1 - bc * g2
|
||
offset2 = -ac * g0 - bc * g1 - cc * g2
|
||
return np.stack((offset0, offset1, offset2), axis=-1)
|
||
|
||
|
||
class SIFT(FeatureDetector, DescriptorExtractor):
|
||
"""SIFT feature detection and descriptor extraction.
|
||
|
||
Parameters
|
||
----------
|
||
upsampling : int, optional
|
||
Prior to the feature detection the image is upscaled by a factor
|
||
of 1 (no upscaling), 2 or 4. Method: Bi-cubic interpolation.
|
||
n_octaves : int, optional
|
||
Maximum number of octaves. With every octave the image size is
|
||
halved and the sigma doubled. The number of octaves will be
|
||
reduced as needed to keep at least 12 pixels along each dimension
|
||
at the smallest scale.
|
||
n_scales : int, optional
|
||
Maximum number of scales in every octave.
|
||
sigma_min : float, optional
|
||
The blur level of the seed image. If upsampling is enabled
|
||
sigma_min is scaled by factor 1/upsampling
|
||
sigma_in : float, optional
|
||
The assumed blur level of the input image.
|
||
c_dog : float, optional
|
||
Threshold to discard low contrast extrema in the DoG. It's final
|
||
value is dependent on n_scales by the relation:
|
||
final_c_dog = (2^(1/n_scales)-1) / (2^(1/3)-1) * c_dog
|
||
c_edge : float, optional
|
||
Threshold to discard extrema that lie in edges. If H is the
|
||
Hessian of an extremum, its "edgeness" is described by
|
||
tr(H)²/det(H). If the edgeness is higher than
|
||
(c_edge + 1)²/c_edge, the extremum is discarded.
|
||
n_bins : int, optional
|
||
Number of bins in the histogram that describes the gradient
|
||
orientations around keypoint.
|
||
lambda_ori : float, optional
|
||
The window used to find the reference orientation of a keypoint
|
||
has a width of 6 * lambda_ori * sigma and is weighted by a
|
||
standard deviation of 2 * lambda_ori * sigma.
|
||
c_max : float, optional
|
||
The threshold at which a secondary peak in the orientation
|
||
histogram is accepted as orientation
|
||
lambda_descr : float, optional
|
||
The window used to define the descriptor of a keypoint has a width
|
||
of 2 * lambda_descr * sigma * (n_hist+1)/n_hist and is weighted by
|
||
a standard deviation of lambda_descr * sigma.
|
||
n_hist : int, optional
|
||
The window used to define the descriptor of a keypoint consists of
|
||
n_hist * n_hist histograms.
|
||
n_ori : int, optional
|
||
The number of bins in the histograms of the descriptor patch.
|
||
|
||
Attributes
|
||
----------
|
||
delta_min : float
|
||
The sampling distance of the first octave. It's final value is
|
||
1/upsampling.
|
||
float_dtype : type
|
||
The datatype of the image.
|
||
scalespace_sigmas : (n_octaves, n_scales + 3) array
|
||
The sigma value of all scales in all octaves.
|
||
keypoints : (N, 2) array
|
||
Keypoint coordinates as ``(row, col)``.
|
||
positions : (N, 2) array
|
||
Subpixel-precision keypoint coordinates as ``(row, col)``.
|
||
sigmas : (N, ) array
|
||
The corresponding sigma (blur) value of a keypoint.
|
||
sigmas : (N, ) array
|
||
The corresponding scale of a keypoint.
|
||
orientations : (N, ) array
|
||
The orientations of the gradient around every keypoint.
|
||
octaves : (N, ) array
|
||
The corresponding octave of a keypoint.
|
||
descriptors : (N, n_hist*n_hist*n_ori) array
|
||
The descriptors of a keypoint.
|
||
|
||
Notes
|
||
-----
|
||
The SIFT algorithm was developed by David Lowe [1]_, [2]_ and later
|
||
patented by the University of British Columbia. Since the patent expired in
|
||
2020 it's free to use. The implementation here closely follows the
|
||
detailed description in [3]_, including use of the same default parameters.
|
||
|
||
References
|
||
----------
|
||
.. [1] D.G. Lowe. "Object recognition from local scale-invariant
|
||
features", Proceedings of the Seventh IEEE International
|
||
Conference on Computer Vision, 1999, vol.2, pp. 1150-1157.
|
||
:DOI:`10.1109/ICCV.1999.790410`
|
||
|
||
.. [2] D.G. Lowe. "Distinctive Image Features from Scale-Invariant
|
||
Keypoints", International Journal of Computer Vision, 2004,
|
||
vol. 60, pp. 91–110.
|
||
:DOI:`10.1023/B:VISI.0000029664.99615.94`
|
||
|
||
.. [3] I. R. Otero and M. Delbracio. "Anatomy of the SIFT Method",
|
||
Image Processing On Line, 4 (2014), pp. 370–396.
|
||
:DOI:`10.5201/ipol.2014.82`
|
||
|
||
Examples
|
||
--------
|
||
>>> from skimage.feature import SIFT, match_descriptors
|
||
>>> from skimage.data import camera
|
||
>>> from skimage.transform import rotate
|
||
>>> img1 = camera()
|
||
>>> img2 = rotate(camera(), 90)
|
||
>>> detector_extractor1 = SIFT()
|
||
>>> detector_extractor2 = SIFT()
|
||
>>> detector_extractor1.detect_and_extract(img1)
|
||
>>> detector_extractor2.detect_and_extract(img2)
|
||
>>> matches = match_descriptors(detector_extractor1.descriptors,
|
||
... detector_extractor2.descriptors,
|
||
... max_ratio=0.6)
|
||
>>> matches[10:15]
|
||
array([[ 10, 412],
|
||
[ 11, 417],
|
||
[ 12, 407],
|
||
[ 13, 411],
|
||
[ 14, 406]])
|
||
>>> detector_extractor1.keypoints[matches[10:15, 0]]
|
||
array([[ 95, 214],
|
||
[ 97, 211],
|
||
[ 97, 218],
|
||
[102, 215],
|
||
[104, 218]])
|
||
>>> detector_extractor2.keypoints[matches[10:15, 1]]
|
||
array([[297, 95],
|
||
[301, 97],
|
||
[294, 97],
|
||
[297, 102],
|
||
[293, 104]])
|
||
|
||
"""
|
||
|
||
def __init__(self, upsampling=2, n_octaves=8, n_scales=3, sigma_min=1.6,
|
||
sigma_in=0.5,
|
||
c_dog=0.04 / 3, c_edge=10, n_bins=36, lambda_ori=1.5,
|
||
c_max=0.8, lambda_descr=6, n_hist=4, n_ori=8):
|
||
if upsampling in [1, 2, 4]:
|
||
self.upsampling = upsampling
|
||
else:
|
||
raise ValueError("upsampling must be 1, 2 or 4")
|
||
self.n_octaves = n_octaves
|
||
self.n_scales = n_scales
|
||
self.sigma_min = sigma_min / upsampling
|
||
self.sigma_in = sigma_in
|
||
self.c_dog = (2 ** (1 / n_scales) - 1) / (2 ** (1 / 3) - 1) * c_dog
|
||
self.c_edge = c_edge
|
||
self.n_bins = n_bins
|
||
self.lambda_ori = lambda_ori
|
||
self.c_max = c_max
|
||
self.lambda_descr = lambda_descr
|
||
self.n_hist = n_hist
|
||
self.n_ori = n_ori
|
||
self.delta_min = 1 / upsampling
|
||
self.float_dtype = None
|
||
self.scalespace_sigmas = None
|
||
self.keypoints = None
|
||
self.positions = None
|
||
self.sigmas = None
|
||
self.scales = None
|
||
self.orientations = None
|
||
self.octaves = None
|
||
self.descriptors = None
|
||
|
||
@property
|
||
def deltas(self):
|
||
"""The sampling distances of all octaves"""
|
||
deltas = self.delta_min * np.power(2, np.arange(self.n_octaves),
|
||
dtype=self.float_dtype)
|
||
return deltas
|
||
|
||
def _set_number_of_octaves(self, image_shape):
|
||
size_min = 12 # minimum size of last octave
|
||
s0 = min(image_shape) * self.upsampling
|
||
max_octaves = int(math.log2(s0 / size_min) + 1)
|
||
if max_octaves < self.n_octaves:
|
||
self.n_octaves = max_octaves
|
||
|
||
def _create_scalespace(self, image):
|
||
"""Source: "Anatomy of the SIFT Method" Alg. 1
|
||
Construction of the scalespace by gradually blurring (scales) and
|
||
downscaling (octaves) the image.
|
||
"""
|
||
scalespace = []
|
||
if self.upsampling > 1:
|
||
image = rescale(image, self.upsampling, order=1)
|
||
|
||
# smooth to sigma_min, assuming sigma_in
|
||
image = gaussian(image,
|
||
self.upsampling * math.sqrt(
|
||
self.sigma_min ** 2 - self.sigma_in ** 2),
|
||
mode='reflect')
|
||
|
||
# Eq. 10: sigmas.shape = (n_octaves, n_scales + 3).
|
||
# The three extra scales are:
|
||
# One for the differences needed for DoG and two auxilliary
|
||
# images (one at either end) for peak_local_max with exclude
|
||
# border = True (see Fig. 5)
|
||
# The smoothing doubles after n_scales steps.
|
||
tmp = np.power(2, np.arange(self.n_scales + 3) / self.n_scales)
|
||
tmp *= self.sigma_min
|
||
# all sigmas for the gaussian scalespace
|
||
sigmas = (self.deltas[:, np.newaxis]
|
||
/ self.deltas[0] * tmp[np.newaxis, :])
|
||
self.scalespace_sigmas = sigmas
|
||
|
||
# Eq. 7: Gaussian smoothing depends on difference with previous sigma
|
||
# gaussian_sigmas.shape = (n_octaves, n_scales + 2)
|
||
var_diff = np.diff(sigmas * sigmas, axis=1)
|
||
gaussian_sigmas = np.sqrt(var_diff) / self.deltas[:, np.newaxis]
|
||
|
||
# one octave is represented by a 3D image with depth (n_scales+x)
|
||
for o in range(self.n_octaves):
|
||
# Temporarily put scales axis first so octave[i] is C-contiguous
|
||
# (this makes Gaussian filtering faster).
|
||
octave = np.empty((self.n_scales + 3,) + image.shape,
|
||
dtype=self.float_dtype, order='C')
|
||
octave[0] = image
|
||
for s in range(1, self.n_scales + 3):
|
||
# blur new scale assuming sigma of the last one
|
||
gaussian(octave[s - 1],
|
||
gaussian_sigmas[o, s - 1],
|
||
mode='reflect', output=octave[s])
|
||
# move scales to last axis as expected by other methods
|
||
scalespace.append(np.moveaxis(octave, 0, -1))
|
||
if o < self.n_octaves - 1:
|
||
# downscale the image by taking every second pixel
|
||
image = octave[self.n_scales][::2, ::2]
|
||
return scalespace
|
||
|
||
def _inrange(self, a, dim):
|
||
return ((a[:, 0] > 0) & (a[:, 0] < dim[0] - 1)
|
||
& (a[:, 1] > 0) & (a[:, 1] < dim[1] - 1))
|
||
|
||
def _find_localize_evaluate(self, dogspace, img_shape):
|
||
"""Source: "Anatomy of the SIFT Method" Alg. 4-9
|
||
1) first find all extrema of a (3, 3, 3) neighborhood
|
||
2) use second order Taylor development to refine the positions to
|
||
sub-pixel precision
|
||
3) filter out extrema that have low contrast and lie on edges or close
|
||
to the image borders
|
||
"""
|
||
extrema_pos = []
|
||
extrema_scales = []
|
||
extrema_sigmas = []
|
||
threshold = self.c_dog * 0.8
|
||
for o, (octave, delta) in enumerate(zip(dogspace, self.deltas)):
|
||
# find extrema
|
||
keys = _local_max(np.ascontiguousarray(octave), threshold)
|
||
if keys.size == 0:
|
||
continue
|
||
|
||
# localize extrema
|
||
oshape = octave.shape
|
||
refinement_iterations = 5
|
||
offset_max = 0.6
|
||
for i in range(refinement_iterations):
|
||
if i > 0:
|
||
# exclude any keys that have moved out of bounds
|
||
keys = keys[self._inrange(keys, oshape), :]
|
||
|
||
# Jacobian and Hessian of all extrema
|
||
grad = _sparse_gradient(octave, keys)
|
||
hess = _hessian(octave, keys)
|
||
|
||
# solve for offset of the extremum
|
||
off = _offsets(grad, hess)
|
||
if i == refinement_iterations - 1:
|
||
break
|
||
# offset is too big and an increase would not bring us out of
|
||
# bounds
|
||
wrong_position_pos = np.logical_and(
|
||
off > offset_max,
|
||
keys + 1 < tuple([a - 1 for a in oshape])
|
||
)
|
||
wrong_position_neg = np.logical_and(
|
||
off < -offset_max,
|
||
keys - 1 > 0
|
||
)
|
||
if (not np.any(np.logical_or(wrong_position_neg,
|
||
wrong_position_pos))):
|
||
break
|
||
keys[wrong_position_pos] += 1
|
||
keys[wrong_position_neg] -= 1
|
||
|
||
# mask for all extrema that have been localized successfully
|
||
finished = np.all(np.abs(off) < offset_max, axis=1)
|
||
keys = keys[finished]
|
||
off = off[finished]
|
||
grad = [g[finished] for g in grad]
|
||
|
||
# value of extremum in octave
|
||
vals = octave[keys[:, 0], keys[:, 1], keys[:, 2]]
|
||
# values at interpolated point
|
||
w = vals
|
||
for i in range(3):
|
||
w += 0.5 * grad[i] * off[:, i]
|
||
|
||
h00, h11, h01 = \
|
||
hess[0][finished], hess[1][finished], hess[3][finished]
|
||
|
||
sigmaratio = (self.scalespace_sigmas[0, 1]
|
||
/ self.scalespace_sigmas[0, 0])
|
||
|
||
# filter for contrast, edgeness and borders
|
||
contrast_threshold = self.c_dog
|
||
contrast_filter = np.abs(w) > contrast_threshold
|
||
|
||
edge_threshold = np.square(self.c_edge + 1) / self.c_edge
|
||
edge_response = _edgeness(h00[contrast_filter],
|
||
h11[contrast_filter],
|
||
h01[contrast_filter])
|
||
edge_filter = np.abs(edge_response) <= edge_threshold
|
||
|
||
keys = keys[contrast_filter][edge_filter]
|
||
off = off[contrast_filter][edge_filter]
|
||
yx = ((keys[:, :2] + off[:, :2]) * delta).astype(self.float_dtype)
|
||
|
||
sigmas = self.scalespace_sigmas[o, keys[:, 2]] * np.power(
|
||
sigmaratio, off[:, 2])
|
||
border_filter = np.all(
|
||
np.logical_and((yx - sigmas[:, np.newaxis]) > 0.0,
|
||
(yx + sigmas[:, np.newaxis]) < img_shape),
|
||
axis=1)
|
||
extrema_pos.append(yx[border_filter])
|
||
extrema_scales.append(keys[border_filter, 2])
|
||
extrema_sigmas.append(sigmas[border_filter])
|
||
|
||
if not extrema_pos:
|
||
raise RuntimeError(
|
||
"SIFT found no features. Try passing in an image containing "
|
||
"greater intensity contrasts between adjacent pixels.")
|
||
|
||
octave_indices = np.concatenate([np.full(len(p), i)
|
||
for i, p in enumerate(extrema_pos)])
|
||
extrema_pos = np.concatenate(extrema_pos)
|
||
extrema_scales = np.concatenate(extrema_scales)
|
||
extrema_sigmas = np.concatenate(extrema_sigmas)
|
||
return extrema_pos, extrema_scales, extrema_sigmas, octave_indices
|
||
|
||
def _fit(self, h):
|
||
"""Refine the position of the peak by fitting it to a parabola"""
|
||
return (h[0] - h[2]) / (2 * (h[0] + h[2] - 2 * h[1]))
|
||
|
||
def _compute_orientation(self, positions_oct, scales_oct, sigmas_oct,
|
||
octaves, gaussian_scalespace):
|
||
"""Source: "Anatomy of the SIFT Method" Alg. 11
|
||
Calculates the orientation of the gradient around every keypoint
|
||
"""
|
||
gradient_space = []
|
||
# list for keypoints that have more than one reference orientation
|
||
keypoint_indices = []
|
||
keypoint_angles = []
|
||
keypoint_octave = []
|
||
orientations = np.zeros_like(sigmas_oct, dtype=self.float_dtype)
|
||
key_count = 0
|
||
for o, (octave, delta) in enumerate(zip(gaussian_scalespace,
|
||
self.deltas)):
|
||
in_oct = octaves == o
|
||
positions = positions_oct[in_oct]
|
||
scales = scales_oct[in_oct]
|
||
sigmas = sigmas_oct[in_oct]
|
||
|
||
gradient_space.append(np.gradient(octave))
|
||
oshape = octave.shape[:2]
|
||
# convert to octave's dimensions
|
||
yx = positions / delta
|
||
sigma = sigmas / delta
|
||
|
||
# dimensions of the patch
|
||
radius = 3 * self.lambda_ori * sigma
|
||
p_min = np.maximum(0,
|
||
yx - radius[:, np.newaxis] + 0.5).astype(int)
|
||
p_max = np.minimum(yx + radius[:, np.newaxis] + 0.5,
|
||
(oshape[0] - 1, oshape[1] - 1)).astype(int)
|
||
# orientation histogram
|
||
hist = np.empty(self.n_bins, dtype=self.float_dtype)
|
||
avg_kernel = np.full((3,), 1 / 3, dtype=self.float_dtype)
|
||
for k in range(len(yx)):
|
||
hist[:] = 0
|
||
|
||
# use the patch coordinates to get the gradient and then
|
||
# normalize them
|
||
r, c = np.meshgrid(np.arange(p_min[k, 0], p_max[k, 0] + 1),
|
||
np.arange(p_min[k, 1], p_max[k, 1] + 1),
|
||
indexing='ij', sparse=True)
|
||
gradient_row = gradient_space[o][0][r, c, scales[k]]
|
||
gradient_col = gradient_space[o][1][r, c, scales[k]]
|
||
r = r.astype(self.float_dtype, copy=False)
|
||
c = c.astype(self.float_dtype, copy=False)
|
||
r -= yx[k, 0]
|
||
c -= yx[k, 1]
|
||
|
||
# gradient magnitude and angles
|
||
magnitude = np.sqrt(np.square(gradient_row) + np.square(
|
||
gradient_col))
|
||
theta = np.mod(np.arctan2(gradient_col, gradient_row),
|
||
2 * np.pi)
|
||
|
||
# more weight to center values
|
||
kernel = np.exp(np.divide(r * r + c * c,
|
||
-2 * (self.lambda_ori
|
||
* sigma[k]) ** 2))
|
||
|
||
# fill the histogram
|
||
bins = np.floor((theta / (2 * np.pi) * self.n_bins + 0.5)
|
||
% self.n_bins).astype(int)
|
||
np.add.at(hist, bins, kernel * magnitude)
|
||
|
||
# smooth the histogram and find the maximum
|
||
hist = np.concatenate((hist[-6:], hist, hist[:6]))
|
||
for _ in range(6): # number of smoothings
|
||
hist = np.convolve(hist, avg_kernel, mode='same')
|
||
hist = hist[6:-6]
|
||
max_filter = ndi.maximum_filter(hist, [3], mode='wrap')
|
||
|
||
# if an angle is in 80% percent range of the maximum, a
|
||
# new keypoint is created for it
|
||
maxima = np.nonzero(np.logical_and(
|
||
hist >= (self.c_max * np.max(hist)),
|
||
max_filter == hist))
|
||
|
||
# save the angles
|
||
for c, m in enumerate(maxima[0]):
|
||
neigh = np.arange(m - 1, m + 2) % len(hist)
|
||
# use neighbors to fit a parabola, to get more accurate
|
||
# result
|
||
ori = ((m + self._fit(hist[neigh]) + 0.5)
|
||
* 2 * np.pi / self.n_bins)
|
||
if ori > np.pi:
|
||
ori -= 2 * np.pi
|
||
if c == 0:
|
||
orientations[key_count] = ori
|
||
else:
|
||
keypoint_indices.append(key_count)
|
||
keypoint_angles.append(ori)
|
||
keypoint_octave.append(o)
|
||
key_count += 1
|
||
self.positions = np.concatenate(
|
||
(positions_oct, positions_oct[keypoint_indices]))
|
||
self.scales = np.concatenate(
|
||
(scales_oct, scales_oct[keypoint_indices]))
|
||
self.sigmas = np.concatenate(
|
||
(sigmas_oct, sigmas_oct[keypoint_indices]))
|
||
self.orientations = np.concatenate(
|
||
(orientations, keypoint_angles))
|
||
self.octaves = np.concatenate(
|
||
(octaves, keypoint_octave))
|
||
# return the gradient_space to reuse it to find the descriptor
|
||
return gradient_space
|
||
|
||
def _rotate(self, row, col, angle):
|
||
c = math.cos(angle)
|
||
s = math.sin(angle)
|
||
rot_row = c * row + s * col
|
||
rot_col = -s * row + c * col
|
||
return rot_row, rot_col
|
||
|
||
def _compute_descriptor(self, gradient_space):
|
||
"""Source: "Anatomy of the SIFT Method" Alg. 12
|
||
Calculates the descriptor for every keypoint
|
||
"""
|
||
n_key = len(self.scales)
|
||
self.descriptors = np.empty((n_key, self.n_hist ** 2 * self.n_ori),
|
||
dtype=np.uint8)
|
||
|
||
# indices of the histograms
|
||
hists = np.arange(1, self.n_hist + 1, dtype=self.float_dtype)
|
||
# indices of the bins
|
||
bins = np.arange(1, self.n_ori + 1, dtype=self.float_dtype)
|
||
|
||
key_numbers = np.arange(n_key)
|
||
for o, (gradient, delta) in enumerate(zip(gradient_space,
|
||
self.deltas)):
|
||
in_oct = self.octaves == o
|
||
if not np.any(in_oct):
|
||
continue
|
||
positions = self.positions[in_oct]
|
||
scales = self.scales[in_oct]
|
||
sigmas = self.sigmas[in_oct]
|
||
orientations = self.orientations[in_oct]
|
||
numbers = key_numbers[in_oct]
|
||
|
||
dim = gradient[0].shape[:2]
|
||
center_pos = positions / delta
|
||
sigma = sigmas / delta
|
||
|
||
# dimensions of the patch
|
||
radius = self.lambda_descr * (1 + 1 / self.n_hist) * sigma
|
||
radius_patch = math.sqrt(2) * radius
|
||
p_min = np.asarray(
|
||
np.maximum(0, center_pos - radius_patch[:, np.newaxis] + 0.5),
|
||
dtype=int)
|
||
p_max = np.asarray(
|
||
np.minimum(center_pos + radius_patch[:, np.newaxis] + 0.5,
|
||
(dim[0] - 1, dim[1] - 1)), dtype=int)
|
||
|
||
for k in range(len(p_max)):
|
||
rad_k = radius[k]
|
||
ori = orientations[k]
|
||
histograms = np.zeros((self.n_hist, self.n_hist, self.n_ori),
|
||
dtype=self.float_dtype)
|
||
# the patch
|
||
r, c = np.meshgrid(np.arange(p_min[k, 0], p_max[k, 0]),
|
||
np.arange(p_min[k, 1], p_max[k, 1]),
|
||
indexing='ij', sparse=True)
|
||
# normalized coordinates
|
||
r_norm = np.subtract(r, center_pos[k, 0],
|
||
dtype=self.float_dtype)
|
||
c_norm = np.subtract(c, center_pos[k, 1],
|
||
dtype=self.float_dtype)
|
||
r_norm, c_norm = self._rotate(r_norm, c_norm, ori)
|
||
|
||
# select coordinates and gradient values within the patch
|
||
inside = np.maximum(np.abs(r_norm), np.abs(c_norm)) < rad_k
|
||
r_norm, c_norm = r_norm[inside], c_norm[inside]
|
||
r_idx, c_idx = np.nonzero(inside)
|
||
r = r[r_idx, 0]
|
||
c = c[0, c_idx]
|
||
gradient_row = gradient[0][r, c, scales[k]]
|
||
gradient_col = gradient[1][r, c, scales[k]]
|
||
# compute the (relative) gradient orientation
|
||
theta = np.arctan2(gradient_col, gradient_row) - ori
|
||
lam_sig = self.lambda_descr * sigma[k]
|
||
# Gaussian weighted kernel magnitude
|
||
kernel = np.exp((r_norm * r_norm + c_norm * c_norm)
|
||
/ (-2 * lam_sig ** 2))
|
||
magnitude = np.sqrt(gradient_row * gradient_row
|
||
+ gradient_col * gradient_col) * kernel
|
||
|
||
lam_sig_ratio = 2 * lam_sig / self.n_hist
|
||
rc_bins = (hists - (1 + self.n_hist) / 2) * lam_sig_ratio
|
||
rc_bin_spacing = lam_sig_ratio
|
||
ori_bins = (2 * np.pi * bins) / self.n_ori
|
||
|
||
# distances to the histograms and bins
|
||
dist_r = np.abs(np.subtract.outer(rc_bins, r_norm))
|
||
dist_c = np.abs(np.subtract.outer(rc_bins, c_norm))
|
||
|
||
# the orientation histograms/bins that get the contribution
|
||
near_t, near_t_val = _ori_distances(ori_bins, theta)
|
||
|
||
# create the histogram
|
||
_update_histogram(histograms, near_t, near_t_val, magnitude,
|
||
dist_r, dist_c, rc_bin_spacing)
|
||
|
||
# convert the histograms to a 1d descriptor
|
||
histograms = histograms.reshape(-1)
|
||
# saturate the descriptor
|
||
histograms = np.minimum(histograms,
|
||
0.2 * np.linalg.norm(histograms))
|
||
# normalize the descriptor
|
||
descriptor = (512 * histograms) / np.linalg.norm(histograms)
|
||
# quantize the descriptor
|
||
descriptor = np.minimum(np.floor(descriptor), 255)
|
||
self.descriptors[numbers[k], :] = descriptor
|
||
|
||
def _preprocess(self, image):
|
||
check_nD(image, 2)
|
||
image = img_as_float(image)
|
||
self.float_dtype = _supported_float_type(image.dtype)
|
||
image = image.astype(self.float_dtype, copy=False)
|
||
|
||
self._set_number_of_octaves(image.shape)
|
||
return image
|
||
|
||
def detect(self, image):
|
||
"""Detect the keypoints.
|
||
|
||
Parameters
|
||
----------
|
||
image : 2D array
|
||
Input image.
|
||
|
||
"""
|
||
image = self._preprocess(image)
|
||
|
||
gaussian_scalespace = self._create_scalespace(image)
|
||
|
||
dog_scalespace = [np.diff(layer, axis=2) for layer in
|
||
gaussian_scalespace]
|
||
|
||
positions, scales, sigmas, octaves = self._find_localize_evaluate(
|
||
dog_scalespace, image.shape)
|
||
|
||
self._compute_orientation(positions, scales, sigmas, octaves,
|
||
gaussian_scalespace)
|
||
|
||
self.keypoints = self.positions.round().astype(int)
|
||
|
||
def extract(self, image):
|
||
"""Extract the descriptors for all keypoints in the image.
|
||
|
||
Parameters
|
||
----------
|
||
image : 2D array
|
||
Input image.
|
||
|
||
"""
|
||
image = self._preprocess(image)
|
||
|
||
gaussian_scalespace = self._create_scalespace(image)
|
||
|
||
gradient_space = [np.gradient(octave) for octave in
|
||
gaussian_scalespace]
|
||
|
||
self._compute_descriptor(gradient_space)
|
||
|
||
def detect_and_extract(self, image):
|
||
"""Detect the keypoints and extract their descriptors.
|
||
|
||
Parameters
|
||
----------
|
||
image : 2D array
|
||
Input image.
|
||
|
||
"""
|
||
image = self._preprocess(image)
|
||
|
||
gaussian_scalespace = self._create_scalespace(image)
|
||
|
||
dog_scalespace = [np.diff(layer, axis=2) for layer in
|
||
gaussian_scalespace]
|
||
|
||
positions, scales, sigmas, octaves = self._find_localize_evaluate(
|
||
dog_scalespace, image.shape)
|
||
|
||
gradient_space = self._compute_orientation(positions, scales, sigmas,
|
||
octaves,
|
||
gaussian_scalespace)
|
||
|
||
self._compute_descriptor(gradient_space)
|
||
|
||
self.keypoints = self.positions.round().astype(int)
|