From 837eb6bb182c2020571ded2f4cf155a55e16bd0c Mon Sep 17 00:00:00 2001 From: Keir Mierle Date: Fri, 18 May 2012 12:05:10 +0000 Subject: [PATCH] Support normalization in the tracking prepass The last tracker commit added normalized tracking. This makes tracking patches undergoing uniform illumination change easier. However, the prepass which computes a quick translation-only estimate of the warp did not take this into account. This commit fixes that. This works reasonably well but in some examples the brute initialization fails. I suspect this is due to the warped template estimate in the current frame being too different from the original, so there are multiple peaks in the normalized-SAD correlation function. The solution is to use the previous frame for the brute initialization and the keyframe for refinement, but that requires architecture changes. --- extern/libmv/libmv/tracking/track_region.cc | 67 ++++++++++++++++++--- 1 file changed, 58 insertions(+), 9 deletions(-) diff --git a/extern/libmv/libmv/tracking/track_region.cc b/extern/libmv/libmv/tracking/track_region.cc index ababd0cee90..6fad3f7af3f 100644 --- a/extern/libmv/libmv/tracking/track_region.cc +++ b/extern/libmv/libmv/tracking/track_region.cc @@ -19,6 +19,11 @@ // IN THE SOFTWARE. // // Author: mierle@google.com (Keir Mierle) +// +// TODO(keir): While this tracking code works rather well, it has some +// outragous inefficiencies. There is probably a 5-10x speedup to be had if a +// smart coder went through the TODO's and made the suggested performance +// enhancements. // Necessary for M_E when building with MSVC. #define _USE_MATH_DEFINES @@ -868,14 +873,17 @@ bool PointOnRightHalfPlane(const Vec2 &a, const Vec2 &b, double x, double y) { // Determine if a point is in a quad. The quad is arranged as: // -// +--> x +// +-------> x // | -// | 0 1 -// v 3 2 +// | a0------a1 +// | | | +// | | | +// | | | +// | a3------a2 +// v // y // -// The idea is to take the maximum x or y distance. This may be oversampling. -// TODO(keir): Investigate the various choices; perhaps average is better? +// The implementation does up to four half-plane comparisons. bool PointInQuad(const double *xs, const double *ys, double x, double y) { Vec2 a0(xs[0], ys[0]); Vec2 a1(xs[1], ys[1]); @@ -888,6 +896,8 @@ bool PointInQuad(const double *xs, const double *ys, double x, double y) { PointOnRightHalfPlane(a3, a0, x, y); } +// This makes it possible to map between Eigen float arrays and FloatImage +// without using comparisons. typedef Eigen::Array FloatArray; // This creates a pattern in the frame of image2, from the pixel is image1, @@ -946,11 +956,26 @@ void CreateBrutePattern(const double *x1, const double *y1, *origin_y = min_y; } +// Compute a translation-only estimate of the warp, using brute force search. A +// smarter implementation would use the FFT to compute the normalized cross +// correlation. Instead, this is a dumb implementation. Surprisingly, it is +// fast enough in practice. +// +// TODO(keir): The normalization is less effective for the brute force search +// than it is with the Ceres solver. It's unclear if this is a bug or due to +// the original frame being too different from the reprojected reference in the +// destination frame. +// +// The likely solution is to use the previous frame, instead of the original +// pattern, when doing brute initialization. Unfortunately that implies a +// totally different warping interface, since access to more than a the source +// and current destination frame is necessary. template void BruteTranslationOnlyInitialize(const FloatImage &image1, const FloatImage *image1_mask, const FloatImage &image2, const int num_extra_points, + const bool use_normalized_intensities, const double *x1, const double *y1, double *x2, double *y2) { // Create the pattern to match in the space of image2, assuming our inital @@ -959,8 +984,18 @@ void BruteTranslationOnlyInitialize(const FloatImage &image1, FloatArray pattern; FloatArray mask; int origin_x = -1, origin_y = -1; - CreateBrutePattern(x1, y1, x2, y2, image1, image1_mask, - &pattern, &mask, &origin_x, &origin_y); + CreateBrutePattern(x1, y1, x2, y2, + image1, image1_mask, + &pattern, &mask, + &origin_x, &origin_y); + + // For normalization, premultiply the pattern by the inverse pattern mean. + double mask_sum = 1.0; + if (use_normalized_intensities) { + mask_sum = mask.sum(); + double inverse_pattern_mean = mask_sum / ((mask * pattern).sum()); + pattern *= inverse_pattern_mean; + } // Use Eigen on the images via maps for strong vectorization. Map search(image2.Data(), image2.Height(), image2.Width()); @@ -984,8 +1019,21 @@ void BruteTranslationOnlyInitialize(const FloatImage &image1, int h = pattern.rows(); for (int r = 0; r < (image2.Height() - h); ++r) { for (int c = 0; c < (image2.Width() - w); ++c) { - // Compute the weighted sum of absolute differences, Eigen style. - double sad = (mask * (pattern - search.block(r, c, h, w))).abs().sum(); + // Compute the weighted sum of absolute differences, Eigen style. Note + // that the block from the search image is never stored in a variable, to + // avoid copying overhead and permit inlining. + double sad; + if (use_normalized_intensities) { + // TODO(keir): It's really dumb to recompute the search mean for every + // shift. A smarter implementation would use summed area tables + // instead, reducing the mean calculation to an O(1) operation. + double inverse_search_mean = + mask_sum / ((mask * search.block(r, c, h, w)).sum()); + sad = (mask * (pattern - (search.block(r, c, h, w) * + inverse_search_mean))).abs().sum(); + } else { + sad = (mask * (pattern - search.block(r, c, h, w))).abs().sum(); + } if (sad < best_sad) { best_r = r; best_c = c; @@ -1053,6 +1101,7 @@ void TemplatedTrackRegion(const FloatImage &image1, options.image1_mask, image2, options.num_extra_points, + options.use_normalized_intensities, x1, y1, x2, y2); for (int i = 0; i < 4; ++i) { LG << "P" << i << ": (" << x1[i] << ", " << y1[i] << "); brute ("