Support normalization in the tracking prepass
The last tracker commit added normalized tracking. This makes tracking patches undergoing uniform illumination change easier. However, the prepass which computes a quick translation-only estimate of the warp did not take this into account. This commit fixes that. This works reasonably well but in some examples the brute initialization fails. I suspect this is due to the warped template estimate in the current frame being too different from the original, so there are multiple peaks in the normalized-SAD correlation function. The solution is to use the previous frame for the brute initialization and the keyframe for refinement, but that requires architecture changes.
This commit is contained in:
67
extern/libmv/libmv/tracking/track_region.cc
vendored
67
extern/libmv/libmv/tracking/track_region.cc
vendored
@@ -19,6 +19,11 @@
|
||||
// IN THE SOFTWARE.
|
||||
//
|
||||
// Author: mierle@google.com (Keir Mierle)
|
||||
//
|
||||
// TODO(keir): While this tracking code works rather well, it has some
|
||||
// outragous inefficiencies. There is probably a 5-10x speedup to be had if a
|
||||
// smart coder went through the TODO's and made the suggested performance
|
||||
// enhancements.
|
||||
|
||||
// Necessary for M_E when building with MSVC.
|
||||
#define _USE_MATH_DEFINES
|
||||
@@ -868,14 +873,17 @@ bool PointOnRightHalfPlane(const Vec2 &a, const Vec2 &b, double x, double y) {
|
||||
|
||||
// Determine if a point is in a quad. The quad is arranged as:
|
||||
//
|
||||
// +--> x
|
||||
// +-------> x
|
||||
// |
|
||||
// | 0 1
|
||||
// v 3 2
|
||||
// | a0------a1
|
||||
// | | |
|
||||
// | | |
|
||||
// | | |
|
||||
// | a3------a2
|
||||
// v
|
||||
// y
|
||||
//
|
||||
// The idea is to take the maximum x or y distance. This may be oversampling.
|
||||
// TODO(keir): Investigate the various choices; perhaps average is better?
|
||||
// The implementation does up to four half-plane comparisons.
|
||||
bool PointInQuad(const double *xs, const double *ys, double x, double y) {
|
||||
Vec2 a0(xs[0], ys[0]);
|
||||
Vec2 a1(xs[1], ys[1]);
|
||||
@@ -888,6 +896,8 @@ bool PointInQuad(const double *xs, const double *ys, double x, double y) {
|
||||
PointOnRightHalfPlane(a3, a0, x, y);
|
||||
}
|
||||
|
||||
// This makes it possible to map between Eigen float arrays and FloatImage
|
||||
// without using comparisons.
|
||||
typedef Eigen::Array<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> FloatArray;
|
||||
|
||||
// This creates a pattern in the frame of image2, from the pixel is image1,
|
||||
@@ -946,11 +956,26 @@ void CreateBrutePattern(const double *x1, const double *y1,
|
||||
*origin_y = min_y;
|
||||
}
|
||||
|
||||
// Compute a translation-only estimate of the warp, using brute force search. A
|
||||
// smarter implementation would use the FFT to compute the normalized cross
|
||||
// correlation. Instead, this is a dumb implementation. Surprisingly, it is
|
||||
// fast enough in practice.
|
||||
//
|
||||
// TODO(keir): The normalization is less effective for the brute force search
|
||||
// than it is with the Ceres solver. It's unclear if this is a bug or due to
|
||||
// the original frame being too different from the reprojected reference in the
|
||||
// destination frame.
|
||||
//
|
||||
// The likely solution is to use the previous frame, instead of the original
|
||||
// pattern, when doing brute initialization. Unfortunately that implies a
|
||||
// totally different warping interface, since access to more than a the source
|
||||
// and current destination frame is necessary.
|
||||
template<typename Warp>
|
||||
void BruteTranslationOnlyInitialize(const FloatImage &image1,
|
||||
const FloatImage *image1_mask,
|
||||
const FloatImage &image2,
|
||||
const int num_extra_points,
|
||||
const bool use_normalized_intensities,
|
||||
const double *x1, const double *y1,
|
||||
double *x2, double *y2) {
|
||||
// Create the pattern to match in the space of image2, assuming our inital
|
||||
@@ -959,8 +984,18 @@ void BruteTranslationOnlyInitialize(const FloatImage &image1,
|
||||
FloatArray pattern;
|
||||
FloatArray mask;
|
||||
int origin_x = -1, origin_y = -1;
|
||||
CreateBrutePattern<Warp>(x1, y1, x2, y2, image1, image1_mask,
|
||||
&pattern, &mask, &origin_x, &origin_y);
|
||||
CreateBrutePattern<Warp>(x1, y1, x2, y2,
|
||||
image1, image1_mask,
|
||||
&pattern, &mask,
|
||||
&origin_x, &origin_y);
|
||||
|
||||
// For normalization, premultiply the pattern by the inverse pattern mean.
|
||||
double mask_sum = 1.0;
|
||||
if (use_normalized_intensities) {
|
||||
mask_sum = mask.sum();
|
||||
double inverse_pattern_mean = mask_sum / ((mask * pattern).sum());
|
||||
pattern *= inverse_pattern_mean;
|
||||
}
|
||||
|
||||
// Use Eigen on the images via maps for strong vectorization.
|
||||
Map<const FloatArray> search(image2.Data(), image2.Height(), image2.Width());
|
||||
@@ -984,8 +1019,21 @@ void BruteTranslationOnlyInitialize(const FloatImage &image1,
|
||||
int h = pattern.rows();
|
||||
for (int r = 0; r < (image2.Height() - h); ++r) {
|
||||
for (int c = 0; c < (image2.Width() - w); ++c) {
|
||||
// Compute the weighted sum of absolute differences, Eigen style.
|
||||
double sad = (mask * (pattern - search.block(r, c, h, w))).abs().sum();
|
||||
// Compute the weighted sum of absolute differences, Eigen style. Note
|
||||
// that the block from the search image is never stored in a variable, to
|
||||
// avoid copying overhead and permit inlining.
|
||||
double sad;
|
||||
if (use_normalized_intensities) {
|
||||
// TODO(keir): It's really dumb to recompute the search mean for every
|
||||
// shift. A smarter implementation would use summed area tables
|
||||
// instead, reducing the mean calculation to an O(1) operation.
|
||||
double inverse_search_mean =
|
||||
mask_sum / ((mask * search.block(r, c, h, w)).sum());
|
||||
sad = (mask * (pattern - (search.block(r, c, h, w) *
|
||||
inverse_search_mean))).abs().sum();
|
||||
} else {
|
||||
sad = (mask * (pattern - search.block(r, c, h, w))).abs().sum();
|
||||
}
|
||||
if (sad < best_sad) {
|
||||
best_r = r;
|
||||
best_c = c;
|
||||
@@ -1053,6 +1101,7 @@ void TemplatedTrackRegion(const FloatImage &image1,
|
||||
options.image1_mask,
|
||||
image2,
|
||||
options.num_extra_points,
|
||||
options.use_normalized_intensities,
|
||||
x1, y1, x2, y2);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
LG << "P" << i << ": (" << x1[i] << ", " << y1[i] << "); brute ("
|
||||
|
||||
Reference in New Issue
Block a user