Camera tracking integration

=========================== Bundle new libmv to fix crash caused by some errors in detector. This commit makes SAD tracking much slower because now it supports afgine tracking. Not implemented in Blender yet to keep commits more clear.
2011-08-18 21:20:12 +00:00
parent a6e4029697
commit 83b2be3749
7 changed files with 163 additions and 100 deletions
--- a/extern/libmv/ChangeLog
+++ b/extern/libmv/ChangeLog
@@ -1,3 +1,39 @@
+commit 75520f4bc4ccbb272a1b4149d3b8d05a90f7f896
+Author: Matthias Fauconneau <matthias.fauconneau@gmail.com>
+Date:   Thu Aug 18 23:14:17 2011 +0200
+
+    Fix affine iteration.
+
+commit 4e8e0aa6018e3eb2fbebdad7f1cbd6c909d26e79
+Author: Matthias Fauconneau <matthias.fauconneau@gmail.com>
+Date:   Thu Aug 18 23:03:26 2011 +0200
+
+    Handle rotations.
+
+commit 3ce41cf3c1b5c136a61d8f4c63ccae3cafbdb8da
+Author: Matthias Fauconneau <matthias.fauconneau@gmail.com>
+Date:   Thu Aug 18 22:24:47 2011 +0200
+
+    Slow brute-force affine diamond search implementation.
+
+commit 1c4acd03e030c1c50dc6fc36c419c72ea69a0713
+Author: Matthias Fauconneau <matthias.fauconneau@gmail.com>
+Date:   Thu Aug 18 20:51:43 2011 +0200
+
+    Fix detect.cc.
+
+commit ec18cc5ea9ae2e641075a847e82d0aacb8415ad8
+Author: Matthias Fauconneau <matthias.fauconneau@gmail.com>
+Date:   Thu Aug 18 17:45:37 2011 +0200
+
+    Compute and return Pearson product-moment correlation coefficient between reference and matched pattern.
+
+commit 21d4245c63a01bfc736192d55baf10983e7c9ec7
+Author: Matthias Fauconneau <matthias.fauconneau@gmail.com>
+Date:   Thu Aug 18 16:18:44 2011 +0200
+
+    UI and API support for affine tracking.
+
 commit a4876d8c40dcde615b44009c38c49e9a1b1d4698
 Author: Matthias Fauconneau <matthias.fauconneau@gmail.com>
 Date:   Wed Aug 17 20:26:01 2011 +0200
@@ -406,11 +442,3 @@ Date:   Tue Jul 19 14:54:43 2011 +0200
    Add optimized version of KLT tracker.
    
    FIXME: It currently doesn't work.
-
-commit 07a27f214c98e92b876c0878d05397f2031b35cd
-Author: Matthias Fauconneau <matthias.fauconneau@gmail.com>
-Date:   Sun Jul 17 23:01:58 2011 +0200
-
-    Add some #if (QT_VERSION < QT_VERSION_CHECK(4, 7, 0)) and #define constBits bits macros to support Qt < 4.7 which is still used on older systems.
-    
-    Build using Qt < 4.7 will see increased memory usage and unecessary copying.
--- a/extern/libmv/libmv-capi.cpp
+++ b/extern/libmv/libmv-capi.cpp
@@ -315,25 +315,28 @@ void libmv_regionTrackerDestroy(libmv_RegionTracker *libmv_tracker)
 /* ************ Tracks ************ */

 void libmv_SADSamplePattern(unsigned char *image, int stride,
-			float warp[3][3], unsigned char *pattern)
+			float warp[3][2], unsigned char *pattern)
 {
-	float mat3[9];
+	libmv::mat32 mat32;

-	memcpy(mat3, warp, sizeof(float)*9);
+	memcpy(mat32.data, warp, sizeof(float)*3*2);

-	libmv::SamplePattern(image, stride, mat3, pattern);
+	libmv::SamplePattern(image, stride, mat32, pattern, 16);
 }

 int libmv_SADTrackerTrack(unsigned char *pattern, unsigned char *image, int stride,
 			int width, int height, double *x, double *y)
 {
 	int result;
-	float x2, y2;
+	libmv::mat32 mat32;

-	result = libmv::Track(pattern, image, stride, width, height, &x2, &y2);
+	mat32(0, 2)= *x;
+	mat32(1, 2)= *y;

-	*x= x2;
-	*y= y2;
+	result = libmv::Track(pattern, 16, image, stride, width, height, &mat32);
+
+	*x= mat32(0, 2);
+	*y= mat32(1, 2);

 	return result;
 }
--- a/extern/libmv/libmv-capi.h
+++ b/extern/libmv/libmv-capi.h
@@ -52,7 +52,7 @@ void libmv_regionTrackerDestroy(struct libmv_RegionTracker *libmv_tracker);

 /* SAD Tracker */
 void libmv_SADSamplePattern(unsigned char *image, int stride,
-			float warp[3][3], unsigned char *pattern);
+			float warp[3][2], unsigned char *pattern);
 int libmv_SADTrackerTrack(unsigned char *pattern, unsigned char *image, int stride,
 			int width, int height, double *x, double *y);

--- a/extern/libmv/libmv/simple_pipeline/detect.cc
+++ b/extern/libmv/libmv/simple_pipeline/detect.cc
@@ -56,7 +56,7 @@ void Detect(ubyte* image, int stride, int width, int height, Feature* detected,
  unsigned short histogram[256];
  memset(histogram,0,sizeof(histogram));
  ubyte* scores = new ubyte[width*height];
-  memset(scores,0,sizeof(scores));
+  memset(scores,0,width*height);
  const int r = 1; //radius for self similarity comparison
  for(int y=distance; y<height-distance; y++) {
    for(int x=distance; x<width-distance; x++) {
@@ -104,7 +104,7 @@ void Detect(ubyte* image, int stride, int width, int height, Feature* detected,
    }
  }
  *count = i;
-  free(scores);
+  delete[] scores;
 }

 }
--- a/extern/libmv/libmv/tracking/sad.cc
+++ b/extern/libmv/libmv/tracking/sad.cc
@@ -28,15 +28,12 @@

 namespace libmv {

-typedef unsigned int uint;
-
 struct vec2 {
  float x,y;
  inline vec2(float x, float y):x(x),y(y){}
 };
-static vec2 operator*(mat3 m, vec2 v) {
-  float z = v.x*m[6]+v.y*m[7]+m[8];
-  return vec2((v.x*m[0]+v.y*m[1]+m[2])/z,(v.x*m[3]+v.y*m[4]+m[5])/z);
+inline vec2 operator*(mat32 m, vec2 v) {
+  return vec2(v.x*m(0,0)+v.y*m(0,1)+m(0,2),v.x*m(1,0)+v.y*m(1,1)+m(1,2));
 }

 //! fixed point bilinear sample with precision k
@@ -48,95 +45,109 @@ template <int k> inline int sample(const ubyte* image,int stride, int x, int y,

 #ifdef __SSE__
 #include <xmmintrin.h>
-#endif
-void SamplePattern(ubyte* image, int stride, mat3 warp, ubyte* pattern) {
-  const int k = 256;
-  for (int i = 0; i < 16; i++) for (int j = 0; j < 16; j++) {
-    vec2 p = warp*vec2(j-8,i-8);
-#ifdef __SSE__
-    //MSVC apparently doesn't support any float rounding.
-    int fx = _mm_cvtss_si32(_mm_set_ss(p.x*k));
-    int fy = _mm_cvtss_si32(_mm_set_ss(p.y*k));
+int lround(float x) { return _mm_cvtss_si32(_mm_set_ss(x)); }
 #elif defined(_MSC_VER)
-    int fx = int(p.x*k+0.5), fy = int(p.y*k+0.5);
-#else
-    int fx = lround(p.x*k), fy = lround(p.y*k);
+int lround(float x) { return x+0.5; }
 #endif
+
+//TODO(MatthiasF): SSE optimization
+void SamplePattern(ubyte* image, int stride, mat32 warp, ubyte* pattern, int size) {
+  const int k = 256;
+  for (int i = 0; i < size; i++) for (int j = 0; j < size; j++) {
+    vec2 p = warp*vec2(j-size/2,i-size/2);
+    int fx = lround(p.x*k), fy = lround(p.y*k);
    int ix = fx/k, iy = fy/k;
    int u = fx%k, v = fy%k;
-    pattern[i*16+j] = sample<k>(image,stride,ix,iy,u,v);
+    pattern[i*size+j] = sample<k>(image,stride,ix,iy,u,v);
  }
 }

 #ifdef __SSE2__
 #include <emmintrin.h>
-static uint SAD(const ubyte* pattern, const ubyte* image, int stride) {
+static uint SAD(const ubyte* pattern, const ubyte* image, int stride, int size) {
  __m128i a = _mm_setzero_si128();
-  for(int i = 0; i < 16; i++) {
-    a = _mm_adds_epu16(a, _mm_sad_epu8( _mm_loadu_si128((__m128i*)(pattern+i*16)),
-                                        _mm_loadu_si128((__m128i*)(image+i*stride))));
+  for(int i = 0; i < size; i++) {
+    for(int j = 0; j < size/16; j++) {
+      a = _mm_adds_epu16(a, _mm_sad_epu8( _mm_loadu_si128((__m128i*)(pattern+i*size+j*16)),
+                                          _mm_loadu_si128((__m128i*)(image+i*stride+j*16))));
+    }
  }
  return _mm_extract_epi16(a,0) + _mm_extract_epi16(a,4);
 }
 #else
-static uint SAD(const ubyte* pattern, const ubyte* image, int stride) {
+static uint SAD(const ubyte* pattern, const ubyte* image, int stride, int size) {
  uint sad=0;
-  for(int i = 0; i < 16; i++) {
-    for(int j = 0; j < 16; j++) {
-      sad += abs((int)pattern[i*16+j] - image[i*stride+j]);
+  for(int i = 0; i < size; i++) {
+    for(int j = 0; j < size; j++) {
+      sad += abs((int)pattern[i*size+j] - image[i*stride+j]);
    }
  }
  return sad;
 }
 #endif

-//float sq( float x ) { return x*x; }
-int Track(ubyte* pattern, ubyte* image, int stride, int w, int h, float* px, float* py) {
-  int ix = *px-8, iy = *py-8;
+float Track(ubyte* reference, int size, ubyte* image, int stride, int w, int h, mat32* warp) {
+  mat32 m=*warp;
  uint min=-1;
-  // integer pixel
-  for(int y = 0; y < h-16; y++) {
-    for(int x = 0; x < w-16; x++) {
-      uint d = SAD(pattern,&image[y*stride+x],stride); //image L1 distance
-      //d += sq(x-w/2-8)+sq(y-h/2-8); //spatial L2 distance (need feature prediction first)
-      if(d < min) {
-        min = d;
+
+  // exhaustive search integer pixel translation
+  int ix = m(0,2), iy = m(1,2);
+  for(int y = size/2; y < h-size/2; y++) {
+    for(int x = size/2; x < w-size/2; x++) {
+      m(0,2) = x, m(1,2) = y;
+      ubyte match[size*size];
+      SamplePattern(image,stride,m,match,size);
+      uint sad = SAD(reference,match,size,size);
+      if(sad < min) {
+        min = sad;
        ix = x, iy = y;
      }
    }
  }
+  m(0,2) = ix, m(1,2) = iy;

-  const int kPrecision = 4; //subpixel precision in bits
-  const int kScale = 1<<kPrecision;
-  int fx=0,fy=0;
-  for(int k = 1; k <= kPrecision; k++) {
-    fx *= 2, fy *= 2;
-    int nx = fx, ny = fy;
-    int p = kPrecision-k;
-    for(int y = -1; y <= 1; y++) {
-      for(int x = -1; x <= 1; x++) {
-        uint sad=0;
-        int sx = ix, sy = iy;
-        int u = (fx+x)<<p, v = (fy+y)<<p;
-        if( u < 0 ) u+=kScale, sx--;
-        if( v < 0 ) v+=kScale, sy--;
-        for(int i = 0; i < 16; i++) {
-          for(int j = 0; j < 16; j++) {
-            sad += abs((int)pattern[i*16+j] - sample<kScale>(image,stride,sx+j,sy+i,u,v));
+  // 6D coordinate descent to find affine transform
+  float step = 0.5;
+  for(int p = 0; p < 4; p++) { //foreach precision level
+    step /= 2;
+    for(int i = 0; i < 2; i++) { // iterate twice per precision level
+      for(int d = 0; d < 6; d++) { // foreach dimension
+        for(float x = -step*2; x <= step*2; x+=step*2) { //test small steps
+          mat32 t = m;
+          t.data[d] += x;
+          if( d<4 && (t.data[d] > 2 || t.data[d] < -2) ) continue; // avoid big distortion
+          ubyte match[size*size];
+          SamplePattern(image,stride,t,match,size);
+          uint sad = SAD(reference,match,size,size);
+          if(sad < min) {
+            min = sad;
+            m = t;
          }
        }
-        if(sad < min) {
-          min = sad;
-          nx = fx + x, ny = fy + y;
-        }
      }
    }
-    fx = nx, fy = ny;
  }
+  *warp = m;

-  *px = float((ix*kScale)+fx)/kScale+8;
-  *py = float((iy*kScale)+fy)/kScale+8;
-  return min;
+  // Compute Pearson product-moment correlation coefficient
+  uint sX=0,sY=0,sXX=0,sYY=0,sXY=0;
+  ubyte match[size*size];
+  SamplePattern(image,stride,m,match,size);
+  SAD(reference,match,size,size);
+  for(int i = 0; i < size; i++) {
+    for(int j = 0; j < size; j++) {
+      int x = reference[i*size+j];
+      int y = match[i*size+j];
+      sX += x;
+      sY += y;
+      sXX += x*x;
+      sYY += y*y;
+      sXY += x*y;
+    }
+  }
+  const int N = size*size;
+  sX /= N, sY /= N, sXX /= N, sYY /= N, sXY /= N;
+  return (sXY-sX*sY)/sqrt((sXX-sX*sX)*(sYY-sY*sY));
 }

 }  // namespace libmv
--- a/extern/libmv/libmv/tracking/sad.h
+++ b/extern/libmv/libmv/tracking/sad.h
@@ -30,17 +30,28 @@ namespace libmv {
 #endif

 typedef unsigned char ubyte;
-typedef float mat3[9];
+typedef unsigned int uint;
+
+/// Affine transformation matrix in column major order.
+struct mat32 {
+  float data[3*2];
+#ifdef __cplusplus
+  inline mat32(int d=1) { for(int i=0;i<3*2;i++) data[i]=0; if(d!=0) for(int i=0;i<2;i++) m(i,i)=d; }
+  inline float m(int i, int j) const { return data[j*2+i]; }
+  inline float& m(int i, int j) { return data[j*2+i]; }
+  inline float operator()(int i, int j) const { return m(i,j); }
+  inline float& operator()(int i, int j) { return m(i,j); }
+  inline operator bool() const { for (int i=0; i<3*2; i++) if(data[i]!=0) return true; return false; }
+#endif
+};
+

 /*!
    Sample \a pattern from \a image.

-    \a pattern is a 16x16 buffer
-    \a warp is the transformation to apply when sampling the \a pattern in \a image.
-
-    \note \a warp might be used by higher level tracking methods (e.g planar)
+    \a warp is the transformation to apply to \a image when sampling the \a pattern.
 */
-void SamplePattern(ubyte* image, int stride, mat3 warp, ubyte* pattern);
+void SamplePattern(ubyte* image, int stride, mat32 warp, ubyte* pattern, int size);

 /*!
    Track \a pattern in \a image.
@@ -51,20 +62,26 @@ void SamplePattern(ubyte* image, int stride, mat3 warp, ubyte* pattern);
    refine subpixel position using a square search.
    A similar method is used for motion estimation in video encoders.

-    \a pattern is a 16x16 single channel image to track.
-    \a x, \a y is the initial estimated position in \a image.
-    On return, \a x, \a y is the tracked position.
-    \a image is a reference to the single channel image to search.
+    \a reference is the pattern to track.
+    the \a size of the pattern should be aligned to 16.
+    \a image is a reference to the region to search.
    \a stride is size of \a image lines.

-    \note For a 16x speedup, compile this tracker with SSE2 support.
-    \note \a stride allow you to reference your search region instead of copying.
+    On input, \a warp is the predicted affine transformation (e.g from previous frame)
+    On return, \a warp is the affine transformation which best match the reference \a pattern

-    \return Sum of absolute difference between reference and matched pattern.
-            A lower value indicate a better match. Divide this sum by the pattern area (16x16)
-            to compute the per pixel deviation (0=perfect match, 255=worst match).
+    \return Pearson product-moment correlation coefficient between reference and matched pattern.
+            This measure of the linear dependence between the patterns
+            ranges from −1 (negative correlation) to 1 (positive correlation).
+            A value of 0 implies that there is no linear correlation between the variables.
+
+    \note To track affine features:
+     - Sample reference pattern using estimated (e.g previous frame) warp.
+     -
+    \note \a stride allow you to reference your search region instead of copying.
+    \note For a 16x speedup, compile this tracker with SSE2 support.
 */
-int Track(ubyte* pattern, ubyte* image, int stride, int width, int height, float* x, float* y);
+float Track(ubyte* reference, int size, ubyte* image, int stride, int width, int height, mat32* warp);

 #ifdef __cplusplus
 }  // namespace libmv
--- a/source/blender/blenkernel/intern/tracking.c
+++ b/source/blender/blenkernel/intern/tracking.c
@@ -952,16 +952,17 @@ int BKE_tracking_next(MovieTrackingContext *context)

 				if(track_context->pattern==NULL) {
 					unsigned char *image;
-					float warp[3][3];
+					float warp[3][2]={0};

 					/* calculate pattern for keyframed position */

 					ibuf= acquire_keyframed_ibuf(context, track, marker);
 					image= acquire_search_bytebuf(ibuf, track, marker, &width, &height, pos, origin);

-					unit_m3(warp);
-					warp[0][2]= pos[0];
-					warp[1][2]= pos[1];
+					warp[0][0]= 1;
+					warp[1][1]= 1;
+					warp[2][0]= pos[0];
+					warp[2][1]= pos[1];

 					/* pattern size is hardcoded to 16x16px in libmv */
 					track_context->patsize[0]= 16;
@@ -976,6 +977,9 @@ int BKE_tracking_next(MovieTrackingContext *context)

 				image_new= acquire_search_bytebuf(ibuf_new, track, marker, &width, &height, pos, origin);

+				x2= pos[0];
+				y2= pos[1];
+
 				sad= libmv_SADTrackerTrack(track_context->pattern, image_new, width, width, height, &x2, &y2);
 				error= sad/(track_context->patsize[0]*track_context->patsize[1]);