Add new planar tracker features and use the new planar API
authorKeir Mierle <mierle@gmail.com>
Thu, 17 May 2012 02:31:52 +0000 (02:31 +0000)
committerKeir Mierle <mierle@gmail.com>
Thu, 17 May 2012 02:31:52 +0000 (02:31 +0000)
This commit removes the use of the legacy RegionTracker API from
Blender, and replaces it with the new TrackRegion API. This also
adds several features to the planar tracker in libmv:

- Do a brute-force initialization of tracking similar to "Hybrid"
  mode in the stable release, but using all floats. This is slower
  but more accurate. It is still necessary to evaluate if the
  performance loss is worth it. In particular, this change is
  necessary to support high bit depth imagery.

- Add support for masks over the search window. This is a step
  towards supporting user-defined tracker masks. The tracker masks
  will make it easy for users to make a mask for e.g. a ball.

- Add Pearson product moment correlation coefficient checking (aka
  "Correlation" in the UI. This causes tracking failure if the
  tracked patch is not linearly related to the template.

- Add support for warping a few points in addition to the supplied
  points. This is useful because the tracking code deliberately
  does not expose the underlying warp representation. Instead,
  warps are specified in an aparametric way via the correspondences.

- Remove the "num_samples_xy" concept and replace it with
  automatic determination of the number of samples. This makes the
  API easier for users.

- Fix various bugs in the parameterizations.

There remains a bug with subpixel precision tracking when in
"keyframe" mode; this will get fixed shortly.

extern/libmv/libmv-capi.cpp
extern/libmv/libmv-capi.h
extern/libmv/libmv/tracking/esm_region_tracker.cc
extern/libmv/libmv/tracking/track_region.cc
extern/libmv/libmv/tracking/track_region.h
source/blender/blenkernel/intern/tracking.c
source/blender/makesdna/DNA_tracking_types.h

index ddf69ce5e28fe329cd9b01f6c88a3eb1da654cf9..7d3dca5c4e8835c5a89c93884c8bd142773588c7 100644 (file)
    tracking between which failed */
 #undef DUMP_FAILURE
 
+/* define this to generate PNG images with content of search areas
+   on every itteration of tracking */
+#undef DUMP_ALWAYS
+
 #include "libmv-capi.h"
 
 #include "third_party/gflags/gflags/gflags.h"
@@ -60,7 +64,7 @@
 #include <stdlib.h>
 #include <assert.h>
 
-#ifdef DUMP_FAILURE
+#if defined(DUMP_FAILURE) || defined (DUMP_ALWAYS)
 #  include <png.h>
 #endif
 
@@ -172,7 +176,7 @@ static void floatBufToImage(const float *buf, int width, int height, libmv::Floa
        }
 }
 
-#ifdef DUMP_FAILURE
+#if defined(DUMP_FAILURE) || defined (DUMP_ALWAYS)
 void savePNGImage(png_bytep *row_pointers, int width, int height, int depth, int color_type, char *file_name)
 {
        png_infop info_ptr;
@@ -306,16 +310,20 @@ int libmv_regionTrackerTrack(libmv_RegionTracker *libmv_tracker, const float *im
        floatBufToImage(ima1, width, height, &old_patch);
        floatBufToImage(ima2, width, height, &new_patch);
 
-#ifndef DUMP_FAILURE
+#if !defined(DUMP_FAILURE) && !defined(DUMP_ALWAYS)
        return region_tracker->Track(old_patch, new_patch, x1, y1, x2, y2);
 #else
        {
-               double sx2 = *x2, sy2 = *y2;
+               /* double sx2 = *x2, sy2 = *y2; */
                int result = region_tracker->Track(old_patch, new_patch, x1, y1, x2, y2);
 
+#if defined(DUMP_ALWAYS)
+               {
+#else
                if (!result) {
+#endif
                        saveImage("old_patch", old_patch, x1, y1);
-                       saveImage("new_patch", new_patch, sx2, sy2);
+                       saveImage("new_patch", new_patch, *x2, *y2);
                }
 
                return result;
@@ -334,65 +342,81 @@ void libmv_regionTrackerDestroy(libmv_RegionTracker *libmv_tracker)
 
 /* TrackRegion (new planar tracker) */
 int libmv_trackRegion(const struct libmv_trackRegionOptions *options,
-                       const float *image1, const float *image2,
-                       int width, int height, 
-                       const double *x1, const double *y1,
-                       struct libmv_trackRegionResult *result,
-                       double *x2, double *y2) {
-  double xx1[4], yy1[4];
-  double xx2[4], yy2[4];
-
-  // Convert to doubles for the libmv api.
-  for (int i = 0; i < 4; ++i) {
-    xx1[i] = x1[i];
-    yy1[i] = y1[i];
-    xx2[i] = x2[i];
-    yy2[i] = y2[i];
-  }
-
-  libmv::TrackRegionOptions track_region_options;
-  switch (options->motion_model) {
+                      const float *image1, const float *image2,
+                      int width, int height, 
+                      const double *x1, const double *y1,
+                      struct libmv_trackRegionResult *result,
+                      double *x2, double *y2)
+{
+       double xx1[5], yy1[5];
+       double xx2[5], yy2[5];
+       bool tracking_result = false;
+
+       /* Convert to doubles for the libmv api. The four corners and the center. */
+       for (int i = 0; i < 5; ++i) {
+               xx1[i] = x1[i];
+               yy1[i] = y1[i];
+               xx2[i] = x2[i];
+               yy2[i] = y2[i];
+       }
+
+       libmv::TrackRegionOptions track_region_options;
+       switch (options->motion_model) {
 #define LIBMV_CONVERT(the_model) \
     case libmv::TrackRegionOptions::the_model: \
                track_region_options.mode = libmv::TrackRegionOptions::the_model; \
                break;
-    LIBMV_CONVERT(TRANSLATION)
-    LIBMV_CONVERT(TRANSLATION_ROTATION)
-    LIBMV_CONVERT(TRANSLATION_SCALE)
-    LIBMV_CONVERT(TRANSLATION_ROTATION_SCALE)
-    LIBMV_CONVERT(AFFINE)
-    LIBMV_CONVERT(HOMOGRAPHY)
+               LIBMV_CONVERT(TRANSLATION)
+               LIBMV_CONVERT(TRANSLATION_ROTATION)
+               LIBMV_CONVERT(TRANSLATION_SCALE)
+               LIBMV_CONVERT(TRANSLATION_ROTATION_SCALE)
+               LIBMV_CONVERT(AFFINE)
+               LIBMV_CONVERT(HOMOGRAPHY)
 #undef LIBMV_CONVERT
-  }
-  track_region_options.num_samples_x = options->num_samples_x;
-  track_region_options.num_samples_y = options->num_samples_y;
-  track_region_options.minimum_correlation = options->minimum_correlation;
-  track_region_options.max_iterations = options->num_iterations;
-  track_region_options.sigma = options->sigma;
-  
-  // Convert from raw float buffers to libmv's FloatImage.
-  libmv::FloatImage old_patch, new_patch;
-  floatBufToImage(image1, width, height, &old_patch);
-  floatBufToImage(image2, width, height, &new_patch);
-
-  libmv::TrackRegionResult track_region_result;
-  libmv::TrackRegion(old_patch, new_patch, xx1, yy1, track_region_options, xx2, yy2, &track_region_result);
-
-  // Convert to floats for the blender api.
-  for (int i = 0; i < 4; ++i) {
-    x2[i] = xx2[i];
-    y2[i] = yy2[i];
-  }
-
-  // TODO(keir): Update the termination string with failure details.
-  if (track_region_result.termination == libmv::TrackRegionResult::PARAMETER_TOLERANCE ||
-      track_region_result.termination == libmv::TrackRegionResult::FUNCTION_TOLERANCE  ||
-      track_region_result.termination == libmv::TrackRegionResult::GRADIENT_TOLERANCE  ||
-      track_region_result.termination == libmv::TrackRegionResult::NO_CONVERGENCE      ||
-      track_region_result.termination == libmv::TrackRegionResult::INSUFFICIENT_CORRELATION) {
-    return true;
-  }
-  return false;
+       }
+
+       track_region_options.minimum_correlation = options->minimum_correlation;
+       track_region_options.max_iterations = options->num_iterations;
+       track_region_options.sigma = options->sigma;
+       track_region_options.num_extra_points = 1;
+       track_region_options.image1_mask = NULL;
+
+       /* Convert from raw float buffers to libmv's FloatImage. */
+       libmv::FloatImage old_patch, new_patch;
+       floatBufToImage(image1, width, height, &old_patch);
+       floatBufToImage(image2, width, height, &new_patch);
+
+       libmv::TrackRegionResult track_region_result;
+       libmv::TrackRegion(old_patch, new_patch, xx1, yy1, track_region_options, xx2, yy2, &track_region_result);
+
+       /* Convert to floats for the blender api. */
+       for (int i = 0; i < 5; ++i) {
+               x2[i] = xx2[i];
+               y2[i] = yy2[i];
+       }
+
+       /* TODO(keir): Update the termination string with failure details. */
+       if (track_region_result.termination == libmv::TrackRegionResult::PARAMETER_TOLERANCE ||
+           track_region_result.termination == libmv::TrackRegionResult::FUNCTION_TOLERANCE  ||
+           track_region_result.termination == libmv::TrackRegionResult::GRADIENT_TOLERANCE  ||
+           track_region_result.termination == libmv::TrackRegionResult::NO_CONVERGENCE      ||
+           track_region_result.termination == libmv::TrackRegionResult::INSUFFICIENT_CORRELATION)
+       {
+               tracking_result = true;
+       }
+
+#if defined(DUMP_FAILURE) || defined(DUMP_ALWAYS)
+#if defined(DUMP_ALWAYS)
+       {
+#else
+       if (!tracking_result) {
+#endif
+               saveImage("old_patch", old_patch, x1[4], y1[4]);
+               saveImage("new_patch", new_patch, x2[4], y2[4]);
+       }
+#endif
+
+       return tracking_result;
 }
 
 /* ************ Tracks ************ */
index d7d132d4f21ac0081c31299f13a84df11b192660..d3f02f99e517ad0328147ac3e03607534a22b30b 100644 (file)
@@ -53,8 +53,6 @@ void libmv_regionTrackerDestroy(struct libmv_RegionTracker *libmv_tracker);
 /* TrackRegion (new planar tracker) */
 struct libmv_trackRegionOptions {
   int motion_model;
-  int num_samples_x;
-  int num_samples_y;
   int num_iterations;
   double minimum_correlation;
   double sigma;
index e6d0b9830f4eb2576a2b2377c5ff4b33ba01db9f..a8dc46d439b6838a3ed9c6a9ab39258c7583e542 100644 (file)
@@ -98,8 +98,6 @@ bool EsmRegionTracker::Track(const FloatImage &image1,
 
   TrackRegionOptions options;
   options.mode = TrackRegionOptions::TRANSLATION;
-  options.num_samples_x = 2 * half_window_size + 1;
-  options.num_samples_y = 2 * half_window_size + 1;
   options.max_iterations = 20;
   options.sigma = sigma;
   options.use_esm = true;
index 0bb8ae221b1dc76b03b31ae197f29b4143ef30cc..0dac56aeff70908e11d19d670765410307a5fb0c 100644 (file)
 
 namespace libmv {
 
+TrackRegionOptions::TrackRegionOptions()
+    : mode(TRANSLATION),
+      minimum_correlation(0),
+      max_iterations(20),
+      use_esm(true),
+      use_brute_initialization(true),
+      sigma(0.9),
+      num_extra_points(0),
+      image1_mask(NULL) {
+}
+
+namespace {
+
 // TODO(keir): Consider adding padding.
 template<typename T>
 bool InBounds(const FloatImage &image,
@@ -161,22 +174,26 @@ class WarpCostFunctor {
                   const FloatImage &image_and_gradient1,
                   const FloatImage &image_and_gradient2,
                   const Mat3 &canonical_to_image1,
+                  int num_samples_x,
+                  int num_samples_y,
                   const Warp &warp)
       : options_(options),
         image_and_gradient1_(image_and_gradient1),       
         image_and_gradient2_(image_and_gradient2),       
         canonical_to_image1_(canonical_to_image1),
+        num_samples_x_(num_samples_x),
+        num_samples_y_(num_samples_y),
         warp_(warp) {}
 
- template<typename T>
- bool operator()(const T *warp_parameters, T *residuals) const {
-   for (int i = 0; i < Warp::NUM_PARAMETERS; ++i) {
-     VLOG(2) << "warp_parameters[" << i << "]: " << warp_parameters[i];
-   }
 template<typename T>
 bool operator()(const T *warp_parameters, T *residuals) const {
+    for (int i = 0; i < Warp::NUM_PARAMETERS; ++i) {
+      VLOG(2) << "warp_parameters[" << i << "]: " << warp_parameters[i];
+    }
 
-   int cursor = 0;
-   for (int r = 0; r < options_.num_samples_y; ++r) {
-     for (int c = 0; c < options_.num_samples_x; ++c) {
+    int cursor = 0;
+    for (int r = 0; r < num_samples_y_; ++r) {
+      for (int c = 0; c < num_samples_x_; ++c) {
         // Compute the location of the source pixel (via homography).
         Vec3 image1_position = canonical_to_image1_ * Vec3(c, r, 1);
         image1_position /= image1_position(2);
@@ -223,17 +240,102 @@ class WarpCostFunctor {
         }
 
         // The difference is the error.
+        T error = src_sample - dst_sample;
+
+        // Weight the error by the mask, if one is present.
+        if (options_.image1_mask != NULL) {
+          error *= T(AutoDiff<double>::Sample(*options_.image1_mask,
+                                              image1_position[0],
+                                              image1_position[1]));
+        }
         residuals[cursor++] = src_sample - dst_sample;
       }
     }
     return true;
   }
 
+ // TODO(keir): Consider also computing the cost here.
+ double PearsonProductMomentCorrelationCoefficient(
+     const double *warp_parameters) const {
+   for (int i = 0; i < Warp::NUM_PARAMETERS; ++i) {
+     VLOG(2) << "Correlation warp_parameters[" << i << "]: "
+             << warp_parameters[i];
+   }
+
+   // The single-pass PMCC computation is somewhat numerically unstable, but
+   // it's sufficient for the tracker.
+   double sX = 0, sY = 0, sXX = 0, sYY = 0, sXY = 0;
+
+   // Due to masking, it's important to account for fractional samples.
+   // Samples with a 50% mask get counted as a half sample.
+   double num_samples = 0;
+
+   for (int r = 0; r < num_samples_y_; ++r) {
+     for (int c = 0; c < num_samples_x_; ++c) {
+        // Compute the location of the source pixel (via homography).
+        // TODO(keir): Cache these projections.
+        Vec3 image1_position = canonical_to_image1_ * Vec3(c, r, 1);
+        image1_position /= image1_position(2);
+        
+        // Compute the location of the destination pixel.
+        double image2_position[2];
+        warp_.Forward(warp_parameters,
+                      image1_position[0],
+                      image1_position[1],
+                      &image2_position[0],
+                      &image2_position[1]);
+
+        double x = AutoDiff<double>::Sample(image_and_gradient2_,
+                                            image2_position[0],
+                                            image2_position[1]);
+
+        double y = AutoDiff<double>::Sample(image_and_gradient1_,
+                                            image1_position[0],
+                                            image1_position[1]);
+
+        // Weight the signals by the mask, if one is present.
+        if (options_.image1_mask != NULL) {
+          double mask_value = AutoDiff<double>::Sample(*options_.image1_mask,
+                                                       image1_position[0],
+                                                       image1_position[1]);
+          x *= mask_value;
+          y *= mask_value;
+          num_samples += mask_value;
+        } else {
+          num_samples++;
+        }
+        sX += x;
+        sY += y;
+        sXX += x*x;
+        sYY += y*y;
+        sXY += x*y;
+      }
+    }
+    // Normalize.
+    sX /= num_samples;
+    sY /= num_samples;
+    sXX /= num_samples;
+    sYY /= num_samples;
+    sXY /= num_samples;
+
+    double var_x = sXX - sX*sX;
+    double var_y = sYY - sY*sY;
+    double covariance_xy = sXY - sX*sY;
+
+    double correlation = covariance_xy / sqrt(var_x * var_y);
+    LG << "Covariance xy: " << covariance_xy
+       << ", var 1: " << var_x << ", var 2: " << var_y
+       << ", correlation: " << correlation;
+    return correlation;
+  }
+
  private:
   const TrackRegionOptions &options_;
   const FloatImage &image_and_gradient1_;
   const FloatImage &image_and_gradient2_;
   const Mat3 &canonical_to_image1_;
+  int num_samples_x_;
+  int num_samples_y_;
   const Warp &warp_;
 };
 
@@ -260,9 +362,7 @@ Mat3 ComputeCanonicalHomography(const double *x1,
 
 class Quad {
  public:
-  Quad(const double *x, const double *y)
-      : x_(x), y_(y) {
-
+  Quad(const double *x, const double *y) : x_(x), y_(y) {
     // Compute the centroid and store it.
     centroid_ = Vec2(0.0, 0.0);
     for (int i = 0; i < 4; ++i) {
@@ -298,7 +398,7 @@ class Quad {
 struct TranslationWarp {
   TranslationWarp(const double *x1, const double *y1,
                   const double *x2, const double *y2) {
-    Vec2 t = Quad(x1, y1).Centroid() - Quad(x2, y2).Centroid();
+    Vec2 t = Quad(x2, y2).Centroid() - Quad(x1, y1).Centroid() ;
     parameters[0] = t[0];
     parameters[1] = t[1];
   }
@@ -310,6 +410,13 @@ struct TranslationWarp {
     *y2 = y1 + warp_parameters[1];
   }
 
+  template<typename T>
+  void Backward(const T *warp_parameters,
+                const T &x2, const T& y2, T *x1, T* y1) const {
+    *x1 = x2 - warp_parameters[0];
+    *y1 = y2 - warp_parameters[1];
+  }
+
   // Translation x, translation y.
   enum { NUM_PARAMETERS = 2 };
   double parameters[NUM_PARAMETERS];
@@ -322,7 +429,7 @@ struct TranslationScaleWarp {
     Quad q2(x2, y2);
 
     // The difference in centroids is the best guess for translation.
-    Vec2 t = q1.Centroid() - q2.Centroid();
+    Vec2 t = q2.Centroid() - q1.Centroid();
     parameters[0] = t[0];
     parameters[1] = t[1];
 
@@ -354,6 +461,12 @@ struct TranslationScaleWarp {
     *y2 = y1_scaled + warp_parameters[1];
   }
 
+  template<typename T>
+  void Backward(const T *warp_parameters,
+                const T &x2, const T& y2, T *x1, T* y1) const {
+    // XXX
+  }
+
   // Translation x, translation y, scale.
   enum { NUM_PARAMETERS = 3 };
   double parameters[NUM_PARAMETERS];
@@ -375,7 +488,7 @@ struct TranslationRotationWarp {
     Quad q2(x2, y2);
 
     // The difference in centroids is the best guess for translation.
-    Vec2 t = q1.Centroid() - q2.Centroid();
+    Vec2 t = q2.Centroid() - q1.Centroid();
     parameters[0] = t[0];
     parameters[1] = t[1];
 
@@ -387,6 +500,10 @@ struct TranslationRotationWarp {
     }
     Mat2 R = OrthogonalProcrustes(correlation_matrix);
     parameters[2] = acos(R(0, 0));
+
+    std::cout << "correlation_matrix:\n" << correlation_matrix << "\n";
+    std::cout << "R:\n" << R << "\n";
+    std::cout << "theta:" << parameters[2] << "\n";
   }
 
   // The strange way of parameterizing the translation and rotation is to make
@@ -422,6 +539,12 @@ struct TranslationRotationWarp {
     *y2 = y1_rotated + warp_parameters[1];
   }
 
+  template<typename T>
+  void Backward(const T *warp_parameters,
+                const T &x2, const T& y2, T *x1, T* y1) const {
+    // XXX
+  }
+
   // Translation x, translation y, rotation about the center of Q1 degrees.
   enum { NUM_PARAMETERS = 3 };
   double parameters[NUM_PARAMETERS];
@@ -436,7 +559,7 @@ struct TranslationRotationScaleWarp {
     Quad q2(x2, y2);
 
     // The difference in centroids is the best guess for translation.
-    Vec2 t = q1.Centroid() - q2.Centroid();
+    Vec2 t = q2.Centroid() - q1.Centroid();
     parameters[0] = t[0];
     parameters[1] = t[1];
 
@@ -449,8 +572,11 @@ struct TranslationRotationScaleWarp {
       correlation_matrix += q1.CornerRelativeToCentroid(i) * 
                             q2.CornerRelativeToCentroid(i).transpose();
     }
+    std::cout << "correlation_matrix:\n" << correlation_matrix << "\n";
     Mat2 R = OrthogonalProcrustes(correlation_matrix);
+    std::cout << "R:\n" << R << "\n";
     parameters[3] = acos(R(0, 0));
+    std::cout << "theta:" << parameters[3] << "\n";
   }
 
   // The strange way of parameterizing the translation and rotation is to make
@@ -471,7 +597,7 @@ struct TranslationRotationScaleWarp {
     const T y1_origin = y1 - q1.Centroid()(1);
 
     // Rotate about the origin (i.e. centroid of Q1).
-    const T theta = warp_parameters[2];
+    const T theta = warp_parameters[3];
     const T costheta = cos(theta);
     const T sintheta = sin(theta);
     const T x1_origin_rotated = costheta * x1_origin - sintheta * y1_origin;
@@ -491,6 +617,12 @@ struct TranslationRotationScaleWarp {
     *y2 = y1_rotated_scaled + warp_parameters[1];
   }
 
+  template<typename T>
+  void Backward(const T *warp_parameters,
+                const T &x2, const T& y2, T *x1, T* y1) const {
+    // XXX
+  }
+
   // Translation x, translation y, rotation about the center of Q1 degrees,
   // scale.
   enum { NUM_PARAMETERS = 4 };
@@ -542,10 +674,216 @@ struct HomographyWarp {
     *y2 = yy2 / zz2;
   }
 
+  template<typename T>
+  void Backward(const T *warp_parameters,
+                const T &x2, const T& y2, T *x1, T* y1) const {
+    // XXX
+  }
+
   enum { NUM_PARAMETERS = 8 };
   double parameters[NUM_PARAMETERS];
 };
 
+// Determine the number of samples to use for x and y. Quad winding goes:
+//
+//    0 1
+//    3 2
+//
+// The idea is to take the maximum x or y distance. This may be oversampling.
+// TODO(keir): Investigate the various choices; perhaps average is better?
+void PickSampling(const double *x1, const double *y1,
+                  const double *x2, const double *y2,
+                  int *num_samples_x, int *num_samples_y) {
+  Vec2 a0(x1[0], y1[0]);
+  Vec2 a1(x1[1], y1[1]);
+  Vec2 a2(x1[2], y1[2]);
+  Vec2 a3(x1[3], y1[3]);
+
+  Vec2 b0(x1[0], y1[0]);
+  Vec2 b1(x1[1], y1[1]);
+  Vec2 b2(x1[2], y1[2]);
+  Vec2 b3(x1[3], y1[3]);
+
+  double x_dimensions[4] = {
+    (a1 - a0).norm(),
+    (a3 - a2).norm(),
+    (b1 - b0).norm(),
+    (b3 - b2).norm()
+  };
+
+  double y_dimensions[4] = {
+    (a3 - a0).norm(),
+    (a1 - a2).norm(),
+    (b3 - b0).norm(),
+    (b1 - b2).norm()
+  };
+  const double kScaleFactor = 1.0;
+  *num_samples_x = static_cast<int>(
+      kScaleFactor * *std::max_element(x_dimensions, x_dimensions + 4));
+  *num_samples_y = static_cast<int>(
+      kScaleFactor * *std::max_element(y_dimensions, y_dimensions + 4));
+  LG << "Automatic num_samples_x: " << *num_samples_x
+     << ", num_samples_y: " << *num_samples_y;
+}
+
+bool SearchAreaTooBigForDescent(const FloatImage &image2,
+                                const double *x2, const double *y2) {
+  // TODO(keir): Check the bounds and enable only when it makes sense.
+  return true;
+}
+
+bool PointOnRightHalfPlane(const Vec2 &a, const Vec2 &b, double x, double y) {
+  Vec2 ba = b - a;
+  return ((Vec2(x, y) - b).transpose() * Vec2(-ba.y(), ba.x())) > 0;
+}
+
+// Determine if a point is in a quad. The quad is arranged as:
+//
+//    +--> x
+//    |
+//    |  0 1
+//    v  3 2
+//    y
+//
+// The idea is to take the maximum x or y distance. This may be oversampling.
+// TODO(keir): Investigate the various choices; perhaps average is better?
+bool PointInQuad(const double *xs, const double *ys, double x, double y) {
+  Vec2 a0(xs[0], ys[0]);
+  Vec2 a1(xs[1], ys[1]);
+  Vec2 a2(xs[2], ys[2]);
+  Vec2 a3(xs[3], ys[3]);
+
+  return PointOnRightHalfPlane(a0, a1, x, y) &&
+         PointOnRightHalfPlane(a1, a2, x, y) &&
+         PointOnRightHalfPlane(a2, a3, x, y) &&
+         PointOnRightHalfPlane(a3, a0, x, y);
+}
+
+typedef Eigen::Array<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> FloatArray;
+
+// This creates a pattern in the frame of image2, from the pixel is image1,
+// based on the initial guess represented by the two quads x1, y1, and x2, y2.
+template<typename Warp>
+void CreateBrutePattern(const double *x1, const double *y1,
+                        const double *x2, const double *y2,
+                        const FloatImage &image1,
+                        const FloatImage *image1_mask,
+                        FloatArray *pattern,
+                        FloatArray *mask,
+                        int *origin_x,
+                        int *origin_y) {
+  // Get integer bounding box of quad2 in image2.
+  int min_x = static_cast<int>(floor(*std::min_element(x2, x2 + 4)));
+  int min_y = static_cast<int>(floor(*std::min_element(y2, y2 + 4)));
+  int max_x = static_cast<int>(ceil (*std::max_element(x2, x2 + 4)));
+  int max_y = static_cast<int>(ceil (*std::max_element(y2, y2 + 4)));
+
+  int w = max_x - min_x;
+  int h = max_y - min_y;
+
+  pattern->resize(h, w);
+  mask->resize(h, w);
+
+  Warp inverse_warp(x2, y2, x1, y1);
+
+  // r,c are in the coordinate frame of image2.
+  for (int r = min_y; r < max_y; ++r) {
+    for (int c = min_x; c < max_x; ++c) {
+      // i and j are in the coordinate frame of the pattern in image2.
+      int i = r - min_y;
+      int j = c - min_x;
+
+      double dst_x = c;
+      double dst_y = r;
+      double src_x;
+      double src_y;
+      inverse_warp.Forward(inverse_warp.parameters,
+                           dst_x, dst_y,
+                           &src_x, &src_y);
+      
+      if (PointInQuad(x1, y1, src_x, src_y)) {
+        (*pattern)(i, j) = SampleLinear(image1, src_y, src_x);
+        (*mask)(i, j) = 1.0;
+        if (image1_mask) {
+          (*mask)(i, j) = SampleLinear(*image1_mask, src_y, src_x);;
+        }
+      } else {
+        (*pattern)(i, j) = 0.0;
+        (*mask)(i, j) = 0.0;
+      }
+    }
+  }
+  *origin_x = min_x;
+  *origin_y = min_y;
+}
+
+template<typename Warp>
+void BruteTranslationOnlyInitialize(const FloatImage &image1,
+                                    const FloatImage *image1_mask,
+                                    const FloatImage &image2,
+                                    const int num_extra_points,
+                                    const double *x1, const double *y1,
+                                    double *x2, double *y2) {
+  // Create the pattern to match in the space of image2, assuming our inital
+  // guess isn't too far from the template in image1. If there is no image1
+  // mask, then the resulting mask is binary.
+  FloatArray pattern;
+  FloatArray mask;
+  int origin_x = -1, origin_y = -1;
+  CreateBrutePattern<Warp>(x1, y1, x2, y2, image1, image1_mask,
+                           &pattern, &mask, &origin_x, &origin_y);
+
+  // Use Eigen on the images via maps for strong vectorization.
+  Map<const FloatArray> search(image2.Data(), image2.Height(), image2.Width());
+
+  // Try all possible locations inside the search area. Yes, everywhere.
+  //
+  // TODO(keir): There are a number of possible optimizations here. One choice
+  // is to make a grid and only try one out of every N possible samples.
+  // 
+  // Another, slightly more clever idea, is to compute some sort of spatial
+  // frequency distribution of the pattern patch. If the spatial resolution is
+  // high (e.g. a grating pattern or fine lines) then checking every possible
+  // translation is necessary, since a 1-pixel shift may induce a massive
+  // change in the cost function. If the image is a blob or splotch with blurry
+  // edges, then fewer samples are necessary since a few pixels offset won't
+  // change the cost function much.
+  double best_sad = std::numeric_limits<double>::max();
+  int best_r = -1;
+  int best_c = -1;
+  int w = pattern.cols();
+  int h = pattern.rows();
+  for (int r = 0; r < (image2.Height() - h); ++r) {
+    for (int c = 0; c < (image2.Width() - w); ++c) {
+      // Compute the weighted sum of absolute differences, Eigen style.
+      double sad = (mask * (pattern - search.block(r, c, h, w))).abs().sum();
+      if (sad < best_sad) {
+        best_r = r;
+        best_c = c;
+        best_sad = sad;
+      }
+    }
+  }
+  CHECK_NE(best_r, -1);
+  CHECK_NE(best_c, -1);
+
+  LG << "Brute force translation found a shift. "
+     << "best_c: " << best_c << ", best_r: " << best_r << ", "
+     << "origin_x: " << origin_x << ", origin_y: " << origin_y << ", "
+     << "dc: " << (best_c - origin_x) << ", "
+     << "dr: " << (best_r - origin_y)
+     << ", tried " << ((image2.Height() - h) * (image2.Width() - w))
+     << " shifts.";
+
+  // Apply the shift.
+  for (int i = 0; i < 4 + num_extra_points; ++i) {
+    x2[i] += best_c - origin_x;
+    y2[i] += best_r - origin_y;
+  }
+}
+
+}  // namespace
+
 template<typename Warp>
 void TemplatedTrackRegion(const FloatImage &image1,
                           const FloatImage &image2,
@@ -553,6 +891,12 @@ void TemplatedTrackRegion(const FloatImage &image1,
                           const TrackRegionOptions &options,
                           double *x2, double *y2,
                           TrackRegionResult *result) {
+  for (int i = 0; i < 4; ++i) {
+    LG << "P" << i << ": (" << x1[i] << ", " << y1[i] << "); guess ("
+       << x2[i] << ", " << y2[i] << "); (dx, dy): (" << (x2[i] - x1[i]) << ", "
+       << (y2[i] - y1[i]) << ").";
+  }
+
   // Bail early if the points are already outside.
   if (!AllInBounds(image1, x1, y1)) {
     result->termination = TrackRegionResult::SOURCE_OUT_OF_BOUNDS;
@@ -564,6 +908,19 @@ void TemplatedTrackRegion(const FloatImage &image1,
   }
   // TODO(keir): Check quads to ensure there is some area.
 
+  // Prepare the initial warp parameters from the four correspondences.
+  Warp warp(x1, y1, x2, y2);
+
+  // Decide how many samples to use in the x and y dimensions.
+  int num_samples_x;
+  int num_samples_y;
+  PickSampling(x1, y1, x2, y2, &num_samples_x, &num_samples_y);
+
+  // Compute the warp from rectangular coordinates.
+  Mat3 canonical_homography = ComputeCanonicalHomography(x1, y1,
+                                                         num_samples_x,
+                                                         num_samples_y);
+
   // Prepare the image and gradient.
   Array3Df image_and_gradient1;
   Array3Df image_and_gradient2;
@@ -572,8 +929,16 @@ void TemplatedTrackRegion(const FloatImage &image1,
   BlurredImageAndDerivativesChannels(image2, options.sigma,
                                      &image_and_gradient2);
 
-  // Prepare the initial warp parameters from the four correspondences.
-  Warp warp(x1, y1, x2, y2);
+  // Possibly do a brute-force translation-only initialization.
+  if (SearchAreaTooBigForDescent(image2, x2, y2) &&
+      options.use_brute_initialization) {
+    LG << "Running brute initialization...";
+    BruteTranslationOnlyInitialize<Warp>(image_and_gradient1,
+                                         options.image1_mask,
+                                         image2,
+                                         options.num_extra_points,
+                                         x1, y1, x2, y2);
+  }
 
   ceres::Solver::Options solver_options;
   solver_options.linear_solver_type = ceres::DENSE_QR;
@@ -591,17 +956,14 @@ void TemplatedTrackRegion(const FloatImage &image1,
   BoundaryCheckingCallback<Warp> callback(image2, warp, x1, y1);
   solver_options.callbacks.push_back(&callback);
 
-  // Compute the warp from rectangular coordinates.
-  Mat3 canonical_homography = ComputeCanonicalHomography(x1, y1,
-                                                         options.num_samples_x,
-                                                         options.num_samples_y);
-
   // Construct the warp cost function. AutoDiffCostFunction takes ownership.
-  WarpCostFunctor<Warp> *cost_function =
+  WarpCostFunctor<Warp> *warp_cost_function =
       new WarpCostFunctor<Warp>(options,
                                 image_and_gradient1,
                                 image_and_gradient2,
                                 canonical_homography,
+                                num_samples_x,
+                                num_samples_y,
                                 warp);
 
   // Construct the problem with a single residual.
@@ -612,8 +974,8 @@ void TemplatedTrackRegion(const FloatImage &image1,
       new ceres::AutoDiffCostFunction<
           WarpCostFunctor<Warp>,
           ceres::DYNAMIC,
-          Warp::NUM_PARAMETERS>(cost_function,
-                                options.num_samples_x * options.num_samples_y),
+          Warp::NUM_PARAMETERS>(warp_cost_function,
+                                num_samples_x * num_samples_y),
       NULL,
       warp.parameters);
 
@@ -624,11 +986,17 @@ void TemplatedTrackRegion(const FloatImage &image1,
 
   // Update the four points with the found solution; if the solver failed, then
   // the warp parameters are the identity (so ignore failure).
-  for (int i = 0; i < 4; ++i) {
+  //
+  // Also warp any extra points on the end of the array.
+  for (int i = 0; i < 4 + options.num_extra_points; ++i) {
     warp.Forward(warp.parameters, x1[i], y1[i], x2 + i, y2 + i);
+    LG << "Warped point " << i << ": (" << x1[i] << ", " << y1[i] << ") -> ("
+       << x2[i] << ", " << y2[i] << "); (dx, dy): (" << (x2[i] - x1[i]) << ", "
+       << (y2[i] - y1[i]) << ").";
   }
 
   // TODO(keir): Update the result statistics.
+  // TODO(keir): Add a normalize-cross-correlation variant.
 
   CHECK_NE(summary.termination_type, ceres::USER_ABORT) << "Libmv bug.";
   if (summary.termination_type == ceres::USER_ABORT) {
@@ -640,12 +1008,26 @@ void TemplatedTrackRegion(const FloatImage &image1,
     result->termination = TrackRegionResult::termination_enum; \
     return; \
   }
+
+  // Avoid computing correlation for tracking failures.
+  HANDLE_TERMINATION(DID_NOT_RUN);
+  HANDLE_TERMINATION(NUMERICAL_FAILURE);
+
+  // Otherwise, run a final correlation check.
+  if (options.minimum_correlation > 0.0) {
+    result->correlation = warp_cost_function->
+          PearsonProductMomentCorrelationCoefficient(warp.parameters);
+    if (result->correlation < options.minimum_correlation) {
+      LG << "Failing with insufficient correlation.";
+      result->termination = TrackRegionResult::INSUFFICIENT_CORRELATION;
+      return;
+    }
+  }
+
   HANDLE_TERMINATION(PARAMETER_TOLERANCE);
   HANDLE_TERMINATION(FUNCTION_TOLERANCE);
   HANDLE_TERMINATION(GRADIENT_TOLERANCE);
   HANDLE_TERMINATION(NO_CONVERGENCE);
-  HANDLE_TERMINATION(DID_NOT_RUN);
-  HANDLE_TERMINATION(NUMERICAL_FAILURE);
 #undef HANDLE_TERMINATION
 };
 
index 7c937c04794131f67aa2f14906729b103a9f30d9..1a1346f544f5b2c8486b9e0885db216f06b26c02 100644 (file)
@@ -32,6 +32,8 @@
 namespace libmv {
 
 struct TrackRegionOptions {
+  TrackRegionOptions();
+
   enum Mode {
     TRANSLATION,
     TRANSLATION_ROTATION,
@@ -42,9 +44,6 @@ struct TrackRegionOptions {
   };
   Mode mode;
 
-  int num_samples_x;
-  int num_samples_y;
-
   double minimum_correlation;
   int max_iterations;
 
@@ -52,13 +51,28 @@ struct TrackRegionOptions {
   // convergence speed at the cost of more per-iteration work.
   bool use_esm;
 
+  // If true, apply a brute-force translation-only search before attempting the
+  // full search. This is not enabled if the destination image ("image2") is
+  // too small; in that case eithen the basin of attraction is close enough
+  // that the nearby minima is correct, or the search area is too small.
+  bool use_brute_initialization;
+
   double sigma;
+
+  // Extra points that should get transformed by the warp. This is useful
+  // because the actual warp parameters are not exposed.
+  int num_extra_points;
+
+  // If non-null, this is used as the pattern mask. It should match the size of
+  // image1, even though only values inside the image1 quad are examined. The
+  // values must be in the range 0.0 to 0.1.
+  FloatImage *image1_mask;
 };
 
 struct TrackRegionResult {
   enum Termination {
-    // Ceres termination types, duplicated.
-    PARAMETER_TOLERANCE = 0,
+    // Ceres termination types, duplicated; though, not the int values.
+    PARAMETER_TOLERANCE,
     FUNCTION_TOLERANCE,
     GRADIENT_TOLERANCE,
     NO_CONVERGENCE,
@@ -70,6 +84,7 @@ struct TrackRegionResult {
     DESTINATION_OUT_OF_BOUNDS,
     FELL_OUT_OF_BOUNDS,
     INSUFFICIENT_CORRELATION,
+    CONFIGURATION_ERROR,
   };
   Termination termination;
 
@@ -77,6 +92,7 @@ struct TrackRegionResult {
   double correlation;
 
   // Final parameters?
+  bool used_brute_translation_initialization;
 };
 
 // Always needs 4 correspondences.
@@ -87,8 +103,6 @@ void TrackRegion(const FloatImage &image1,
                  double *x2, double *y2,
                  TrackRegionResult *result);
 
-// TODO(keir): May need a "samplewarp" function.
-
 }  // namespace libmv
 
 #endif  // LIBMV_TRACKING_TRACK_REGION_H_
index abb4560882242b1c3a991a73618383a2f9d47c6b..3bf5c735cb73dcc96d01b014d74d09fa9883b6a8 100644 (file)
@@ -20,6 +20,7 @@
  *
  * Contributor(s): Blender Foundation,
  *                 Sergey Sharybin
+ *                 Keir Mierle
  *
  * ***** END GPL LICENSE BLOCK *****
  */
@@ -943,10 +944,14 @@ static void tracks_map_free(TracksMap *map, void (*customdata_free) (void *custo
 
 typedef struct TrackContext {
 #ifdef WITH_LIBMV
-       float keyframed_pos[2];
+       /* the reference marker and cutout search area */
+       MovieTrackingMarker marker;
 
-       struct libmv_RegionTracker *region_tracker;
-       float *patch;                   /* keyframed patch */
+       /* keyframed patch. This is the search area */
+       float *search_area;
+       int search_area_height;
+       int search_area_width;
+       int framenr;
 #else
        int pad;
 #endif
@@ -1011,59 +1016,7 @@ MovieTrackingContext *BKE_tracking_context_new(MovieClip *clip, MovieClipUser *u
 
                                if ((marker->flag & MARKER_DISABLED) == 0) {
                                        TrackContext track_context;
-
                                        memset(&track_context, 0, sizeof(TrackContext));
-
-#ifdef WITH_LIBMV
-                                       {
-                                               float search_size_x, search_size_y;
-                                               float pattern_size_x, pattern_size_y;
-                                               float pat_min[2], pat_max[2], patx, paty;
-                                               float search_to_pattern_ratio, log2_search_to_pattern_ratio;
-                                               int wndx, wndy, half_wnd, max_pyramid_levels, level;
-
-                                               struct libmv_RegionTracker *region_tracker;
-
-                                               /* XXX: use boundbox of marker's pattern for now
-                                                *      no idea how it should behave with non-affine tracking */
-                                               BKE_tracking_marker_pattern_minmax(marker, pat_min, pat_max);
-
-                                               patx = (int)((pat_max[0] - pat_min[0]) * width);
-                                               paty = (int)((pat_max[1] - pat_min[1]) * height);
-                                               wndx = (int)patx / 2;
-                                               wndy = (int)paty / 2;
-                                               half_wnd = MAX2(wndx, wndy);
-
-                                               search_size_x = (track->search_max[0] - track->search_min[0]) * width;
-                                               search_size_y = (track->search_max[1] - track->search_min[1]) * height;
-                                               pattern_size_x = (pat_max[0] - pat_min[0]) * width;
-                                               pattern_size_y = (pat_max[1] - pat_min[1]) * height;
-
-                                               /* compute the maximum pyramid size */
-                                               search_to_pattern_ratio = MIN2(search_size_x,  search_size_y) / MAX2(pattern_size_x, pattern_size_y);
-                                               log2_search_to_pattern_ratio = log(floor(search_to_pattern_ratio)) / M_LN2;
-                                               max_pyramid_levels = floor(log2_search_to_pattern_ratio + 1);
-
-                                               /* try to accommodate the user's choice of pyramid level in a way
-                                                * that doesn't cause the coarsest pyramid pattern to be larger
-                                                * than the search size */
-                                               level = MIN2(track->pyramid_levels, max_pyramid_levels);
-
-                                               if (track->tracker == TRACKER_KLT) {
-                                                       region_tracker = libmv_pyramidRegionTrackerNew(100, level, half_wnd,
-                                                                                                      track->minimum_correlation);
-                                               }
-                                               else if (track->tracker == TRACKER_HYBRID) {
-                                                       region_tracker = libmv_hybridRegionTrackerNew(100, half_wnd, track->minimum_correlation);
-                                               }
-                                               else if (track->tracker == TRACKER_SAD) {
-                                                       region_tracker = libmv_bruteRegionTrackerNew(MAX2(wndx, wndy), track->minimum_correlation);
-                                               }
-
-                                               track_context.region_tracker = region_tracker;
-                                       }
-#endif
-
                                        tracks_map_insert(context->tracks_map, track, &track_context);
                                }
                        }
@@ -1099,11 +1052,8 @@ static void track_context_free(void *customdata)
        TrackContext *track_context = (TrackContext *)customdata;
 
 #if WITH_LIBMV
-       if (track_context->region_tracker)
-               libmv_regionTrackerDestroy(track_context->region_tracker);
-
-       if (track_context->patch)
-               MEM_freeN(track_context->patch);
+       if (track_context->search_area)
+               MEM_freeN(track_context->search_area);
 
 #else
                (void) track_context;
@@ -1370,50 +1320,172 @@ ImBuf *BKE_tracking_track_mask_get(MovieTracking *tracking, MovieTrackingTrack *
 /* Convert from float and byte RGBA to grayscale. Supports different coefficients for RGB. */
 static void float_rgba_to_gray(const float *rgba, float *gray, int num_pixels, float weight_red, float weight_green, float weight_blue) {
        int i;
-       for (i = 0; i < num_pixels; ++i) {
+
+       for (i = 0; i < num_pixels; i++) {
                const float *pixel = rgba + 4 * i;
+
                gray[i] = weight_red * pixel[0] + weight_green * pixel[1] + weight_blue * pixel[2];
        }
 }
 
 static void uint8_rgba_to_float_gray(const unsigned char *rgba, float *gray, int num_pixels, float weight_red, float weight_green, float weight_blue) {
        int i;
-       for (i = 0; i < num_pixels; ++i) {
+
+       for (i = 0; i < num_pixels; i++) {
                const unsigned char *pixel = rgba + i * 4;
+
                *gray++ = (weight_red * pixel[0] + weight_green * pixel[1] + weight_blue * pixel[2]) / 255.0f;
        }
 }
 
-static float *get_search_floatbuf(ImBuf *ibuf, MovieTrackingTrack *track, MovieTrackingMarker *marker,
-                                  int *width_r, int *height_r, float pos[2], int origin[2])
+static float *get_search_floatbuf(ImBuf *ibuf, MovieTrackingTrack *track, MovieTrackingMarker *marker, int *width_r, int *height_r)
 {
-       ImBuf *tmpibuf;
+       ImBuf *searchibuf;
        float *gray_pixels;
-       int x, y, width, height;
+       int width, height;
+       /* ignored */
+       float pos[2];
+       int origin[2];
 
-       width = (track->search_max[0] - track->search_min[0]) * ibuf->x;
-       height = (track->search_max[1] - track->search_min[1]) * ibuf->y;
+       searchibuf = BKE_tracking_get_search_imbuf(ibuf, track, marker, 0, 0, pos, origin);
+       disable_imbuf_channels(searchibuf, track, FALSE /* don't grayscale */);
 
-       tmpibuf = BKE_tracking_get_search_imbuf(ibuf, track, marker, 0, 0, pos, origin);
-       disable_imbuf_channels(tmpibuf, track, FALSE /* don't grayscale */);
+       width = searchibuf->x;
+       height = searchibuf->y;
 
-       *width_r = width;
-       *height_r = height;
+       *width_r = searchibuf->x;
+       *height_r = searchibuf->y;
 
        gray_pixels = MEM_callocN(width * height * sizeof(float), "tracking floatBuf");
 
-       if (tmpibuf->rect_float) {
-               float_rgba_to_gray(tmpibuf->rect_float, gray_pixels, height * width, 0.2126f, 0.7152f, 0.0722f);
+       if (searchibuf->rect_float) {
+               float_rgba_to_gray(searchibuf->rect_float, gray_pixels, width * height, 0.2126f, 0.7152f, 0.0722f);
        }
        else {
-               uint8_rgba_to_float_gray((unsigned char *)tmpibuf->rect, gray_pixels, height * width, 0.2126f, 0.7152f, 0.0722f);
+               uint8_rgba_to_float_gray((unsigned char *)searchibuf->rect, gray_pixels, width * height, 0.2126f, 0.7152f, 0.0722f);
        }
 
-       IMB_freeImBuf(tmpibuf);
+       IMB_freeImBuf(searchibuf);
 
        return gray_pixels;
 }
 
+/* Three coordinate frames: Frame, Search, and Marker
+ * Two units: Pixels, Unified
+ * Notation: {coordinate frame}_{unit}; for example, "search_pixel" are search
+ * window relative coordinates in pixels, and "frame_unified" are unified 0..1
+ * coordinates relative to the entire frame.
+ */
+static void unified_to_pixel(const ImBuf *ibuf, const float unified_coords[2], float pixel_coords[2])
+{
+       pixel_coords[0] = unified_coords[0] * ibuf->x;
+       pixel_coords[1] = unified_coords[1] * ibuf->y;
+}
+
+static void pixel_to_unified(const ImBuf *ibuf, const float pixel_coords[2], float unified_coords[2])
+{
+       unified_coords[0] = pixel_coords[0] / ibuf->x;
+       unified_coords[1] = pixel_coords[1] / ibuf->y;
+}
+
+static void marker_to_frame_unified(const MovieTrackingMarker *marker, const float marker_unified_coords[2], float frame_unified_coords[2])
+{
+       frame_unified_coords[0] = marker_unified_coords[0] + marker->pos[0];
+       frame_unified_coords[1] = marker_unified_coords[1] + marker->pos[1];
+}
+
+static void marker_unified_to_frame_pixel_coordinates(const ImBuf *ibuf, const MovieTrackingMarker *marker,
+               const float marker_unified_coords[2], float frame_pixel_coords[2])
+{
+       marker_to_frame_unified(marker, marker_unified_coords, frame_pixel_coords);
+       unified_to_pixel(ibuf, frame_pixel_coords, frame_pixel_coords);
+}
+
+static void get_search_origin_frame_pixel(const ImBuf *ibuf, const MovieTrackingTrack *track, const MovieTrackingMarker *marker, float frame_pixel[2])
+{
+       /* Get the lower left coordinate of the search window and snap to pixel coordinates */
+       marker_unified_to_frame_pixel_coordinates(ibuf, marker, track->search_min, frame_pixel);
+       frame_pixel[0] = (int)frame_pixel[0];
+       frame_pixel[1] = (int)frame_pixel[1];
+}
+
+static void marker_unified_to_search_pixel(const ImBuf *ibuf, const MovieTrackingTrack *track, const MovieTrackingMarker *marker,
+               const float marker_unified[2], float search_pixel[2])
+{
+       float frame_pixel[2];
+       float search_origin_frame_pixel[2];
+       marker_unified_to_frame_pixel_coordinates(ibuf, marker, marker_unified, frame_pixel);
+       get_search_origin_frame_pixel(ibuf, track, marker, search_origin_frame_pixel);
+       sub_v2_v2v2(search_pixel, frame_pixel, search_origin_frame_pixel);
+}
+
+static void search_pixel_to_marker_unified(const ImBuf *ibuf, const MovieTrackingTrack *track, const MovieTrackingMarker *marker,
+               const float search_pixel[2], float marker_unified[2])
+{
+       float frame_unified[2];
+       float search_origin_frame_pixel[2];
+       get_search_origin_frame_pixel(ibuf, track, marker, search_origin_frame_pixel);
+       add_v2_v2v2(frame_unified, search_pixel, search_origin_frame_pixel);
+       pixel_to_unified(ibuf, frame_unified, frame_unified);
+       sub_v2_v2v2(marker_unified, frame_unified, marker->pos /* marker pos is in frame unified */);
+}
+
+/* Each marker has 5 coordinates associated with it that get warped with
+ * tracking: the four corners ("pattern_corners"), and the cernter ("pos").
+ * This function puts those 5 points into the appropriate frame for tracking
+ * (the "search" coordinate frame).
+ */
+static void get_marker_coords_for_tracking(const ImBuf *ibuf, const MovieTrackingTrack *track, const MovieTrackingMarker *marker,
+               double search_pixel_x[5], double search_pixel_y[5]) {
+       int i;
+       float unified_coords[2];
+       float pixel_coords[2];
+
+       /* Convert the corners into search space coordinates. */
+       for (i = 0; i < 4; ++i) {
+               marker_unified_to_search_pixel(ibuf, track, marker, marker->pattern_corners[i], pixel_coords);
+               search_pixel_x[i] = pixel_coords[0];
+               search_pixel_y[i] = pixel_coords[1];
+       }
+       /* Convert the center position (aka "pos"); this is the origin */
+       unified_coords[0] = 0.0;
+       unified_coords[1] = 0.0;
+       marker_unified_to_search_pixel(ibuf, track, marker, unified_coords, pixel_coords);
+       search_pixel_x[4] = pixel_coords[0];
+       search_pixel_y[4] = pixel_coords[1];
+}
+
+/* Inverse of above. */
+static void set_marker_coords_from_tracking(const ImBuf *ibuf, const MovieTrackingTrack *track, MovieTrackingMarker *marker,
+               const double search_pixel_x[5], const double search_pixel_y[5])
+{
+       int i;
+       float marker_unified[2];
+       float search_pixel[2];
+
+       /* Convert the corners into search space coordinates. */
+       for (i = 0; i < 4; ++i) {
+               search_pixel[0] = search_pixel_x[i];
+               search_pixel[1] = search_pixel_y[i];
+               search_pixel_to_marker_unified(ibuf, track, marker, search_pixel, marker->pattern_corners[i]);
+       }
+
+       /* Convert the center position (aka "pos"); this is the origin */
+       search_pixel[0] = search_pixel_x[4];
+       search_pixel[1] = search_pixel_y[4];
+       search_pixel_to_marker_unified(ibuf, track, marker, search_pixel, marker_unified);
+
+       /* If the tracker tracked nothing, then "marker_unified" would be zero.
+        * Otherwise, the entire patch shifted, and that delta should be applied to
+        * all the coordinates. */
+       for (i = 0; i < 4; ++i) {
+               marker->pattern_corners[i][0] -= marker_unified[0];
+               marker->pattern_corners[i][1] -= marker_unified[1];
+       }
+       marker->pos[0] += marker_unified[0];
+       marker->pos[1] += marker_unified[1];
+}
+
 static unsigned char *get_ucharbuf(ImBuf *ibuf)
 {
        int x, y;
@@ -1535,7 +1607,7 @@ void BKE_tracking_sync_user(MovieClipUser *user, MovieTrackingContext *context)
 
 int BKE_tracking_next(MovieTrackingContext *context)
 {
-       ImBuf *ibuf_new;
+       ImBuf *destination_ibuf;
        int curfra = context->user.framenr;
        int a, ok = FALSE, map_size;
 
@@ -1550,26 +1622,27 @@ int BKE_tracking_next(MovieTrackingContext *context)
        else
                context->user.framenr++;
 
-       ibuf_new = BKE_movieclip_get_ibuf_flag(context->clip, &context->user, context->clip_flag, MOVIECLIP_CACHE_SKIP);
-       if (!ibuf_new)
+       destination_ibuf = BKE_movieclip_get_ibuf_flag(context->clip, &context->user, context->clip_flag, MOVIECLIP_CACHE_SKIP);
+       if (!destination_ibuf)
                return FALSE;
 
-       #pragma omp parallel for private(a) shared(ibuf_new, ok) if (map_size>1)
+       #pragma omp parallel for private(a) shared(destination_ibuf, ok) if (map_size>1)
        for (a = 0; a < map_size; a++) {
                TrackContext *track_context = NULL;
                MovieTrackingTrack *track;
                MovieTrackingMarker *marker;
 
+               double dst_pixel_x[5];
+               double dst_pixel_y[5];
+
                tracks_map_get(context->tracks_map, a, &track, (void**)&track_context);
 
                marker = BKE_tracking_exact_marker(track, curfra);
 
                if (marker && (marker->flag & MARKER_DISABLED) == 0) {
 #ifdef WITH_LIBMV
-                       int width, height, origin[2], tracked = 0, need_readjust = 0;
-                       float pos[2], margin[2], dim[2], pat_min[2], pat_max[2];
-                       double x1, y1, x2, y2;
-                       ImBuf *ibuf = NULL;
+                       int width, height, tracked = 0, need_readjust = 0;
+                       float margin[2], dim[2], pat_min[2], pat_max[2];
                        MovieTrackingMarker marker_new, *marker_keyed;
                        int onbound = FALSE, nextfra;
 
@@ -1588,8 +1661,8 @@ int BKE_tracking_next(MovieTrackingContext *context)
                        sub_v2_v2v2(dim, pat_max, pat_min);
                        margin[0] = margin[1] = MAX2(dim[0], dim[1]) / 2.0f;
 
-                       margin[0] = MAX2(margin[0], (float)track->margin / ibuf_new->x);
-                       margin[1] = MAX2(margin[1], (float)track->margin / ibuf_new->y);
+                       margin[0] = MAX2(margin[0], (float)track->margin / destination_ibuf->x);
+                       margin[1] = MAX2(margin[1], (float)track->margin / destination_ibuf->y);
 
                        /* do not track markers which are too close to boundary */
                        if (marker->pos[0] < margin[0] || marker->pos[0] > 1.0f - margin[0] ||
@@ -1598,62 +1671,97 @@ int BKE_tracking_next(MovieTrackingContext *context)
                                onbound = TRUE;
                        }
                        else {
+                               /* to convert to the x/y split array format for libmv. */
+                               double src_pixel_x[5];
+                               double src_pixel_y[5];
+
+                               /* settings for the tracker */
+                               struct libmv_trackRegionOptions options;
+                               struct libmv_trackRegionResult result;
+
                                float *patch_new;
 
                                if (need_readjust) {
+                                       ImBuf *reference_ibuf = NULL;
                                        /* calculate patch for keyframed position */
-                                       ibuf = get_adjust_ibuf(context, track, marker, curfra, &marker_keyed);
+                                       reference_ibuf = get_adjust_ibuf(context, track, marker, curfra, &marker_keyed);
+                                       track_context->marker = *marker_keyed;
 
-                                       if (track_context->patch)
-                                               MEM_freeN(track_context->patch);
+                                       if (track_context->search_area)
+                                               MEM_freeN(track_context->search_area);
 
-                                       track_context->patch = get_search_floatbuf(ibuf, track, marker_keyed, &width, &height,
-                                                                                  track_context->keyframed_pos, origin);
+                                       track_context->search_area = get_search_floatbuf(reference_ibuf, track, marker_keyed, &width, &height);
+                                       track_context->search_area_height = height;
+                                       track_context->search_area_width = width;
 
-                                       IMB_freeImBuf(ibuf);
+                                       IMB_freeImBuf(reference_ibuf);
                                }
 
-                               patch_new = get_search_floatbuf(ibuf_new, track, marker, &width, &height, pos, origin);
-
-                               x1 = track_context->keyframed_pos[0];
-                               y1 = track_context->keyframed_pos[1];
-
-                               x2 = pos[0];
-                               y2 = pos[1];
+                               /* XXX for now, the width & height always match because the
+                                * settings are per-track. This will change soon and in that
+                                * case different sizes must be used */
+                               patch_new = get_search_floatbuf(destination_ibuf, track, marker, &width, &height);
+
+                               /* Configure the tracker */
+                               options.motion_model = 0;
+                               options.num_iterations = 10;
+                               options.minimum_correlation = track->minimum_correlation;
+                               options.sigma = 0.9;
+
+                               /* Convert the marker corners and center into pixel coordinates in the search/destination images. */
+                               printf("track_context: %p\n", track_context);
+                               printf("track_context->marker.framenr: %d\n", track_context->marker.framenr);
+                               printf("marker: %p\n", marker);
+                               printf("marker.framenr: %d\n", marker->framenr);
+                               printf("track: %p\n", track);
+                               printf("destination_ibuf: %p\n", destination_ibuf);
+                               printf("\n");
+                               get_marker_coords_for_tracking(destination_ibuf, track, &track_context->marker, src_pixel_x, src_pixel_y);
+                               get_marker_coords_for_tracking(destination_ibuf, track, marker, dst_pixel_x, dst_pixel_y);
+
+#if 1
+                               /* Run the tracker! */
+                               tracked = libmv_trackRegion(&options,
+                                                           track_context->search_area, patch_new,
+                                                           width, height,
+                                                           src_pixel_x, src_pixel_y,
+                                                           &result,
+                                                           dst_pixel_x, dst_pixel_y);
+#else
+                               {
+                                       /* XXX: just for debug
+                                        *      easy to see that marker isn't getting updated properly if it've got
+                                        *      non-integer initial coordinates */
+                                       int i;
+
+                                       for (i = 0; i < 5; i++) {
+                                               dst_pixel_x[i] = src_pixel_x[i] + 0.5f;
+                                               dst_pixel_y[i] = src_pixel_y[i] + 0.5f;
+                                       }
 
-                               tracked = libmv_regionTrackerTrack(track_context->region_tracker, track_context->patch, patch_new,
-                                                       width, height, x1, y1, &x2, &y2);
+                                       tracked = TRUE;
+                               }
+#endif
 
                                MEM_freeN(patch_new);
                        }
 
-                       if (tracked && !onbound && finite(x2) && finite(y2)) {
+                       if (tracked && !onbound) {
+                               memset(&marker_new, 0, sizeof(marker_new));
+                               marker_new = *marker;
+                               set_marker_coords_from_tracking(destination_ibuf, track, &marker_new, dst_pixel_x, dst_pixel_y);
+                               marker_new.flag |= MARKER_TRACKED;
+                               marker_new.framenr = nextfra;
+
                                if (context->first_time) {
                                        #pragma omp critical
                                        {
                                                /* check if there's no keyframe/tracked markers before tracking marker.
                                                 * if so -- create disabled marker before currently tracking "segment" */
-                                               put_disabled_marker(track, marker, !context->backwards, 0);
+                                               put_disabled_marker(track, &marker_new, !context->backwards, 0);
                                        }
                                }
 
-                               memset(&marker_new, 0, sizeof(marker_new));
-
-                               if (!onbound) {
-                                       marker_new.pos[0] = (origin[0] + x2) / ibuf_new->x;
-                                       marker_new.pos[1] = (origin[1] + y2) / ibuf_new->y;
-                               }
-                               else {
-                                       copy_v2_v2(marker_new.pos, marker->pos);
-                               }
-
-                               /* XXX: currently trackers does not change corners of a track, so
-                                *      just copy them from previous position */
-                               memcpy(marker_new.pattern_corners, marker->pattern_corners, sizeof(marker_new.pattern_corners));
-
-                               marker_new.flag |= MARKER_TRACKED;
-                               marker_new.framenr = nextfra;
-
                                #pragma omp critical
                                {
                                        BKE_tracking_insert_marker(track, &marker_new);
@@ -1682,7 +1790,7 @@ int BKE_tracking_next(MovieTrackingContext *context)
                }
        }
 
-       IMB_freeImBuf(ibuf_new);
+       IMB_freeImBuf(destination_ibuf);
 
        context->first_time = FALSE;
        context->frames++;
index feb77c5c9ec16e65ce4c3e0a79521e08955949be..6f2e017b1f11367eeef4641bed05ae2bbf839369 100644 (file)
@@ -73,14 +73,16 @@ typedef struct MovieTrackingMarker {
 
        /* corners of pattern in the following order:
         *
-        * Y
-        * ^
-        * | (3) --- (2)
-        * |  |       |
-        * |  |       |
-        * |  |       |
-        * | (0) --- (1)
-        * +-------------> X
+        *       Y
+        *       ^
+        *       | (3) --- (2)
+        *       |  |       |
+        *       |  |       |
+        *       |  |       |
+        *       | (0) --- (1)
+        *       +-------------> X
+     *
+     * the coordinates are stored relative to pos.
         */
        float pattern_corners[4][2];
 
@@ -95,10 +97,10 @@ typedef struct MovieTrackingTrack {
 
        /* ** setings ** */
 
-       /* positions of left-bottom and right-top corners of pattern (in unified 0..1 space) */
+       /* positions of left-bottom and right-top corners of pattern (in unified 0..1 units, relative to marker->pos) */
        float pat_min[2], pat_max[2]            DNA_DEPRECATED;
 
-       /* positions of left-bottom and right-top corners of search area (in unified 0..1 space) */
+       /* positions of left-bottom and right-top corners of search area (in unified 0..1 units, relative to marker->pos */
        float search_min[2], search_max[2];
 
        float offset[2];                                        /* offset to "parenting" point */