Add light-normalized tracking to the planar tracker
authorKeir Mierle <mierle@gmail.com>
Fri, 18 May 2012 02:12:47 +0000 (02:12 +0000)
committerKeir Mierle <mierle@gmail.com>
Fri, 18 May 2012 02:12:47 +0000 (02:12 +0000)
This commit adds the ability to normalize patterns by their
average value while tracking, to make them invariant to global
illumination changes.

To see this in action, check out the "Lobby" scene from Hollywood
VFX. If you track the markers that are shadowed by the actress,
previously they would not track. With the scale adaption on, the
tracker would shrink the area to compensate for the changed
illumination, losing the track. With "Normalize" turned on, the
patch is correctly tracked and scale is maintained.

A remaining problem is that only the Ceres cost function is
updated to handle the normalization. The brute translation search
does not take this into account. Perhaps "Prepass" (see below)
should get disabled if normalization is enabled until I fix the
prepass to normalize as well.

There are a few other changes:

- Cleanups in tracking RNA comments.

- Bail out of the sampling loop early if the mask is zero; this
  saves expensive samples of the image derivatives.

- Rename the wordy "Translation initialization" to "Prepass" at
  Sebastian's suggestion.

- Fix a bug where the mask was ignored when sampling in the cost
  functor.

extern/libmv/libmv-capi.cpp
extern/libmv/libmv-capi.h
extern/libmv/libmv/tracking/track_region.cc
extern/libmv/libmv/tracking/track_region.h
release/scripts/startup/bl_ui/space_clip.py
source/blender/blenkernel/intern/tracking.c
source/blender/makesdna/DNA_tracking_types.h
source/blender/makesrna/intern/rna_tracking.c

index 1781eaa747c00d327c32c5e7ab1e19acfdcb4c91..eba4698f1986410143edd2295ed650fe6ee52cef 100644 (file)
@@ -381,6 +381,7 @@ int libmv_trackRegion(const struct libmv_trackRegionOptions *options,
        track_region_options.num_extra_points = 1;
        track_region_options.image1_mask = NULL;
        track_region_options.use_brute_initialization = options->use_brute;
+       track_region_options.use_normalized_intensities = options->use_normalization;
 
        /* Convert from raw float buffers to libmv's FloatImage. */
        libmv::FloatImage old_patch, new_patch;
index 9fedb9b968311070844f3534d43c40561d33d7da..bbd8f0c30d07454edfa60c5b9b3cc43f378954c1 100644 (file)
@@ -55,6 +55,7 @@ struct libmv_trackRegionOptions {
   int motion_model;
   int num_iterations;
   int use_brute;
+  int use_normalization;
   double minimum_correlation;
   double sigma;
 };
index 58a2077acce86dac06ffba245eda4bbd9b43a715..ababd0cee9030de67249fb9b1e069102d65898dc 100644 (file)
@@ -44,6 +44,7 @@ TrackRegionOptions::TrackRegionOptions()
       max_iterations(20),
       use_esm(true),
       use_brute_initialization(true),
+      use_normalized_intensities(false),
       sigma(0.9),
       num_extra_points(0),
       image1_mask(NULL) {
@@ -191,6 +192,14 @@ class WarpCostFunctor {
       VLOG(2) << "warp_parameters[" << i << "]: " << warp_parameters[i];
     }
 
+    T src_mean = T(1.0);
+    T dst_mean = T(1.0);
+    if (options_.use_normalized_intensities) {
+      ComputeNormalizingCoefficients(warp_parameters,
+                                     &src_mean,
+                                     &dst_mean);
+    }
+
     int cursor = 0;
     for (int r = 0; r < num_samples_y_; ++r) {
       for (int c = 0; c < num_samples_x_; ++c) {
@@ -198,6 +207,19 @@ class WarpCostFunctor {
         Vec3 image1_position = canonical_to_image1_ * Vec3(c, r, 1);
         image1_position /= image1_position(2);
         
+        // Sample the mask early; if it's zero, this pixel has no effect. This
+        // allows early bailout from the expensive sampling that happens below.
+        double mask_value = 1.0;
+        if (options_.image1_mask != NULL) {
+          mask_value = AutoDiff<double>::Sample(*options_.image1_mask,
+                                                image1_position[0],
+                                                image1_position[1]);
+          if (mask_value == 0.0) {
+            residuals[cursor++] = T(0.0);
+            continue;
+          }
+        }
+
         // Compute the location of the destination pixel.
         T image2_position[2];
         warp_.Forward(warp_parameters,
@@ -206,7 +228,6 @@ class WarpCostFunctor {
                       &image2_position[0],
                       &image2_position[1]);
 
-
         // Sample the destination, propagating derivatives.
         T dst_sample = AutoDiff<T>::Sample(image_and_gradient2_,
                                            image2_position[0],
@@ -239,6 +260,15 @@ class WarpCostFunctor {
                                                   image1_position[1]));
         }
 
+        // Normalize the samples by the mean values of each signal. The typical
+        // light model assumes multiplicative intensity changes with changing
+        // light, so this is a reasonable choice. Note that dst_mean has
+        // derivative information attached thanks to autodiff.
+        if (options_.use_normalized_intensities) {
+          src_sample /= src_mean;
+          dst_sample /= dst_mean;
+        }
+
         // The difference is the error.
         T error = src_sample - dst_sample;
 
@@ -248,12 +278,80 @@ class WarpCostFunctor {
                                               image1_position[0],
                                               image1_position[1]));
         }
-        residuals[cursor++] = src_sample - dst_sample;
+        residuals[cursor++] = error;
       }
     }
     return true;
   }
 
+  // For normalized matching, the average and 
+  template<typename T>
+  void ComputeNormalizingCoefficients(const T *warp_parameters,
+                                      T *src_mean,
+                                      T *dst_mean) const {
+
+    *src_mean = T(0.0);
+    *dst_mean = T(0.0);
+    double num_samples = 0.0;
+    for (int r = 0; r < num_samples_y_; ++r) {
+      for (int c = 0; c < num_samples_x_; ++c) {
+        // Compute the location of the source pixel (via homography).
+        Vec3 image1_position = canonical_to_image1_ * Vec3(c, r, 1);
+        image1_position /= image1_position(2);
+        
+        // Sample the mask early; if it's zero, this pixel has no effect. This
+        // allows early bailout from the expensive sampling that happens below.
+        double mask_value = 1.0;
+        if (options_.image1_mask != NULL) {
+          mask_value = AutoDiff<double>::Sample(*options_.image1_mask,
+                                                image1_position[0],
+                                                image1_position[1]);
+          if (mask_value == 0.0) {
+            continue;
+          }
+        }
+
+        // Compute the location of the destination pixel.
+        T image2_position[2];
+        warp_.Forward(warp_parameters,
+                      T(image1_position[0]),
+                      T(image1_position[1]),
+                      &image2_position[0],
+                      &image2_position[1]);
+
+
+        // Sample the destination, propagating derivatives.
+        // TODO(keir): This accumulation can, surprisingly, be done as a
+        // pre-pass by using integral images. This is complicated by the need
+        // to store the jets in the integral image, but it is possible.
+        T dst_sample = AutoDiff<T>::Sample(image_and_gradient2_,
+                                           image2_position[0],
+                                           image2_position[1]);
+
+        // Sample the source.
+        // TODO(keir): There is no reason to do this inside the loop;
+        // precompute this and reuse it.
+        T src_sample = T(AutoDiff<double>::Sample(image_and_gradient1_,
+                                                  image1_position[0],
+                                                  image1_position[1]));
+
+        // Weight the sample by the mask, if one is present.
+        if (options_.image1_mask != NULL) {
+          src_sample *= T(mask_value);
+          dst_sample *= T(mask_value);
+        }
+
+        *src_mean += src_sample;
+        *dst_mean += dst_sample;
+        num_samples += mask_value;
+      }
+    }
+    *src_mean /= T(num_samples);
+    *dst_mean /= T(num_samples);
+    std::cout << "Normalization for src:\n" << *src_mean << "\n";
+    std::cout << "Normalization for dst:\n" << *dst_mean << "\n";
+  }
+
  // TODO(keir): Consider also computing the cost here.
  double PearsonProductMomentCorrelationCoefficient(
      const double *warp_parameters) const {
index 1a1346f544f5b2c8486b9e0885db216f06b26c02..4a1427a6b9fb12b38f4c1a0b12ffa90eeea41e6e 100644 (file)
@@ -57,6 +57,16 @@ struct TrackRegionOptions {
   // that the nearby minima is correct, or the search area is too small.
   bool use_brute_initialization;
 
+  // If true, normalize the image patches by their mean before doing the sum of
+  // squared error calculation. This is reasonable since the effect of
+  // increasing light intensity is multiplicative on the pixel intensities.
+  //
+  // Note: This does nearly double the solving time, so it is not advised to
+  // turn this on all the time.
+  bool use_normalized_intensities;
+
+  // The size in pixels of the blur kernel used to both smooth the image and
+  // take the image derivative.
   double sigma;
 
   // Extra points that should get transformed by the warp. This is useful
index c17266cf303ac4e328518f0b9038bf5ddcc3d1f3..ac5b4fe52f436b29213c919c59c137a03da27ff2 100644 (file)
@@ -189,6 +189,7 @@ class CLIP_PT_tools_marker(CLIP_PT_tracking_panel, Panel):
             col.label(text="Tracker:")
             col.prop(settings, "default_motion_model")
             col.prop(settings, "default_use_brute")
+            col.prop(settings, "default_use_normalization")
             col.prop(settings, "default_correlation_min")
 
             col.separator()
@@ -488,6 +489,7 @@ class CLIP_PT_track_settings(CLIP_PT_tracking_panel, Panel):
         if active:
             col.prop(active, "motion_model")
             col.prop(active, "use_brute")
+            col.prop(active, "use_normalization")
             col.prop(active, "correlation_min")
 
             col.separator()
index 1956492fce919424d0defc431405a35d7f78d2f7..2456411c2c274177f277d4f3ebc4cd786eda076b 100644 (file)
@@ -1735,6 +1735,7 @@ int BKE_tracking_next(MovieTrackingContext *context)
                                /* Configure the tracker */
                                options.motion_model = track->motion_model;
                                options.use_brute = ((track->algorithm_flag & TRACK_ALGORITHM_FLAG_USE_BRUTE) == 0);
+                               options.use_normalization = ((track->algorithm_flag & TRACK_ALGORITHM_FLAG_USE_NORMALIZATION) == 0);
                                options.num_iterations = 50;
                                options.minimum_correlation = track->minimum_correlation;
                                options.sigma = 0.9;
index 2be7722df8c88e5ade353f37a8b15add3b09a920..a720a14fdcb059801bc081e086902d3985e01d5f 100644 (file)
@@ -287,6 +287,7 @@ enum {
 
 /* MovieTrackingTrack->algorithm_flag */
 #define TRACK_ALGORITHM_FLAG_USE_BRUTE 1
+#define TRACK_ALGORITHM_FLAG_USE_NORMALIZATION 2
 
 /* MovieTrackingTrack->adjframes */
 #define TRACK_MATCH_KEYFRAME           0
index 34dd471e9c1056e3e465eb4b107d8a8318a15518..565655ccf37a7d7ba2df0d991a1bb19bb09cda1c 100644 (file)
@@ -590,14 +590,14 @@ static void rna_def_trackingSettings(BlenderRNA *brna)
        RNA_def_property_boolean_sdna(prop, NULL, "motion_flag", TRACKING_MOTION_TRIPOD);
        RNA_def_property_ui_text(prop, "Tripod Motion", "Use special solver to track a stable camera position, such as a tripod");
 
-       /* limit frames */
+       /* default_limit_frames */
        prop = RNA_def_property(srna, "default_frames_limit", PROP_INT, PROP_NONE);
        RNA_def_property_clear_flag(prop, PROP_ANIMATABLE);
        RNA_def_property_int_sdna(prop, NULL, "default_frames_limit");
        RNA_def_property_range(prop, 0, SHRT_MAX);
        RNA_def_property_ui_text(prop, "Frames Limit", "Every tracking cycle, this number of frames are tracked");
 
-       /* pattern match */
+       /* default_pattern_match */
        prop = RNA_def_property(srna, "default_pattern_match", PROP_ENUM, PROP_NONE);
        RNA_def_property_clear_flag(prop, PROP_ANIMATABLE);
        RNA_def_property_enum_sdna(prop, NULL, "default_pattern_match");
@@ -605,14 +605,14 @@ static void rna_def_trackingSettings(BlenderRNA *brna)
        RNA_def_property_ui_text(prop, "Pattern Match",
                                 "Track pattern from given frame when tracking marker to next frame");
 
-       /* margin */
+       /* default_margin */
        prop = RNA_def_property(srna, "default_margin", PROP_INT, PROP_NONE);
        RNA_def_property_clear_flag(prop, PROP_ANIMATABLE);
        RNA_def_property_int_sdna(prop, NULL, "default_margin");
        RNA_def_property_range(prop, 0, 300);
        RNA_def_property_ui_text(prop, "Margin", "Default distance from image boudary at which marker stops tracking");
 
-       /* tracking motion model */
+       /* default_tracking_motion_model */
        prop = RNA_def_property(srna, "default_motion_model", PROP_ENUM, PROP_NONE);
        RNA_def_property_clear_flag(prop, PROP_ANIMATABLE);
        RNA_def_property_clear_flag(prop, PROP_ANIMATABLE);
@@ -622,10 +622,16 @@ static void rna_def_trackingSettings(BlenderRNA *brna)
        /* use_brute */
        prop = RNA_def_property(srna, "default_use_brute", PROP_BOOLEAN, PROP_NONE);
        RNA_def_property_boolean_negative_sdna(prop, NULL, "default_algorithm_flag", TRACK_ALGORITHM_FLAG_USE_BRUTE);
-       RNA_def_property_ui_text(prop, "Translation-only initialization", "Use a brute-force translation-only initialization when tracking");
+       RNA_def_property_ui_text(prop, "Prepass", "Use a brute-force translation-only initialization when tracking");
        RNA_def_property_update(prop, NC_MOVIECLIP|ND_DISPLAY, NULL);
 
-       /* minmal correlation */
+       /* default use_normalization */
+       prop = RNA_def_property(srna, "default_use_normalization", PROP_BOOLEAN, PROP_NONE);
+       RNA_def_property_boolean_negative_sdna(prop, NULL, "default_algorithm_flag", TRACK_ALGORITHM_FLAG_USE_NORMALIZATION);
+       RNA_def_property_ui_text(prop, "Normalize", "Normalize light intensities while tracking. Slower.");
+       RNA_def_property_update(prop, NC_MOVIECLIP|ND_DISPLAY, NULL);
+
+       /* default minmal correlation */
        prop = RNA_def_property(srna, "default_correlation_min", PROP_FLOAT, PROP_NONE);
        RNA_def_property_clear_flag(prop, PROP_ANIMATABLE);
        RNA_def_property_clear_flag(prop, PROP_ANIMATABLE);
@@ -652,19 +658,19 @@ static void rna_def_trackingSettings(BlenderRNA *brna)
        RNA_def_property_update(prop, 0, "rna_tracking_defaultSettings_searchUpdate");
        RNA_def_property_ui_text(prop, "Search Size", "Size of search area for newly created tracks");
 
-       /* use_red_channel */
+       /* default use_red_channel */
        prop = RNA_def_property(srna, "use_default_red_channel", PROP_BOOLEAN, PROP_NONE);
        RNA_def_property_boolean_negative_sdna(prop, NULL, "default_flag", TRACK_DISABLE_RED);
        RNA_def_property_ui_text(prop, "Use Red Channel", "Use red channel from footage for tracking");
        RNA_def_property_update(prop, NC_MOVIECLIP|ND_DISPLAY, NULL);
 
-       /* use_green_channel */
+       /* default_use_green_channel */
        prop = RNA_def_property(srna, "use_default_green_channel", PROP_BOOLEAN, PROP_NONE);
        RNA_def_property_boolean_negative_sdna(prop, NULL, "default_flag", TRACK_DISABLE_GREEN);
        RNA_def_property_ui_text(prop, "Use Green Channel", "Use green channel from footage for tracking");
        RNA_def_property_update(prop, NC_MOVIECLIP|ND_DISPLAY, NULL);
 
-       /* use_blue_channel */
+       /* default_use_blue_channel */
        prop = RNA_def_property(srna, "use_default_blue_channel", PROP_BOOLEAN, PROP_NONE);
        RNA_def_property_boolean_negative_sdna(prop, NULL, "default_flag", TRACK_DISABLE_BLUE);
        RNA_def_property_ui_text(prop, "Use Blue Channel", "Use blue channel from footage for tracking");
@@ -928,7 +934,14 @@ static void rna_def_trackingTrack(BlenderRNA *brna)
        prop = RNA_def_property(srna, "use_brute", PROP_BOOLEAN, PROP_NONE);
        RNA_def_property_boolean_negative_sdna(prop, NULL, "algorithm_flag", TRACK_ALGORITHM_FLAG_USE_BRUTE);
        RNA_def_property_clear_flag(prop, PROP_ANIMATABLE);
-       RNA_def_property_ui_text(prop, "Translation-only initialization", "Use a brute-force translation only pre-track before refinement");
+       RNA_def_property_ui_text(prop, "Prepass", "Use a brute-force translation only pre-track before refinement");
+       RNA_def_property_update(prop, NC_MOVIECLIP|ND_DISPLAY, NULL);
+
+       /* use_brute */
+       prop = RNA_def_property(srna, "use_normalization", PROP_BOOLEAN, PROP_NONE);
+       RNA_def_property_boolean_negative_sdna(prop, NULL, "algorithm_flag", TRACK_ALGORITHM_FLAG_USE_NORMALIZATION);
+       RNA_def_property_clear_flag(prop, PROP_ANIMATABLE);
+       RNA_def_property_ui_text(prop, "Normalize", "Normalize light intensities while tracking. Slower.");
        RNA_def_property_update(prop, NC_MOVIECLIP|ND_DISPLAY, NULL);
 
        /* markers */