From 455061d09378a16f48217b5bee9f31a66fda27bc Mon Sep 17 00:00:00 2001
From: Shantanu Chhabra <shantanu_chhabra@apple.com>
Date: Tue, 13 Aug 2019 18:21:15 -0700
Subject: [PATCH] Software change to resize backgrounds as part of OSOD toolkit
 (#2190)

---
 .../util/_augmentation.py                     |  9 ++++++
 .../one_shot_object_detector.cpp              | 31 ++++++++++---------
 .../one_shot_object_detector.hpp              |  5 ++-
 .../util/superposition.cpp                    |  7 +++--
 .../util/superposition.hpp                    |  7 +++--
 5 files changed, 35 insertions(+), 24 deletions(-)
diff --git a/src/python/turicreate/toolkits/one_shot_object_detector/util/_augmentation.py b/src/python/turicreate/toolkits/one_shot_object_detector/util/_augmentation.py
index 5367ca8019..4c13bfc71f 100644
--- a/src/python/turicreate/toolkits/one_shot_object_detector/util/_augmentation.py
+++ b/src/python/turicreate/toolkits/one_shot_object_detector/util/_augmentation.py
@@ -52,6 +52,15 @@ def preview_synthetic_training_data(data,
         backgrounds_tar = _tarfile.open(backgrounds_tar_path)
         backgrounds_tar.extractall()
         backgrounds = _tc.SArray("one_shot_backgrounds.sarray")
+        # We resize the background dimensions by half along each axis to reduce
+        # the disk footprint during augmentation, and also reduce the time
+        # taken to synthesize data. 
+        backgrounds = backgrounds.apply(lambda im: _tc.image_analysis.resize(
+            im,
+            int(im.width/2),
+            int(im.height/2),
+            im.channels
+            ))
     # Option arguments to pass in to C++ Object Detector, if we use it:
     # {'mlmodel_path':'darknet.mlmodel', 'max_iterations' : 25}
     options_for_augmentation = {
diff --git a/src/toolkits/object_detection/one_shot_object_detection/one_shot_object_detector.cpp b/src/toolkits/object_detection/one_shot_object_detection/one_shot_object_detector.cpp
index f5c8a7e9f0..372ab97c7b 100644
--- a/src/toolkits/object_detection/one_shot_object_detection/one_shot_object_detector.cpp
+++ b/src/toolkits/object_detection/one_shot_object_detection/one_shot_object_detector.cpp
@@ -100,27 +100,21 @@ static std::map<std::string, size_t> generate_column_index_map(
   return index_map;
 }
 
-boost::gil::rgba8_image_t::view_t create_starter_image_view(
-    const flex_image &object_input) {
+flex_image create_rgba_flex_image(const flex_image &object_input) {
   if (!(object_input.is_decoded())) {
     log_and_throw("Input object starter image is not decoded.");
   }
-  flex_image object =
+  flex_image rgba_flex_image =
       image_util::resize_image(object_input, object_input.m_width,
                                object_input.m_height, 4, true)
           .to<flex_image>();
-  if (!(object.is_decoded())) {
+  if (!(rgba_flex_image.is_decoded())) {
     log_and_throw("Resized object starter image is not decoded.");
   }
-  if (object.m_channels != 4) {
+  if (rgba_flex_image.m_channels != 4) {
     log_and_throw("Object image is not resized to be 4.");
   }
-  boost::gil::rgba8_image_t::view_t starter_image_view = interleaved_view(
-      object.m_width, object.m_height,
-      (boost::gil::rgba8_pixel_t *)(object.get_image_data()),
-      object.m_channels * object.m_width  // row length in bytes
-  );
-  return starter_image_view;
+  return rgba_flex_image;
 }
 
 std::pair<flex_image, flex_dict>
@@ -149,12 +143,19 @@ create_synthetic_image_from_background_and_starter(const flex_image &starter,
     log_and_throw("Background image is not decoded into raw format.");
   }
 
-  boost::gil::rgba8_image_t::view_t starter_image_view =
-      create_starter_image_view(starter);
+  flex_image rgba_flex_image = create_rgba_flex_image(starter);
+  boost::gil::rgba8_image_t::const_view_t starter_image_view =
+      interleaved_view(rgba_flex_image.m_width, rgba_flex_image.m_height,
+                       reinterpret_cast<const boost::gil::rgba8_pixel_t *>(
+                           rgba_flex_image.get_image_data()),
+                       rgba_flex_image.m_channels *
+                           rgba_flex_image.m_width  // row length in bytes
+      );
 
-  boost::gil::rgb8_image_t::view_t background_view = interleaved_view(
+  boost::gil::rgb8_image_t::const_view_t background_view = interleaved_view(
       background.m_width, background.m_height,
-      (boost::gil::rgb8_pixel_t *)(background.get_image_data()),
+      reinterpret_cast<const boost::gil::rgb8_pixel_t *>(
+          background.get_image_data()),
       background.m_channels * background.m_width  // row length in bytes
   );
   flex_image synthetic_image = create_synthetic_image(
diff --git a/src/toolkits/object_detection/one_shot_object_detection/one_shot_object_detector.hpp b/src/toolkits/object_detection/one_shot_object_detection/one_shot_object_detector.hpp
index 179d37177b..a61a12710c 100644
--- a/src/toolkits/object_detection/one_shot_object_detection/one_shot_object_detector.hpp
+++ b/src/toolkits/object_detection/one_shot_object_detection/one_shot_object_detector.hpp
@@ -26,9 +26,8 @@ class EXPORT one_shot_object_detector: public ml_model_base {
   // Interface exposed via Unity server
 
   // TODO: augment -> train
-  gl_sframe augment(const gl_sframe &data,
-                    const std::string& image_column_name,
-                    const std::string& target_column_name,
+  gl_sframe augment(const gl_sframe &data, const std::string &image_column_name,
+                    const std::string &target_column_name,
                     const gl_sarray &backgrounds,
                     std::map<std::string, flexible_type> &options);
 
diff --git a/src/toolkits/object_detection/one_shot_object_detection/util/superposition.cpp b/src/toolkits/object_detection/one_shot_object_detection/util/superposition.cpp
index 53750aeb0c..b0dbf1f136 100644
--- a/src/toolkits/object_detection/one_shot_object_detection/util/superposition.cpp
+++ b/src/toolkits/object_detection/one_shot_object_detection/util/superposition.cpp
@@ -38,9 +38,10 @@ void superimpose_image(const boost::gil::rgb8_image_t::view_t &superimposed,
   });
 }
 
-flex_image create_synthetic_image(const boost::gil::rgba8_image_t::view_t &starter_image_view,
-                                  const boost::gil::rgb8_image_t::view_t &background_view,
-                                  ParameterSampler &parameter_sampler) {
+flex_image create_synthetic_image(
+    const boost::gil::rgba8_image_t::const_view_t &starter_image_view,
+    const boost::gil::rgb8_image_t::const_view_t &background_view,
+    ParameterSampler &parameter_sampler) {
   boost::gil::rgba8_image_t background_rgba(boost::gil::rgba8_image_t::point_t(background_view.dimensions()));
   boost::gil::copy_and_convert_pixels(
     background_view,
diff --git a/src/toolkits/object_detection/one_shot_object_detection/util/superposition.hpp b/src/toolkits/object_detection/one_shot_object_detection/util/superposition.hpp
index 442856b86e..8a7f1da297 100644
--- a/src/toolkits/object_detection/one_shot_object_detection/util/superposition.hpp
+++ b/src/toolkits/object_detection/one_shot_object_detection/util/superposition.hpp
@@ -15,9 +15,10 @@ namespace turi {
 namespace one_shot_object_detection {
 namespace data_augmentation {
 
-flex_image create_synthetic_image(const boost::gil::rgba8_image_t::view_t &starter_image_view,
-                                  const boost::gil::rgb8_image_t::view_t &background_view,
-                                  ParameterSampler &parameter_sampler);
+flex_image create_synthetic_image(
+    const boost::gil::rgba8_image_t::const_view_t &starter_image_view,
+    const boost::gil::rgb8_image_t::const_view_t &background_view,
+    ParameterSampler &parameter_sampler);
 
 } // data_augmentation
 } // one_shot_object_detection