Merge branch 'feat/face_recognition' into 'master'

feat: add face recognition. See merge request ai/esp-dl!57
espressif · Oct 15, 2024 · d08d644 · d08d644
2 parents d5bd7ba + 2731e81
commit d08d644
Show file tree

Hide file tree

Showing 40 changed files with 1,711 additions and 238 deletions.
diff --git a/.gitlab/ci/build.yml b/.gitlab/ci/build.yml
@@ -81,6 +81,17 @@ build_espdl_ops:
  variables:
  EXAMPLE_DIR: test_apps/esp-dl
  MODEL_PATH: test_apps/esp-dl/models
+
+build_example_human_face_recognition:
+ extends:
+ - .build_examples_template
+ - .rules:build:example_human_face_recognition
+ parallel:
+ matrix:
+ - IMAGE: espressif/idf:release-v5.3
+ TARGET: [esp32p4, esp32s3]
+ variables:
+ EXAMPLE_DIR: examples/human_face_recognition
 
 build_example_human_face_detect:
  extends:
@@ -89,7 +100,7 @@ build_example_human_face_detect:
  parallel:
  matrix:
  - IMAGE: espressif/idf:release-v5.3
- TARGET: esp32p4
+ TARGET: [esp32p4, esp32s3]
  variables:
  EXAMPLE_DIR: examples/human_face_detect
 
@@ -100,7 +111,7 @@ build_example_pedestrian_detect:
  parallel:
  matrix:
  - IMAGE: espressif/idf:release-v5.3
- TARGET: esp32p4
+ TARGET: [esp32p4, esp32s3]
  variables:
  EXAMPLE_DIR: examples/pedestrian_detect
 

diff --git a/.gitlab/ci/rules.yml b/.gitlab/ci/rules.yml
@@ -31,13 +31,19 @@
  - "esp-dl/vision/**/*"
 
 # models folder, in the alphabetic order
+.patterns-model_human_face_recognition: &patterns-model_human_face_recognition
+ - "models/human_face_recognition/**/*"
+
 .patterns-model_human_face_detect: &patterns-model_human_face_detect
- - "models/model_human_face_detect/**/*"
+ - "models/human_face_detect/**/*"
 
 .patterns-model_pedestrian_detect: &patterns-model_pedestrian_detect
  - "models/pedestrian_detect/**/*"
 
 # examples folder, in the alphabetic order
+.patterns-example_human_face_recognition: &patterns-example_human_face_recognition
+ - "examples/human_face_recognition/**/*"
+
 .patterns-example_human_face_detect: &patterns-example_human_face_detect
  - "examples/human_face_detect/**/*"
 
@@ -79,6 +85,19 @@
  changes: *patterns-gitlab-ci
 
 # rules for examples
+.rules:build:example_human_face_recognition:
+ rules:
+ - <<: *if-protected
+ - <<: *if-label-build
+ - <<: *if-dev-push
+ changes: *patterns-esp-dl
+ - <<: *if-dev-push
+ changes: *patterns-model_human_face_recognition
+ - <<: *if-dev-push
+ changes: *patterns-example_human_face_recognition
+ - <<: *if-dev-push
+ changes: *patterns-gitlab-ci
+
 .rules:build:example_human_face_detect:
  rules:
  - <<: *if-protected

diff --git a/README.md b/README.md
@@ -19,7 +19,8 @@ In general, the ESP-DL features will be supported, as shown below:
 ## Support models
 
 [Pedestrian Detection](./models/pedestrian_detect/) 
-[Human Face Detection](./models/human_face_detect/)
+[Human Face Detection](./models/human_face_detect/) 
+[Human Face Recognition](./models/human_face_recognition/)
 
 ## Getting Started
 

diff --git a/README_cn.md b/README_cn.md
@@ -23,7 +23,8 @@ ESP-DL 的以下功能将得到支持，如下所示：
 ## Support models
 
 [行人检测](./models/pedestrian_detect/) 
-[人脸检测](./models/human_face_detect/)
+[人脸检测](./models/human_face_detect/) 
+[人脸识别](./models/human_face_recognition/)
 
 ## Getting Started
 

diff --git a/esp-dl/CMakeLists.txt b/esp-dl/CMakeLists.txt
@@ -9,6 +9,7 @@ set(src_dirs ./dl/tool/src
  ./fbs_loader/src
  ./vision/detect
  ./vision/image
+ ./vision/recognition
  )
 
 set(include_dirs ./dl
@@ -23,6 +24,7 @@ set(include_dirs ./dl
  ./fbs_loader/include
  ./vision/detect
  ./vision/image
+ ./vision/recognition
  )
 
 if(CONFIG_IDF_TARGET_ESP32)

diff --git a/esp-dl/vision/image/dl_image.cpp b/esp-dl/vision/image/dl_image.cpp
@@ -831,14 +831,13 @@ uint32_t get_moving_point_number(uint8_t *f1,
 }
 
 template <typename T>
-void warp_affine(dl::Tensor<T> *input, dl::Tensor<T> *output, dl::math::Matrix<float> *M_inv)
+void warp_affine(uint8_t *input,
+ const std::vector<int> &input_shape,
+ T *output,
+ const std::vector<int> &output_shape,
+ dl::math::Matrix<float> *M_inv,
+ bool byte_swap)
 {
- // Matrix<float> *M_inv = M->get_inverse();
- T *dst = (T *)output->get_element_ptr();
- T *src = (T *)input->get_element_ptr();
- std::vector<int> input_shape = input->shape;
- std::vector<int> output_shape = output->shape;
-
  int input_stride = input_shape[1] * input_shape[2]; // stride = w * c
  int c = input_shape[2];
  int output_h = output_shape[0];
@@ -859,34 +858,46 @@ void warp_affine(dl::Tensor<T> *input, dl::Tensor<T> *output, dl::math::Matrix<f
  (M_inv->array[2][0] * j + M_inv->array[2][1] * i + M_inv->array[2][2]);
  if ((x_src < 0) || (y_src < 0) || (x_src >= (input_shape[1] - 1)) || (y_src >= (input_shape[0] - 1))) {
  for (int k = 0; k < c; k++) {
- *dst++ = 0;
+ *output++ = 0;
  }
  } else {
  x1 = floor(x_src);
  x2 = x1 + 1;
  y1 = floor(y_src);
  y2 = y1 + 1;
  for (int k = 0; k < c; k++) {
- *dst++ = (T)rintf(((src[y1 * input_stride + x1 * c + k]) * (x2 - x_src) * (y2 - y_src)) +
- ((src[y1 * input_stride + x2 * c + k]) * (x_src - x1) * (y2 - y_src)) +
- ((src[y2 * input_stride + x1 * c + k]) * (x2 - x_src) * (y_src - y1)) +
- ((src[y2 * input_stride + x2 * c + k]) * (x_src - x1) * (y_src - y1)));
+ *output++ = (T)rintf(((input[y1 * input_stride + x1 * c + k]) * (x2 - x_src) * (y2 - y_src)) +
+  ((input[y1 * input_stride + x2 * c + k]) * (x_src - x1) * (y2 - y_src)) +
+  ((input[y2 * input_stride + x1 * c + k]) * (x2 - x_src) * (y_src - y1)) +
+  ((input[y2 * input_stride + x2 * c + k]) * (x_src - x1) * (y_src - y1)));
  }
  }
  }
  }
- // matrix_free(M_inv);
 }
-template void warp_affine(dl::Tensor<uint8_t> *input, dl::Tensor<uint8_t> *output, dl::math::Matrix<float> *M_inv);
+template void warp_affine(uint8_t *input,
+ const std::vector<int> &input_shape,
+ uint8_t *output,
+ const std::vector<int> &output_shape,
+ dl::math::Matrix<float> *M_inv,
+ bool byte_swap);
+template void warp_affine(uint8_t *input,
+ const std::vector<int> &input_shape,
+ int16_t *output,
+ const std::vector<int> &output_shape,
+ dl::math::Matrix<float> *M_inv,
+ bool byte_swap);
 
 template <typename T>
-void warp_affine(uint16_t *input, std::vector<int> shape, dl::Tensor<T> *output, dl::math::Matrix<float> *M_inv)
+void warp_affine(uint16_t *input,
+ const std::vector<int> &input_shape,
+ T *output,
+ const std::vector<int> &output_shape,
+ dl::math::Matrix<float> *M_inv,
+ bool byte_swap)
 {
- // Matrix<float> *M_inv = M->get_inverse();
- std::vector<int> output_shape = output->shape;
- T *dst = (T *)output->get_element_ptr();
- int input_stride = shape[1]; // stride = w
- int c = shape[2];
+ int input_stride = input_shape[1]; // stride = w
+ int c = input_shape[2];
  assert(c == 3);
  int output_h = output_shape[0];
  int output_w = output_shape[1];
@@ -898,52 +909,57 @@ void warp_affine(uint16_t *input, std::vector<int> shape, dl::Tensor<T> *output,
  int y1 = 0;
  int y2 = 0;
 
- T src_x1y1[3] = {0};
- T src_x1y2[3] = {0};
- T src_x2y1[3] = {0};
- T src_x2y2[3] = {0};
+ uint8_t src_x1y1[3] = {0};
+ uint8_t src_x1y2[3] = {0};
+ uint8_t src_x2y1[3] = {0};
+ uint8_t src_x2y2[3] = {0};
 
  for (int i = 0; i < output_h; i++) {
  for (int j = 0; j < output_w; j++) {
  x_src = (M_inv->array[0][0] * j + M_inv->array[0][1] * i + M_inv->array[0][2]) /
  (M_inv->array[2][0] * j + M_inv->array[2][1] * i + M_inv->array[2][2]);
  y_src = (M_inv->array[1][0] * j + M_inv->array[1][1] * i + M_inv->array[1][2]) /
  (M_inv->array[2][0] * j + M_inv->array[2][1] * i + M_inv->array[2][2]);
- if ((x_src < 0) || (y_src < 0) || (x_src >= (shape[1] - 1)) || (y_src >= (shape[0] - 1))) {
+ if ((x_src < 0) || (y_src < 0) || (x_src >= (input_shape[1] - 1)) || (y_src >= (input_shape[0] - 1))) {
  for (int k = 0; k < c; k++) {
- *dst++ = 0;
+ *output++ = 0;
  }
  } else {
  x1 = floor(x_src);
  x2 = x1 + 1;
  y1 = floor(y_src);
  y2 = y1 + 1;
+
  dl::image::convert_pixel_rgb565_to_rgb888(input[y1 * input_stride + x1], src_x1y1);
  dl::image::convert_pixel_rgb565_to_rgb888(input[y2 * input_stride + x1], src_x1y2);
  dl::image::convert_pixel_rgb565_to_rgb888(input[y1 * input_stride + x2], src_x2y1);
  dl::image::convert_pixel_rgb565_to_rgb888(input[y2 * input_stride + x2], src_x2y2);
 
- *dst++ =
+ *output++ =
  (T)rintf((src_x1y1[0] * (x2 - x_src) * (y2 - y_src)) + (src_x2y1[0] * (x_src - x1) * (y2 - y_src)) +
  (src_x1y2[0] * (x2 - x_src) * (y_src - y1)) + (src_x2y2[0] * (x_src - x1) * (y_src - y1)));
- *dst++ =
+ *output++ =
  (T)rintf((src_x1y1[1] * (x2 - x_src) * (y2 - y_src)) + (src_x2y1[1] * (x_src - x1) * (y2 - y_src)) +
  (src_x1y2[1] * (x2 - x_src) * (y_src - y1)) + (src_x2y2[1] * (x_src - x1) * (y_src - y1)));
- *dst++ =
+ *output++ =
  (T)rintf((src_x1y1[2] * (x2 - x_src) * (y2 - y_src)) + (src_x2y1[2] * (x_src - x1) * (y2 - y_src)) +
  (src_x1y2[2] * (x2 - x_src) * (y_src - y1)) + (src_x2y2[2] * (x_src - x1) * (y_src - y1)));
  }
  }
  }
 }
 template void warp_affine(uint16_t *input,
- std::vector<int> shape,
- dl::Tensor<uint8_t> *output,
- dl::math::Matrix<float> *M_inv);
+ const std::vector<int> &input_shape,
+ uint8_t *output,
+ const std::vector<int> &output_shape,
+ dl::math::Matrix<float> *M_inv,
+ bool byte_swap);
 template void warp_affine(uint16_t *input,
- std::vector<int> shape,
- dl::Tensor<int16_t> *output,
- dl::math::Matrix<float> *M_inv);
+ const std::vector<int> &input_shape,
+ int16_t *output,
+ const std::vector<int> &output_shape,
+ dl::math::Matrix<float> *M_inv,
+ bool byte_swap);
 
 uint8_t get_otsu_thresh(Tensor<uint8_t> &image)
 {

diff --git a/esp-dl/vision/image/dl_image.hpp b/esp-dl/vision/image/dl_image.hpp
@@ -450,7 +450,12 @@ uint32_t get_moving_point_number(uint8_t *f1,
  * @param M_inv the inverse transformation matrix.
  */
 template <typename T>
-void warp_affine(dl::Tensor<T> *input, dl::Tensor<T> *output, dl::math::Matrix<float> *M_inv);
+void warp_affine(uint8_t *input,
+ const std::vector<int> &input_shape,
+ T *output,
+ const std::vector<int> &output_shape,
+ dl::math::Matrix<float> *M_inv,
+ bool byte_swap = false);
 
 /**
  * @brief Apply an affine transformation to an image.
@@ -462,7 +467,12 @@ void warp_affine(dl::Tensor<T> *input, dl::Tensor<T> *output, dl::math::Matrix<f
  * @param M_inv the inverse transformation matrix.
  */
 template <typename T>
-void warp_affine(uint16_t *input, std::vector<int> shape, dl::Tensor<T> *output, dl::math::Matrix<float> *M_inv);
+void warp_affine(uint16_t *input,
+ const std::vector<int> &input_shape,
+ T *output,
+ const std::vector<int> &output_shape,
+ dl::math::Matrix<float> *M_inv,
+ bool byte_swap = false);
 
 /**
  * @brief Get the otsu thresh object.