code cleanup, example errors fixed of library changes

tarun-bisht · Dec 14, 2021 · 9d59720 · 9d59720
1 parent 99935d7
commit 9d59720
Show file tree

Hide file tree

Showing 13 changed files with 44 additions and 125 deletions.
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ Security camera application powered by AI. It uses live video stream from camera
 
 ### System
 
-System requirements depends on complexity or size of object detection model, larger model will require more compute power and will be good at detection. I have used this in a raspberry pi 3b with a pi camera using mobilenet backbone and it gave around 1-2 fps. For final implementation tflite model was used with pi which boost fps of application. With my Nvidia Geforce 940 MX GPU based system it is giving around 30 fps with mobilenet backbone. So here is tradeoff between compute power and accuracy.
+System requirements depends on complexity or size of object detection model, larger model will require more compute power and will be good at detection. I have used this in a raspberry pi 3b with a pi camera using mobilenet backbone and it gave around 1-2 fps. For final implementation tflite model was used with pi which boost fps of application. With my Nvidia Geforce 940 MX GPU based system it is giving around 20-30 fps with mobilenet backbone. So here is tradeoff between compute power and accuracy.
 
 ### Python 3
 

diff --git a/app.py b/app.py
@@ -48,6 +48,7 @@ def main(_argv):
     record_path = args.get("recording_save_path")
     recipients = args.get("send_mail_to")
     neglect_categories = args.get("neglect_categories")
+    tf.keras.backend.clear_session()
     model = tf.saved_model.load(args.get("model"))
     logging.info("...model loaded...")
 

diff --git a/configs/security_cam.cfg b/configs/security_cam.cfg
@@ -15,7 +15,7 @@
     "recording_save_path": "data/outputs",
     "temp_dir": "data/temp",
     "detector_model_path": "data/models/ssd_mobilenet_v2_320x320_coco17_tpu-8-theft-inf/saved_model",
-    "labels_path": "data/labels/security_cam.txt"
+    "labels_path": "data/labels/security_cam.txt",
     "neglect_categories": ["dog", "cat"],
     "min_threshold": 0.65,
     "min_recording_time": 10

diff --git a/data/labels/coco.txt b/data/labels/coco.txt
@@ -1,3 +1,4 @@
+???
 person
 bicycle
 car

diff --git a/data/labels/security_cam.txt b/data/labels/security_cam.txt
@@ -1,3 +1,4 @@
+???
 cat
 dog
 monkey

diff --git a/data/outputs/detection_output.jpg b/data/outputs/detection_output.jpg
diff --git a/examples/detect_cam.py b/examples/detect_cam.py
@@ -3,23 +3,23 @@
 import cv2
 import tensorflow as tf
 import numpy as np
-from utils.utility import draw_boxes
-from utils.category import theft_category_index
+from src.utils import draw_boxes
+from src.category import read_label_pbtxt
 from absl import app, flags, logging
 from absl.flags import FLAGS
 
 flags.DEFINE_string("model", None, "path to model inference graph")
+flags.DEFINE_string("labels", None, "path to labels.txt file with detection classes")
 flags.DEFINE_string("output", "data/outputs/cam_output.avi", "path to output video")
 flags.DEFINE_integer("cam", 0, "camera number or id to access")
 flags.DEFINE_float("threshold", 0.5, "detection threshold")
 
 
 def main(_argv):
     flags.mark_flag_as_required("model")
+    flags.mark_flag_as_required("labels")
 
-    physical_devices = tf.config.experimental.list_physical_devices("GPU")
-    for physical_device in physical_devices:
-        tf.config.experimental.set_memory_growth(physical_device, True)
+    labels = read_label_pbtxt(FLAGS.labels)
 
     start_time = time.time()
     model = tf.saved_model.load(FLAGS.model)
@@ -52,7 +52,7 @@ def main(_argv):
             boxes,
             classes,
             scores,
-            theft_category_index,
+            labels,
             height,
             width,
             min_threshold=FLAGS.threshold,

diff --git a/examples/detect_image.py b/examples/detect_image.py
@@ -2,15 +2,16 @@
 import time
 import tensorflow as tf
 import numpy as np
-from PIL import Image
+import cv2
 from absl import app, flags, logging
 from absl.flags import FLAGS
 from src.utils import draw_boxes
-from src.category import theft_category_index
-from src.utils import load_image
+from src.category import read_label_pbtxt
+from src.utils import load_image, preprocess_input
 
 flags.DEFINE_string("model", None, "path to model inference graph")
 flags.DEFINE_string("image", None, "path to input image")
+flags.DEFINE_string("labels", None, "path to labels.txt file with detection classes")
 flags.DEFINE_string(
     "output", "data/outputs/detection_output.jpg", "path to output image"
 )
@@ -20,6 +21,9 @@
 def main(_argv):
     flags.mark_flag_as_required("model")
     flags.mark_flag_as_required("image")
+    flags.mark_flag_as_required("labels")
+
+    labels = read_label_pbtxt(FLAGS.labels)
 
     start_time = time.time()
     tf.keras.backend.clear_session()
@@ -30,6 +34,7 @@ def main(_argv):
 
     image_np = load_image(FLAGS.image)
     image_tensor = np.expand_dims(image_np, axis=0)
+    image_tensor = preprocess_input(image_tensor)
     height, width, _ = image_np.shape
     start_time = time.time()
     detections = model(image_tensor)
@@ -44,15 +49,16 @@ def main(_argv):
         boxes,
         classes,
         scores,
-        theft_category_index,
+        labels,
         height,
         width,
         min_threshold=FLAGS.threshold,
     )
 
-    output = Image.fromarray(output_image)
-    output.save(FLAGS.output)
-    output.show()
+    output_image = cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB)
+    cv2.imwrite(FLAGS.output, output_image)
+    cv2.imshow("Object Detection", output_image)
+    cv2.waitKey(0)
     logging.info(f"Elapsed time: {str(end_time - start_time)}sec")
 
 

diff --git a/examples/detect_ipcam.py b/examples/detect_ipcam.py
@@ -3,11 +3,11 @@
 import cv2
 import tensorflow as tf
 import numpy as np
-from object_detection.utils import visualization_utils
-from utils.category import theft_category_index
+from src.category import read_label_pbtxt
+from src.utils import draw_boxes
 from absl import app, flags, logging
 from absl.flags import FLAGS
-from utils.utility import VideoStream
+from src.utility import VideoStream
 
 flags.DEFINE_string("model", None, "path to model inference graph")
 flags.DEFINE_string("output", "data/outputs/ipcam_output.avi", "path to output video")
@@ -20,15 +20,14 @@
 def main(_argv):
     flags.mark_flag_as_required("model")
     flags.mark_flag_as_required("ip")
+    flags.mark_flag_as_required("labels")
+
+    labels = read_label_pbtxt(FLAGS.labels)
 
     stream_url = f"rtsp://{FLAGS.ip}:{FLAGS.port}/h264_ulaw.sdp"
     if FLAGS.username and FLAGS.password:
         stream_url = f"rtsp://{FLAGS.username}:{FLAGS.password}@{FLAGS.ip}:{FLAGS.port}/h264_ulaw.sdp"
 
-    physical_devices = tf.config.experimental.list_physical_devices("GPU")
-    for physical_device in physical_devices:
-        tf.config.experimental.set_memory_growth(physical_device, True)
-
     start_time = time.time()
     model = tf.saved_model.load(FLAGS.model)
     end_time = time.time()
@@ -55,16 +54,15 @@ def main(_argv):
         classes = detections["detection_classes"][0].numpy().astype(np.int32)
         scores = detections["detection_scores"][0].numpy()
 
-        output_image = visualization_utils.visualize_boxes_and_labels_on_image_array(
+        output_image = draw_boxes(
             img.copy(),
             boxes,
             classes,
             scores,
-            theft_category_index,
-            use_normalized_coordinates=True,
-            max_boxes_to_draw=200,
-            min_score_thresh=0.6,
-            agnostic_mode=False,
+            labels,
+            height,
+            width,
+            min_threshold=FLAGS.threshold,
         )
         cv2.imshow("Object Detection", cv2.resize(output_image, (800, 600)))
         if out:

diff --git a/examples/detect_video.py b/examples/detect_video.py
@@ -3,20 +3,24 @@
 import cv2
 import tensorflow as tf
 import numpy as np
-from utils.utility import draw_boxes
-from utils.category import theft_category_index
+from src.utils import draw_boxes
+from src.category import read_label_pbtxt
 from absl import app, flags, logging
 from absl.flags import FLAGS
 
 flags.DEFINE_string("model", None, "path to model inference graph")
 flags.DEFINE_string("video", None, "path to input video")
+flags.DEFINE_string("labels", None, "path to labels.txt file with detection classes")
 flags.DEFINE_string("output", "data/outputs/video_output.avi", "path to output video")
 flags.DEFINE_float("threshold", 0.5, "detection threshold")
 
 
 def main(_argv):
     flags.mark_flag_as_required("model")
     flags.mark_flag_as_required("video")
+    flags.mark_flag_as_required("labels")
+
+    labels = read_label_pbtxt(FLAGS.labels)
 
     start_time = time.time()
     model = tf.saved_model.load(FLAGS.model)
@@ -55,7 +59,7 @@ def main(_argv):
             boxes,
             classes,
             scores,
-            theft_category_index,
+            labels,
             height,
             width,
             min_threshold=FLAGS.threshold,

diff --git a/src/category.py b/src/category.py
@@ -1,98 +1,7 @@
-coco_category_index = {
-    1: {"id": 1, "name": "person"},
-    2: {"id": 2, "name": "bicycle"},
-    3: {"id": 3, "name": "car"},
-    4: {"id": 4, "name": "motorcycle"},
-    5: {"id": 5, "name": "airplane"},
-    6: {"id": 6, "name": "bus"},
-    7: {"id": 7, "name": "train"},
-    8: {"id": 8, "name": "truck"},
-    9: {"id": 9, "name": "boat"},
-    10: {"id": 10, "name": "traffic light"},
-    11: {"id": 11, "name": "fire hydrant"},
-    13: {"id": 13, "name": "stop sign"},
-    14: {"id": 14, "name": "parking meter"},
-    15: {"id": 15, "name": "bench"},
-    16: {"id": 16, "name": "bird"},
-    17: {"id": 17, "name": "cat"},
-    18: {"id": 18, "name": "dog"},
-    19: {"id": 19, "name": "horse"},
-    20: {"id": 20, "name": "sheep"},
-    21: {"id": 21, "name": "cow"},
-    22: {"id": 22, "name": "elephant"},
-    23: {"id": 23, "name": "bear"},
-    24: {"id": 24, "name": "zebra"},
-    25: {"id": 25, "name": "giraffe"},
-    27: {"id": 27, "name": "backpack"},
-    28: {"id": 28, "name": "umbrella"},
-    31: {"id": 31, "name": "handbag"},
-    32: {"id": 32, "name": "tie"},
-    33: {"id": 33, "name": "suitcase"},
-    34: {"id": 34, "name": "frisbee"},
-    35: {"id": 35, "name": "skis"},
-    36: {"id": 36, "name": "snowboard"},
-    37: {"id": 37, "name": "sports ball"},
-    38: {"id": 38, "name": "kite"},
-    39: {"id": 39, "name": "baseball bat"},
-    40: {"id": 40, "name": "baseball glove"},
-    41: {"id": 41, "name": "skateboard"},
-    42: {"id": 42, "name": "surfboard"},
-    43: {"id": 43, "name": "tennis racket"},
-    44: {"id": 44, "name": "bottle"},
-    46: {"id": 46, "name": "wine glass"},
-    47: {"id": 47, "name": "cup"},
-    48: {"id": 48, "name": "fork"},
-    49: {"id": 49, "name": "knife"},
-    50: {"id": 50, "name": "spoon"},
-    51: {"id": 51, "name": "bowl"},
-    52: {"id": 52, "name": "banana"},
-    53: {"id": 53, "name": "apple"},
-    54: {"id": 54, "name": "sandwich"},
-    55: {"id": 55, "name": "orange"},
-    56: {"id": 56, "name": "broccoli"},
-    57: {"id": 57, "name": "carrot"},
-    58: {"id": 58, "name": "hot dog"},
-    59: {"id": 59, "name": "pizza"},
-    60: {"id": 60, "name": "donut"},
-    61: {"id": 61, "name": "cake"},
-    62: {"id": 62, "name": "chair"},
-    63: {"id": 63, "name": "couch"},
-    64: {"id": 64, "name": "potted plant"},
-    65: {"id": 65, "name": "bed"},
-    67: {"id": 67, "name": "dining table"},
-    70: {"id": 70, "name": "toilet"},
-    72: {"id": 72, "name": "tv"},
-    73: {"id": 73, "name": "laptop"},
-    74: {"id": 74, "name": "mouse"},
-    75: {"id": 75, "name": "remote"},
-    76: {"id": 76, "name": "keyboard"},
-    77: {"id": 77, "name": "cell phone"},
-    78: {"id": 78, "name": "microwave"},
-    79: {"id": 79, "name": "oven"},
-    80: {"id": 80, "name": "toaster"},
-    81: {"id": 81, "name": "sink"},
-    82: {"id": 82, "name": "refrigerator"},
-    84: {"id": 84, "name": "book"},
-    85: {"id": 85, "name": "clock"},
-    86: {"id": 86, "name": "vase"},
-    87: {"id": 87, "name": "scissors"},
-    88: {"id": 88, "name": "teddy bear"},
-    89: {"id": 89, "name": "hair drier"},
-    90: {"id": 90, "name": "toothbrush"},
-}
-
-security_cam_category_index = {
-    1: {"id": 1, "name": "cat"},
-    2: {"id": 2, "name": "dog"},
-    3: {"id": 3, "name": "monkey"},
-    4: {"id": 4, "name": "person"},
-}
-
-
 def read_label_pbtxt(label_path: str) -> dict:
     with open(label_path, "r") as label_file:
         lines = label_file.readlines()
         labels = {}
         for row, content in enumerate(lines):
-            labels[row + 1] = {"id": row + 1, "name": content.strip()}
+            labels[row] = {"id": row, "name": content.strip()}
     return labels
diff --git a/src/parse_args.py b/src/parse_args.py
@@ -1,6 +1,6 @@
 import json
 from absl.flags import FLAGS
-from category import read_label_pbtxt
+from src.category import read_label_pbtxt
 
 
 def get_security_cam_arguments(flag: FLAGS) -> tuple:

diff --git a/src/utils.py b/src/utils.py
@@ -72,7 +72,6 @@ def preprocess_input(image: np.array) -> np.array:
         np.array: preprocesses image as numpy array
     """
     processed = (2.0 / 255.0) * image - 1.0
-    processed = np.expand_dims(processed, axis=0)
     return processed.astype(np.float32)