diff --git a/README.md b/README.md index 56343d6..b147d33 100755 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Security camera application powered by AI. It uses live video stream from camera ### System -System requirements depends on complexity or size of object detection model, larger model will require more compute power and will be good at detection. I have used this in a raspberry pi 3b with a pi camera using mobilenet backbone and it gave around 1-2 fps. For final implementation tflite model was used with pi which boost fps of application. With my Nvidia Geforce 940 MX GPU based system it is giving around 30 fps with mobilenet backbone. So here is tradeoff between compute power and accuracy. +System requirements depends on complexity or size of object detection model, larger model will require more compute power and will be good at detection. I have used this in a raspberry pi 3b with a pi camera using mobilenet backbone and it gave around 1-2 fps. For final implementation tflite model was used with pi which boost fps of application. With my Nvidia Geforce 940 MX GPU based system it is giving around 20-30 fps with mobilenet backbone. So here is tradeoff between compute power and accuracy. ### Python 3 diff --git a/app.py b/app.py index 5692bd5..f0f545a 100755 --- a/app.py +++ b/app.py @@ -48,6 +48,7 @@ def main(_argv): record_path = args.get("recording_save_path") recipients = args.get("send_mail_to") neglect_categories = args.get("neglect_categories") + tf.keras.backend.clear_session() model = tf.saved_model.load(args.get("model")) logging.info("...model loaded...") diff --git a/configs/security_cam.cfg b/configs/security_cam.cfg index 9ee9ee5..6307efc 100644 --- a/configs/security_cam.cfg +++ b/configs/security_cam.cfg @@ -15,7 +15,7 @@ "recording_save_path": "data/outputs", "temp_dir": "data/temp", "detector_model_path": "data/models/ssd_mobilenet_v2_320x320_coco17_tpu-8-theft-inf/saved_model", - "labels_path": "data/labels/security_cam.txt" + "labels_path": "data/labels/security_cam.txt", "neglect_categories": ["dog", "cat"], "min_threshold": 0.65, "min_recording_time": 10 diff --git a/data/labels/coco.txt b/data/labels/coco.txt index 1f42c8e..63355c8 100644 --- a/data/labels/coco.txt +++ b/data/labels/coco.txt @@ -1,3 +1,4 @@ +??? person bicycle car diff --git a/data/labels/security_cam.txt b/data/labels/security_cam.txt index 06f8660..c745f91 100755 --- a/data/labels/security_cam.txt +++ b/data/labels/security_cam.txt @@ -1,3 +1,4 @@ +??? cat dog monkey diff --git a/data/outputs/detection_output.jpg b/data/outputs/detection_output.jpg index 0f9b1ae..39650df 100755 Binary files a/data/outputs/detection_output.jpg and b/data/outputs/detection_output.jpg differ diff --git a/examples/detect_cam.py b/examples/detect_cam.py index 69803c2..32f5cb2 100755 --- a/examples/detect_cam.py +++ b/examples/detect_cam.py @@ -3,12 +3,13 @@ import cv2 import tensorflow as tf import numpy as np -from utils.utility import draw_boxes -from utils.category import theft_category_index +from src.utils import draw_boxes +from src.category import read_label_pbtxt from absl import app, flags, logging from absl.flags import FLAGS flags.DEFINE_string("model", None, "path to model inference graph") +flags.DEFINE_string("labels", None, "path to labels.txt file with detection classes") flags.DEFINE_string("output", "data/outputs/cam_output.avi", "path to output video") flags.DEFINE_integer("cam", 0, "camera number or id to access") flags.DEFINE_float("threshold", 0.5, "detection threshold") @@ -16,10 +17,9 @@ def main(_argv): flags.mark_flag_as_required("model") + flags.mark_flag_as_required("labels") - physical_devices = tf.config.experimental.list_physical_devices("GPU") - for physical_device in physical_devices: - tf.config.experimental.set_memory_growth(physical_device, True) + labels = read_label_pbtxt(FLAGS.labels) start_time = time.time() model = tf.saved_model.load(FLAGS.model) @@ -52,7 +52,7 @@ def main(_argv): boxes, classes, scores, - theft_category_index, + labels, height, width, min_threshold=FLAGS.threshold, diff --git a/examples/detect_image.py b/examples/detect_image.py index d96921e..37426c8 100755 --- a/examples/detect_image.py +++ b/examples/detect_image.py @@ -2,15 +2,16 @@ import time import tensorflow as tf import numpy as np -from PIL import Image +import cv2 from absl import app, flags, logging from absl.flags import FLAGS from src.utils import draw_boxes -from src.category import theft_category_index -from src.utils import load_image +from src.category import read_label_pbtxt +from src.utils import load_image, preprocess_input flags.DEFINE_string("model", None, "path to model inference graph") flags.DEFINE_string("image", None, "path to input image") +flags.DEFINE_string("labels", None, "path to labels.txt file with detection classes") flags.DEFINE_string( "output", "data/outputs/detection_output.jpg", "path to output image" ) @@ -20,6 +21,9 @@ def main(_argv): flags.mark_flag_as_required("model") flags.mark_flag_as_required("image") + flags.mark_flag_as_required("labels") + + labels = read_label_pbtxt(FLAGS.labels) start_time = time.time() tf.keras.backend.clear_session() @@ -30,6 +34,7 @@ def main(_argv): image_np = load_image(FLAGS.image) image_tensor = np.expand_dims(image_np, axis=0) + image_tensor = preprocess_input(image_tensor) height, width, _ = image_np.shape start_time = time.time() detections = model(image_tensor) @@ -44,15 +49,16 @@ def main(_argv): boxes, classes, scores, - theft_category_index, + labels, height, width, min_threshold=FLAGS.threshold, ) - output = Image.fromarray(output_image) - output.save(FLAGS.output) - output.show() + output_image = cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB) + cv2.imwrite(FLAGS.output, output_image) + cv2.imshow("Object Detection", output_image) + cv2.waitKey(0) logging.info(f"Elapsed time: {str(end_time - start_time)}sec") diff --git a/examples/detect_ipcam.py b/examples/detect_ipcam.py index 69c7931..bedc5f4 100755 --- a/examples/detect_ipcam.py +++ b/examples/detect_ipcam.py @@ -3,11 +3,11 @@ import cv2 import tensorflow as tf import numpy as np -from object_detection.utils import visualization_utils -from utils.category import theft_category_index +from src.category import read_label_pbtxt +from src.utils import draw_boxes from absl import app, flags, logging from absl.flags import FLAGS -from utils.utility import VideoStream +from src.utility import VideoStream flags.DEFINE_string("model", None, "path to model inference graph") flags.DEFINE_string("output", "data/outputs/ipcam_output.avi", "path to output video") @@ -20,15 +20,14 @@ def main(_argv): flags.mark_flag_as_required("model") flags.mark_flag_as_required("ip") + flags.mark_flag_as_required("labels") + + labels = read_label_pbtxt(FLAGS.labels) stream_url = f"rtsp://{FLAGS.ip}:{FLAGS.port}/h264_ulaw.sdp" if FLAGS.username and FLAGS.password: stream_url = f"rtsp://{FLAGS.username}:{FLAGS.password}@{FLAGS.ip}:{FLAGS.port}/h264_ulaw.sdp" - physical_devices = tf.config.experimental.list_physical_devices("GPU") - for physical_device in physical_devices: - tf.config.experimental.set_memory_growth(physical_device, True) - start_time = time.time() model = tf.saved_model.load(FLAGS.model) end_time = time.time() @@ -55,16 +54,15 @@ def main(_argv): classes = detections["detection_classes"][0].numpy().astype(np.int32) scores = detections["detection_scores"][0].numpy() - output_image = visualization_utils.visualize_boxes_and_labels_on_image_array( + output_image = draw_boxes( img.copy(), boxes, classes, scores, - theft_category_index, - use_normalized_coordinates=True, - max_boxes_to_draw=200, - min_score_thresh=0.6, - agnostic_mode=False, + labels, + height, + width, + min_threshold=FLAGS.threshold, ) cv2.imshow("Object Detection", cv2.resize(output_image, (800, 600))) if out: diff --git a/examples/detect_video.py b/examples/detect_video.py index d57d309..9f4ef52 100755 --- a/examples/detect_video.py +++ b/examples/detect_video.py @@ -3,13 +3,14 @@ import cv2 import tensorflow as tf import numpy as np -from utils.utility import draw_boxes -from utils.category import theft_category_index +from src.utils import draw_boxes +from src.category import read_label_pbtxt from absl import app, flags, logging from absl.flags import FLAGS flags.DEFINE_string("model", None, "path to model inference graph") flags.DEFINE_string("video", None, "path to input video") +flags.DEFINE_string("labels", None, "path to labels.txt file with detection classes") flags.DEFINE_string("output", "data/outputs/video_output.avi", "path to output video") flags.DEFINE_float("threshold", 0.5, "detection threshold") @@ -17,6 +18,9 @@ def main(_argv): flags.mark_flag_as_required("model") flags.mark_flag_as_required("video") + flags.mark_flag_as_required("labels") + + labels = read_label_pbtxt(FLAGS.labels) start_time = time.time() model = tf.saved_model.load(FLAGS.model) @@ -55,7 +59,7 @@ def main(_argv): boxes, classes, scores, - theft_category_index, + labels, height, width, min_threshold=FLAGS.threshold, diff --git a/src/category.py b/src/category.py index 6dea7ac..4fd4d5c 100755 --- a/src/category.py +++ b/src/category.py @@ -1,98 +1,7 @@ -coco_category_index = { - 1: {"id": 1, "name": "person"}, - 2: {"id": 2, "name": "bicycle"}, - 3: {"id": 3, "name": "car"}, - 4: {"id": 4, "name": "motorcycle"}, - 5: {"id": 5, "name": "airplane"}, - 6: {"id": 6, "name": "bus"}, - 7: {"id": 7, "name": "train"}, - 8: {"id": 8, "name": "truck"}, - 9: {"id": 9, "name": "boat"}, - 10: {"id": 10, "name": "traffic light"}, - 11: {"id": 11, "name": "fire hydrant"}, - 13: {"id": 13, "name": "stop sign"}, - 14: {"id": 14, "name": "parking meter"}, - 15: {"id": 15, "name": "bench"}, - 16: {"id": 16, "name": "bird"}, - 17: {"id": 17, "name": "cat"}, - 18: {"id": 18, "name": "dog"}, - 19: {"id": 19, "name": "horse"}, - 20: {"id": 20, "name": "sheep"}, - 21: {"id": 21, "name": "cow"}, - 22: {"id": 22, "name": "elephant"}, - 23: {"id": 23, "name": "bear"}, - 24: {"id": 24, "name": "zebra"}, - 25: {"id": 25, "name": "giraffe"}, - 27: {"id": 27, "name": "backpack"}, - 28: {"id": 28, "name": "umbrella"}, - 31: {"id": 31, "name": "handbag"}, - 32: {"id": 32, "name": "tie"}, - 33: {"id": 33, "name": "suitcase"}, - 34: {"id": 34, "name": "frisbee"}, - 35: {"id": 35, "name": "skis"}, - 36: {"id": 36, "name": "snowboard"}, - 37: {"id": 37, "name": "sports ball"}, - 38: {"id": 38, "name": "kite"}, - 39: {"id": 39, "name": "baseball bat"}, - 40: {"id": 40, "name": "baseball glove"}, - 41: {"id": 41, "name": "skateboard"}, - 42: {"id": 42, "name": "surfboard"}, - 43: {"id": 43, "name": "tennis racket"}, - 44: {"id": 44, "name": "bottle"}, - 46: {"id": 46, "name": "wine glass"}, - 47: {"id": 47, "name": "cup"}, - 48: {"id": 48, "name": "fork"}, - 49: {"id": 49, "name": "knife"}, - 50: {"id": 50, "name": "spoon"}, - 51: {"id": 51, "name": "bowl"}, - 52: {"id": 52, "name": "banana"}, - 53: {"id": 53, "name": "apple"}, - 54: {"id": 54, "name": "sandwich"}, - 55: {"id": 55, "name": "orange"}, - 56: {"id": 56, "name": "broccoli"}, - 57: {"id": 57, "name": "carrot"}, - 58: {"id": 58, "name": "hot dog"}, - 59: {"id": 59, "name": "pizza"}, - 60: {"id": 60, "name": "donut"}, - 61: {"id": 61, "name": "cake"}, - 62: {"id": 62, "name": "chair"}, - 63: {"id": 63, "name": "couch"}, - 64: {"id": 64, "name": "potted plant"}, - 65: {"id": 65, "name": "bed"}, - 67: {"id": 67, "name": "dining table"}, - 70: {"id": 70, "name": "toilet"}, - 72: {"id": 72, "name": "tv"}, - 73: {"id": 73, "name": "laptop"}, - 74: {"id": 74, "name": "mouse"}, - 75: {"id": 75, "name": "remote"}, - 76: {"id": 76, "name": "keyboard"}, - 77: {"id": 77, "name": "cell phone"}, - 78: {"id": 78, "name": "microwave"}, - 79: {"id": 79, "name": "oven"}, - 80: {"id": 80, "name": "toaster"}, - 81: {"id": 81, "name": "sink"}, - 82: {"id": 82, "name": "refrigerator"}, - 84: {"id": 84, "name": "book"}, - 85: {"id": 85, "name": "clock"}, - 86: {"id": 86, "name": "vase"}, - 87: {"id": 87, "name": "scissors"}, - 88: {"id": 88, "name": "teddy bear"}, - 89: {"id": 89, "name": "hair drier"}, - 90: {"id": 90, "name": "toothbrush"}, -} - -security_cam_category_index = { - 1: {"id": 1, "name": "cat"}, - 2: {"id": 2, "name": "dog"}, - 3: {"id": 3, "name": "monkey"}, - 4: {"id": 4, "name": "person"}, -} - - def read_label_pbtxt(label_path: str) -> dict: with open(label_path, "r") as label_file: lines = label_file.readlines() labels = {} for row, content in enumerate(lines): - labels[row + 1] = {"id": row + 1, "name": content.strip()} + labels[row] = {"id": row, "name": content.strip()} return labels diff --git a/src/parse_args.py b/src/parse_args.py index 785846c..fe7ea5b 100755 --- a/src/parse_args.py +++ b/src/parse_args.py @@ -1,6 +1,6 @@ import json from absl.flags import FLAGS -from category import read_label_pbtxt +from src.category import read_label_pbtxt def get_security_cam_arguments(flag: FLAGS) -> tuple: diff --git a/src/utils.py b/src/utils.py index b99bd2c..1cbc1b0 100755 --- a/src/utils.py +++ b/src/utils.py @@ -72,7 +72,6 @@ def preprocess_input(image: np.array) -> np.array: np.array: preprocesses image as numpy array """ processed = (2.0 / 255.0) * image - 1.0 - processed = np.expand_dims(processed, axis=0) return processed.astype(np.float32)