updated logic for handling inputs and refactored

mmphego · mmphego · commit 99eb33b35391 · 2020-07-17T13:07:08.000+02:00
Signed-off-by: Mpho Mphego &lt;mpho112@gmail.com&gt;
diff --git a/main.py b/main.py
@@ -106,9 +106,15 @@ def arg_parser():
         default="fast",
         help="The speed (how fast it moves) by changing",
     )
+    parser.add_argument(
+        "--enable-mouse", action="store_true", help="Enable Mouse Movement",
+    )
     parser.add_argument(
         "--debug", action="store_true", help="Show output on screen [debugging].",
     )
+    parser.add_argument(
+        "--show-bbox", action="store_true", help="Show bounding box and stats on screen [debugging].",
+    )
 
     return parser.parse_args()
 
@@ -142,7 +148,7 @@ def main(args):
 
     for frame in video_feed.next_frame():
 
-        predict_end_time, face_bboxes = face_detection.predict(frame, show_bbox=True)
+        predict_end_time, face_bboxes = face_detection.predict(frame, show_bbox=args.show_bbox)
 
         if face_bboxes:
             for face_bbox in face_bboxes:
@@ -164,16 +170,16 @@ def main(args):
                     continue
 
                 facial_landmarks_pred_time, eyes_coords = facial_landmarks.predict(
-                    face, show_bbox=True
+                    face, show_bbox=args.show_bbox
                 )
 
                 hp_est_pred_time, head_pose_angles = head_pose_estimation.predict(
-                    face, show_bbox=True
+                    face, show_bbox=args.show_bbox
                 )
 
-                gaze_pred_time, gaze_vector, coords_xy = gaze_estimation.predict(
+                gaze_pred_time, gaze_vector = gaze_estimation.predict(
                     frame,
-                    show_bbox=True,
+                    show_bbox=args.show_bbox,
                     face=face,
                     eyes_coords=eyes_coords,
                     head_pose_angles=head_pose_angles,
@@ -182,9 +188,12 @@ def main(args):
                     head_pose_estimation.show_text(frame, head_pose_angles)
                     gaze_estimation.show_text(frame, gaze_vector)
 
-                mouse_controller.move(coords_xy['x'],coords_xy['y'])
+                print(f"gaze_vector: {gaze_vector}")
+                if args.enable_mouse:
+                    mouse_controller.move(gaze_vector['x'], gaze_vector['y'])
 
         if args.debug:
+            video_feed.show(video_feed.resize(frame))
             text = f"Face Detection Inference time: {predict_end_time:.3f} s"
             face_detection.add_text(text, frame, (15, video_feed.source_height - 80))
             text = f"Facial Landmarks Est. Inference time: {facial_landmarks_pred_time:.3f} s"
@@ -195,7 +204,6 @@ def main(args):
             )
             text = f"Gaze Est. Inference time: {gaze_pred_time:.3f} s"
             gaze_estimation.add_text(text, frame, (15, video_feed.source_height - 20))
-            video_feed.show(video_feed.resize(frame))
 
     video_feed.close()
 
diff --git a/src/model.py b/src/model.py
@@ -138,9 +138,11 @@ def add_text(self, text, image, position, font_size=0.75, color=(255, 255, 255))
             image, text, position, cv2.FONT_HERSHEY_COMPLEX, font_size, color, 1,
         )
 
-    def preprocess_input(self, image):
+    def preprocess_input(self, image, height=None, width=None):
         """Helper function for processing frame"""
-        p_frame = cv2.resize(image, (self.input_shape[3], self.input_shape[2]))
+        if (height and width) is None:
+            height, width = self.input_shape[2:]
+        p_frame = cv2.resize(image, (width, height))
         # Change data layout from HWC to CHW
         p_frame = p_frame.transpose((2, 0, 1))
         p_frame = p_frame.reshape(1, *p_frame.shape)
@@ -427,42 +429,44 @@ def __init__(
     def preprocess_output(self, inference_results, image, show_bbox, **kwargs):
         gaze_vector = dict(zip(["x", "y", "z"], np.vstack(inference_results).ravel()))
 
-        roll_val = kwargs["head_pose_angles"]["roll"]
+        # roll_val = kwargs["head_pose_angles"]["roll"]
 
-        cos_theta = math.cos(roll_val * math.pi / 180)
-        sin_theta = math.sin(roll_val * math.pi / 180)
+        # cos_theta = math.cos(roll_val * math.pi / 180)
+        # sin_theta = math.sin(roll_val * math.pi / 180)
 
-        coords = {"x": None, "y": None}
-        coords["x"] = gaze_vector["x"] * cos_theta + gaze_vector["y"] * sin_theta
-        coords["y"] = gaze_vector["y"] * cos_theta - gaze_vector["x"] * sin_theta
+        # coords = {"x": None, "y": None}
+        # coords["x"] = gaze_vector["x"] * cos_theta + gaze_vector["y"] * sin_theta
+        # coords["y"] = gaze_vector["y"] * cos_theta - gaze_vector["x"] * sin_theta
         if show_bbox:
             self.draw_output(gaze_vector, image, **kwargs)
-        return gaze_vector, coords, image
+        return gaze_vector, image
 
     @staticmethod
     def draw_output(coords, image, **kwargs):
         left_eye_point = kwargs["eyes_coords"]["left_eye_point"]
         right_eye_point = kwargs["eyes_coords"]["right_eye_point"]
-        print('here')
+        print(left_eye_point)
         cv2.arrowedLine(
             image,
-            (left_eye_point[0], left_eye_point[1]),
             (
-                left_eye_point[0] + int(coords["x"] * 100),
-                left_eye_point[1] + int(-coords["y"] * 100),
+                left_eye_point[0] + int(coords["x"] * 500),
+                left_eye_point[1] + int(-coords["y"] * 500),
             ),
-            (0, 0, 255),
-            5,
+            (left_eye_point[0], left_eye_point[1]),
+            color=(0, 0, 255),
+            thickness=2,
+            tipLength=0.2,
         )
         cv2.arrowedLine(
             image,
-            (right_eye_point[0], right_eye_point[1]),
             (
-                right_eye_point[0] + int(coords["x"] * 100),
-                right_eye_point[1] + int(-coords["y"] * 100),
+                right_eye_point[0] + int(coords["x"] * 500),
+                right_eye_point[1] + int(-coords["y"] * 500),
             ),
-            (0, 0, 255),
-            5,
+            (right_eye_point[0], right_eye_point[1]),
+            color=(0, 0, 255),
+            thickness=2,
+            tipLength=0.2,
         )
 
     @staticmethod
@@ -472,8 +476,7 @@ def show_text(
         """Helper function for showing the text on frame."""
         height, _ = image.shape[:2]
         ypos = abs(height - pos)
-        text = ", ".join(f"{x}: {y:.2f}" for x, y in coords.items())
-
+        text = "Gaze Vector: " + ", ".join(f"{x}: {y:.2f}" for x, y in coords.items())
         cv2.putText(
             image,
             text,
@@ -485,19 +488,13 @@ def show_text(
         )
 
     def preprocess_input(self, image, **kwargs):
-        def p_eye_image(which_eye, input_shape):
-
-            p_image = cv2.resize(image, (input_shape.shape[3], input_shape.shape[2]))
-            p_image = p_image.transpose((2, 0, 1))
-            p_image = p_image.reshape(1, *input_shape.shape)
-            return p_image
+        width, height = self.model.inputs["left_eye_image"].shape[2:]
 
-        p_left_eye_image = p_eye_image(
-            kwargs["eyes_coords"]["left_eye_image"], self.model.inputs["left_eye_image"]
+        p_left_eye_image = Base.preprocess_input(
+            Base, kwargs["eyes_coords"]["left_eye_image"], width, height
         )
-        p_right_eye_image = p_eye_image(
-            kwargs["eyes_coords"]["right_eye_image"],
-            self.model.inputs["right_eye_image"],
+        p_right_eye_image = Base.preprocess_input(
+            Base, kwargs["eyes_coords"]["right_eye_image"], width, height
         )
 
         return p_left_eye_image, p_right_eye_image
@@ -524,7 +521,7 @@ def predict(self, image, request_id=0, show_bbox=False, **kwargs):
                     self.exec_network.requests[request_id].outputs[output_name]
                 )
             predict_end_time = float(time.time() - predict_start_time) * 1000
-            gaze_vector, coords, _ = self.preprocess_output(
+            gaze_vector, _ = self.preprocess_output(
                 pred_result, image, show_bbox=show_bbox, **kwargs
             )
-        return (predict_end_time, gaze_vector, coords)
+        return (predict_end_time, gaze_vector)
diff --git a/src/mouse_controller.py b/src/mouse_controller.py
@@ -1,6 +1,8 @@
 import Xlib.display
 import pyautogui
 
+from loguru import logger
+
 
 class MouseController:
     """
@@ -20,10 +22,18 @@ def __init__(self, precision, speed):
 
     def move(self, x, y):
         """Move mouse pointer to position the x and y."""
-        pyautogui.moveRel(
-            -x * self.precision, 1 * y * self.precision, duration=self.speed
-        )
-
+        try:
+            start_pos = pyautogui.position()
+            pyautogui.moveRel(
+                x * self.precision, -1 * y * self.precision, duration=self.speed
+            )
+            end_pos = pyautogui.position()
+            logger.info(f"Mouse -> start_pos: {start_pos}, end_pos: {end_pos}")
+        except pyautogui.FailSafeException:
+            logger.exception(f"Position: {x}, {y} are out of the screen")
+            pyautogui.moveRel(
+                -x * self.precision, 1 * y * self.precision, duration=self.speed
+            )
     def left_click(self):
         pass