Skip to content

Commit 99eb33b

Browse files
committed
updated logic for handling inputs and refactored
Signed-off-by: Mpho Mphego <mpho112@gmail.com>
1 parent 78a3393 commit 99eb33b

File tree

3 files changed

+61
-46
lines changed

3 files changed

+61
-46
lines changed

main.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,15 @@ def arg_parser():
106106
default="fast",
107107
help="The speed (how fast it moves) by changing",
108108
)
109+
parser.add_argument(
110+
"--enable-mouse", action="store_true", help="Enable Mouse Movement",
111+
)
109112
parser.add_argument(
110113
"--debug", action="store_true", help="Show output on screen [debugging].",
111114
)
115+
parser.add_argument(
116+
"--show-bbox", action="store_true", help="Show bounding box and stats on screen [debugging].",
117+
)
112118

113119
return parser.parse_args()
114120

@@ -142,7 +148,7 @@ def main(args):
142148

143149
for frame in video_feed.next_frame():
144150

145-
predict_end_time, face_bboxes = face_detection.predict(frame, show_bbox=True)
151+
predict_end_time, face_bboxes = face_detection.predict(frame, show_bbox=args.show_bbox)
146152

147153
if face_bboxes:
148154
for face_bbox in face_bboxes:
@@ -164,16 +170,16 @@ def main(args):
164170
continue
165171

166172
facial_landmarks_pred_time, eyes_coords = facial_landmarks.predict(
167-
face, show_bbox=True
173+
face, show_bbox=args.show_bbox
168174
)
169175

170176
hp_est_pred_time, head_pose_angles = head_pose_estimation.predict(
171-
face, show_bbox=True
177+
face, show_bbox=args.show_bbox
172178
)
173179

174-
gaze_pred_time, gaze_vector, coords_xy = gaze_estimation.predict(
180+
gaze_pred_time, gaze_vector = gaze_estimation.predict(
175181
frame,
176-
show_bbox=True,
182+
show_bbox=args.show_bbox,
177183
face=face,
178184
eyes_coords=eyes_coords,
179185
head_pose_angles=head_pose_angles,
@@ -182,9 +188,12 @@ def main(args):
182188
head_pose_estimation.show_text(frame, head_pose_angles)
183189
gaze_estimation.show_text(frame, gaze_vector)
184190

185-
mouse_controller.move(coords_xy['x'],coords_xy['y'])
191+
print(f"gaze_vector: {gaze_vector}")
192+
if args.enable_mouse:
193+
mouse_controller.move(gaze_vector['x'], gaze_vector['y'])
186194

187195
if args.debug:
196+
video_feed.show(video_feed.resize(frame))
188197
text = f"Face Detection Inference time: {predict_end_time:.3f} s"
189198
face_detection.add_text(text, frame, (15, video_feed.source_height - 80))
190199
text = f"Facial Landmarks Est. Inference time: {facial_landmarks_pred_time:.3f} s"
@@ -195,7 +204,6 @@ def main(args):
195204
)
196205
text = f"Gaze Est. Inference time: {gaze_pred_time:.3f} s"
197206
gaze_estimation.add_text(text, frame, (15, video_feed.source_height - 20))
198-
video_feed.show(video_feed.resize(frame))
199207

200208
video_feed.close()
201209

src/model.py

Lines changed: 32 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,11 @@ def add_text(self, text, image, position, font_size=0.75, color=(255, 255, 255))
138138
image, text, position, cv2.FONT_HERSHEY_COMPLEX, font_size, color, 1,
139139
)
140140

141-
def preprocess_input(self, image):
141+
def preprocess_input(self, image, height=None, width=None):
142142
"""Helper function for processing frame"""
143-
p_frame = cv2.resize(image, (self.input_shape[3], self.input_shape[2]))
143+
if (height and width) is None:
144+
height, width = self.input_shape[2:]
145+
p_frame = cv2.resize(image, (width, height))
144146
# Change data layout from HWC to CHW
145147
p_frame = p_frame.transpose((2, 0, 1))
146148
p_frame = p_frame.reshape(1, *p_frame.shape)
@@ -427,42 +429,44 @@ def __init__(
427429
def preprocess_output(self, inference_results, image, show_bbox, **kwargs):
428430
gaze_vector = dict(zip(["x", "y", "z"], np.vstack(inference_results).ravel()))
429431

430-
roll_val = kwargs["head_pose_angles"]["roll"]
432+
# roll_val = kwargs["head_pose_angles"]["roll"]
431433

432-
cos_theta = math.cos(roll_val * math.pi / 180)
433-
sin_theta = math.sin(roll_val * math.pi / 180)
434+
# cos_theta = math.cos(roll_val * math.pi / 180)
435+
# sin_theta = math.sin(roll_val * math.pi / 180)
434436

435-
coords = {"x": None, "y": None}
436-
coords["x"] = gaze_vector["x"] * cos_theta + gaze_vector["y"] * sin_theta
437-
coords["y"] = gaze_vector["y"] * cos_theta - gaze_vector["x"] * sin_theta
437+
# coords = {"x": None, "y": None}
438+
# coords["x"] = gaze_vector["x"] * cos_theta + gaze_vector["y"] * sin_theta
439+
# coords["y"] = gaze_vector["y"] * cos_theta - gaze_vector["x"] * sin_theta
438440
if show_bbox:
439441
self.draw_output(gaze_vector, image, **kwargs)
440-
return gaze_vector, coords, image
442+
return gaze_vector, image
441443

442444
@staticmethod
443445
def draw_output(coords, image, **kwargs):
444446
left_eye_point = kwargs["eyes_coords"]["left_eye_point"]
445447
right_eye_point = kwargs["eyes_coords"]["right_eye_point"]
446-
print('here')
448+
print(left_eye_point)
447449
cv2.arrowedLine(
448450
image,
449-
(left_eye_point[0], left_eye_point[1]),
450451
(
451-
left_eye_point[0] + int(coords["x"] * 100),
452-
left_eye_point[1] + int(-coords["y"] * 100),
452+
left_eye_point[0] + int(coords["x"] * 500),
453+
left_eye_point[1] + int(-coords["y"] * 500),
453454
),
454-
(0, 0, 255),
455-
5,
455+
(left_eye_point[0], left_eye_point[1]),
456+
color=(0, 0, 255),
457+
thickness=2,
458+
tipLength=0.2,
456459
)
457460
cv2.arrowedLine(
458461
image,
459-
(right_eye_point[0], right_eye_point[1]),
460462
(
461-
right_eye_point[0] + int(coords["x"] * 100),
462-
right_eye_point[1] + int(-coords["y"] * 100),
463+
right_eye_point[0] + int(coords["x"] * 500),
464+
right_eye_point[1] + int(-coords["y"] * 500),
463465
),
464-
(0, 0, 255),
465-
5,
466+
(right_eye_point[0], right_eye_point[1]),
467+
color=(0, 0, 255),
468+
thickness=2,
469+
tipLength=0.2,
466470
)
467471

468472
@staticmethod
@@ -472,8 +476,7 @@ def show_text(
472476
"""Helper function for showing the text on frame."""
473477
height, _ = image.shape[:2]
474478
ypos = abs(height - pos)
475-
text = ", ".join(f"{x}: {y:.2f}" for x, y in coords.items())
476-
479+
text = "Gaze Vector: " + ", ".join(f"{x}: {y:.2f}" for x, y in coords.items())
477480
cv2.putText(
478481
image,
479482
text,
@@ -485,19 +488,13 @@ def show_text(
485488
)
486489

487490
def preprocess_input(self, image, **kwargs):
488-
def p_eye_image(which_eye, input_shape):
489-
490-
p_image = cv2.resize(image, (input_shape.shape[3], input_shape.shape[2]))
491-
p_image = p_image.transpose((2, 0, 1))
492-
p_image = p_image.reshape(1, *input_shape.shape)
493-
return p_image
491+
width, height = self.model.inputs["left_eye_image"].shape[2:]
494492

495-
p_left_eye_image = p_eye_image(
496-
kwargs["eyes_coords"]["left_eye_image"], self.model.inputs["left_eye_image"]
493+
p_left_eye_image = Base.preprocess_input(
494+
Base, kwargs["eyes_coords"]["left_eye_image"], width, height
497495
)
498-
p_right_eye_image = p_eye_image(
499-
kwargs["eyes_coords"]["right_eye_image"],
500-
self.model.inputs["right_eye_image"],
496+
p_right_eye_image = Base.preprocess_input(
497+
Base, kwargs["eyes_coords"]["right_eye_image"], width, height
501498
)
502499

503500
return p_left_eye_image, p_right_eye_image
@@ -524,7 +521,7 @@ def predict(self, image, request_id=0, show_bbox=False, **kwargs):
524521
self.exec_network.requests[request_id].outputs[output_name]
525522
)
526523
predict_end_time = float(time.time() - predict_start_time) * 1000
527-
gaze_vector, coords, _ = self.preprocess_output(
524+
gaze_vector, _ = self.preprocess_output(
528525
pred_result, image, show_bbox=show_bbox, **kwargs
529526
)
530-
return (predict_end_time, gaze_vector, coords)
527+
return (predict_end_time, gaze_vector)

src/mouse_controller.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import Xlib.display
22
import pyautogui
33

4+
from loguru import logger
5+
46

57
class MouseController:
68
"""
@@ -20,10 +22,18 @@ def __init__(self, precision, speed):
2022

2123
def move(self, x, y):
2224
"""Move mouse pointer to position the x and y."""
23-
pyautogui.moveRel(
24-
-x * self.precision, 1 * y * self.precision, duration=self.speed
25-
)
26-
25+
try:
26+
start_pos = pyautogui.position()
27+
pyautogui.moveRel(
28+
x * self.precision, -1 * y * self.precision, duration=self.speed
29+
)
30+
end_pos = pyautogui.position()
31+
logger.info(f"Mouse -> start_pos: {start_pos}, end_pos: {end_pos}")
32+
except pyautogui.FailSafeException:
33+
logger.exception(f"Position: {x}, {y} are out of the screen")
34+
pyautogui.moveRel(
35+
-x * self.precision, 1 * y * self.precision, duration=self.speed
36+
)
2737
def left_click(self):
2838
pass
2939

0 commit comments

Comments
 (0)