Skip to content

Commit 78a3393

Browse files
committed
added gaze est
Signed-off-by: Mpho Mphego <mpho112@gmail.com>
1 parent aaf5d94 commit 78a3393

File tree

3 files changed

+150
-30
lines changed

3 files changed

+150
-30
lines changed

main.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -143,8 +143,6 @@ def main(args):
143143
for frame in video_feed.next_frame():
144144

145145
predict_end_time, face_bboxes = face_detection.predict(frame, show_bbox=True)
146-
text = f"Face Detection Inference time: {predict_end_time:.3f} s"
147-
face_detection.add_text(text, frame, (15, video_feed.source_height - 80))
148146

149147
if face_bboxes:
150148
for face_bbox in face_bboxes:
@@ -165,25 +163,38 @@ def main(args):
165163
if face_height < 20 or face_width < 20:
166164
continue
167165

168-
predict_end_time, eyes_coords = facial_landmarks.predict(
166+
facial_landmarks_pred_time, eyes_coords = facial_landmarks.predict(
169167
face, show_bbox=True
170168
)
171-
text = f"Facial Landmarks Est. Inference time: {predict_end_time:.3f} s"
172-
facial_landmarks.add_text(
173-
text, frame, (15, video_feed.source_height - 60)
174-
)
175169

176-
predict_end_time, head_pose_angles = head_pose_estimation.predict(
170+
hp_est_pred_time, head_pose_angles = head_pose_estimation.predict(
177171
face, show_bbox=True
178172
)
179-
head_pose_estimation.show_text(frame, head_pose_angles)
180-
text = f"Head Pose Est. Inference time: {predict_end_time:.3f} s"
181-
head_pose_estimation.add_text(
182-
text, frame, (15, video_feed.source_height - 40)
173+
174+
gaze_pred_time, gaze_vector, coords_xy = gaze_estimation.predict(
175+
frame,
176+
show_bbox=True,
177+
face=face,
178+
eyes_coords=eyes_coords,
179+
head_pose_angles=head_pose_angles,
183180
)
184-
# print (f"head pose: {head_pose_angles}")
181+
if args.debug:
182+
head_pose_estimation.show_text(frame, head_pose_angles)
183+
gaze_estimation.show_text(frame, gaze_vector)
184+
185+
mouse_controller.move(coords_xy['x'],coords_xy['y'])
185186

186187
if args.debug:
188+
text = f"Face Detection Inference time: {predict_end_time:.3f} s"
189+
face_detection.add_text(text, frame, (15, video_feed.source_height - 80))
190+
text = f"Facial Landmarks Est. Inference time: {facial_landmarks_pred_time:.3f} s"
191+
facial_landmarks.add_text(text, frame, (15, video_feed.source_height - 60))
192+
text = f"Head Pose Est. Inference time: {hp_est_pred_time:.3f} s"
193+
head_pose_estimation.add_text(
194+
text, frame, (15, video_feed.source_height - 40)
195+
)
196+
text = f"Gaze Est. Inference time: {gaze_pred_time:.3f} s"
197+
gaze_estimation.add_text(text, frame, (15, video_feed.source_height - 20))
187198
video_feed.show(video_feed.resize(frame))
188199

189200
video_feed.close()

src/model.py

Lines changed: 125 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -58,19 +58,12 @@ def __init__(
5858
self.input_name = next(iter(self.model.inputs))
5959
self.input_shape = self.model.inputs[self.input_name].shape
6060
self.output_name = next(iter(self.model.outputs))
61-
self._output_shape = None
6261
self.output_shape = self.model.outputs[self.output_name].shape
6362
self._init_image_w = source_width
6463
self._init_image_h = source_height
6564
self.exec_network = None
6665
self.load_model()
6766

68-
# @property
69-
# def output_shape(self):
70-
# if not self._output_shape:
71-
# self._output_shape =
72-
# return self._output_shape
73-
7467
def _get_model(self):
7568
"""Helper function for reading the network."""
7669
try:
@@ -103,17 +96,17 @@ def load_model(self):
10396
f"Model: {self.model_structure} took {self._model_load_time:.3f} ms to load."
10497
)
10598

106-
def predict(self, image, request_id=0, show_bbox=False):
99+
def predict(self, image, request_id=0, show_bbox=False, **kwargs):
107100
if not isinstance(image, np.ndarray):
108101
raise IOError("Image not parsed correctly.")
109102

110-
p_image = self.preprocess_input(image)
103+
p_image = self.preprocess_input(image, **kwargs)
104+
predict_start_time = time.time()
111105
self.exec_network.start_async(
112106
request_id=request_id, inputs={self.input_name: p_image}
113107
)
114108
status = self.exec_network.requests[request_id].wait(-1)
115109
if status == 0:
116-
predict_start_time = time.time()
117110
pred_result = []
118111
for output_name, data_ptr in self.model.outputs.items():
119112
pred_result.append(
@@ -124,7 +117,7 @@ def predict(self, image, request_id=0, show_bbox=False):
124117
return (predict_end_time, bbox)
125118

126119
@abstractmethod
127-
def preprocess_output(self, inference_results, image, show_bbox=False):
120+
def preprocess_output(self, inference_results, image, show_bbox=False, **kwargs):
128121
"""Draw bounding boxes onto the frame."""
129122
raise NotImplementedError("Please Implement this method")
130123

@@ -261,13 +254,30 @@ def preprocess_output(self, inference_results, image, show_bbox=False):
261254
h, w = image.shape[:2]
262255

263256
left_eye_x_coord = int(eyes_coords[0] * w)
257+
left_eye_xmin = left_eye_x_coord - 10
258+
left_eye_xmax = left_eye_x_coord + 10
259+
264260
left_eye_y_coord = int(eyes_coords[1] * h)
261+
left_eye_ymin = left_eye_y_coord - 10
262+
left_eye_ymax = left_eye_y_coord + 10
263+
265264
right_eye_x_coord = int(eyes_coords[2] * w)
265+
right_eye_xmin = right_eye_x_coord - 10
266+
right_eye_xmax = right_eye_x_coord + 10
267+
266268
right_eye_y_coord = int(eyes_coords[3] * h)
269+
right_eye_ymin = right_eye_y_coord - 10
270+
right_eye_ymax = right_eye_y_coord + 10
267271

268272
eyes_coords = {
269273
"left_eye_point": (left_eye_x_coord, left_eye_y_coord),
270274
"right_eye_point": (right_eye_x_coord, right_eye_y_coord),
275+
"left_eye_image": image[
276+
left_eye_ymin:left_eye_ymax, left_eye_xmin:left_eye_xmax
277+
],
278+
"right_eye_image": image[
279+
right_eye_ymin:right_eye_ymax, right_eye_xmin:right_eye_xmax,
280+
],
271281
}
272282
if show_bbox:
273283
self.draw_output(image, eyes_coords)
@@ -277,7 +287,8 @@ def preprocess_output(self, inference_results, image, show_bbox=False):
277287
def draw_output(image, eyes_coords, radius=10, color=(0, 0, 255), thickness=2):
278288
"""Draw a circle around ROI"""
279289
for eye, coords in eyes_coords.items():
280-
cv2.circle(image, (coords[0], coords[1]), radius, color, thickness)
290+
if "point" in eye:
291+
cv2.circle(image, (coords[0], coords[1]), radius, color, thickness)
281292

282293

283294
class Head_Pose_Estimation(Base):
@@ -413,9 +424,107 @@ def __init__(
413424
model_name, source_width, source_height, device, threshold, extensions,
414425
)
415426

416-
def preprocess_output(self, inference_results, image, show_bbox):
417-
pass
427+
def preprocess_output(self, inference_results, image, show_bbox, **kwargs):
428+
gaze_vector = dict(zip(["x", "y", "z"], np.vstack(inference_results).ravel()))
429+
430+
roll_val = kwargs["head_pose_angles"]["roll"]
431+
432+
cos_theta = math.cos(roll_val * math.pi / 180)
433+
sin_theta = math.sin(roll_val * math.pi / 180)
434+
435+
coords = {"x": None, "y": None}
436+
coords["x"] = gaze_vector["x"] * cos_theta + gaze_vector["y"] * sin_theta
437+
coords["y"] = gaze_vector["y"] * cos_theta - gaze_vector["x"] * sin_theta
438+
if show_bbox:
439+
self.draw_output(gaze_vector, image, **kwargs)
440+
return gaze_vector, coords, image
418441

419442
@staticmethod
420-
def draw_output(coords, image):
421-
pass
443+
def draw_output(coords, image, **kwargs):
444+
left_eye_point = kwargs["eyes_coords"]["left_eye_point"]
445+
right_eye_point = kwargs["eyes_coords"]["right_eye_point"]
446+
print('here')
447+
cv2.arrowedLine(
448+
image,
449+
(left_eye_point[0], left_eye_point[1]),
450+
(
451+
left_eye_point[0] + int(coords["x"] * 100),
452+
left_eye_point[1] + int(-coords["y"] * 100),
453+
),
454+
(0, 0, 255),
455+
5,
456+
)
457+
cv2.arrowedLine(
458+
image,
459+
(right_eye_point[0], right_eye_point[1]),
460+
(
461+
right_eye_point[0] + int(coords["x"] * 100),
462+
right_eye_point[1] + int(-coords["y"] * 100),
463+
),
464+
(0, 0, 255),
465+
5,
466+
)
467+
468+
@staticmethod
469+
def show_text(
470+
image, coords, pos=550, font_scale=1.5, color=(255, 255, 255), thickness=1
471+
):
472+
"""Helper function for showing the text on frame."""
473+
height, _ = image.shape[:2]
474+
ypos = abs(height - pos)
475+
text = ", ".join(f"{x}: {y:.2f}" for x, y in coords.items())
476+
477+
cv2.putText(
478+
image,
479+
text,
480+
(15, ypos),
481+
fontFace=cv2.FONT_HERSHEY_PLAIN,
482+
fontScale=font_scale,
483+
color=color,
484+
thickness=thickness,
485+
)
486+
487+
def preprocess_input(self, image, **kwargs):
488+
def p_eye_image(which_eye, input_shape):
489+
490+
p_image = cv2.resize(image, (input_shape.shape[3], input_shape.shape[2]))
491+
p_image = p_image.transpose((2, 0, 1))
492+
p_image = p_image.reshape(1, *input_shape.shape)
493+
return p_image
494+
495+
p_left_eye_image = p_eye_image(
496+
kwargs["eyes_coords"]["left_eye_image"], self.model.inputs["left_eye_image"]
497+
)
498+
p_right_eye_image = p_eye_image(
499+
kwargs["eyes_coords"]["right_eye_image"],
500+
self.model.inputs["right_eye_image"],
501+
)
502+
503+
return p_left_eye_image, p_right_eye_image
504+
505+
def predict(self, image, request_id=0, show_bbox=False, **kwargs):
506+
p_left_eye_image, p_right_eye_image = self.preprocess_input(image, **kwargs)
507+
head_pose_angles = list(kwargs.get("head_pose_angles").values())
508+
509+
predict_start_time = time.time()
510+
status = self.exec_network.start_async(
511+
request_id=request_id,
512+
inputs={
513+
"left_eye_image": p_left_eye_image,
514+
"right_eye_image": p_right_eye_image,
515+
"head_pose_angles": head_pose_angles,
516+
},
517+
)
518+
status = self.exec_network.requests[request_id].wait(-1)
519+
520+
if status == 0:
521+
pred_result = []
522+
for output_name, data_ptr in self.model.outputs.items():
523+
pred_result.append(
524+
self.exec_network.requests[request_id].outputs[output_name]
525+
)
526+
predict_end_time = float(time.time() - predict_start_time) * 1000
527+
gaze_vector, coords, _ = self.preprocess_output(
528+
pred_result, image, show_bbox=show_bbox, **kwargs
529+
)
530+
return (predict_end_time, gaze_vector, coords)

src/mouse_controller.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def __init__(self, precision, speed):
2121
def move(self, x, y):
2222
"""Move mouse pointer to position the x and y."""
2323
pyautogui.moveRel(
24-
x * self.precision, -1 * y * self.precision, duration=self.speed
24+
-x * self.precision, 1 * y * self.precision, duration=self.speed
2525
)
2626

2727
def left_click(self):

0 commit comments

Comments
 (0)