Skip to content

Commit 5b2d154

Browse files
committed
feat: better support BGR, BGRA and uint8, uint16, uint32
1 parent 50d781a commit 5b2d154

File tree

4 files changed

+633
-410
lines changed

4 files changed

+633
-410
lines changed

src/arduino/app_utils/image/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"PipeableFunction",
1212
"letterboxed",
1313
"resized",
14+
"adjusted",
1415
"greyscaled",
1516
"compressed_to_jpeg",
1617
"compressed_to_png",

src/arduino/app_utils/image/image_editor.py

Lines changed: 230 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -9,75 +9,79 @@
99

1010
from arduino.app_utils.image.pipeable import PipeableFunction
1111

12+
# NOTE: we use the following formats for image shapes (H = height, W = width, C = channels):
13+
# - When receiving a resolution as argument we expect (W, H) format which is more user-friendly
14+
# - When receiving images we expect (H, W, C) format with C = BGR, BGRA or greyscale
15+
# - When returning images we use (H, W, C) format with C = BGR, BGRA or greyscale (depending on input)
16+
# Keep in mind OpenCV uses (W, H, C) format with C = BGR whereas numpy uses (H, W, C) format with any C.
17+
# The below functions all support unsigned integer types used by OpenCV (uint8, uint16 and uint32).
18+
1219

1320
class ImageEditor:
1421
"""
15-
Image processing utilities for camera frames.
16-
17-
Handles common image operations like compression, letterboxing, resizing, and format conversions.
18-
19-
This class provides traditional static methods for image processing operations.
20-
For functional composition with pipe operators, use the standalone functions below the class.
21-
22-
Examples:
23-
Traditional API:
24-
result = ImageEditor.letterbox(frame, target_size=(640, 640))
25-
26-
Functional API:
27-
result = frame | letterboxed(target_size=(640, 640))
28-
29-
Chained operations:
30-
result = frame | letterboxed(target_size=(640, 640)) | greyscaled()
22+
Image processing utilities handling common image operations like letterboxing, resizing,
23+
adjusting, compressing and format conversions.
24+
Frames are expected to be in BGR, BGRA or greyscale format.
3125
"""
3226

3327
@staticmethod
3428
def letterbox(frame: np.ndarray,
3529
target_size: Optional[Tuple[int, int]] = None,
36-
color: Tuple[int, int, int] = (114, 114, 114)) -> np.ndarray:
30+
color: int | Tuple[int, int, int] = (114, 114, 114),
31+
interpolation: int = cv2.INTER_LINEAR) -> np.ndarray:
3732
"""
3833
Add letterboxing to frame to achieve target size while maintaining aspect ratio.
3934
4035
Args:
4136
frame (np.ndarray): Input frame
4237
target_size (tuple, optional): Target size as (width, height). If None, makes frame square.
43-
color (tuple): RGB color for padding borders. Default: (114, 114, 114)
44-
38+
color (int or tuple, optional): BGR color for padding borders, can be a scalar or a tuple
39+
matching the frame's channel count. Default: (114, 114, 114)
40+
interpolation (int, optional): OpenCV interpolation method. Default: cv2.INTER_LINEAR
41+
4542
Returns:
4643
np.ndarray: Letterboxed frame
4744
"""
45+
original_dtype = frame.dtype
46+
orig_h, orig_w = frame.shape[:2]
47+
4848
if target_size is None:
49-
# Make square based on the larger dimension
50-
max_dim = max(frame.shape[0], frame.shape[1])
51-
target_size = (max_dim, max_dim)
52-
53-
target_w, target_h = target_size
54-
h, w = frame.shape[:2]
55-
56-
# Handle empty frames
57-
if w == 0 or h == 0:
58-
raise ValueError("Cannot letterbox empty frame")
59-
60-
# Calculate scaling factor to fit image inside target size
61-
scale = min(target_w / w, target_h / h)
62-
new_w, new_h = int(w * scale), int(h * scale)
63-
64-
# Resize frame
65-
resized = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
66-
67-
# Calculate padding
68-
pad_w = target_w - new_w
69-
pad_h = target_h - new_h
70-
71-
# Add padding
72-
return cv2.copyMakeBorder(
73-
resized,
74-
top=pad_h // 2,
75-
bottom=(pad_h + 1) // 2,
76-
left=pad_w // 2,
77-
right=(pad_w + 1) // 2,
78-
borderType=cv2.BORDER_CONSTANT,
79-
value=color
80-
)
49+
# Default to a square canvas based on the longest side
50+
max_dim = max(orig_h, orig_w)
51+
target_w, target_h = int(max_dim), int(max_dim)
52+
else:
53+
target_w, target_h = int(target_size[0]), int(target_size[1])
54+
55+
scale = min(target_w / orig_w, target_h / orig_h)
56+
new_w = int(orig_w * scale)
57+
new_h = int(orig_h * scale)
58+
59+
resized_frame = cv2.resize(frame, (new_w, new_h), interpolation=interpolation)
60+
61+
if frame.ndim == 2:
62+
# Greyscale
63+
if hasattr(color, '__len__'):
64+
color = color[0]
65+
canvas = np.full((target_h, target_w), color, dtype=original_dtype)
66+
else:
67+
# Colored (BGR/BGRA)
68+
channels = frame.shape[2]
69+
if not hasattr(color, '__len__'):
70+
color = (color,) * channels
71+
elif len(color) != channels:
72+
raise ValueError(
73+
f"color length ({len(color)}) must match frame channels ({channels})."
74+
)
75+
canvas = np.full((target_h, target_w, channels), color, dtype=original_dtype)
76+
77+
# Calculate offsets to center the image
78+
y_offset = (target_h - new_h) // 2
79+
x_offset = (target_w - new_w) // 2
80+
81+
# Paste the resized image onto the canvas
82+
canvas[y_offset:y_offset + new_h, x_offset:x_offset + new_w] = resized_frame
83+
84+
return canvas
8185

8286
@staticmethod
8387
def resize(frame: np.ndarray,
@@ -99,23 +103,162 @@ def resize(frame: np.ndarray,
99103
if maintain_ratio:
100104
return ImageEditor.letterbox(frame, target_size)
101105
else:
102-
return cv2.resize(frame, (target_size[1], target_size[0]), interpolation=interpolation)
106+
return cv2.resize(frame, (target_size[0], target_size[1]), interpolation=interpolation)
107+
108+
@staticmethod
109+
def adjust(frame: np.ndarray,
110+
brightness: float = 0.0,
111+
contrast: float = 1.0,
112+
saturation: float = 1.0,
113+
gamma: float = 1.0) -> np.ndarray:
114+
"""
115+
Apply image adjustments to a BGR or BGRA frame, preserving channel count
116+
and data type.
117+
118+
Args:
119+
frame (np.ndarray): Input frame (uint8, uint16, uint32).
120+
brightness (float): -1.0 to 1.0 (default: 0.0).
121+
contrast (float): 0.0 to N (default: 1.0).
122+
saturation (float): 0.0 to N (default: 1.0).
123+
gamma (float): > 0 (default: 1.0).
124+
125+
Returns:
126+
np.ndarray: The adjusted input with same dtype as frame.
127+
"""
128+
original_dtype = frame.dtype
129+
dtype_info = np.iinfo(original_dtype)
130+
max_val = dtype_info.max
131+
132+
# Use float64 for int types with > 24 bits of precision (e.g., uint32)
133+
processing_dtype = np.float64 if dtype_info.bits > 24 else np.float32
134+
135+
# Apply the adjustments in float space to reduce clipping and data loss
136+
frame_float = frame.astype(processing_dtype) / max_val
137+
138+
# If present, separate alpha channel
139+
alpha_channel = None
140+
if frame.ndim == 3 and frame.shape[2] == 4:
141+
alpha_channel = frame_float[:, :, 3]
142+
frame_float = frame_float[:, :, :3]
143+
144+
# Saturation
145+
if saturation != 1.0 and frame.ndim == 3: # Ensure frame has color channels
146+
# This must be done with float32 so it's lossy only for uint32
147+
frame_float_32 = frame_float.astype(np.float32)
148+
hsv = cv2.cvtColor(frame_float_32, cv2.COLOR_BGR2HSV)
149+
h, s, v = ImageEditor.split_channels(hsv)
150+
s = np.clip(s * saturation, 0.0, 1.0)
151+
frame_float_32 = cv2.cvtColor(np.stack([h, s, v], axis=2), cv2.COLOR_HSV2BGR)
152+
frame_float = frame_float_32.astype(processing_dtype)
153+
154+
# Brightness
155+
if brightness != 0.0:
156+
frame_float = frame_float + brightness
157+
158+
# Contrast
159+
if contrast != 1.0:
160+
frame_float = (frame_float - 0.5) * contrast + 0.5
161+
162+
# We need to clip before reaching gamma correction
163+
# Clipping to 0 is mandatory to avoid handling complex numbers
164+
# Clipping to 1 is handy to avoid clipping again after gamma correction
165+
frame_float = np.clip(frame_float, 0.0, 1.0)
166+
167+
# Gamma
168+
if gamma != 1.0:
169+
if gamma <= 0:
170+
# This check is critical to prevent math errors (NaN/Inf)
171+
raise ValueError("Gamma value must be greater than 0.")
172+
frame_float = np.power(frame_float, gamma)
173+
174+
# Convert back to original dtype
175+
final_frame_bgr = (frame_float * max_val).astype(original_dtype)
176+
177+
# If present, reattach alpha channel
178+
if alpha_channel is not None:
179+
final_alpha = (alpha_channel * max_val).astype(original_dtype)
180+
b, g, r = ImageEditor.split_channels(final_frame_bgr)
181+
final_frame = np.stack([b, g, r, final_alpha], axis=2)
182+
else:
183+
final_frame = final_frame_bgr
184+
185+
return final_frame
186+
187+
@staticmethod
188+
def split_channels(frame: np.ndarray) -> tuple:
189+
"""
190+
Split a multi-channel frame into individual channels using numpy indexing.
191+
This function provides better data type compatibility than cv2.split,
192+
especially for uint32 data which OpenCV doesn't fully support.
193+
194+
Args:
195+
frame (np.ndarray): Input frame with 3 or 4 channels
196+
197+
Returns:
198+
tuple: Individual channel arrays. For BGR: (b, g, r). For BGRA: (b, g, r, a).
199+
For HSV: (h, s, v). For other 3-channel: (ch0, ch1, ch2).
200+
"""
201+
if frame.ndim != 3:
202+
raise ValueError("Frame must be 3-dimensional (H, W, C)")
203+
204+
channels = frame.shape[2]
205+
if channels == 3:
206+
return frame[:, :, 0], frame[:, :, 1], frame[:, :, 2]
207+
elif channels == 4:
208+
return frame[:, :, 0], frame[:, :, 1], frame[:, :, 2], frame[:, :, 3]
209+
else:
210+
raise ValueError(f"Unsupported number of channels: {channels}. Expected 3 or 4.")
103211

104212
@staticmethod
105213
def greyscale(frame: np.ndarray) -> np.ndarray:
106214
"""
107-
Convert frame to greyscale and maintain 3 channels for consistency.
215+
Converts a BGR or BGRA frame to greyscale, preserving channel count and
216+
data type. A greyscale frame is returned unmodified.
108217
109218
Args:
110-
frame (np.ndarray): Input frame in BGR format
219+
frame (np.ndarray): Input frame (uint8, uint16, uint32).
111220
112221
Returns:
113-
np.ndarray: Greyscale frame (3 channels, all identical)
222+
np.ndarray: The greyscaled frame with same dtype and channel count as frame.
114223
"""
115-
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
116-
# Convert back to 3 channels for consistency
117-
return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
224+
# If already greyscale or unknown format, return the original frame
225+
if frame.ndim != 3:
226+
return frame
227+
228+
original_dtype = frame.dtype
229+
dtype_info = np.iinfo(original_dtype)
230+
max_val = dtype_info.max
231+
232+
# Use float64 for int types with > 24 bits of precision (e.g., uint32)
233+
processing_dtype = np.float64 if dtype_info.bits > 24 else np.float32
234+
235+
# Apply the adjustments in float space to reduce clipping and data loss
236+
frame_float = frame.astype(processing_dtype) / max_val
237+
238+
# If present, separate alpha channel
239+
alpha_channel = None
240+
if frame.shape[2] == 4:
241+
alpha_channel = frame_float[:, :, 3]
242+
frame_float = frame_float[:, :, :3]
118243

244+
# Convert to greyscale using standard BT.709 weights
245+
# GREY = 0.0722 * B + 0.7152 * G + 0.2126 * R
246+
grey_float = (0.0722 * frame_float[:, :, 0] +
247+
0.7152 * frame_float[:, :, 1] +
248+
0.2126 * frame_float[:, :, 2])
249+
250+
# Convert back to original dtype
251+
final_grey = (grey_float * max_val).astype(original_dtype)
252+
253+
# If present, reattach alpha channel
254+
if alpha_channel is not None:
255+
final_alpha = (alpha_channel * max_val).astype(original_dtype)
256+
final_frame = np.stack([final_grey, final_grey, final_grey, final_alpha], axis=2)
257+
else:
258+
final_frame = np.stack([final_grey, final_grey, final_grey], axis=2)
259+
260+
return final_frame
261+
119262
@staticmethod
120263
def compress_to_jpeg(frame: np.ndarray, quality: int = 80) -> Optional[np.ndarray]:
121264
"""
@@ -168,7 +311,7 @@ def numpy_to_pil(frame: np.ndarray) -> Image.Image:
168311
Convert numpy array to PIL Image.
169312
170313
Args:
171-
frame (np.ndarray): Input frame in BGR format (OpenCV default)
314+
frame (np.ndarray): Input frame in BGR format
172315
173316
Returns:
174317
PIL.Image.Image: PIL Image in RGB format
@@ -186,9 +329,8 @@ def pil_to_numpy(image: Image.Image) -> np.ndarray:
186329
image (PIL.Image.Image): PIL Image
187330
188331
Returns:
189-
np.ndarray: Numpy array in BGR format (OpenCV default)
332+
np.ndarray: Numpy array in BGR format
190333
"""
191-
# Convert to RGB if not already
192334
if image.mode != 'RGB':
193335
image = image.convert('RGB')
194336

@@ -202,7 +344,8 @@ def pil_to_numpy(image: Image.Image) -> np.ndarray:
202344
# =============================================================================
203345

204346
def letterboxed(target_size: Optional[Tuple[int, int]] = None,
205-
color: Tuple[int, int, int] = (114, 114, 114)):
347+
color: Tuple[int, int, int] = (114, 114, 114),
348+
interpolation: int = cv2.INTER_LINEAR):
206349
"""
207350
Pipeable letterbox function - apply letterboxing with pipe operator support.
208351
@@ -217,7 +360,7 @@ def letterboxed(target_size: Optional[Tuple[int, int]] = None,
217360
pipe = letterboxed(target_size=(640, 640))
218361
pipe = letterboxed() | greyscaled()
219362
"""
220-
return PipeableFunction(ImageEditor.letterbox, target_size=target_size, color=color)
363+
return PipeableFunction(ImageEditor.letterbox, target_size=target_size, color=color, interpolation=interpolation)
221364

222365

223366
def resized(target_size: Tuple[int, int],
@@ -241,6 +384,29 @@ def resized(target_size: Tuple[int, int],
241384
return PipeableFunction(ImageEditor.resize, target_size=target_size, maintain_ratio=maintain_ratio, interpolation=interpolation)
242385

243386

387+
def adjusted(brightness: float = 0.0,
388+
contrast: float = 1.0,
389+
saturation: float = 1.0,
390+
gamma: float = 1.0):
391+
"""
392+
Pipeable adjust function - apply image adjustments with pipe operator support.
393+
394+
Args:
395+
brightness (float): -1.0 to 1.0 (default: 0.0).
396+
contrast (float): 0.0 to N (default: 1.0).
397+
saturation (float): 0.0 to N (default: 1.0).
398+
gamma (float): > 0 (default: 1.0).
399+
400+
Returns:
401+
Partial function that takes a frame and returns adjusted frame
402+
403+
Examples:
404+
pipe = adjusted(brightness=0.1, contrast=1.2)
405+
pipe = letterboxed() | adjusted(saturation=0.8)
406+
"""
407+
return PipeableFunction(ImageEditor.adjust, brightness=brightness, contrast=contrast, saturation=saturation, gamma=gamma)
408+
409+
244410
def greyscaled():
245411
"""
246412
Pipeable greyscale function - convert frame to greyscale with pipe operator support.
@@ -250,7 +416,7 @@ def greyscaled():
250416
251417
Examples:
252418
pipe = greyscaled()
253-
pipe = letterboxed() | greyscaled() | greyscaled()
419+
pipe = letterboxed() | greyscaled()
254420
"""
255421
return PipeableFunction(ImageEditor.greyscale)
256422

0 commit comments

Comments
 (0)