99
1010from arduino .app_utils .image .pipeable import PipeableFunction
1111
12+ # NOTE: we use the following formats for image shapes (H = height, W = width, C = channels):
13+ # - When receiving a resolution as argument we expect (W, H) format which is more user-friendly
14+ # - When receiving images we expect (H, W, C) format with C = BGR, BGRA or greyscale
15+ # - When returning images we use (H, W, C) format with C = BGR, BGRA or greyscale (depending on input)
16+ # Keep in mind OpenCV uses (W, H, C) format with C = BGR whereas numpy uses (H, W, C) format with any C.
17+ # The below functions all support unsigned integer types used by OpenCV (uint8, uint16 and uint32).
18+
1219
1320class ImageEditor :
1421 """
15- Image processing utilities for camera frames.
16-
17- Handles common image operations like compression, letterboxing, resizing, and format conversions.
18-
19- This class provides traditional static methods for image processing operations.
20- For functional composition with pipe operators, use the standalone functions below the class.
21-
22- Examples:
23- Traditional API:
24- result = ImageEditor.letterbox(frame, target_size=(640, 640))
25-
26- Functional API:
27- result = frame | letterboxed(target_size=(640, 640))
28-
29- Chained operations:
30- result = frame | letterboxed(target_size=(640, 640)) | greyscaled()
22+ Image processing utilities handling common image operations like letterboxing, resizing,
23+ adjusting, compressing and format conversions.
24+ Frames are expected to be in BGR, BGRA or greyscale format.
3125 """
3226
3327 @staticmethod
3428 def letterbox (frame : np .ndarray ,
3529 target_size : Optional [Tuple [int , int ]] = None ,
36- color : Tuple [int , int , int ] = (114 , 114 , 114 )) -> np .ndarray :
30+ color : int | Tuple [int , int , int ] = (114 , 114 , 114 ),
31+ interpolation : int = cv2 .INTER_LINEAR ) -> np .ndarray :
3732 """
3833 Add letterboxing to frame to achieve target size while maintaining aspect ratio.
3934
4035 Args:
4136 frame (np.ndarray): Input frame
4237 target_size (tuple, optional): Target size as (width, height). If None, makes frame square.
43- color (tuple): RGB color for padding borders. Default: (114, 114, 114)
44-
38+ color (int or tuple, optional): BGR color for padding borders, can be a scalar or a tuple
39+ matching the frame's channel count. Default: (114, 114, 114)
40+ interpolation (int, optional): OpenCV interpolation method. Default: cv2.INTER_LINEAR
41+
4542 Returns:
4643 np.ndarray: Letterboxed frame
4744 """
45+ original_dtype = frame .dtype
46+ orig_h , orig_w = frame .shape [:2 ]
47+
4848 if target_size is None :
49- # Make square based on the larger dimension
50- max_dim = max (frame .shape [0 ], frame .shape [1 ])
51- target_size = (max_dim , max_dim )
52-
53- target_w , target_h = target_size
54- h , w = frame .shape [:2 ]
55-
56- # Handle empty frames
57- if w == 0 or h == 0 :
58- raise ValueError ("Cannot letterbox empty frame" )
59-
60- # Calculate scaling factor to fit image inside target size
61- scale = min (target_w / w , target_h / h )
62- new_w , new_h = int (w * scale ), int (h * scale )
63-
64- # Resize frame
65- resized = cv2 .resize (frame , (new_w , new_h ), interpolation = cv2 .INTER_LINEAR )
66-
67- # Calculate padding
68- pad_w = target_w - new_w
69- pad_h = target_h - new_h
70-
71- # Add padding
72- return cv2 .copyMakeBorder (
73- resized ,
74- top = pad_h // 2 ,
75- bottom = (pad_h + 1 ) // 2 ,
76- left = pad_w // 2 ,
77- right = (pad_w + 1 ) // 2 ,
78- borderType = cv2 .BORDER_CONSTANT ,
79- value = color
80- )
49+ # Default to a square canvas based on the longest side
50+ max_dim = max (orig_h , orig_w )
51+ target_w , target_h = int (max_dim ), int (max_dim )
52+ else :
53+ target_w , target_h = int (target_size [0 ]), int (target_size [1 ])
54+
55+ scale = min (target_w / orig_w , target_h / orig_h )
56+ new_w = int (orig_w * scale )
57+ new_h = int (orig_h * scale )
58+
59+ resized_frame = cv2 .resize (frame , (new_w , new_h ), interpolation = interpolation )
60+
61+ if frame .ndim == 2 :
62+ # Greyscale
63+ if hasattr (color , '__len__' ):
64+ color = color [0 ]
65+ canvas = np .full ((target_h , target_w ), color , dtype = original_dtype )
66+ else :
67+ # Colored (BGR/BGRA)
68+ channels = frame .shape [2 ]
69+ if not hasattr (color , '__len__' ):
70+ color = (color ,) * channels
71+ elif len (color ) != channels :
72+ raise ValueError (
73+ f"color length ({ len (color )} ) must match frame channels ({ channels } )."
74+ )
75+ canvas = np .full ((target_h , target_w , channels ), color , dtype = original_dtype )
76+
77+ # Calculate offsets to center the image
78+ y_offset = (target_h - new_h ) // 2
79+ x_offset = (target_w - new_w ) // 2
80+
81+ # Paste the resized image onto the canvas
82+ canvas [y_offset :y_offset + new_h , x_offset :x_offset + new_w ] = resized_frame
83+
84+ return canvas
8185
8286 @staticmethod
8387 def resize (frame : np .ndarray ,
@@ -99,23 +103,162 @@ def resize(frame: np.ndarray,
99103 if maintain_ratio :
100104 return ImageEditor .letterbox (frame , target_size )
101105 else :
102- return cv2 .resize (frame , (target_size [1 ], target_size [0 ]), interpolation = interpolation )
106+ return cv2 .resize (frame , (target_size [0 ], target_size [1 ]), interpolation = interpolation )
107+
108+ @staticmethod
109+ def adjust (frame : np .ndarray ,
110+ brightness : float = 0.0 ,
111+ contrast : float = 1.0 ,
112+ saturation : float = 1.0 ,
113+ gamma : float = 1.0 ) -> np .ndarray :
114+ """
115+ Apply image adjustments to a BGR or BGRA frame, preserving channel count
116+ and data type.
117+
118+ Args:
119+ frame (np.ndarray): Input frame (uint8, uint16, uint32).
120+ brightness (float): -1.0 to 1.0 (default: 0.0).
121+ contrast (float): 0.0 to N (default: 1.0).
122+ saturation (float): 0.0 to N (default: 1.0).
123+ gamma (float): > 0 (default: 1.0).
124+
125+ Returns:
126+ np.ndarray: The adjusted input with same dtype as frame.
127+ """
128+ original_dtype = frame .dtype
129+ dtype_info = np .iinfo (original_dtype )
130+ max_val = dtype_info .max
131+
132+ # Use float64 for int types with > 24 bits of precision (e.g., uint32)
133+ processing_dtype = np .float64 if dtype_info .bits > 24 else np .float32
134+
135+ # Apply the adjustments in float space to reduce clipping and data loss
136+ frame_float = frame .astype (processing_dtype ) / max_val
137+
138+ # If present, separate alpha channel
139+ alpha_channel = None
140+ if frame .ndim == 3 and frame .shape [2 ] == 4 :
141+ alpha_channel = frame_float [:, :, 3 ]
142+ frame_float = frame_float [:, :, :3 ]
143+
144+ # Saturation
145+ if saturation != 1.0 and frame .ndim == 3 : # Ensure frame has color channels
146+ # This must be done with float32 so it's lossy only for uint32
147+ frame_float_32 = frame_float .astype (np .float32 )
148+ hsv = cv2 .cvtColor (frame_float_32 , cv2 .COLOR_BGR2HSV )
149+ h , s , v = ImageEditor .split_channels (hsv )
150+ s = np .clip (s * saturation , 0.0 , 1.0 )
151+ frame_float_32 = cv2 .cvtColor (np .stack ([h , s , v ], axis = 2 ), cv2 .COLOR_HSV2BGR )
152+ frame_float = frame_float_32 .astype (processing_dtype )
153+
154+ # Brightness
155+ if brightness != 0.0 :
156+ frame_float = frame_float + brightness
157+
158+ # Contrast
159+ if contrast != 1.0 :
160+ frame_float = (frame_float - 0.5 ) * contrast + 0.5
161+
162+ # We need to clip before reaching gamma correction
163+ # Clipping to 0 is mandatory to avoid handling complex numbers
164+ # Clipping to 1 is handy to avoid clipping again after gamma correction
165+ frame_float = np .clip (frame_float , 0.0 , 1.0 )
166+
167+ # Gamma
168+ if gamma != 1.0 :
169+ if gamma <= 0 :
170+ # This check is critical to prevent math errors (NaN/Inf)
171+ raise ValueError ("Gamma value must be greater than 0." )
172+ frame_float = np .power (frame_float , gamma )
173+
174+ # Convert back to original dtype
175+ final_frame_bgr = (frame_float * max_val ).astype (original_dtype )
176+
177+ # If present, reattach alpha channel
178+ if alpha_channel is not None :
179+ final_alpha = (alpha_channel * max_val ).astype (original_dtype )
180+ b , g , r = ImageEditor .split_channels (final_frame_bgr )
181+ final_frame = np .stack ([b , g , r , final_alpha ], axis = 2 )
182+ else :
183+ final_frame = final_frame_bgr
184+
185+ return final_frame
186+
187+ @staticmethod
188+ def split_channels (frame : np .ndarray ) -> tuple :
189+ """
190+ Split a multi-channel frame into individual channels using numpy indexing.
191+ This function provides better data type compatibility than cv2.split,
192+ especially for uint32 data which OpenCV doesn't fully support.
193+
194+ Args:
195+ frame (np.ndarray): Input frame with 3 or 4 channels
196+
197+ Returns:
198+ tuple: Individual channel arrays. For BGR: (b, g, r). For BGRA: (b, g, r, a).
199+ For HSV: (h, s, v). For other 3-channel: (ch0, ch1, ch2).
200+ """
201+ if frame .ndim != 3 :
202+ raise ValueError ("Frame must be 3-dimensional (H, W, C)" )
203+
204+ channels = frame .shape [2 ]
205+ if channels == 3 :
206+ return frame [:, :, 0 ], frame [:, :, 1 ], frame [:, :, 2 ]
207+ elif channels == 4 :
208+ return frame [:, :, 0 ], frame [:, :, 1 ], frame [:, :, 2 ], frame [:, :, 3 ]
209+ else :
210+ raise ValueError (f"Unsupported number of channels: { channels } . Expected 3 or 4." )
103211
104212 @staticmethod
105213 def greyscale (frame : np .ndarray ) -> np .ndarray :
106214 """
107- Convert frame to greyscale and maintain 3 channels for consistency.
215+ Converts a BGR or BGRA frame to greyscale, preserving channel count and
216+ data type. A greyscale frame is returned unmodified.
108217
109218 Args:
110- frame (np.ndarray): Input frame in BGR format
219+ frame (np.ndarray): Input frame (uint8, uint16, uint32).
111220
112221 Returns:
113- np.ndarray: Greyscale frame (3 channels, all identical)
222+ np.ndarray: The greyscaled frame with same dtype and channel count as frame.
114223 """
115- gray = cv2 .cvtColor (frame , cv2 .COLOR_BGR2GRAY )
116- # Convert back to 3 channels for consistency
117- return cv2 .cvtColor (gray , cv2 .COLOR_GRAY2BGR )
224+ # If already greyscale or unknown format, return the original frame
225+ if frame .ndim != 3 :
226+ return frame
227+
228+ original_dtype = frame .dtype
229+ dtype_info = np .iinfo (original_dtype )
230+ max_val = dtype_info .max
231+
232+ # Use float64 for int types with > 24 bits of precision (e.g., uint32)
233+ processing_dtype = np .float64 if dtype_info .bits > 24 else np .float32
234+
235+ # Apply the adjustments in float space to reduce clipping and data loss
236+ frame_float = frame .astype (processing_dtype ) / max_val
237+
238+ # If present, separate alpha channel
239+ alpha_channel = None
240+ if frame .shape [2 ] == 4 :
241+ alpha_channel = frame_float [:, :, 3 ]
242+ frame_float = frame_float [:, :, :3 ]
118243
244+ # Convert to greyscale using standard BT.709 weights
245+ # GREY = 0.0722 * B + 0.7152 * G + 0.2126 * R
246+ grey_float = (0.0722 * frame_float [:, :, 0 ] +
247+ 0.7152 * frame_float [:, :, 1 ] +
248+ 0.2126 * frame_float [:, :, 2 ])
249+
250+ # Convert back to original dtype
251+ final_grey = (grey_float * max_val ).astype (original_dtype )
252+
253+ # If present, reattach alpha channel
254+ if alpha_channel is not None :
255+ final_alpha = (alpha_channel * max_val ).astype (original_dtype )
256+ final_frame = np .stack ([final_grey , final_grey , final_grey , final_alpha ], axis = 2 )
257+ else :
258+ final_frame = np .stack ([final_grey , final_grey , final_grey ], axis = 2 )
259+
260+ return final_frame
261+
119262 @staticmethod
120263 def compress_to_jpeg (frame : np .ndarray , quality : int = 80 ) -> Optional [np .ndarray ]:
121264 """
@@ -168,7 +311,7 @@ def numpy_to_pil(frame: np.ndarray) -> Image.Image:
168311 Convert numpy array to PIL Image.
169312
170313 Args:
171- frame (np.ndarray): Input frame in BGR format (OpenCV default)
314+ frame (np.ndarray): Input frame in BGR format
172315
173316 Returns:
174317 PIL.Image.Image: PIL Image in RGB format
@@ -186,9 +329,8 @@ def pil_to_numpy(image: Image.Image) -> np.ndarray:
186329 image (PIL.Image.Image): PIL Image
187330
188331 Returns:
189- np.ndarray: Numpy array in BGR format (OpenCV default)
332+ np.ndarray: Numpy array in BGR format
190333 """
191- # Convert to RGB if not already
192334 if image .mode != 'RGB' :
193335 image = image .convert ('RGB' )
194336
@@ -202,7 +344,8 @@ def pil_to_numpy(image: Image.Image) -> np.ndarray:
202344# =============================================================================
203345
204346def letterboxed (target_size : Optional [Tuple [int , int ]] = None ,
205- color : Tuple [int , int , int ] = (114 , 114 , 114 )):
347+ color : Tuple [int , int , int ] = (114 , 114 , 114 ),
348+ interpolation : int = cv2 .INTER_LINEAR ):
206349 """
207350 Pipeable letterbox function - apply letterboxing with pipe operator support.
208351
@@ -217,7 +360,7 @@ def letterboxed(target_size: Optional[Tuple[int, int]] = None,
217360 pipe = letterboxed(target_size=(640, 640))
218361 pipe = letterboxed() | greyscaled()
219362 """
220- return PipeableFunction (ImageEditor .letterbox , target_size = target_size , color = color )
363+ return PipeableFunction (ImageEditor .letterbox , target_size = target_size , color = color , interpolation = interpolation )
221364
222365
223366def resized (target_size : Tuple [int , int ],
@@ -241,6 +384,29 @@ def resized(target_size: Tuple[int, int],
241384 return PipeableFunction (ImageEditor .resize , target_size = target_size , maintain_ratio = maintain_ratio , interpolation = interpolation )
242385
243386
387+ def adjusted (brightness : float = 0.0 ,
388+ contrast : float = 1.0 ,
389+ saturation : float = 1.0 ,
390+ gamma : float = 1.0 ):
391+ """
392+ Pipeable adjust function - apply image adjustments with pipe operator support.
393+
394+ Args:
395+ brightness (float): -1.0 to 1.0 (default: 0.0).
396+ contrast (float): 0.0 to N (default: 1.0).
397+ saturation (float): 0.0 to N (default: 1.0).
398+ gamma (float): > 0 (default: 1.0).
399+
400+ Returns:
401+ Partial function that takes a frame and returns adjusted frame
402+
403+ Examples:
404+ pipe = adjusted(brightness=0.1, contrast=1.2)
405+ pipe = letterboxed() | adjusted(saturation=0.8)
406+ """
407+ return PipeableFunction (ImageEditor .adjust , brightness = brightness , contrast = contrast , saturation = saturation , gamma = gamma )
408+
409+
244410def greyscaled ():
245411 """
246412 Pipeable greyscale function - convert frame to greyscale with pipe operator support.
@@ -250,7 +416,7 @@ def greyscaled():
250416
251417 Examples:
252418 pipe = greyscaled()
253- pipe = letterboxed() | greyscaled() | greyscaled()
419+ pipe = letterboxed() | greyscaled()
254420 """
255421 return PipeableFunction (ImageEditor .greyscale )
256422
0 commit comments