Merge pull request #1680 from laurigates/pr/perf-float32-buffer-reuse

perf(processing): optimize post-processing with float32 and buffer reuse
This commit is contained in:
Kenneth Estanislao
2026-02-23 15:13:03 +08:00
committed by GitHub
2 changed files with 48 additions and 41 deletions
+39 -32
View File
@@ -6,24 +6,31 @@ from modules.gpu_processing import gpu_gaussian_blur, gpu_resize, gpu_cvt_color
def apply_color_transfer(source, target): def apply_color_transfer(source, target):
""" """
Apply color transfer from target to source image Apply color transfer from target to source image using LAB color space.
Uses float32 throughout for performance (sufficient precision for 8-bit images).
""" """
source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype("float32") # Convert to float32 [0,1] range for proper LAB conversion
target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype("float32") source_f32 = source.astype(np.float32) / 255.0
target_f32 = target.astype(np.float32) / 255.0
source_mean, source_std = cv2.meanStdDev(source) source_lab = cv2.cvtColor(source_f32, cv2.COLOR_BGR2LAB)
target_mean, target_std = cv2.meanStdDev(target) target_lab = cv2.cvtColor(target_f32, cv2.COLOR_BGR2LAB)
# Reshape mean and std to be broadcastable source_mean, source_std = cv2.meanStdDev(source_lab)
source_mean = source_mean.reshape(1, 1, 3) target_mean, target_std = cv2.meanStdDev(target_lab)
source_std = source_std.reshape(1, 1, 3)
target_mean = target_mean.reshape(1, 1, 3)
target_std = target_std.reshape(1, 1, 3)
# Perform the color transfer # Reshape mean and std to be broadcastable (already float64 from meanStdDev, cast to f32)
source = (source - source_mean) * (target_std / source_std) + target_mean source_mean = source_mean.reshape(1, 1, 3).astype(np.float32)
source_std = np.maximum(source_std.reshape(1, 1, 3), 1e-6).astype(np.float32)
target_mean = target_mean.reshape(1, 1, 3).astype(np.float32)
target_std = target_std.reshape(1, 1, 3).astype(np.float32)
return cv2.cvtColor(np.clip(source, 0, 255).astype("uint8"), cv2.COLOR_LAB2BGR) # Perform the color transfer in LAB space
result_lab = (source_lab - source_mean) * (target_std / source_std) + target_mean
# Convert back to BGR and uint8
result_bgr = cv2.cvtColor(result_lab, cv2.COLOR_LAB2BGR)
return np.clip(result_bgr * 255.0, 0, 255).astype(np.uint8)
def create_face_mask(face: Face, frame: Frame) -> np.ndarray: def create_face_mask(face: Face, frame: Frame) -> np.ndarray:
mask = np.zeros(frame.shape[:2], dtype=np.uint8) mask = np.zeros(frame.shape[:2], dtype=np.uint8)
@@ -48,16 +55,14 @@ def create_face_mask(face: Face, frame: Frame) -> np.ndarray:
# Create a slightly larger convex hull for padding # Create a slightly larger convex hull for padding
face_outline = landmarks[0:33] face_outline = landmarks[0:33]
hull = cv2.convexHull(face_outline) hull = cv2.convexHull(face_outline)
hull_padded = [] # Vectorized hull padding — expand each point outward from center
for point in hull: center = np.mean(face_outline, axis=0, dtype=np.float32)
x, y = point[0] hull_pts = hull.reshape(-1, 2).astype(np.float32)
center = np.mean(face_outline, axis=0) directions = hull_pts - center
direction = np.array([x, y]) - center norms = np.linalg.norm(directions, axis=1, keepdims=True)
direction = direction / np.linalg.norm(direction) norms = np.maximum(norms, 1e-6) # avoid division by zero
padded_point = np.array([x, y]) + direction * padding directions /= norms
hull_padded.append(padded_point) hull_padded = (hull_pts + directions * padding).astype(np.int32)
hull_padded = np.array(hull_padded, dtype=np.int32)
# Fill the padded convex hull # Fill the padded convex hull
cv2.fillConvexPoly(mask, hull_padded, 255) cv2.fillConvexPoly(mask, hull_padded, 255)
@@ -468,26 +473,28 @@ def apply_mask_area(
box_height // modules.globals.mask_feather_ratio, box_height // modules.globals.mask_feather_ratio,
) )
feathered_mask = cv2.GaussianBlur( feathered_mask = cv2.GaussianBlur(
polygon_mask.astype(float), (0, 0), feather_amount polygon_mask.astype(np.float32), (0, 0), feather_amount
) )
feathered_mask = feathered_mask / feathered_mask.max() max_val = feathered_mask.max()
if max_val > 1e-6:
feathered_mask *= np.float32(1.0 / max_val)
# Apply additional smoothing to the mask edges # Apply additional smoothing to the mask edges
feathered_mask = cv2.GaussianBlur(feathered_mask, (5, 5), 1) feathered_mask = cv2.GaussianBlur(feathered_mask, (5, 5), 1)
face_mask_roi = face_mask[min_y:max_y, min_x:max_x] face_mask_roi = face_mask[min_y:max_y, min_x:max_x]
combined_mask = feathered_mask * (face_mask_roi / 255.0) combined_mask = feathered_mask * (face_mask_roi.astype(np.float32) * np.float32(1.0 / 255.0))
combined_mask = combined_mask[:, :, np.newaxis] combined_mask_3ch = combined_mask[:, :, np.newaxis]
inv_mask = np.float32(1.0) - combined_mask_3ch
blended = ( blended = (
color_corrected_area * combined_mask + roi * (1 - combined_mask) color_corrected_area * combined_mask_3ch + roi * inv_mask
).astype(np.uint8) ).astype(np.uint8)
# Apply face mask to blended result # Apply face mask to blended result
face_mask_3channel = ( face_mask_f32 = face_mask_roi[:, :, np.newaxis].astype(np.float32) * np.float32(1.0 / 255.0)
np.repeat(face_mask_roi[:, :, np.newaxis], 3, axis=2) / 255.0 face_mask_3channel = np.broadcast_to(face_mask_f32, blended.shape)
) final_blend = blended * face_mask_3channel + roi * (np.float32(1.0) - face_mask_3channel)
final_blend = blended * face_mask_3channel + roi * (1 - face_mask_3channel)
frame[min_y:max_y, min_x:max_x] = final_blend.astype(np.uint8) frame[min_y:max_y, min_x:max_x] = final_blend.astype(np.uint8)
except Exception as e: except Exception as e:
+9 -9
View File
@@ -1004,7 +1004,7 @@ def apply_mouth_area(
feather_amount = max(1, min(30, feather_base_dim // max(1, mask_feather_ratio))) # Avoid div by zero feather_amount = max(1, min(30, feather_base_dim // max(1, mask_feather_ratio))) # Avoid div by zero
# Ensure kernel size is odd and positive # Ensure kernel size is odd and positive
kernel_size = 2 * feather_amount + 1 kernel_size = 2 * feather_amount + 1
feathered_polygon_mask = cv2.GaussianBlur(polygon_mask_roi.astype(float), (kernel_size, kernel_size), 0) feathered_polygon_mask = cv2.GaussianBlur(polygon_mask_roi.astype(np.float32), (kernel_size, kernel_size), 0)
# Normalize feathered mask to [0.0, 1.0] range # Normalize feathered mask to [0.0, 1.0] range
max_val = feathered_polygon_mask.max() max_val = feathered_polygon_mask.max()
@@ -1019,9 +1019,9 @@ def apply_mouth_area(
# Get the corresponding ROI from the *full face mask* (already blurred) # Get the corresponding ROI from the *full face mask* (already blurred)
# Ensure face_mask is float and normalized [0.0, 1.0] # Ensure face_mask is float and normalized [0.0, 1.0]
if face_mask.dtype != np.float64 and face_mask.dtype != np.float32: if face_mask.dtype != np.float64 and face_mask.dtype != np.float32:
face_mask_float = face_mask.astype(float) / 255.0 face_mask_float = face_mask.astype(np.float32) / 255.0
else: # Assume already float [0,1] if type is float else: # Assume already float [0,1] if type is float
face_mask_float = face_mask face_mask_float = face_mask.astype(np.float32) if face_mask.dtype == np.float64 else face_mask
face_mask_roi = face_mask_float[min_y:max_y, min_x:max_x] face_mask_roi = face_mask_float[min_y:max_y, min_x:max_x]
# Combine the feathered mouth polygon mask with the face mask ROI # Combine the feathered mouth polygon mask with the face mask ROI
@@ -1033,14 +1033,14 @@ def apply_mouth_area(
if len(frame.shape) == 3 and frame.shape[2] == 3: if len(frame.shape) == 3 and frame.shape[2] == 3:
combined_mask_3channel = combined_mask[:, :, np.newaxis] combined_mask_3channel = combined_mask[:, :, np.newaxis]
# Ensure data types are compatible for blending (float or double for mask, uint8 for images) # Ensure data types are compatible for blending
color_corrected_mouth_uint8 = color_corrected_mouth.astype(np.uint8) # float32 provides sufficient precision for 8-bit image blending
roi_uint8 = roi.astype(np.uint8) combined_mask_f32 = combined_mask_3channel.astype(np.float32)
combined_mask_float = combined_mask_3channel.astype(np.float64) # Use float64 for precision in mask inv_mask = np.float32(1.0) - combined_mask_f32
# Blend: (original_mouth * combined_mask) + (swapped_face_roi * (1 - combined_mask)) # Blend: (original_mouth * combined_mask) + (swapped_face_roi * (1 - combined_mask))
blended_roi = (color_corrected_mouth_uint8 * combined_mask_float + blended_roi = (color_corrected_mouth * combined_mask_f32 +
roi_uint8 * (1.0 - combined_mask_float)) roi * inv_mask)
# Place the blended ROI back into the frame # Place the blended ROI back into the frame
frame[min_y:max_y, min_x:max_x] = blended_roi.astype(np.uint8) frame[min_y:max_y, min_x:max_x] = blended_roi.astype(np.uint8)