diff --git a/modules/processors/frame/face_masking.py b/modules/processors/frame/face_masking.py
index 2329818..3435b7e 100644
--- a/modules/processors/frame/face_masking.py
+++ b/modules/processors/frame/face_masking.py
@@ -6,24 +6,31 @@ from modules.gpu_processing import gpu_gaussian_blur, gpu_resize, gpu_cvt_color
 
 def apply_color_transfer(source, target):
     """
-    Apply color transfer from target to source image
+    Apply color transfer from target to source image using LAB color space.
+    Uses float32 throughout for performance (sufficient precision for 8-bit images).
     """
-    source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype("float32")
-    target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype("float32")
+    # Convert to float32 [0,1] range for proper LAB conversion
+    source_f32 = source.astype(np.float32) / 255.0
+    target_f32 = target.astype(np.float32) / 255.0
 
-    source_mean, source_std = cv2.meanStdDev(source)
-    target_mean, target_std = cv2.meanStdDev(target)
+    source_lab = cv2.cvtColor(source_f32, cv2.COLOR_BGR2LAB)
+    target_lab = cv2.cvtColor(target_f32, cv2.COLOR_BGR2LAB)
 
-    # Reshape mean and std to be broadcastable
-    source_mean = source_mean.reshape(1, 1, 3)
-    source_std = source_std.reshape(1, 1, 3)
-    target_mean = target_mean.reshape(1, 1, 3)
-    target_std = target_std.reshape(1, 1, 3)
+    source_mean, source_std = cv2.meanStdDev(source_lab)
+    target_mean, target_std = cv2.meanStdDev(target_lab)
 
-    # Perform the color transfer
-    source = (source - source_mean) * (target_std / source_std) + target_mean
+    # Reshape mean and std to be broadcastable (already float64 from meanStdDev, cast to f32)
+    source_mean = source_mean.reshape(1, 1, 3).astype(np.float32)
+    source_std = np.maximum(source_std.reshape(1, 1, 3), 1e-6).astype(np.float32)
+    target_mean = target_mean.reshape(1, 1, 3).astype(np.float32)
+    target_std = target_std.reshape(1, 1, 3).astype(np.float32)
 
-    return cv2.cvtColor(np.clip(source, 0, 255).astype("uint8"), cv2.COLOR_LAB2BGR)
+    # Perform the color transfer in LAB space
+    result_lab = (source_lab - source_mean) * (target_std / source_std) + target_mean
+
+    # Convert back to BGR and uint8
+    result_bgr = cv2.cvtColor(result_lab, cv2.COLOR_LAB2BGR)
+    return np.clip(result_bgr * 255.0, 0, 255).astype(np.uint8)
 
 def create_face_mask(face: Face, frame: Frame) -> np.ndarray:
     mask = np.zeros(frame.shape[:2], dtype=np.uint8)
@@ -48,16 +55,14 @@ def create_face_mask(face: Face, frame: Frame) -> np.ndarray:
         # Create a slightly larger convex hull for padding
         face_outline = landmarks[0:33]
         hull = cv2.convexHull(face_outline)
-        hull_padded = []
-        for point in hull:
-            x, y = point[0]
-            center = np.mean(face_outline, axis=0)
-            direction = np.array([x, y]) - center
-            direction = direction / np.linalg.norm(direction)
-            padded_point = np.array([x, y]) + direction * padding
-            hull_padded.append(padded_point)
-
-        hull_padded = np.array(hull_padded, dtype=np.int32)
+        # Vectorized hull padding — expand each point outward from center
+        center = np.mean(face_outline, axis=0, dtype=np.float32)
+        hull_pts = hull.reshape(-1, 2).astype(np.float32)
+        directions = hull_pts - center
+        norms = np.linalg.norm(directions, axis=1, keepdims=True)
+        norms = np.maximum(norms, 1e-6)  # avoid division by zero
+        directions /= norms
+        hull_padded = (hull_pts + directions * padding).astype(np.int32)
 
         # Fill the padded convex hull
         cv2.fillConvexPoly(mask, hull_padded, 255)
@@ -468,26 +473,28 @@ def apply_mask_area(
             box_height // modules.globals.mask_feather_ratio,
         )
         feathered_mask = cv2.GaussianBlur(
-            polygon_mask.astype(float), (0, 0), feather_amount
+            polygon_mask.astype(np.float32), (0, 0), feather_amount
         )
-        feathered_mask = feathered_mask / feathered_mask.max()
+        max_val = feathered_mask.max()
+        if max_val > 1e-6:
+            feathered_mask *= np.float32(1.0 / max_val)
 
         # Apply additional smoothing to the mask edges
         feathered_mask = cv2.GaussianBlur(feathered_mask, (5, 5), 1)
 
         face_mask_roi = face_mask[min_y:max_y, min_x:max_x]
-        combined_mask = feathered_mask * (face_mask_roi / 255.0)
+        combined_mask = feathered_mask * (face_mask_roi.astype(np.float32) * np.float32(1.0 / 255.0))
 
-        combined_mask = combined_mask[:, :, np.newaxis]
+        combined_mask_3ch = combined_mask[:, :, np.newaxis]
+        inv_mask = np.float32(1.0) - combined_mask_3ch
         blended = (
-            color_corrected_area * combined_mask + roi * (1 - combined_mask)
+            color_corrected_area * combined_mask_3ch + roi * inv_mask
         ).astype(np.uint8)
 
         # Apply face mask to blended result
-        face_mask_3channel = (
-            np.repeat(face_mask_roi[:, :, np.newaxis], 3, axis=2) / 255.0
-        )
-        final_blend = blended * face_mask_3channel + roi * (1 - face_mask_3channel)
+        face_mask_f32 = face_mask_roi[:, :, np.newaxis].astype(np.float32) * np.float32(1.0 / 255.0)
+        face_mask_3channel = np.broadcast_to(face_mask_f32, blended.shape)
+        final_blend = blended * face_mask_3channel + roi * (np.float32(1.0) - face_mask_3channel)
 
         frame[min_y:max_y, min_x:max_x] = final_blend.astype(np.uint8)
     except Exception as e:
diff --git a/modules/processors/frame/face_swapper.py b/modules/processors/frame/face_swapper.py
index 0f27386..193d693 100644
--- a/modules/processors/frame/face_swapper.py
+++ b/modules/processors/frame/face_swapper.py
@@ -1004,7 +1004,7 @@ def apply_mouth_area(
         feather_amount = max(1, min(30, feather_base_dim // max(1, mask_feather_ratio))) # Avoid div by zero
         # Ensure kernel size is odd and positive
         kernel_size = 2 * feather_amount + 1
-        feathered_polygon_mask = cv2.GaussianBlur(polygon_mask_roi.astype(float), (kernel_size, kernel_size), 0)
+        feathered_polygon_mask = cv2.GaussianBlur(polygon_mask_roi.astype(np.float32), (kernel_size, kernel_size), 0)
 
         # Normalize feathered mask to [0.0, 1.0] range
         max_val = feathered_polygon_mask.max()
@@ -1019,9 +1019,9 @@ def apply_mouth_area(
         # Get the corresponding ROI from the *full face mask* (already blurred)
         # Ensure face_mask is float and normalized [0.0, 1.0]
         if face_mask.dtype != np.float64 and face_mask.dtype != np.float32:
-            face_mask_float = face_mask.astype(float) / 255.0
+            face_mask_float = face_mask.astype(np.float32) / 255.0
         else: # Assume already float [0,1] if type is float
-            face_mask_float = face_mask
+            face_mask_float = face_mask.astype(np.float32) if face_mask.dtype == np.float64 else face_mask
         face_mask_roi = face_mask_float[min_y:max_y, min_x:max_x]
 
         # Combine the feathered mouth polygon mask with the face mask ROI
@@ -1033,14 +1033,14 @@ def apply_mouth_area(
         if len(frame.shape) == 3 and frame.shape[2] == 3:
             combined_mask_3channel = combined_mask[:, :, np.newaxis]
 
-            # Ensure data types are compatible for blending (float or double for mask, uint8 for images)
-            color_corrected_mouth_uint8 = color_corrected_mouth.astype(np.uint8)
-            roi_uint8 = roi.astype(np.uint8)
-            combined_mask_float = combined_mask_3channel.astype(np.float64) # Use float64 for precision in mask
+            # Ensure data types are compatible for blending
+            # float32 provides sufficient precision for 8-bit image blending
+            combined_mask_f32 = combined_mask_3channel.astype(np.float32)
+            inv_mask = np.float32(1.0) - combined_mask_f32
 
             # Blend: (original_mouth * combined_mask) + (swapped_face_roi * (1 - combined_mask))
-            blended_roi = (color_corrected_mouth_uint8 * combined_mask_float +
-                           roi_uint8 * (1.0 - combined_mask_float))
+            blended_roi = (color_corrected_mouth * combined_mask_f32 +
+                           roi * inv_mask)
 
             # Place the blended ROI back into the frame
             frame[min_y:max_y, min_x:max_x] = blended_roi.astype(np.uint8)