Merge pull request #1665 from laurigates/pr/perf-pipeline-threading

perf(ui): decouple face detection from swap in live webcam pipeline
2026-02-23 00:59:22 +08:00
parent d89385457e ca6cba9311
commit 7ec3a4be29
1 changed files with 86 additions and 41 deletions
@@ -997,28 +997,48 @@ def _capture_thread_func(cap, capture_queue, stop_event):
                pass
-# How often to run full face detection. On intermediate frames the last
+def _detection_thread_func(latest_frame_holder, detection_result, detection_lock, stop_event):
-# detected face positions are reused, which significantly reduces the
+    """Detection thread: continuously runs face detection on the latest
-# per-frame cost of the processing thread.
+    captured frame and stores results in detection_result under detection_lock.
-DETECT_EVERY_N = 2
+
    This decouples face detection (~15-30ms) from face swapping (~5-10ms)
    so the swap loop never blocks on detection, significantly improving
    live mode FPS."""
    while not stop_event.is_set():
        with detection_lock:
            frame = latest_frame_holder[0]
        if frame is None:
            time.sleep(0.005)
            continue
        if modules.globals.many_faces:
            many = get_many_faces(frame)
            with detection_lock:
                detection_result['target_face'] = None
                detection_result['many_faces'] = many
        else:
            face = get_one_face(frame)
            with detection_lock:
                detection_result['target_face'] = face
                detection_result['many_faces'] = None
-def _processing_thread_func(capture_queue, processed_queue, stop_event):
+def _processing_thread_func(capture_queue, processed_queue, stop_event,
-    """Processing thread: takes raw frames from capture_queue, applies face
+                             latest_frame_holder, detection_result, detection_lock):
-    processing, and puts results into processed_queue. Drops processed frames
+    """Processing thread: takes raw frames from capture_queue, reads the
-    when the output queue is full so the UI always gets the latest result.
+    latest detection result from the shared detection_result dict, applies
    face swap/enhancement, and puts results into processed_queue.
-    Uses DETECT_EVERY_N to skip expensive face detection on intermediate
+    Face detection runs concurrently in _detection_thread_func — this thread
-    frames, reusing cached face positions instead."""
+    only reads cached results so it never blocks on detection."""
    frame_processors = get_frame_processors_modules(modules.globals.frame_processors)
    source_image = None
    last_source_path = None
    prev_time = time.time()
    fps_update_interval = 0.5
    frame_count = 0
    fps = 0
    proc_frame_index = 0
    cached_target_face = None  # cached single-face result
    cached_many_faces = None   # cached many-faces result
    while not stop_event.is_set():
        try:
@@ -1026,32 +1046,31 @@ def _processing_thread_func(capture_queue, processed_queue, stop_event):
        except queue.Empty:
            continue
-        temp_frame = frame.copy()
+        temp_frame = frame
        run_detection = (proc_frame_index % DETECT_EVERY_N == 0)
        proc_frame_index += 1
        if modules.globals.live_mirror:
            temp_frame = gpu_flip(temp_frame, 1)
        # Publish the mirrored frame for the detection thread to pick up
        with detection_lock:
            latest_frame_holder[0] = temp_frame
        if not modules.globals.map_faces:
-            if source_image is None and modules.globals.source_path:
+            if modules.globals.source_path and modules.globals.source_path != last_source_path:
                last_source_path = modules.globals.source_path
                source_image = get_one_face(cv2.imread(modules.globals.source_path))
-            # Update face detection cache on detection frames
+            # Read latest detection results (brief lock to avoid blocking detection thread)
-            if run_detection or (cached_target_face is None and cached_many_faces is None):
+            with detection_lock:
-                if modules.globals.many_faces:
+                cached_target_face = detection_result.get('target_face')
-                    cached_many_faces = get_many_faces(temp_frame)
+                cached_many_faces = detection_result.get('many_faces')
                    cached_target_face = None
                else:
                    cached_target_face = get_one_face(temp_frame)
                    cached_many_faces = None
            for frame_processor in frame_processors:
                if frame_processor.NAME == "DLC.FACE-ENHANCER":
                    if modules.globals.fp_ui["face_enhancer"]:
                        temp_frame = frame_processor.process_frame(None, temp_frame)
                elif frame_processor.NAME == "DLC.FACE-SWAPPER":
-                    # Use cached face positions to skip redundant detection
+                    # Use cached face positions from detection thread
                    swapped_bboxes = []
                    if modules.globals.many_faces and cached_many_faces:
                        result = temp_frame.copy()
@@ -1127,6 +1146,14 @@ def create_webcam_preview(camera_index: int):
    processed_queue = queue.Queue(maxsize=2)
    stop_event = threading.Event()
    # Shared state for the detection pipeline.
    # latest_frame_holder[0] is the most recent raw frame for the detection
    # thread; detection_result holds the last detected faces for the
    # processing thread to read.  Both are guarded by detection_lock.
    detection_lock = threading.Lock()
    latest_frame_holder = [None]
    detection_result = {'target_face': None, 'many_faces': None}
    # Start capture thread
    cap_thread = threading.Thread(
        target=_capture_thread_func,
@@ -1135,21 +1162,45 @@ def create_webcam_preview(camera_index: int):
    )
    cap_thread.start()
    # Start detection thread — runs face detection asynchronously so the
    # processing/swap thread never blocks on it
    det_thread = threading.Thread(
        target=_detection_thread_func,
        args=(latest_frame_holder, detection_result, detection_lock, stop_event),
        daemon=True,
    )
    det_thread.start()
    # Start processing thread
    proc_thread = threading.Thread(
        target=_processing_thread_func,
-        args=(capture_queue, processed_queue, stop_event),
+        args=(capture_queue, processed_queue, stop_event,
              latest_frame_holder, detection_result, detection_lock),
        daemon=True,
    )
    proc_thread.start()
-    # Main (UI) thread: pull processed frames and update the display
+    # Cleanup helper called from the display loop when preview closes
-    while not stop_event.is_set():
+    def _cleanup():
        stop_event.set()
        cap_thread.join(timeout=2.0)
        det_thread.join(timeout=2.0)
        proc_thread.join(timeout=2.0)
        cap.release()
        PREVIEW.withdraw()
    # Non-blocking display loop using ROOT.after() — avoids blocking the
    # Tk event loop which could cause UI freezes or re-entrancy issues
    def _display_next_frame():
        if stop_event.is_set() or PREVIEW.state() == "withdrawn":
            _cleanup()
            return
        try:
-            temp_frame = processed_queue.get(timeout=0.03)
+            temp_frame = processed_queue.get_nowait()
        except queue.Empty:
-            ROOT.update()
+            ROOT.after(16, _display_next_frame)
-            continue
+            return
        if modules.globals.live_resizable:
            temp_frame = fit_image_to_size(
@@ -1167,17 +1218,11 @@ def create_webcam_preview(camera_index: int):
        )
        image = ctk.CTkImage(image, size=image.size)
        preview_label.configure(image=image)
        ROOT.update()
-        if PREVIEW.state() == "withdrawn":
+        ROOT.after(16, _display_next_frame)
            break
-    # Signal threads to stop and wait for them
+    # Kick off the non-blocking display loop
-    stop_event.set()
+    ROOT.after(0, _display_next_frame)
    cap_thread.join(timeout=2.0)
    proc_thread.join(timeout=2.0)
    cap.release()
    PREVIEW.withdraw()
 def create_source_target_popup_for_webcam(