Merge pull request #1665 from laurigates/pr/perf-pipeline-threading
perf(ui): decouple face detection from swap in live webcam pipeline
This commit is contained in:
+86
-41
@@ -997,28 +997,48 @@ def _capture_thread_func(cap, capture_queue, stop_event):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
# How often to run full face detection. On intermediate frames the last
|
def _detection_thread_func(latest_frame_holder, detection_result, detection_lock, stop_event):
|
||||||
# detected face positions are reused, which significantly reduces the
|
"""Detection thread: continuously runs face detection on the latest
|
||||||
# per-frame cost of the processing thread.
|
captured frame and stores results in detection_result under detection_lock.
|
||||||
DETECT_EVERY_N = 2
|
|
||||||
|
This decouples face detection (~15-30ms) from face swapping (~5-10ms)
|
||||||
|
so the swap loop never blocks on detection, significantly improving
|
||||||
|
live mode FPS."""
|
||||||
|
while not stop_event.is_set():
|
||||||
|
with detection_lock:
|
||||||
|
frame = latest_frame_holder[0]
|
||||||
|
|
||||||
|
if frame is None:
|
||||||
|
time.sleep(0.005)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if modules.globals.many_faces:
|
||||||
|
many = get_many_faces(frame)
|
||||||
|
with detection_lock:
|
||||||
|
detection_result['target_face'] = None
|
||||||
|
detection_result['many_faces'] = many
|
||||||
|
else:
|
||||||
|
face = get_one_face(frame)
|
||||||
|
with detection_lock:
|
||||||
|
detection_result['target_face'] = face
|
||||||
|
detection_result['many_faces'] = None
|
||||||
|
|
||||||
|
|
||||||
def _processing_thread_func(capture_queue, processed_queue, stop_event):
|
def _processing_thread_func(capture_queue, processed_queue, stop_event,
|
||||||
"""Processing thread: takes raw frames from capture_queue, applies face
|
latest_frame_holder, detection_result, detection_lock):
|
||||||
processing, and puts results into processed_queue. Drops processed frames
|
"""Processing thread: takes raw frames from capture_queue, reads the
|
||||||
when the output queue is full so the UI always gets the latest result.
|
latest detection result from the shared detection_result dict, applies
|
||||||
|
face swap/enhancement, and puts results into processed_queue.
|
||||||
|
|
||||||
Uses DETECT_EVERY_N to skip expensive face detection on intermediate
|
Face detection runs concurrently in _detection_thread_func — this thread
|
||||||
frames, reusing cached face positions instead."""
|
only reads cached results so it never blocks on detection."""
|
||||||
frame_processors = get_frame_processors_modules(modules.globals.frame_processors)
|
frame_processors = get_frame_processors_modules(modules.globals.frame_processors)
|
||||||
source_image = None
|
source_image = None
|
||||||
|
last_source_path = None
|
||||||
prev_time = time.time()
|
prev_time = time.time()
|
||||||
fps_update_interval = 0.5
|
fps_update_interval = 0.5
|
||||||
frame_count = 0
|
frame_count = 0
|
||||||
fps = 0
|
fps = 0
|
||||||
proc_frame_index = 0
|
|
||||||
cached_target_face = None # cached single-face result
|
|
||||||
cached_many_faces = None # cached many-faces result
|
|
||||||
|
|
||||||
while not stop_event.is_set():
|
while not stop_event.is_set():
|
||||||
try:
|
try:
|
||||||
@@ -1026,32 +1046,31 @@ def _processing_thread_func(capture_queue, processed_queue, stop_event):
|
|||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
temp_frame = frame.copy()
|
temp_frame = frame
|
||||||
run_detection = (proc_frame_index % DETECT_EVERY_N == 0)
|
|
||||||
proc_frame_index += 1
|
|
||||||
|
|
||||||
if modules.globals.live_mirror:
|
if modules.globals.live_mirror:
|
||||||
temp_frame = gpu_flip(temp_frame, 1)
|
temp_frame = gpu_flip(temp_frame, 1)
|
||||||
|
|
||||||
|
# Publish the mirrored frame for the detection thread to pick up
|
||||||
|
with detection_lock:
|
||||||
|
latest_frame_holder[0] = temp_frame
|
||||||
|
|
||||||
if not modules.globals.map_faces:
|
if not modules.globals.map_faces:
|
||||||
if source_image is None and modules.globals.source_path:
|
if modules.globals.source_path and modules.globals.source_path != last_source_path:
|
||||||
|
last_source_path = modules.globals.source_path
|
||||||
source_image = get_one_face(cv2.imread(modules.globals.source_path))
|
source_image = get_one_face(cv2.imread(modules.globals.source_path))
|
||||||
|
|
||||||
# Update face detection cache on detection frames
|
# Read latest detection results (brief lock to avoid blocking detection thread)
|
||||||
if run_detection or (cached_target_face is None and cached_many_faces is None):
|
with detection_lock:
|
||||||
if modules.globals.many_faces:
|
cached_target_face = detection_result.get('target_face')
|
||||||
cached_many_faces = get_many_faces(temp_frame)
|
cached_many_faces = detection_result.get('many_faces')
|
||||||
cached_target_face = None
|
|
||||||
else:
|
|
||||||
cached_target_face = get_one_face(temp_frame)
|
|
||||||
cached_many_faces = None
|
|
||||||
|
|
||||||
for frame_processor in frame_processors:
|
for frame_processor in frame_processors:
|
||||||
if frame_processor.NAME == "DLC.FACE-ENHANCER":
|
if frame_processor.NAME == "DLC.FACE-ENHANCER":
|
||||||
if modules.globals.fp_ui["face_enhancer"]:
|
if modules.globals.fp_ui["face_enhancer"]:
|
||||||
temp_frame = frame_processor.process_frame(None, temp_frame)
|
temp_frame = frame_processor.process_frame(None, temp_frame)
|
||||||
elif frame_processor.NAME == "DLC.FACE-SWAPPER":
|
elif frame_processor.NAME == "DLC.FACE-SWAPPER":
|
||||||
# Use cached face positions to skip redundant detection
|
# Use cached face positions from detection thread
|
||||||
swapped_bboxes = []
|
swapped_bboxes = []
|
||||||
if modules.globals.many_faces and cached_many_faces:
|
if modules.globals.many_faces and cached_many_faces:
|
||||||
result = temp_frame.copy()
|
result = temp_frame.copy()
|
||||||
@@ -1127,6 +1146,14 @@ def create_webcam_preview(camera_index: int):
|
|||||||
processed_queue = queue.Queue(maxsize=2)
|
processed_queue = queue.Queue(maxsize=2)
|
||||||
stop_event = threading.Event()
|
stop_event = threading.Event()
|
||||||
|
|
||||||
|
# Shared state for the detection pipeline.
|
||||||
|
# latest_frame_holder[0] is the most recent raw frame for the detection
|
||||||
|
# thread; detection_result holds the last detected faces for the
|
||||||
|
# processing thread to read. Both are guarded by detection_lock.
|
||||||
|
detection_lock = threading.Lock()
|
||||||
|
latest_frame_holder = [None]
|
||||||
|
detection_result = {'target_face': None, 'many_faces': None}
|
||||||
|
|
||||||
# Start capture thread
|
# Start capture thread
|
||||||
cap_thread = threading.Thread(
|
cap_thread = threading.Thread(
|
||||||
target=_capture_thread_func,
|
target=_capture_thread_func,
|
||||||
@@ -1135,21 +1162,45 @@ def create_webcam_preview(camera_index: int):
|
|||||||
)
|
)
|
||||||
cap_thread.start()
|
cap_thread.start()
|
||||||
|
|
||||||
|
# Start detection thread — runs face detection asynchronously so the
|
||||||
|
# processing/swap thread never blocks on it
|
||||||
|
det_thread = threading.Thread(
|
||||||
|
target=_detection_thread_func,
|
||||||
|
args=(latest_frame_holder, detection_result, detection_lock, stop_event),
|
||||||
|
daemon=True,
|
||||||
|
)
|
||||||
|
det_thread.start()
|
||||||
|
|
||||||
# Start processing thread
|
# Start processing thread
|
||||||
proc_thread = threading.Thread(
|
proc_thread = threading.Thread(
|
||||||
target=_processing_thread_func,
|
target=_processing_thread_func,
|
||||||
args=(capture_queue, processed_queue, stop_event),
|
args=(capture_queue, processed_queue, stop_event,
|
||||||
|
latest_frame_holder, detection_result, detection_lock),
|
||||||
daemon=True,
|
daemon=True,
|
||||||
)
|
)
|
||||||
proc_thread.start()
|
proc_thread.start()
|
||||||
|
|
||||||
# Main (UI) thread: pull processed frames and update the display
|
# Cleanup helper called from the display loop when preview closes
|
||||||
while not stop_event.is_set():
|
def _cleanup():
|
||||||
|
stop_event.set()
|
||||||
|
cap_thread.join(timeout=2.0)
|
||||||
|
det_thread.join(timeout=2.0)
|
||||||
|
proc_thread.join(timeout=2.0)
|
||||||
|
cap.release()
|
||||||
|
PREVIEW.withdraw()
|
||||||
|
|
||||||
|
# Non-blocking display loop using ROOT.after() — avoids blocking the
|
||||||
|
# Tk event loop which could cause UI freezes or re-entrancy issues
|
||||||
|
def _display_next_frame():
|
||||||
|
if stop_event.is_set() or PREVIEW.state() == "withdrawn":
|
||||||
|
_cleanup()
|
||||||
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
temp_frame = processed_queue.get(timeout=0.03)
|
temp_frame = processed_queue.get_nowait()
|
||||||
except queue.Empty:
|
except queue.Empty:
|
||||||
ROOT.update()
|
ROOT.after(16, _display_next_frame)
|
||||||
continue
|
return
|
||||||
|
|
||||||
if modules.globals.live_resizable:
|
if modules.globals.live_resizable:
|
||||||
temp_frame = fit_image_to_size(
|
temp_frame = fit_image_to_size(
|
||||||
@@ -1167,17 +1218,11 @@ def create_webcam_preview(camera_index: int):
|
|||||||
)
|
)
|
||||||
image = ctk.CTkImage(image, size=image.size)
|
image = ctk.CTkImage(image, size=image.size)
|
||||||
preview_label.configure(image=image)
|
preview_label.configure(image=image)
|
||||||
ROOT.update()
|
|
||||||
|
|
||||||
if PREVIEW.state() == "withdrawn":
|
ROOT.after(16, _display_next_frame)
|
||||||
break
|
|
||||||
|
|
||||||
# Signal threads to stop and wait for them
|
# Kick off the non-blocking display loop
|
||||||
stop_event.set()
|
ROOT.after(0, _display_next_frame)
|
||||||
cap_thread.join(timeout=2.0)
|
|
||||||
proc_thread.join(timeout=2.0)
|
|
||||||
cap.release()
|
|
||||||
PREVIEW.withdraw()
|
|
||||||
|
|
||||||
|
|
||||||
def create_source_target_popup_for_webcam(
|
def create_source_target_popup_for_webcam(
|
||||||
|
|||||||
Reference in New Issue
Block a user