Merge pull request #566 from pereiraroland26/main

Added support for multiple faces
2024-09-10 23:35:19 +08:00
parent a11ccf9c49
commit 5d450b4352
8 changed files with 974 additions and 324 deletions
@@ -19,7 +19,7 @@ if platform.system() == 'Windows' or platform.system() == 'Linux':  # Windows or

 import modules.globals
 import modules.metadata
-from modules.face_analyser import get_one_face
+from modules.face_analyser import get_one_face, get_unique_faces_from_target_image, get_unique_faces_from_target_video, add_blank_map, has_valid_map, simplify_maps
 from modules.capturer import get_video_frame, get_video_frame_total
 from modules.processors.frame.core import get_frame_processors_modules
 from modules.utilities import is_image, is_video, resolve_relative_path
@@ -34,6 +34,22 @@ PREVIEW_MAX_WIDTH  = 1200
 PREVIEW_DEFAULT_WIDTH  = 960
 PREVIEW_DEFAULT_HEIGHT = 540

+POPUP_WIDTH = 750
+POPUP_HEIGHT = 810
+POPUP_SCROLL_WIDTH = 740, 
+POPUP_SCROLL_HEIGHT = 700
+
+POPUP_LIVE_WIDTH = 900
+POPUP_LIVE_HEIGHT = 820
+POPUP_LIVE_SCROLL_WIDTH = 890, 
+POPUP_LIVE_SCROLL_HEIGHT = 700
+
+MAPPER_PREVIEW_MAX_HEIGHT = 100
+MAPPER_PREVIEW_MAX_WIDTH = 100
+
+DEFAULT_BUTTON_WIDTH = 200
+DEFAULT_BUTTON_HEIGHT = 40
+
 RECENT_DIRECTORY_SOURCE = None
 RECENT_DIRECTORY_TARGET = None
 RECENT_DIRECTORY_OUTPUT = None
@@ -43,6 +59,11 @@ preview_slider = None
 source_label = None
 target_label = None
 status_label = None
+popup_status_label = None
+popup_status_label_live = None
+source_label_dict = {}
+source_label_dict_live = {}
+target_label_dict_live = {}

 img_ft, vid_ft = modules.globals.file_types

@@ -148,8 +169,12 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C
    nsfw_switch = ctk.CTkSwitch(root, text='NSFW', variable=nsfw_value, cursor='hand2', command=lambda: setattr(modules.globals, 'nsfw', nsfw_value.get()))
    nsfw_switch.place(relx=0.6, rely=0.6125)

-    start_button = ctk.CTkButton(root, text='Start', cursor='hand2', command=lambda: select_output_path(start))
-    start_button.place(relx=0.15, rely=0.7, relwidth=0.2, relheight=0.05)
+    map_faces = ctk.BooleanVar(value=modules.globals.map_faces)
+    map_faces_switch = ctk.CTkSwitch(root, text='Map faces', variable=map_faces, cursor='hand2', command=lambda: setattr(modules.globals, 'map_faces', map_faces.get()))
+    map_faces_switch.place(relx=0.1, rely=0.75)
+
+    start_button = ctk.CTkButton(root, text='Start', cursor='hand2', command=lambda: analyze_target(start, root))
+    start_button.place(relx=0.15, rely=0.80, relwidth=0.2, relheight=0.05)

    stop_button = ctk.CTkButton(root, text='Destroy', cursor='hand2', command=destroy)
    stop_button.place(relx=0.4, rely=0.7, relwidth=0.2, relheight=0.05)
@@ -157,22 +182,8 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C
    preview_button = ctk.CTkButton(root, text='Preview', cursor='hand2', command=toggle_preview)
    preview_button.place(relx=0.65, rely=0.7, relwidth=0.2, relheight=0.05)

-    camera_label = ctk.CTkLabel(root, text="Select Camera:")
-    camera_label.place(relx=0.4, rely=0.7525, relwidth=0.2, relheight=0.05)
-
-    available_cameras = get_available_cameras()
-    available_camera_strings = [str(cam) for cam in available_cameras]
-
-    camera_variable = ctk.StringVar(value=available_camera_strings[0] if available_camera_strings else "No cameras found")
-    camera_optionmenu = ctk.CTkOptionMenu(root, variable=camera_variable, values=available_camera_strings)
-    camera_optionmenu.place(relx=0.65, rely=0.7525, relwidth=0.2, relheight=0.05)
-
-    virtual_cam_out_value = ctk.BooleanVar(value=False)
-    virtual_cam_out_switch = ctk.CTkSwitch(root, text='Virtual Cam Output (OBS)', variable=virtual_cam_out_value, cursor='hand2')
-    virtual_cam_out_switch.place(relx=0.4, rely=0.805)
-
-    live_button = ctk.CTkButton(root, text='Live', cursor='hand2', command=lambda: webcam_preview(camera_variable.get(), virtual_cam_out_value.get()))
-    live_button.place(relx=0.15, rely=0.7525, relwidth=0.2, relheight=0.05)
+    live_button = ctk.CTkButton(root, text='Live', cursor='hand2', command=lambda: webcam_preview(root))
+    live_button.place(relx=0.40, rely=0.86, relwidth=0.2, relheight=0.05)

    status_label = ctk.CTkLabel(root, text=None, justify='center')
    status_label.place(relx=0.1, relwidth=0.8, rely=0.875)
@@ -184,6 +195,109 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C

    return root

+def analyze_target(start: Callable[[], None], root: ctk.CTk):
+    if POPUP != None and POPUP.winfo_exists():
+        update_status("Please complete pop-up or close it.")
+        return
+
+    if modules.globals.map_faces:
+        modules.globals.souce_target_map = []
+
+        if is_image(modules.globals.target_path):
+            update_status('Getting unique faces')
+            get_unique_faces_from_target_image()
+        elif is_video(modules.globals.target_path):
+            update_status('Getting unique faces')
+            get_unique_faces_from_target_video()
+
+        if len(modules.globals.souce_target_map) > 0:
+            create_source_target_popup(start, root, modules.globals.souce_target_map)
+        else:
+            update_status("No faces found in target")
+    else:
+        select_output_path(start)
+
+def create_source_target_popup(start: Callable[[], None], root: ctk.CTk, map: list) -> None:
+    global POPUP, popup_status_label
+
+    POPUP = ctk.CTkToplevel(root)
+    POPUP.title("Source x Target Mapper")
+    POPUP.geometry(f"{POPUP_WIDTH}x{POPUP_HEIGHT}")
+    POPUP.focus()
+
+    def on_submit_click(start):
+        if has_valid_map():
+            POPUP.destroy()
+            select_output_path(start)
+        else:
+            update_pop_status("Atleast 1 source with target is required!")
+
+    scrollable_frame = ctk.CTkScrollableFrame(POPUP, width=POPUP_SCROLL_WIDTH, height=POPUP_SCROLL_HEIGHT)
+    scrollable_frame.grid(row=0, column=0, padx=0, pady=0, sticky='nsew')
+
+    def on_button_click(map, button_num):
+        map = update_popup_source(scrollable_frame, map, button_num)
+
+    for item in map:
+        id = item['id']
+
+        button = ctk.CTkButton(scrollable_frame, text="Select source image", command=lambda id=id: on_button_click(map, id), width=DEFAULT_BUTTON_WIDTH, height=DEFAULT_BUTTON_HEIGHT)
+        button.grid(row=id, column=0, padx=50, pady=10)
+
+        x_label = ctk.CTkLabel(scrollable_frame, text=f"X", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
+        x_label.grid(row=id, column=2, padx=10, pady=10)
+
+        image = Image.fromarray(cv2.cvtColor(item['target']['cv2'], cv2.COLOR_BGR2RGB))
+        image = image.resize((MAPPER_PREVIEW_MAX_WIDTH, MAPPER_PREVIEW_MAX_HEIGHT), Image.LANCZOS)
+        tk_image = ctk.CTkImage(image, size=image.size)
+
+        target_image = ctk.CTkLabel(scrollable_frame, text=f"T-{id}", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
+        target_image.grid(row=id, column=3, padx=10, pady=10)
+        target_image.configure(image=tk_image)
+
+    popup_status_label = ctk.CTkLabel(POPUP, text=None, justify='center')
+    popup_status_label.grid(row=1, column=0, pady=15)
+
+    close_button = ctk.CTkButton(POPUP, text="Submit", command=lambda: on_submit_click(start))
+    close_button.grid(row=2, column=0, pady=10)
+
+
+def update_popup_source(scrollable_frame: ctk.CTkScrollableFrame, map: list, button_num: int) -> list:
+    global source_label_dict
+
+    source_path = ctk.filedialog.askopenfilename(title='select an source image', initialdir=RECENT_DIRECTORY_SOURCE, filetypes=[img_ft])
+
+    if "source" in map[button_num]:
+        map[button_num].pop("source")
+        source_label_dict[button_num].destroy()
+        del source_label_dict[button_num]
+        
+    if source_path == "":
+        return map
+    else:
+        cv2_img = cv2.imread(source_path)
+        face = get_one_face(cv2_img)
+
+        if face:
+            x_min, y_min, x_max, y_max = face['bbox']
+
+            map[button_num]['source'] = {
+                'cv2' : cv2_img[int(y_min):int(y_max), int(x_min):int(x_max)],
+                'face' : face
+                }
+            
+            image = Image.fromarray(cv2.cvtColor(map[button_num]['source']['cv2'], cv2.COLOR_BGR2RGB))
+            image = image.resize((MAPPER_PREVIEW_MAX_WIDTH, MAPPER_PREVIEW_MAX_HEIGHT), Image.LANCZOS)
+            tk_image = ctk.CTkImage(image, size=image.size)
+            
+            source_image = ctk.CTkLabel(scrollable_frame, text=f"S-{button_num}", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
+            source_image.grid(row=button_num, column=1, padx=10, pady=10)
+            source_image.configure(image=tk_image)
+            source_label_dict[button_num] = source_image
+        else:
+            update_pop_status("Face could not be detected in last upload!")
+        return map
+

 def create_preview(parent: ctk.CTk) -> ctk.CTkToplevel:
    global preview_label, preview_slider
@@ -206,6 +320,11 @@ def update_status(text: str) -> None:
    status_label.configure(text=text)
    ROOT.update()

+def update_pop_status(text: str) -> None:
+    popup_status_label.configure(text=text)
+
+def update_pop_live_status(text: str) -> None:
+    popup_status_label_live.configure(text=text)

 def update_tumbler(var: str, value: bool) -> None:
    modules.globals.fp_ui[var] = value
@@ -394,10 +513,75 @@ def fit_image_to_size(image, width: int, height: int):
    new_size = (int(ratio * w), int(ratio * h))
    return cv2.resize(image, dsize=new_size)

-def webcam_preview(camera_name: str, virtual_cam_output: bool):
-    if modules.globals.source_path is None:
-        return

+def render_image_preview(image_path: str, size: Tuple[int, int]) -> ctk.CTkImage:
+    image = Image.open(image_path)
+    if size:
+        image = ImageOps.fit(image, size, Image.LANCZOS)
+    return ctk.CTkImage(image, size=image.size)
+
+
+def render_video_preview(video_path: str, size: Tuple[int, int], frame_number: int = 0) -> ctk.CTkImage:
+    capture = cv2.VideoCapture(video_path)
+    if frame_number:
+        capture.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
+    has_frame, frame = capture.read()
+    if has_frame:
+        image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+        if size:
+            image = ImageOps.fit(image, size, Image.LANCZOS)
+        return ctk.CTkImage(image, size=image.size)
+    capture.release()
+    cv2.destroyAllWindows()
+
+
+def toggle_preview() -> None:
+    if PREVIEW.state() == 'normal':
+        PREVIEW.withdraw()
+    elif modules.globals.source_path and modules.globals.target_path:
+        init_preview()
+        update_preview()
+
+
+def init_preview() -> None:
+    if is_image(modules.globals.target_path):
+        preview_slider.pack_forget()
+    if is_video(modules.globals.target_path):
+        video_frame_total = get_video_frame_total(modules.globals.target_path)
+        preview_slider.configure(to=video_frame_total)
+        preview_slider.pack(fill='x')
+        preview_slider.set(0)
+
+
+def update_preview(frame_number: int = 0) -> None:
+    if modules.globals.source_path and modules.globals.target_path:
+        update_status('Processing...')
+        temp_frame = get_video_frame(modules.globals.target_path, frame_number)
+        if modules.globals.nsfw_filter and check_and_ignore_nsfw(temp_frame):
+            return
+        for frame_processor in get_frame_processors_modules(modules.globals.frame_processors):
+            temp_frame = frame_processor.process_frame(
+                get_one_face(cv2.imread(modules.globals.source_path)),
+                temp_frame
+            )
+        image = Image.fromarray(cv2.cvtColor(temp_frame, cv2.COLOR_BGR2RGB))
+        image = ImageOps.contain(image, (PREVIEW_MAX_WIDTH, PREVIEW_MAX_HEIGHT), Image.LANCZOS)
+        image = ctk.CTkImage(image, size=image.size)
+        preview_label.configure(image=image)
+        update_status('Processing succeed!')
+        PREVIEW.deiconify()
+
+def webcam_preview(root: ctk.CTk):
+    if not modules.globals.map_faces:
+        if modules.globals.source_path is None:
+            # No image selected
+            return
+        create_webcam_preview()
+    else:
+        modules.globals.souce_target_map = []
+        create_source_target_popup_for_webcam(root, modules.globals.souce_target_map)
+
+def create_webcam_preview():
    global preview_label, PREVIEW

    WIDTH = 960
@@ -440,11 +624,7 @@ def webcam_preview(camera_name: str, virtual_cam_output: bool):
            while preview_running:
                preview_running = webcam_preview_loop(camera, source_image, frame_processors, virtual_cam)

-    while preview_running:
-        preview_running = webcam_preview_loop(camera, source_image, frame_processors)
-
-    if camera: camera.release()
-    PREVIEW.withdraw()
+        temp_frame = frame.copy()  #Create a copy of the frame


 def get_camera_index_by_name(camera_name: str) -> int:
@@ -459,6 +639,18 @@ def get_camera_index_by_name(camera_name: str) -> int:
        return get_available_cameras().index(camera_name)
    return -1

+        if not modules.globals.map_faces:
+            # Select and save face image only once
+            if source_image is None and modules.globals.source_path:
+                source_image = get_one_face(cv2.imread(modules.globals.source_path))
+
+            for frame_processor in frame_processors:
+                    temp_frame = frame_processor.process_frame(source_image, temp_frame)
+        else:
+            modules.globals.target_path = None
+
+            for frame_processor in frame_processors:
+                temp_frame = frame_processor.process_frame_v2(temp_frame)

 def get_available_cameras():
    """Get available camera names (cross-platform)."""
@@ -490,5 +682,156 @@ def get_available_cameras():
                cap.release()
                index += 1

-        available_cameras = devices
-    return available_cameras
+        if PREVIEW.state() == 'withdrawn':
+            break
+
+    camera.release()
+    PREVIEW.withdraw()  # Close preview window when loop is finished
+
+
+def create_source_target_popup_for_webcam(root: ctk.CTk, map: list) -> None:
+    global POPUP_LIVE, popup_status_label_live
+
+    POPUP_LIVE = ctk.CTkToplevel(root)
+    POPUP_LIVE.title("Source x Target Mapper")
+    POPUP_LIVE.geometry(f"{POPUP_LIVE_WIDTH}x{POPUP_LIVE_HEIGHT}")
+    POPUP_LIVE.focus()
+
+    def on_submit_click():
+        if has_valid_map():
+            POPUP_LIVE.destroy()
+            simplify_maps()
+            create_webcam_preview()
+        else:
+            update_pop_live_status("Atleast 1 source with target is required!")
+
+    def on_add_click():
+        add_blank_map()
+        refresh_data(map)
+        update_pop_live_status("Please provide mapping!")
+
+    popup_status_label_live = ctk.CTkLabel(POPUP_LIVE, text=None, justify='center')
+    popup_status_label_live.grid(row=1, column=0, pady=15)
+
+    add_button = ctk.CTkButton(POPUP_LIVE, text="Add", command=lambda: on_add_click())
+    add_button.place(relx=0.2, rely=0.92, relwidth=0.2, relheight=0.05)
+
+    close_button = ctk.CTkButton(POPUP_LIVE, text="Submit", command=lambda: on_submit_click())
+    close_button.place(relx=0.6, rely=0.92, relwidth=0.2, relheight=0.05)
+
+
+def refresh_data(map: list):
+    global POPUP_LIVE
+
+    scrollable_frame = ctk.CTkScrollableFrame(POPUP_LIVE, width=POPUP_LIVE_SCROLL_WIDTH, height=POPUP_LIVE_SCROLL_HEIGHT)
+    scrollable_frame.grid(row=0, column=0, padx=0, pady=0, sticky='nsew')
+
+    def on_sbutton_click(map, button_num):
+        map = update_webcam_source(scrollable_frame, map, button_num)
+
+    def on_tbutton_click(map, button_num):
+        map = update_webcam_target(scrollable_frame, map, button_num)
+
+    for item in map:
+        id = item['id']
+
+        button = ctk.CTkButton(scrollable_frame, text="Select source image", command=lambda id=id: on_sbutton_click(map, id), width=DEFAULT_BUTTON_WIDTH, height=DEFAULT_BUTTON_HEIGHT)
+        button.grid(row=id, column=0, padx=30, pady=10)
+
+        x_label = ctk.CTkLabel(scrollable_frame, text=f"X", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
+        x_label.grid(row=id, column=2, padx=10, pady=10)
+
+        button = ctk.CTkButton(scrollable_frame, text="Select target image", command=lambda id=id: on_tbutton_click(map, id), width=DEFAULT_BUTTON_WIDTH, height=DEFAULT_BUTTON_HEIGHT)
+        button.grid(row=id, column=3, padx=20, pady=10)
+
+        if "source" in item:
+            image = Image.fromarray(cv2.cvtColor(item['source']['cv2'], cv2.COLOR_BGR2RGB))
+            image = image.resize((MAPPER_PREVIEW_MAX_WIDTH, MAPPER_PREVIEW_MAX_HEIGHT), Image.LANCZOS)
+            tk_image = ctk.CTkImage(image, size=image.size)
+
+            source_image = ctk.CTkLabel(scrollable_frame, text=f"S-{id}", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
+            source_image.grid(row=id, column=1, padx=10, pady=10)
+            source_image.configure(image=tk_image)
+
+        if "target" in item:
+            image = Image.fromarray(cv2.cvtColor(item['target']['cv2'], cv2.COLOR_BGR2RGB))
+            image = image.resize((MAPPER_PREVIEW_MAX_WIDTH, MAPPER_PREVIEW_MAX_HEIGHT), Image.LANCZOS)
+            tk_image = ctk.CTkImage(image, size=image.size)
+
+            target_image = ctk.CTkLabel(scrollable_frame, text=f"T-{id}", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
+            target_image.grid(row=id, column=4, padx=20, pady=10)
+            target_image.configure(image=tk_image)
+
+
+def update_webcam_source(scrollable_frame: ctk.CTkScrollableFrame, map: list, button_num: int) -> list:
+    global source_label_dict_live
+
+    source_path = ctk.filedialog.askopenfilename(title='select an source image', initialdir=RECENT_DIRECTORY_SOURCE, filetypes=[img_ft])
+
+    if "source" in map[button_num]:
+        map[button_num].pop("source")
+        source_label_dict_live[button_num].destroy()
+        del source_label_dict_live[button_num]
+        
+    if source_path == "":
+        return map
+    else:
+        cv2_img = cv2.imread(source_path)
+        face = get_one_face(cv2_img)
+
+        if face:
+            x_min, y_min, x_max, y_max = face['bbox']
+
+            map[button_num]['source'] = {
+                'cv2' : cv2_img[int(y_min):int(y_max), int(x_min):int(x_max)],
+                'face' : face
+                }
+            
+            image = Image.fromarray(cv2.cvtColor(map[button_num]['source']['cv2'], cv2.COLOR_BGR2RGB))
+            image = image.resize((MAPPER_PREVIEW_MAX_WIDTH, MAPPER_PREVIEW_MAX_HEIGHT), Image.LANCZOS)
+            tk_image = ctk.CTkImage(image, size=image.size)
+            
+            source_image = ctk.CTkLabel(scrollable_frame, text=f"S-{button_num}", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
+            source_image.grid(row=button_num, column=1, padx=10, pady=10)
+            source_image.configure(image=tk_image)
+            source_label_dict_live[button_num] = source_image
+        else:
+            update_pop_live_status("Face could not be detected in last upload!")
+        return map
+    
+def update_webcam_target(scrollable_frame: ctk.CTkScrollableFrame, map: list, button_num: int) -> list:
+    global target_label_dict_live
+
+    target_path = ctk.filedialog.askopenfilename(title='select an target image', initialdir=RECENT_DIRECTORY_SOURCE, filetypes=[img_ft])
+
+    if "target" in map[button_num]:
+        map[button_num].pop("target")
+        target_label_dict_live[button_num].destroy()
+        del target_label_dict_live[button_num]
+        
+    if target_path == "":
+        return map
+    else:
+        cv2_img = cv2.imread(target_path)
+        face = get_one_face(cv2_img)
+
+        if face:
+            x_min, y_min, x_max, y_max = face['bbox']
+
+            map[button_num]['target'] = {
+                'cv2' : cv2_img[int(y_min):int(y_max), int(x_min):int(x_max)],
+                'face' : face
+                }
+            
+            image = Image.fromarray(cv2.cvtColor(map[button_num]['target']['cv2'], cv2.COLOR_BGR2RGB))
+            image = image.resize((MAPPER_PREVIEW_MAX_WIDTH, MAPPER_PREVIEW_MAX_HEIGHT), Image.LANCZOS)
+            tk_image = ctk.CTkImage(image, size=image.size)
+            
+            target_image = ctk.CTkLabel(scrollable_frame, text=f"T-{button_num}", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
+            target_image.grid(row=button_num, column=4, padx=20, pady=10)
+            target_image.configure(image=tk_image)
+            target_label_dict_live[button_num] = target_image
+        else:
+            update_pop_live_status("Face could not be detected in last upload!")
+        return map
+