Merge pull request #566 from pereiraroland26/main

Added support for multiple faces
This commit is contained in:
Kenneth Estanislao
2024-09-10 23:35:19 +08:00
parent a11ccf9c49
commit 5d450b4352
8 changed files with 974 additions and 324 deletions
+372 -29
View File
@@ -19,7 +19,7 @@ if platform.system() == 'Windows' or platform.system() == 'Linux': # Windows or
import modules.globals
import modules.metadata
from modules.face_analyser import get_one_face
from modules.face_analyser import get_one_face, get_unique_faces_from_target_image, get_unique_faces_from_target_video, add_blank_map, has_valid_map, simplify_maps
from modules.capturer import get_video_frame, get_video_frame_total
from modules.processors.frame.core import get_frame_processors_modules
from modules.utilities import is_image, is_video, resolve_relative_path
@@ -34,6 +34,22 @@ PREVIEW_MAX_WIDTH = 1200
PREVIEW_DEFAULT_WIDTH = 960
PREVIEW_DEFAULT_HEIGHT = 540
POPUP_WIDTH = 750
POPUP_HEIGHT = 810
POPUP_SCROLL_WIDTH = 740,
POPUP_SCROLL_HEIGHT = 700
POPUP_LIVE_WIDTH = 900
POPUP_LIVE_HEIGHT = 820
POPUP_LIVE_SCROLL_WIDTH = 890,
POPUP_LIVE_SCROLL_HEIGHT = 700
MAPPER_PREVIEW_MAX_HEIGHT = 100
MAPPER_PREVIEW_MAX_WIDTH = 100
DEFAULT_BUTTON_WIDTH = 200
DEFAULT_BUTTON_HEIGHT = 40
RECENT_DIRECTORY_SOURCE = None
RECENT_DIRECTORY_TARGET = None
RECENT_DIRECTORY_OUTPUT = None
@@ -43,6 +59,11 @@ preview_slider = None
source_label = None
target_label = None
status_label = None
popup_status_label = None
popup_status_label_live = None
source_label_dict = {}
source_label_dict_live = {}
target_label_dict_live = {}
img_ft, vid_ft = modules.globals.file_types
@@ -148,8 +169,12 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C
nsfw_switch = ctk.CTkSwitch(root, text='NSFW', variable=nsfw_value, cursor='hand2', command=lambda: setattr(modules.globals, 'nsfw', nsfw_value.get()))
nsfw_switch.place(relx=0.6, rely=0.6125)
start_button = ctk.CTkButton(root, text='Start', cursor='hand2', command=lambda: select_output_path(start))
start_button.place(relx=0.15, rely=0.7, relwidth=0.2, relheight=0.05)
map_faces = ctk.BooleanVar(value=modules.globals.map_faces)
map_faces_switch = ctk.CTkSwitch(root, text='Map faces', variable=map_faces, cursor='hand2', command=lambda: setattr(modules.globals, 'map_faces', map_faces.get()))
map_faces_switch.place(relx=0.1, rely=0.75)
start_button = ctk.CTkButton(root, text='Start', cursor='hand2', command=lambda: analyze_target(start, root))
start_button.place(relx=0.15, rely=0.80, relwidth=0.2, relheight=0.05)
stop_button = ctk.CTkButton(root, text='Destroy', cursor='hand2', command=destroy)
stop_button.place(relx=0.4, rely=0.7, relwidth=0.2, relheight=0.05)
@@ -157,22 +182,8 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C
preview_button = ctk.CTkButton(root, text='Preview', cursor='hand2', command=toggle_preview)
preview_button.place(relx=0.65, rely=0.7, relwidth=0.2, relheight=0.05)
camera_label = ctk.CTkLabel(root, text="Select Camera:")
camera_label.place(relx=0.4, rely=0.7525, relwidth=0.2, relheight=0.05)
available_cameras = get_available_cameras()
available_camera_strings = [str(cam) for cam in available_cameras]
camera_variable = ctk.StringVar(value=available_camera_strings[0] if available_camera_strings else "No cameras found")
camera_optionmenu = ctk.CTkOptionMenu(root, variable=camera_variable, values=available_camera_strings)
camera_optionmenu.place(relx=0.65, rely=0.7525, relwidth=0.2, relheight=0.05)
virtual_cam_out_value = ctk.BooleanVar(value=False)
virtual_cam_out_switch = ctk.CTkSwitch(root, text='Virtual Cam Output (OBS)', variable=virtual_cam_out_value, cursor='hand2')
virtual_cam_out_switch.place(relx=0.4, rely=0.805)
live_button = ctk.CTkButton(root, text='Live', cursor='hand2', command=lambda: webcam_preview(camera_variable.get(), virtual_cam_out_value.get()))
live_button.place(relx=0.15, rely=0.7525, relwidth=0.2, relheight=0.05)
live_button = ctk.CTkButton(root, text='Live', cursor='hand2', command=lambda: webcam_preview(root))
live_button.place(relx=0.40, rely=0.86, relwidth=0.2, relheight=0.05)
status_label = ctk.CTkLabel(root, text=None, justify='center')
status_label.place(relx=0.1, relwidth=0.8, rely=0.875)
@@ -184,6 +195,109 @@ def create_root(start: Callable[[], None], destroy: Callable[[], None]) -> ctk.C
return root
def analyze_target(start: Callable[[], None], root: ctk.CTk):
if POPUP != None and POPUP.winfo_exists():
update_status("Please complete pop-up or close it.")
return
if modules.globals.map_faces:
modules.globals.souce_target_map = []
if is_image(modules.globals.target_path):
update_status('Getting unique faces')
get_unique_faces_from_target_image()
elif is_video(modules.globals.target_path):
update_status('Getting unique faces')
get_unique_faces_from_target_video()
if len(modules.globals.souce_target_map) > 0:
create_source_target_popup(start, root, modules.globals.souce_target_map)
else:
update_status("No faces found in target")
else:
select_output_path(start)
def create_source_target_popup(start: Callable[[], None], root: ctk.CTk, map: list) -> None:
global POPUP, popup_status_label
POPUP = ctk.CTkToplevel(root)
POPUP.title("Source x Target Mapper")
POPUP.geometry(f"{POPUP_WIDTH}x{POPUP_HEIGHT}")
POPUP.focus()
def on_submit_click(start):
if has_valid_map():
POPUP.destroy()
select_output_path(start)
else:
update_pop_status("Atleast 1 source with target is required!")
scrollable_frame = ctk.CTkScrollableFrame(POPUP, width=POPUP_SCROLL_WIDTH, height=POPUP_SCROLL_HEIGHT)
scrollable_frame.grid(row=0, column=0, padx=0, pady=0, sticky='nsew')
def on_button_click(map, button_num):
map = update_popup_source(scrollable_frame, map, button_num)
for item in map:
id = item['id']
button = ctk.CTkButton(scrollable_frame, text="Select source image", command=lambda id=id: on_button_click(map, id), width=DEFAULT_BUTTON_WIDTH, height=DEFAULT_BUTTON_HEIGHT)
button.grid(row=id, column=0, padx=50, pady=10)
x_label = ctk.CTkLabel(scrollable_frame, text=f"X", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
x_label.grid(row=id, column=2, padx=10, pady=10)
image = Image.fromarray(cv2.cvtColor(item['target']['cv2'], cv2.COLOR_BGR2RGB))
image = image.resize((MAPPER_PREVIEW_MAX_WIDTH, MAPPER_PREVIEW_MAX_HEIGHT), Image.LANCZOS)
tk_image = ctk.CTkImage(image, size=image.size)
target_image = ctk.CTkLabel(scrollable_frame, text=f"T-{id}", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
target_image.grid(row=id, column=3, padx=10, pady=10)
target_image.configure(image=tk_image)
popup_status_label = ctk.CTkLabel(POPUP, text=None, justify='center')
popup_status_label.grid(row=1, column=0, pady=15)
close_button = ctk.CTkButton(POPUP, text="Submit", command=lambda: on_submit_click(start))
close_button.grid(row=2, column=0, pady=10)
def update_popup_source(scrollable_frame: ctk.CTkScrollableFrame, map: list, button_num: int) -> list:
global source_label_dict
source_path = ctk.filedialog.askopenfilename(title='select an source image', initialdir=RECENT_DIRECTORY_SOURCE, filetypes=[img_ft])
if "source" in map[button_num]:
map[button_num].pop("source")
source_label_dict[button_num].destroy()
del source_label_dict[button_num]
if source_path == "":
return map
else:
cv2_img = cv2.imread(source_path)
face = get_one_face(cv2_img)
if face:
x_min, y_min, x_max, y_max = face['bbox']
map[button_num]['source'] = {
'cv2' : cv2_img[int(y_min):int(y_max), int(x_min):int(x_max)],
'face' : face
}
image = Image.fromarray(cv2.cvtColor(map[button_num]['source']['cv2'], cv2.COLOR_BGR2RGB))
image = image.resize((MAPPER_PREVIEW_MAX_WIDTH, MAPPER_PREVIEW_MAX_HEIGHT), Image.LANCZOS)
tk_image = ctk.CTkImage(image, size=image.size)
source_image = ctk.CTkLabel(scrollable_frame, text=f"S-{button_num}", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
source_image.grid(row=button_num, column=1, padx=10, pady=10)
source_image.configure(image=tk_image)
source_label_dict[button_num] = source_image
else:
update_pop_status("Face could not be detected in last upload!")
return map
def create_preview(parent: ctk.CTk) -> ctk.CTkToplevel:
global preview_label, preview_slider
@@ -206,6 +320,11 @@ def update_status(text: str) -> None:
status_label.configure(text=text)
ROOT.update()
def update_pop_status(text: str) -> None:
popup_status_label.configure(text=text)
def update_pop_live_status(text: str) -> None:
popup_status_label_live.configure(text=text)
def update_tumbler(var: str, value: bool) -> None:
modules.globals.fp_ui[var] = value
@@ -394,10 +513,75 @@ def fit_image_to_size(image, width: int, height: int):
new_size = (int(ratio * w), int(ratio * h))
return cv2.resize(image, dsize=new_size)
def webcam_preview(camera_name: str, virtual_cam_output: bool):
if modules.globals.source_path is None:
return
def render_image_preview(image_path: str, size: Tuple[int, int]) -> ctk.CTkImage:
image = Image.open(image_path)
if size:
image = ImageOps.fit(image, size, Image.LANCZOS)
return ctk.CTkImage(image, size=image.size)
def render_video_preview(video_path: str, size: Tuple[int, int], frame_number: int = 0) -> ctk.CTkImage:
capture = cv2.VideoCapture(video_path)
if frame_number:
capture.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
has_frame, frame = capture.read()
if has_frame:
image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
if size:
image = ImageOps.fit(image, size, Image.LANCZOS)
return ctk.CTkImage(image, size=image.size)
capture.release()
cv2.destroyAllWindows()
def toggle_preview() -> None:
if PREVIEW.state() == 'normal':
PREVIEW.withdraw()
elif modules.globals.source_path and modules.globals.target_path:
init_preview()
update_preview()
def init_preview() -> None:
if is_image(modules.globals.target_path):
preview_slider.pack_forget()
if is_video(modules.globals.target_path):
video_frame_total = get_video_frame_total(modules.globals.target_path)
preview_slider.configure(to=video_frame_total)
preview_slider.pack(fill='x')
preview_slider.set(0)
def update_preview(frame_number: int = 0) -> None:
if modules.globals.source_path and modules.globals.target_path:
update_status('Processing...')
temp_frame = get_video_frame(modules.globals.target_path, frame_number)
if modules.globals.nsfw_filter and check_and_ignore_nsfw(temp_frame):
return
for frame_processor in get_frame_processors_modules(modules.globals.frame_processors):
temp_frame = frame_processor.process_frame(
get_one_face(cv2.imread(modules.globals.source_path)),
temp_frame
)
image = Image.fromarray(cv2.cvtColor(temp_frame, cv2.COLOR_BGR2RGB))
image = ImageOps.contain(image, (PREVIEW_MAX_WIDTH, PREVIEW_MAX_HEIGHT), Image.LANCZOS)
image = ctk.CTkImage(image, size=image.size)
preview_label.configure(image=image)
update_status('Processing succeed!')
PREVIEW.deiconify()
def webcam_preview(root: ctk.CTk):
if not modules.globals.map_faces:
if modules.globals.source_path is None:
# No image selected
return
create_webcam_preview()
else:
modules.globals.souce_target_map = []
create_source_target_popup_for_webcam(root, modules.globals.souce_target_map)
def create_webcam_preview():
global preview_label, PREVIEW
WIDTH = 960
@@ -440,11 +624,7 @@ def webcam_preview(camera_name: str, virtual_cam_output: bool):
while preview_running:
preview_running = webcam_preview_loop(camera, source_image, frame_processors, virtual_cam)
while preview_running:
preview_running = webcam_preview_loop(camera, source_image, frame_processors)
if camera: camera.release()
PREVIEW.withdraw()
temp_frame = frame.copy() #Create a copy of the frame
def get_camera_index_by_name(camera_name: str) -> int:
@@ -459,6 +639,18 @@ def get_camera_index_by_name(camera_name: str) -> int:
return get_available_cameras().index(camera_name)
return -1
if not modules.globals.map_faces:
# Select and save face image only once
if source_image is None and modules.globals.source_path:
source_image = get_one_face(cv2.imread(modules.globals.source_path))
for frame_processor in frame_processors:
temp_frame = frame_processor.process_frame(source_image, temp_frame)
else:
modules.globals.target_path = None
for frame_processor in frame_processors:
temp_frame = frame_processor.process_frame_v2(temp_frame)
def get_available_cameras():
"""Get available camera names (cross-platform)."""
@@ -490,5 +682,156 @@ def get_available_cameras():
cap.release()
index += 1
available_cameras = devices
return available_cameras
if PREVIEW.state() == 'withdrawn':
break
camera.release()
PREVIEW.withdraw() # Close preview window when loop is finished
def create_source_target_popup_for_webcam(root: ctk.CTk, map: list) -> None:
global POPUP_LIVE, popup_status_label_live
POPUP_LIVE = ctk.CTkToplevel(root)
POPUP_LIVE.title("Source x Target Mapper")
POPUP_LIVE.geometry(f"{POPUP_LIVE_WIDTH}x{POPUP_LIVE_HEIGHT}")
POPUP_LIVE.focus()
def on_submit_click():
if has_valid_map():
POPUP_LIVE.destroy()
simplify_maps()
create_webcam_preview()
else:
update_pop_live_status("Atleast 1 source with target is required!")
def on_add_click():
add_blank_map()
refresh_data(map)
update_pop_live_status("Please provide mapping!")
popup_status_label_live = ctk.CTkLabel(POPUP_LIVE, text=None, justify='center')
popup_status_label_live.grid(row=1, column=0, pady=15)
add_button = ctk.CTkButton(POPUP_LIVE, text="Add", command=lambda: on_add_click())
add_button.place(relx=0.2, rely=0.92, relwidth=0.2, relheight=0.05)
close_button = ctk.CTkButton(POPUP_LIVE, text="Submit", command=lambda: on_submit_click())
close_button.place(relx=0.6, rely=0.92, relwidth=0.2, relheight=0.05)
def refresh_data(map: list):
global POPUP_LIVE
scrollable_frame = ctk.CTkScrollableFrame(POPUP_LIVE, width=POPUP_LIVE_SCROLL_WIDTH, height=POPUP_LIVE_SCROLL_HEIGHT)
scrollable_frame.grid(row=0, column=0, padx=0, pady=0, sticky='nsew')
def on_sbutton_click(map, button_num):
map = update_webcam_source(scrollable_frame, map, button_num)
def on_tbutton_click(map, button_num):
map = update_webcam_target(scrollable_frame, map, button_num)
for item in map:
id = item['id']
button = ctk.CTkButton(scrollable_frame, text="Select source image", command=lambda id=id: on_sbutton_click(map, id), width=DEFAULT_BUTTON_WIDTH, height=DEFAULT_BUTTON_HEIGHT)
button.grid(row=id, column=0, padx=30, pady=10)
x_label = ctk.CTkLabel(scrollable_frame, text=f"X", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
x_label.grid(row=id, column=2, padx=10, pady=10)
button = ctk.CTkButton(scrollable_frame, text="Select target image", command=lambda id=id: on_tbutton_click(map, id), width=DEFAULT_BUTTON_WIDTH, height=DEFAULT_BUTTON_HEIGHT)
button.grid(row=id, column=3, padx=20, pady=10)
if "source" in item:
image = Image.fromarray(cv2.cvtColor(item['source']['cv2'], cv2.COLOR_BGR2RGB))
image = image.resize((MAPPER_PREVIEW_MAX_WIDTH, MAPPER_PREVIEW_MAX_HEIGHT), Image.LANCZOS)
tk_image = ctk.CTkImage(image, size=image.size)
source_image = ctk.CTkLabel(scrollable_frame, text=f"S-{id}", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
source_image.grid(row=id, column=1, padx=10, pady=10)
source_image.configure(image=tk_image)
if "target" in item:
image = Image.fromarray(cv2.cvtColor(item['target']['cv2'], cv2.COLOR_BGR2RGB))
image = image.resize((MAPPER_PREVIEW_MAX_WIDTH, MAPPER_PREVIEW_MAX_HEIGHT), Image.LANCZOS)
tk_image = ctk.CTkImage(image, size=image.size)
target_image = ctk.CTkLabel(scrollable_frame, text=f"T-{id}", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
target_image.grid(row=id, column=4, padx=20, pady=10)
target_image.configure(image=tk_image)
def update_webcam_source(scrollable_frame: ctk.CTkScrollableFrame, map: list, button_num: int) -> list:
global source_label_dict_live
source_path = ctk.filedialog.askopenfilename(title='select an source image', initialdir=RECENT_DIRECTORY_SOURCE, filetypes=[img_ft])
if "source" in map[button_num]:
map[button_num].pop("source")
source_label_dict_live[button_num].destroy()
del source_label_dict_live[button_num]
if source_path == "":
return map
else:
cv2_img = cv2.imread(source_path)
face = get_one_face(cv2_img)
if face:
x_min, y_min, x_max, y_max = face['bbox']
map[button_num]['source'] = {
'cv2' : cv2_img[int(y_min):int(y_max), int(x_min):int(x_max)],
'face' : face
}
image = Image.fromarray(cv2.cvtColor(map[button_num]['source']['cv2'], cv2.COLOR_BGR2RGB))
image = image.resize((MAPPER_PREVIEW_MAX_WIDTH, MAPPER_PREVIEW_MAX_HEIGHT), Image.LANCZOS)
tk_image = ctk.CTkImage(image, size=image.size)
source_image = ctk.CTkLabel(scrollable_frame, text=f"S-{button_num}", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
source_image.grid(row=button_num, column=1, padx=10, pady=10)
source_image.configure(image=tk_image)
source_label_dict_live[button_num] = source_image
else:
update_pop_live_status("Face could not be detected in last upload!")
return map
def update_webcam_target(scrollable_frame: ctk.CTkScrollableFrame, map: list, button_num: int) -> list:
global target_label_dict_live
target_path = ctk.filedialog.askopenfilename(title='select an target image', initialdir=RECENT_DIRECTORY_SOURCE, filetypes=[img_ft])
if "target" in map[button_num]:
map[button_num].pop("target")
target_label_dict_live[button_num].destroy()
del target_label_dict_live[button_num]
if target_path == "":
return map
else:
cv2_img = cv2.imread(target_path)
face = get_one_face(cv2_img)
if face:
x_min, y_min, x_max, y_max = face['bbox']
map[button_num]['target'] = {
'cv2' : cv2_img[int(y_min):int(y_max), int(x_min):int(x_max)],
'face' : face
}
image = Image.fromarray(cv2.cvtColor(map[button_num]['target']['cv2'], cv2.COLOR_BGR2RGB))
image = image.resize((MAPPER_PREVIEW_MAX_WIDTH, MAPPER_PREVIEW_MAX_HEIGHT), Image.LANCZOS)
tk_image = ctk.CTkImage(image, size=image.size)
target_image = ctk.CTkLabel(scrollable_frame, text=f"T-{button_num}", width=MAPPER_PREVIEW_MAX_WIDTH, height=MAPPER_PREVIEW_MAX_HEIGHT)
target_image.grid(row=button_num, column=4, padx=20, pady=10)
target_image.configure(image=tk_image)
target_label_dict_live[button_num] = target_image
else:
update_pop_live_status("Face could not be detected in last upload!")
return map