diff --git a/.github/preview.png b/.github/preview.png index b0c29e5..1ee2d02 100644 Binary files a/.github/preview.png and b/.github/preview.png differ diff --git a/README.md b/README.md index 45bb953..1f7b530 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,10 @@ execution: --execution-providers EXECUTION_PROVIDERS [EXECUTION_PROVIDERS ...] choose from the available execution providers (choices: cpu, ...) --execution-thread-count [1-128] specify the number of execution threads --execution-queue-count [1-32] specify the number of execution queries - --max-memory [0-128] specify the maximum amount of ram to be used (in gb) + +memory: + --video-memory-strategy {strict,moderate,tolerant} specify strategy to handle the video memory + --system-memory-limit [0-128] specify the amount (gb) of system memory to be used face analyser: --face-analyser-order {left-right,right-left,top-bottom,bottom-top,small-large,large-small,best-worst,worst-best} specify the order used for the face analyser @@ -70,15 +73,17 @@ face mask: frame extraction: --trim-frame-start TRIM_FRAME_START specify the start frame for extraction --trim-frame-end TRIM_FRAME_END specify the end frame for extraction - --temp-frame-format {jpg,png} specify the image format used for frame extraction + --temp-frame-format {jpg,png,bmp} specify the image format used for frame extraction --temp-frame-quality [0-100] specify the image quality used for frame extraction --keep-temp retain temporary frames after processing output creation: --output-image-quality [0-100] specify the quality used for the output image --output-video-encoder {libx264,libx265,libvpx-vp9,h264_nvenc,hevc_nvenc} specify the encoder used for the output video + --output-video-preset {ultrafast,superfast,veryfast,faster,fast,medium,slow,slower,veryslow} specify the preset used for the output video --output-video-quality [0-100] specify the quality used for the output video - --keep-fps preserve the frames per second (fps) of the target + --output-video-resolution OUTPUT_VIDEO_RESOLUTION specify the resolution used for the output video + --output-video-fps OUTPUT_VIDEO_FPS specify the frames per second (fps) used for the output video --skip-audio omit audio from the target frame processors: diff --git a/facefusion.ini b/facefusion.ini new file mode 100644 index 0000000..2c7be36 --- /dev/null +++ b/facefusion.ini @@ -0,0 +1,66 @@ +[general] +source_paths = +target_path = +output_path = + +[misc] +skip_download = +headless = +log_level = + +[execution] +execution_providers = +execution_thread_count = +execution_queue_count = + +[memory] +video_memory_strategy = +system_memory_limit = + +[face_analyser] +face_analyser_order = +face_analyser_age = +face_analyser_gender = +face_detector_model = +face_detector_size = +face_detector_score = + +[face_selector] +face_selector_mode = +reference_face_position = +reference_face_distance = +reference_frame_number = + +[face_mask] +face_mask_types = +face_mask_blur = +face_mask_padding = +face_mask_regions = + +[frame_extraction] +trim_frame_start = +trim_frame_end = +temp_frame_format = +temp_frame_quality = +keep_temp = + +[output_creation] +output_image_quality = +output_video_encoder = +output_video_preset = +output_video_quality = +output_video_resolution = +output_video_fps = +skip_audio = + +[frame_processors] +frame_processors = +face_debugger_items = +face_enhancer_model = +face_enhancer_blend = +face_swapper_model = +frame_enhancer_model = +frame_enhancer_blend = + +[uis] +ui_layouts = diff --git a/facefusion/choices.py b/facefusion/choices.py index 9808aa5..b2fb133 100755 --- a/facefusion/choices.py +++ b/facefusion/choices.py @@ -1,8 +1,9 @@ from typing import List -from facefusion.typing import FaceSelectorMode, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, FaceMaskType, FaceMaskRegion, TempFrameFormat, OutputVideoEncoder -from facefusion.common_helper import create_range +from facefusion.typing import VideoMemoryStrategy, FaceSelectorMode, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, FaceMaskType, FaceMaskRegion, TempFrameFormat, OutputVideoEncoder, OutputVideoPreset +from facefusion.common_helper import create_int_range, create_float_range +video_memory_strategies : List[VideoMemoryStrategy] = [ 'strict', 'moderate', 'tolerant' ] face_analyser_orders : List[FaceAnalyserOrder] = [ 'left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small', 'best-worst', 'worst-best' ] face_analyser_ages : List[FaceAnalyserAge] = [ 'child', 'teen', 'adult', 'senior' ] face_analyser_genders : List[FaceAnalyserGender] = [ 'male', 'female' ] @@ -11,16 +12,19 @@ face_detector_sizes : List[str] = [ '160x160', '320x320', '480x480', '512x512', face_selector_modes : List[FaceSelectorMode] = [ 'reference', 'one', 'many' ] face_mask_types : List[FaceMaskType] = [ 'box', 'occlusion', 'region' ] face_mask_regions : List[FaceMaskRegion] = [ 'skin', 'left-eyebrow', 'right-eyebrow', 'left-eye', 'right-eye', 'eye-glasses', 'nose', 'mouth', 'upper-lip', 'lower-lip' ] -temp_frame_formats : List[TempFrameFormat] = [ 'jpg', 'png' ] +temp_frame_formats : List[TempFrameFormat] = [ 'jpg', 'png', 'bmp' ] output_video_encoders : List[OutputVideoEncoder] = [ 'libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc' ] +output_video_presets : List[OutputVideoPreset] = [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow' ] -execution_thread_count_range : List[float] = create_range(1, 128, 1) -execution_queue_count_range : List[float] = create_range(1, 32, 1) -max_memory_range : List[float] = create_range(0, 128, 1) -face_detector_score_range : List[float] = create_range(0.0, 1.0, 0.05) -face_mask_blur_range : List[float] = create_range(0.0, 1.0, 0.05) -face_mask_padding_range : List[float] = create_range(0, 100, 1) -reference_face_distance_range : List[float] = create_range(0.0, 1.5, 0.05) -temp_frame_quality_range : List[float] = create_range(0, 100, 1) -output_image_quality_range : List[float] = create_range(0, 100, 1) -output_video_quality_range : List[float] = create_range(0, 100, 1) +video_template_sizes : List[int] = [ 240, 360, 480, 540, 720, 1080, 1440, 2160 ] + +execution_thread_count_range : List[int] = create_int_range(1, 128, 1) +execution_queue_count_range : List[int] = create_int_range(1, 32, 1) +system_memory_limit_range : List[int] = create_int_range(0, 128, 1) +face_detector_score_range : List[float] = create_float_range(0.0, 1.0, 0.05) +face_mask_blur_range : List[float] = create_float_range(0.0, 1.0, 0.05) +face_mask_padding_range : List[int] = create_int_range(0, 100, 1) +reference_face_distance_range : List[float] = create_float_range(0.0, 1.5, 0.05) +temp_frame_quality_range : List[int] = create_int_range(0, 100, 1) +output_image_quality_range : List[int] = create_int_range(0, 100, 1) +output_video_quality_range : List[int] = create_int_range(0, 100, 1) diff --git a/facefusion/common_helper.py b/facefusion/common_helper.py index 8ddcad8..5e25851 100644 --- a/facefusion/common_helper.py +++ b/facefusion/common_helper.py @@ -6,5 +6,9 @@ def create_metavar(ranges : List[Any]) -> str: return '[' + str(ranges[0]) + '-' + str(ranges[-1]) + ']' -def create_range(start : float, stop : float, step : float) -> List[float]: +def create_int_range(start : int, stop : int, step : int) -> List[int]: + return (numpy.arange(start, stop + step, step)).tolist() + + +def create_float_range(start : float, stop : float, step : float) -> List[float]: return (numpy.around(numpy.arange(start, stop + step, step), decimals = 2)).tolist() diff --git a/facefusion/config.py b/facefusion/config.py new file mode 100644 index 0000000..7eb00d7 --- /dev/null +++ b/facefusion/config.py @@ -0,0 +1,80 @@ +from configparser import ConfigParser +from typing import Optional, List + +from facefusion.filesystem import resolve_relative_path + +CONFIG = None + + +def get_config() -> ConfigParser: + global CONFIG + + if CONFIG is None: + config_path = resolve_relative_path('../facefusion.ini') + CONFIG = ConfigParser() + CONFIG.read(config_path) + return CONFIG + + +def clear_config() -> None: + global CONFIG + + CONFIG = None + + +def get_str_value(key : str, fallback : Optional[str] = None) -> Optional[str]: + section, option = key.split('.') + value = get_config()[section].get(option) + if value or fallback: + return str(value or fallback) + return None + + +def get_int_value(key : str, fallback : Optional[str] = None) -> Optional[int]: + section, option = key.split('.') + value = get_config()[section].get(option) + if value or fallback: + return int(value or fallback) + return None + + +def get_float_value(key : str, fallback : Optional[str] = None) -> Optional[float]: + section, option = key.split('.') + value = get_config()[section].get(option) + if value or fallback: + return float(value or fallback) + return None + + +def get_bool_value(key : str, fallback : Optional[str] = None) -> Optional[bool]: + section, option = key.split('.') + value = get_config()[section].get(option, fallback) + if value == 'True' or fallback == 'True': + return True + if value == 'False' or fallback == 'False': + return False + return None + + +def get_str_list(key : str, fallback : Optional[str] = None) -> Optional[List[str]]: + section, option = key.split('.') + value = get_config()[section].get(option) + if value or fallback: + return [ str(value) for value in (value or fallback).split(' ') ] + return None + + +def get_int_list(key : str, fallback : Optional[str] = None) -> Optional[List[int]]: + section, option = key.split('.') + value = get_config()[section].get(option) + if value or fallback: + return [ int(value) for value in (value or fallback).split(' ') ] + return None + + +def get_float_list(key : str, fallback : Optional[str] = None) -> Optional[List[float]]: + section, option = key.split('.') + value = get_config()[section].get(option) + if value or fallback: + return [ float(value) for value in (value or fallback).split(' ') ] + return None diff --git a/facefusion/content_analyser.py b/facefusion/content_analyser.py index daa276e..4c8b980 100644 --- a/facefusion/content_analyser.py +++ b/facefusion/content_analyser.py @@ -8,8 +8,9 @@ from tqdm import tqdm import facefusion.globals from facefusion import wording -from facefusion.typing import Frame, ModelValue -from facefusion.vision import get_video_frame, count_video_frame_total, read_image, detect_fps +from facefusion.typing import Frame, ModelValue, Fps +from facefusion.execution_helper import apply_execution_provider_options +from facefusion.vision import get_video_frame, count_video_frame_total, read_image, detect_video_fps from facefusion.filesystem import resolve_relative_path from facefusion.download import conditional_download @@ -23,8 +24,8 @@ MODELS : Dict[str, ModelValue] =\ 'path': resolve_relative_path('../.assets/models/open_nsfw.onnx') } } -MAX_PROBABILITY = 0.80 -MAX_RATE = 5 +PROBABILITY_LIMIT = 0.80 +RATE_LIMIT = 5 STREAM_COUNTER = 0 @@ -34,7 +35,7 @@ def get_content_analyser() -> Any: with THREAD_LOCK: if CONTENT_ANALYSER is None: model_path = MODELS.get('open_nsfw').get('path') - CONTENT_ANALYSER = onnxruntime.InferenceSession(model_path, providers = facefusion.globals.execution_providers) + CONTENT_ANALYSER = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) return CONTENT_ANALYSER @@ -52,11 +53,11 @@ def pre_check() -> bool: return True -def analyse_stream(frame : Frame, fps : float) -> bool: +def analyse_stream(frame : Frame, video_fps : Fps) -> bool: global STREAM_COUNTER STREAM_COUNTER = STREAM_COUNTER + 1 - if STREAM_COUNTER % int(fps) == 0: + if STREAM_COUNTER % int(video_fps) == 0: return analyse_frame(frame) return False @@ -75,7 +76,7 @@ def analyse_frame(frame : Frame) -> bool: { 'input:0': frame })[0][0][1] - return probability > MAX_PROBABILITY + return probability > PROBABILITY_LIMIT @lru_cache(maxsize = None) @@ -87,17 +88,17 @@ def analyse_image(image_path : str) -> bool: @lru_cache(maxsize = None) def analyse_video(video_path : str, start_frame : int, end_frame : int) -> bool: video_frame_total = count_video_frame_total(video_path) - fps = detect_fps(video_path) + video_fps = detect_video_fps(video_path) frame_range = range(start_frame or 0, end_frame or video_frame_total) rate = 0.0 counter = 0 with tqdm(total = len(frame_range), desc = wording.get('analysing'), unit = 'frame', ascii = ' =', disable = facefusion.globals.log_level in [ 'warn', 'error' ]) as progress: for frame_number in frame_range: - if frame_number % int(fps) == 0: + if frame_number % int(video_fps) == 0: frame = get_video_frame(video_path, frame_number) if analyse_frame(frame): counter += 1 - rate = counter * int(fps) / len(frame_range) * 100 + rate = counter * int(video_fps) / len(frame_range) * 100 progress.update() progress.set_postfix(rate = rate) - return rate > MAX_RATE + return rate > RATE_LIMIT diff --git a/facefusion/core.py b/facefusion/core.py index 3009124..5ac4c8f 100755 --- a/facefusion/core.py +++ b/facefusion/core.py @@ -3,101 +3,106 @@ import os os.environ['OMP_NUM_THREADS'] = '1' import signal -import ssl import sys +import time import warnings -import platform import shutil +import numpy import onnxruntime +from time import sleep from argparse import ArgumentParser, HelpFormatter import facefusion.choices import facefusion.globals from facefusion.face_analyser import get_one_face, get_average_face from facefusion.face_store import get_reference_faces, append_reference_face -from facefusion.vision import get_video_frame, detect_fps, read_image, read_static_images -from facefusion import face_analyser, face_masker, content_analyser, metadata, logger, wording +from facefusion import face_analyser, face_masker, content_analyser, config, metadata, logger, wording from facefusion.content_analyser import analyse_image, analyse_video from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module from facefusion.common_helper import create_metavar from facefusion.execution_helper import encode_execution_providers, decode_execution_providers -from facefusion.normalizer import normalize_output_path, normalize_padding -from facefusion.filesystem import is_image, is_video, list_module_names, get_temp_frame_paths, create_temp, move_temp, clear_temp +from facefusion.normalizer import normalize_output_path, normalize_padding, normalize_fps +from facefusion.memory import limit_system_memory +from facefusion.filesystem import list_directory, get_temp_frame_paths, create_temp, move_temp, clear_temp, is_image, is_video from facefusion.ffmpeg import extract_frames, compress_image, merge_video, restore_audio +from facefusion.vision import get_video_frame, read_image, read_static_images, pack_resolution, detect_video_resolution, detect_video_fps, create_video_resolutions onnxruntime.set_default_logger_severity(3) warnings.filterwarnings('ignore', category = UserWarning, module = 'gradio') warnings.filterwarnings('ignore', category = UserWarning, module = 'torchvision') -if platform.system().lower() == 'darwin': - ssl._create_default_https_context = ssl._create_unverified_context - def cli() -> None: signal.signal(signal.SIGINT, lambda signal_number, frame: destroy()) program = ArgumentParser(formatter_class = lambda prog: HelpFormatter(prog, max_help_position = 120), add_help = False) # general - program.add_argument('-s', '--source', action = 'append', help = wording.get('source_help'), dest = 'source_paths') - program.add_argument('-t', '--target', help = wording.get('target_help'), dest = 'target_path') - program.add_argument('-o', '--output', help = wording.get('output_help'), dest = 'output_path') + program.add_argument('-s', '--source', help = wording.get('source_help'), action = 'append', dest = 'source_paths', default = config.get_str_list('general.source_paths')) + program.add_argument('-t', '--target', help = wording.get('target_help'), dest = 'target_path', default = config.get_str_value('general.target_path')) + program.add_argument('-o', '--output', help = wording.get('output_help'), dest = 'output_path', default = config.get_str_value('general.output_path')) program.add_argument('-v', '--version', version = metadata.get('name') + ' ' + metadata.get('version'), action = 'version') # misc group_misc = program.add_argument_group('misc') - group_misc.add_argument('--skip-download', help = wording.get('skip_download_help'), action = 'store_true') - group_misc.add_argument('--headless', help = wording.get('headless_help'), action = 'store_true') - group_misc.add_argument('--log-level', help = wording.get('log_level_help'), default = 'info', choices = logger.get_log_levels()) + group_misc.add_argument('--skip-download', help = wording.get('skip_download_help'), action = 'store_true', default = config.get_bool_value('misc.skip_download')) + group_misc.add_argument('--headless', help = wording.get('headless_help'), action = 'store_true', default = config.get_bool_value('misc.headless')) + group_misc.add_argument('--log-level', help = wording.get('log_level_help'), default = config.get_str_value('misc.log_level', 'info'), choices = logger.get_log_levels()) # execution execution_providers = encode_execution_providers(onnxruntime.get_available_providers()) group_execution = program.add_argument_group('execution') - group_execution.add_argument('--execution-providers', help = wording.get('execution_providers_help').format(choices = ', '.join(execution_providers)), default = [ 'cpu' ], choices = execution_providers, nargs = '+', metavar = 'EXECUTION_PROVIDERS') - group_execution.add_argument('--execution-thread-count', help = wording.get('execution_thread_count_help'), type = int, default = 4, choices = facefusion.choices.execution_thread_count_range, metavar = create_metavar(facefusion.choices.execution_thread_count_range)) - group_execution.add_argument('--execution-queue-count', help = wording.get('execution_queue_count_help'), type = int, default = 1, choices = facefusion.choices.execution_queue_count_range, metavar = create_metavar(facefusion.choices.execution_queue_count_range)) - group_execution.add_argument('--max-memory', help = wording.get('max_memory_help'), type = int, choices = facefusion.choices.max_memory_range, metavar = create_metavar(facefusion.choices.max_memory_range)) + group_execution.add_argument('--execution-providers', help = wording.get('execution_providers_help').format(choices = ', '.join(execution_providers)), default = config.get_str_list('execution.execution_providers', 'cpu'), choices = execution_providers, nargs = '+', metavar = 'EXECUTION_PROVIDERS') + group_execution.add_argument('--execution-thread-count', help = wording.get('execution_thread_count_help'), type = int, default = config.get_int_value('execution.execution_thread_count', '4'), choices = facefusion.choices.execution_thread_count_range, metavar = create_metavar(facefusion.choices.execution_thread_count_range)) + group_execution.add_argument('--execution-queue-count', help = wording.get('execution_queue_count_help'), type = int, default = config.get_int_value('execution.execution_queue_count', '1'), choices = facefusion.choices.execution_queue_count_range, metavar = create_metavar(facefusion.choices.execution_queue_count_range)) + # memory + group_memory = program.add_argument_group('memory') + group_memory.add_argument('--video-memory-strategy', help = wording.get('video_memory_strategy_help'), default = config.get_str_value('memory.video_memory_strategy', 'strict'), choices = facefusion.choices.video_memory_strategies) + group_memory.add_argument('--system-memory-limit', help = wording.get('system_memory_limit_help'), type = int, default = config.get_int_value('memory.system_memory_limit', '0'), choices = facefusion.choices.system_memory_limit_range, metavar = create_metavar(facefusion.choices.system_memory_limit_range)) # face analyser group_face_analyser = program.add_argument_group('face analyser') - group_face_analyser.add_argument('--face-analyser-order', help = wording.get('face_analyser_order_help'), default = 'left-right', choices = facefusion.choices.face_analyser_orders) - group_face_analyser.add_argument('--face-analyser-age', help = wording.get('face_analyser_age_help'), choices = facefusion.choices.face_analyser_ages) - group_face_analyser.add_argument('--face-analyser-gender', help = wording.get('face_analyser_gender_help'), choices = facefusion.choices.face_analyser_genders) - group_face_analyser.add_argument('--face-detector-model', help = wording.get('face_detector_model_help'), default = 'retinaface', choices = facefusion.choices.face_detector_models) - group_face_analyser.add_argument('--face-detector-size', help = wording.get('face_detector_size_help'), default = '640x640', choices = facefusion.choices.face_detector_sizes) - group_face_analyser.add_argument('--face-detector-score', help = wording.get('face_detector_score_help'), type = float, default = 0.5, choices = facefusion.choices.face_detector_score_range, metavar = create_metavar(facefusion.choices.face_detector_score_range)) + group_face_analyser.add_argument('--face-analyser-order', help = wording.get('face_analyser_order_help'), default = config.get_str_value('face_analyser.face_analyser_order', 'left-right'), choices = facefusion.choices.face_analyser_orders) + group_face_analyser.add_argument('--face-analyser-age', help = wording.get('face_analyser_age_help'), default = config.get_str_value('face_analyser.face_analyser_age'), choices = facefusion.choices.face_analyser_ages) + group_face_analyser.add_argument('--face-analyser-gender', help = wording.get('face_analyser_gender_help'), default = config.get_str_value('face_analyser.face_analyser_gender'), choices = facefusion.choices.face_analyser_genders) + group_face_analyser.add_argument('--face-detector-model', help = wording.get('face_detector_model_help'), default = config.get_str_value('face_analyser.face_detector_model', 'retinaface'), choices = facefusion.choices.face_detector_models) + group_face_analyser.add_argument('--face-detector-size', help = wording.get('face_detector_size_help'), default = config.get_str_value('face_analyser.face_detector_size', '640x640'), choices = facefusion.choices.face_detector_sizes) + group_face_analyser.add_argument('--face-detector-score', help = wording.get('face_detector_score_help'), type = float, default = config.get_float_value('face_analyser.face_detector_score', '0.5'), choices = facefusion.choices.face_detector_score_range, metavar = create_metavar(facefusion.choices.face_detector_score_range)) # face selector group_face_selector = program.add_argument_group('face selector') - group_face_selector.add_argument('--face-selector-mode', help = wording.get('face_selector_mode_help'), default = 'reference', choices = facefusion.choices.face_selector_modes) - group_face_selector.add_argument('--reference-face-position', help = wording.get('reference_face_position_help'), type = int, default = 0) - group_face_selector.add_argument('--reference-face-distance', help = wording.get('reference_face_distance_help'), type = float, default = 0.6, choices = facefusion.choices.reference_face_distance_range, metavar = create_metavar(facefusion.choices.reference_face_distance_range)) - group_face_selector.add_argument('--reference-frame-number', help = wording.get('reference_frame_number_help'), type = int, default = 0) + group_face_selector.add_argument('--face-selector-mode', help = wording.get('face_selector_mode_help'), default = config.get_str_value('face_selector.face_selector_mode', 'reference'), choices = facefusion.choices.face_selector_modes) + group_face_selector.add_argument('--reference-face-position', help = wording.get('reference_face_position_help'), type = int, default = config.get_int_value('face_selector.reference_face_position', '0')) + group_face_selector.add_argument('--reference-face-distance', help = wording.get('reference_face_distance_help'), type = float, default = config.get_float_value('face_selector.reference_face_distance', '0.6'), choices = facefusion.choices.reference_face_distance_range, metavar = create_metavar(facefusion.choices.reference_face_distance_range)) + group_face_selector.add_argument('--reference-frame-number', help = wording.get('reference_frame_number_help'), type = int, default = config.get_int_value('face_selector.reference_frame_number', '0')) # face mask group_face_mask = program.add_argument_group('face mask') - group_face_mask.add_argument('--face-mask-types', help = wording.get('face_mask_types_help').format(choices = ', '.join(facefusion.choices.face_mask_types)), default = [ 'box' ], choices = facefusion.choices.face_mask_types, nargs = '+', metavar = 'FACE_MASK_TYPES') - group_face_mask.add_argument('--face-mask-blur', help = wording.get('face_mask_blur_help'), type = float, default = 0.3, choices = facefusion.choices.face_mask_blur_range, metavar = create_metavar(facefusion.choices.face_mask_blur_range)) - group_face_mask.add_argument('--face-mask-padding', help = wording.get('face_mask_padding_help'), type = int, default = [ 0, 0, 0, 0 ], nargs = '+') - group_face_mask.add_argument('--face-mask-regions', help = wording.get('face_mask_regions_help').format(choices = ', '.join(facefusion.choices.face_mask_regions)), default = facefusion.choices.face_mask_regions, choices = facefusion.choices.face_mask_regions, nargs = '+', metavar = 'FACE_MASK_REGIONS') + group_face_mask.add_argument('--face-mask-types', help = wording.get('face_mask_types_help').format(choices = ', '.join(facefusion.choices.face_mask_types)), default = config.get_str_list('face_mask.face_mask_types', 'box'), choices = facefusion.choices.face_mask_types, nargs = '+', metavar = 'FACE_MASK_TYPES') + group_face_mask.add_argument('--face-mask-blur', help = wording.get('face_mask_blur_help'), type = float, default = config.get_float_value('face_mask.face_mask_blur', '0.3'), choices = facefusion.choices.face_mask_blur_range, metavar = create_metavar(facefusion.choices.face_mask_blur_range)) + group_face_mask.add_argument('--face-mask-padding', help = wording.get('face_mask_padding_help'), type = int, default = config.get_int_list('face_mask.face_mask_padding', '0 0 0 0'), nargs = '+') + group_face_mask.add_argument('--face-mask-regions', help = wording.get('face_mask_regions_help').format(choices = ', '.join(facefusion.choices.face_mask_regions)), default = config.get_str_list('face_mask.face_mask_regions', ' '.join(facefusion.choices.face_mask_regions)), choices = facefusion.choices.face_mask_regions, nargs = '+', metavar = 'FACE_MASK_REGIONS') # frame extraction group_frame_extraction = program.add_argument_group('frame extraction') - group_frame_extraction.add_argument('--trim-frame-start', help = wording.get('trim_frame_start_help'), type = int) - group_frame_extraction.add_argument('--trim-frame-end', help = wording.get('trim_frame_end_help'), type = int) - group_frame_extraction.add_argument('--temp-frame-format', help = wording.get('temp_frame_format_help'), default = 'jpg', choices = facefusion.choices.temp_frame_formats) - group_frame_extraction.add_argument('--temp-frame-quality', help = wording.get('temp_frame_quality_help'), type = int, default = 100, choices = facefusion.choices.temp_frame_quality_range, metavar = create_metavar(facefusion.choices.temp_frame_quality_range)) - group_frame_extraction.add_argument('--keep-temp', help = wording.get('keep_temp_help'), action = 'store_true') + group_frame_extraction.add_argument('--trim-frame-start', help = wording.get('trim_frame_start_help'), type = int, default = facefusion.config.get_int_value('frame_extraction.trim_frame_start')) + group_frame_extraction.add_argument('--trim-frame-end', help = wording.get('trim_frame_end_help'), type = int, default = facefusion.config.get_int_value('frame_extraction.trim_frame_end')) + group_frame_extraction.add_argument('--temp-frame-format', help = wording.get('temp_frame_format_help'), default = config.get_str_value('frame_extraction.temp_frame_format', 'jpg'), choices = facefusion.choices.temp_frame_formats) + group_frame_extraction.add_argument('--temp-frame-quality', help = wording.get('temp_frame_quality_help'), type = int, default = config.get_int_value('frame_extraction.temp_frame_quality', '100'), choices = facefusion.choices.temp_frame_quality_range, metavar = create_metavar(facefusion.choices.temp_frame_quality_range)) + group_frame_extraction.add_argument('--keep-temp', help = wording.get('keep_temp_help'), action = 'store_true', default = config.get_bool_value('frame_extraction.keep_temp')) # output creation group_output_creation = program.add_argument_group('output creation') - group_output_creation.add_argument('--output-image-quality', help = wording.get('output_image_quality_help'), type = int, default = 80, choices = facefusion.choices.output_image_quality_range, metavar = create_metavar(facefusion.choices.output_image_quality_range)) - group_output_creation.add_argument('--output-video-encoder', help = wording.get('output_video_encoder_help'), default = 'libx264', choices = facefusion.choices.output_video_encoders) - group_output_creation.add_argument('--output-video-quality', help = wording.get('output_video_quality_help'), type = int, default = 80, choices = facefusion.choices.output_video_quality_range, metavar = create_metavar(facefusion.choices.output_video_quality_range)) - group_output_creation.add_argument('--keep-fps', help = wording.get('keep_fps_help'), action = 'store_true') - group_output_creation.add_argument('--skip-audio', help = wording.get('skip_audio_help'), action = 'store_true') + group_output_creation.add_argument('--output-image-quality', help = wording.get('output_image_quality_help'), type = int, default = config.get_int_value('output_creation.output_image_quality', '80'), choices = facefusion.choices.output_image_quality_range, metavar = create_metavar(facefusion.choices.output_image_quality_range)) + group_output_creation.add_argument('--output-video-encoder', help = wording.get('output_video_encoder_help'), default = config.get_str_value('output_creation.output_video_encoder', 'libx264'), choices = facefusion.choices.output_video_encoders) + group_output_creation.add_argument('--output-video-preset', help = wording.get('output_video_preset_help'), default = config.get_str_value('output_creation.output_video_preset', 'veryfast'), choices = facefusion.choices.output_video_presets) + group_output_creation.add_argument('--output-video-quality', help = wording.get('output_video_quality_help'), type = int, default = config.get_int_value('output_creation.output_video_quality', '80'), choices = facefusion.choices.output_video_quality_range, metavar = create_metavar(facefusion.choices.output_video_quality_range)) + group_output_creation.add_argument('--output-video-resolution', help = wording.get('output_video_resolution_help'), default = config.get_str_value('output_creation.output_video_resolution')) + group_output_creation.add_argument('--output-video-fps', help = wording.get('output_video_fps_help'), type = float) + group_output_creation.add_argument('--skip-audio', help = wording.get('skip_audio_help'), action = 'store_true', default = config.get_bool_value('output_creation.skip_audio')) # frame processors - available_frame_processors = list_module_names('facefusion/processors/frame/modules') + available_frame_processors = list_directory('facefusion/processors/frame/modules') program = ArgumentParser(parents = [ program ], formatter_class = program.formatter_class, add_help = True) group_frame_processors = program.add_argument_group('frame processors') - group_frame_processors.add_argument('--frame-processors', help = wording.get('frame_processors_help').format(choices = ', '.join(available_frame_processors)), default = [ 'face_swapper' ], nargs = '+') + group_frame_processors.add_argument('--frame-processors', help = wording.get('frame_processors_help').format(choices = ', '.join(available_frame_processors)), default = config.get_str_list('frame_processors.frame_processors', 'face_swapper'), nargs = '+') for frame_processor in available_frame_processors: frame_processor_module = load_frame_processor_module(frame_processor) frame_processor_module.register_args(group_frame_processors) # uis + available_ui_layouts = list_directory('facefusion/uis/layouts') group_uis = program.add_argument_group('uis') - group_uis.add_argument('--ui-layouts', help = wording.get('ui_layouts_help').format(choices = ', '.join(list_module_names('facefusion/uis/layouts'))), default = [ 'default' ], nargs = '+') + group_uis.add_argument('--ui-layouts', help = wording.get('ui_layouts_help').format(choices = ', '.join(available_ui_layouts)), default = config.get_str_list('uis.ui_layout', 'default'), nargs = '+') run(program) @@ -115,7 +120,9 @@ def apply_args(program : ArgumentParser) -> None: facefusion.globals.execution_providers = decode_execution_providers(args.execution_providers) facefusion.globals.execution_thread_count = args.execution_thread_count facefusion.globals.execution_queue_count = args.execution_queue_count - facefusion.globals.max_memory = args.max_memory + # memory + facefusion.globals.video_memory_strategy = args.video_memory_strategy + facefusion.globals.system_memory_limit = args.system_memory_limit # face analyser facefusion.globals.face_analyser_order = args.face_analyser_order facefusion.globals.face_analyser_age = args.face_analyser_age @@ -142,11 +149,20 @@ def apply_args(program : ArgumentParser) -> None: # output creation facefusion.globals.output_image_quality = args.output_image_quality facefusion.globals.output_video_encoder = args.output_video_encoder + facefusion.globals.output_video_preset = args.output_video_preset facefusion.globals.output_video_quality = args.output_video_quality - facefusion.globals.keep_fps = args.keep_fps + if is_video(args.target_path): + target_video_resolutions = create_video_resolutions(args.target_path) + if args.output_video_resolution in target_video_resolutions: + facefusion.globals.output_video_resolution = args.output_video_resolution + else: + target_video_resolution = detect_video_resolution(args.target_path) + facefusion.globals.output_video_resolution = pack_resolution(target_video_resolution) + if args.output_video_fps or is_video(args.target_path): + facefusion.globals.output_video_fps = normalize_fps(args.output_video_fps) or detect_video_fps(args.target_path) facefusion.globals.skip_audio = args.skip_audio # frame processors - available_frame_processors = list_module_names('facefusion/processors/frame/modules') + available_frame_processors = list_directory('facefusion/processors/frame/modules') facefusion.globals.frame_processors = args.frame_processors for frame_processor in available_frame_processors: frame_processor_module = load_frame_processor_module(frame_processor) @@ -158,7 +174,8 @@ def apply_args(program : ArgumentParser) -> None: def run(program : ArgumentParser) -> None: apply_args(program) logger.init(facefusion.globals.log_level) - limit_resources() + if facefusion.globals.system_memory_limit > 0: + limit_system_memory(facefusion.globals.system_memory_limit) if not pre_check() or not content_analyser.pre_check() or not face_analyser.pre_check() or not face_masker.pre_check(): return for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): @@ -178,23 +195,7 @@ def run(program : ArgumentParser) -> None: def destroy() -> None: if facefusion.globals.target_path: clear_temp(facefusion.globals.target_path) - sys.exit() - - -def limit_resources() -> None: - if facefusion.globals.max_memory: - memory = facefusion.globals.max_memory * 1024 ** 3 - if platform.system().lower() == 'darwin': - memory = facefusion.globals.max_memory * 1024 ** 6 - if platform.system().lower() == 'windows': - import ctypes - - kernel32 = ctypes.windll.kernel32 # type: ignore[attr-defined] - kernel32.SetProcessWorkingSetSize(-1, ctypes.c_size_t(memory), ctypes.c_size_t(memory)) - else: - import resource - - resource.setrlimit(resource.RLIMIT_DATA, (memory, memory)) + sys.exit(0) def pre_check() -> bool: @@ -208,14 +209,19 @@ def pre_check() -> bool: def conditional_process() -> None: - conditional_append_reference_faces() + start_time = time.time() for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): + while not frame_processor_module.post_check(): + logger.disable() + sleep(0.5) + logger.enable() if not frame_processor_module.pre_process('output'): return + conditional_append_reference_faces() if is_image(facefusion.globals.target_path): - process_image() + process_image(start_time) if is_video(facefusion.globals.target_path): - process_video() + process_video(start_time) def conditional_append_reference_faces() -> None: @@ -230,12 +236,14 @@ def conditional_append_reference_faces() -> None: append_reference_face('origin', reference_face) if source_face and reference_face: for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): - reference_frame = frame_processor_module.get_reference_frame(source_face, reference_face, reference_frame) - reference_face = get_one_face(reference_frame, facefusion.globals.reference_face_position) - append_reference_face(frame_processor_module.__name__, reference_face) + abstract_reference_frame = frame_processor_module.get_reference_frame(source_face, reference_face, reference_frame) + if numpy.any(abstract_reference_frame): + reference_frame = abstract_reference_frame + reference_face = get_one_face(reference_frame, facefusion.globals.reference_face_position) + append_reference_face(frame_processor_module.__name__, reference_face) -def process_image() -> None: +def process_image(start_time : float) -> None: if analyse_image(facefusion.globals.target_path): return shutil.copy2(facefusion.globals.target_path, facefusion.globals.output_path) @@ -250,21 +258,21 @@ def process_image() -> None: logger.error(wording.get('compressing_image_failed'), __name__.upper()) # validate image if is_image(facefusion.globals.output_path): - logger.info(wording.get('processing_image_succeed'), __name__.upper()) + seconds = '{:.2f}'.format((time.time() - start_time) % 60) + logger.info(wording.get('processing_image_succeed').format(seconds = seconds), __name__.upper()) else: logger.error(wording.get('processing_image_failed'), __name__.upper()) -def process_video() -> None: +def process_video(start_time : float) -> None: if analyse_video(facefusion.globals.target_path, facefusion.globals.trim_frame_start, facefusion.globals.trim_frame_end): return - fps = detect_fps(facefusion.globals.target_path) if facefusion.globals.keep_fps else 25.0 # create temp logger.info(wording.get('creating_temp'), __name__.upper()) create_temp(facefusion.globals.target_path) # extract frames - logger.info(wording.get('extracting_frames_fps').format(fps = fps), __name__.upper()) - extract_frames(facefusion.globals.target_path, fps) + logger.info(wording.get('extracting_frames_fps').format(video_fps = facefusion.globals.output_video_fps), __name__.upper()) + extract_frames(facefusion.globals.target_path, facefusion.globals.output_video_resolution, facefusion.globals.output_video_fps) # process frame temp_frame_paths = get_temp_frame_paths(facefusion.globals.target_path) if temp_frame_paths: @@ -276,8 +284,8 @@ def process_video() -> None: logger.error(wording.get('temp_frames_not_found'), __name__.upper()) return # merge video - logger.info(wording.get('merging_video_fps').format(fps = fps), __name__.upper()) - if not merge_video(facefusion.globals.target_path, fps): + logger.info(wording.get('merging_video_fps').format(video_fps = facefusion.globals.output_video_fps), __name__.upper()) + if not merge_video(facefusion.globals.target_path, facefusion.globals.output_video_fps): logger.error(wording.get('merging_video_failed'), __name__.upper()) return # handle audio @@ -286,7 +294,7 @@ def process_video() -> None: move_temp(facefusion.globals.target_path, facefusion.globals.output_path) else: logger.info(wording.get('restoring_audio'), __name__.upper()) - if not restore_audio(facefusion.globals.target_path, facefusion.globals.output_path): + if not restore_audio(facefusion.globals.target_path, facefusion.globals.output_path, facefusion.globals.output_video_fps): logger.warn(wording.get('restoring_audio_skipped'), __name__.upper()) move_temp(facefusion.globals.target_path, facefusion.globals.output_path) # clear temp @@ -294,6 +302,7 @@ def process_video() -> None: clear_temp(facefusion.globals.target_path) # validate video if is_video(facefusion.globals.output_path): - logger.info(wording.get('processing_video_succeed'), __name__.upper()) + seconds = '{:.2f}'.format((time.time() - start_time)) + logger.info(wording.get('processing_video_succeed').format(seconds = seconds), __name__.upper()) else: logger.error(wording.get('processing_video_failed'), __name__.upper()) diff --git a/facefusion/download.py b/facefusion/download.py index d50935f..c26764f 100644 --- a/facefusion/download.py +++ b/facefusion/download.py @@ -1,5 +1,7 @@ import os import subprocess +import platform +import ssl import urllib.request from typing import List from concurrent.futures import ThreadPoolExecutor @@ -10,6 +12,9 @@ import facefusion.globals from facefusion import wording from facefusion.filesystem import is_file +if platform.system().lower() == 'darwin': + ssl._create_default_https_context = ssl._create_unverified_context + def conditional_download(download_directory_path : str, urls : List[str]) -> None: with ThreadPoolExecutor() as executor: diff --git a/facefusion/execution_helper.py b/facefusion/execution_helper.py index 9c66865..9bdea33 100644 --- a/facefusion/execution_helper.py +++ b/facefusion/execution_helper.py @@ -1,4 +1,4 @@ -from typing import List +from typing import Any, List import onnxruntime @@ -9,10 +9,25 @@ def encode_execution_providers(execution_providers : List[str]) -> List[str]: def decode_execution_providers(execution_providers: List[str]) -> List[str]: available_execution_providers = onnxruntime.get_available_providers() encoded_execution_providers = encode_execution_providers(available_execution_providers) + return [ execution_provider for execution_provider, encoded_execution_provider in zip(available_execution_providers, encoded_execution_providers) if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers) ] -def map_device(execution_providers : List[str]) -> str: +def apply_execution_provider_options(execution_providers: List[str]) -> List[Any]: + execution_providers_with_options : List[Any] = [] + + for execution_provider in execution_providers: + if execution_provider == 'CUDAExecutionProvider': + execution_providers_with_options.append((execution_provider, + { + 'cudnn_conv_algo_search': 'DEFAULT' + })) + else: + execution_providers_with_options.append(execution_provider) + return execution_providers_with_options + + +def map_torch_backend(execution_providers : List[str]) -> str: if 'CoreMLExecutionProvider' in execution_providers: return 'mps' if 'CUDAExecutionProvider' in execution_providers or 'ROCMExecutionProvider' in execution_providers : diff --git a/facefusion/face_analyser.py b/facefusion/face_analyser.py index 06960e4..ffb0741 100644 --- a/facefusion/face_analyser.py +++ b/facefusion/face_analyser.py @@ -7,10 +7,11 @@ import onnxruntime import facefusion.globals from facefusion.download import conditional_download from facefusion.face_store import get_static_faces, set_static_faces -from facefusion.face_helper import warp_face, create_static_anchors, distance_to_kps, distance_to_bbox, apply_nms +from facefusion.execution_helper import apply_execution_provider_options +from facefusion.face_helper import warp_face_by_kps, create_static_anchors, distance_to_kps, distance_to_bbox, apply_nms from facefusion.filesystem import resolve_relative_path from facefusion.typing import Frame, Face, FaceSet, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, ModelSet, Bbox, Kps, Score, Embedding -from facefusion.vision import resize_frame_dimension +from facefusion.vision import resize_frame_resolution, unpack_resolution FACE_ANALYSER = None THREAD_SEMAPHORE : threading.Semaphore = threading.Semaphore() @@ -56,16 +57,16 @@ def get_face_analyser() -> Any: with THREAD_LOCK: if FACE_ANALYSER is None: if facefusion.globals.face_detector_model == 'retinaface': - face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_retinaface').get('path'), providers = facefusion.globals.execution_providers) + face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_retinaface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) if facefusion.globals.face_detector_model == 'yunet': face_detector = cv2.FaceDetectorYN.create(MODELS.get('face_detector_yunet').get('path'), '', (0, 0)) if facefusion.globals.face_recognizer_model == 'arcface_blendswap': - face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_blendswap').get('path'), providers = facefusion.globals.execution_providers) + face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_blendswap').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) if facefusion.globals.face_recognizer_model == 'arcface_inswapper': - face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_inswapper').get('path'), providers = facefusion.globals.execution_providers) + face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_inswapper').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) if facefusion.globals.face_recognizer_model == 'arcface_simswap': - face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_simswap').get('path'), providers = facefusion.globals.execution_providers) - gender_age = onnxruntime.InferenceSession(MODELS.get('gender_age').get('path'), providers = facefusion.globals.execution_providers) + face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_simswap').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + gender_age = onnxruntime.InferenceSession(MODELS.get('gender_age').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) FACE_ANALYSER =\ { 'face_detector': face_detector, @@ -96,10 +97,10 @@ def pre_check() -> bool: return True -def extract_faces(frame: Frame) -> List[Face]: - face_detector_width, face_detector_height = map(int, facefusion.globals.face_detector_size.split('x')) +def extract_faces(frame : Frame) -> List[Face]: + face_detector_width, face_detector_height = unpack_resolution(facefusion.globals.face_detector_size) frame_height, frame_width, _ = frame.shape - temp_frame = resize_frame_dimension(frame, face_detector_width, face_detector_height) + temp_frame = resize_frame_resolution(frame, face_detector_width, face_detector_height) temp_frame_height, temp_frame_width, _ = temp_frame.shape ratio_height = frame_height / temp_frame_height ratio_width = frame_width / temp_frame_width @@ -135,7 +136,7 @@ def detect_with_retinaface(temp_frame : Frame, temp_frame_height : int, temp_fra stride_height = face_detector_height // feature_stride stride_width = face_detector_width // feature_stride anchors = create_static_anchors(feature_stride, anchor_total, stride_height, stride_width) - bbox_raw = (detections[index + feature_map_channel] * feature_stride) + bbox_raw = detections[index + feature_map_channel] * feature_stride kps_raw = detections[index + feature_map_channel * 2] * feature_stride for bbox in distance_to_bbox(anchors, bbox_raw)[keep_indices]: bbox_list.append(numpy.array( @@ -188,7 +189,7 @@ def create_faces(frame : Frame, bbox_list : List[Bbox], kps_list : List[Kps], sc kps = kps_list[index] score = score_list[index] embedding, normed_embedding = calc_embedding(frame, kps) - gender, age = detect_gender_age(frame, kps) + gender, age = detect_gender_age(frame, bbox) faces.append(Face( bbox = bbox, kps = kps, @@ -203,7 +204,7 @@ def create_faces(frame : Frame, bbox_list : List[Bbox], kps_list : List[Kps], sc def calc_embedding(temp_frame : Frame, kps : Kps) -> Tuple[Embedding, Embedding]: face_recognizer = get_face_analyser().get('face_recognizer') - crop_frame, matrix = warp_face(temp_frame, kps, 'arcface_112_v2', (112, 112)) + crop_frame, matrix = warp_face_by_kps(temp_frame, kps, 'arcface_112_v2', (112, 112)) crop_frame = crop_frame.astype(numpy.float32) / 127.5 - 1 crop_frame = crop_frame[:, :, ::-1].transpose(2, 0, 1) crop_frame = numpy.expand_dims(crop_frame, axis = 0) @@ -216,10 +217,15 @@ def calc_embedding(temp_frame : Frame, kps : Kps) -> Tuple[Embedding, Embedding] return embedding, normed_embedding -def detect_gender_age(frame : Frame, kps : Kps) -> Tuple[int, int]: +def detect_gender_age(frame : Frame, bbox : Bbox) -> Tuple[int, int]: gender_age = get_face_analyser().get('gender_age') - crop_frame, affine_matrix = warp_face(frame, kps, 'arcface_112_v2', (96, 96)) - crop_frame = numpy.expand_dims(crop_frame, axis = 0).transpose(0, 3, 1, 2).astype(numpy.float32) + bbox = bbox.reshape(2, -1) + scale = 64 / numpy.subtract(*bbox[::-1]).max() + translation = 48 - bbox.sum(axis = 0) * 0.5 * scale + affine_matrix = numpy.array([[ scale, 0, translation[0] ], [ 0, scale, translation[1] ]]) + crop_frame = cv2.warpAffine(frame, affine_matrix, (96, 96)) + crop_frame = crop_frame.astype(numpy.float32)[:, :, ::-1].transpose(2, 0, 1) + crop_frame = numpy.expand_dims(crop_frame, axis = 0) prediction = gender_age.run(None, { gender_age.get_inputs()[0].name: crop_frame @@ -297,10 +303,14 @@ def find_similar_faces(frame : Frame, reference_faces : FaceSet, face_distance : def compare_faces(face : Face, reference_face : Face, face_distance : float) -> bool: + current_face_distance = calc_face_distance(face, reference_face) + return current_face_distance < face_distance + + +def calc_face_distance(face : Face, reference_face : Face) -> float: if hasattr(face, 'normed_embedding') and hasattr(reference_face, 'normed_embedding'): - current_face_distance = 1 - numpy.dot(face.normed_embedding, reference_face.normed_embedding) - return current_face_distance < face_distance - return False + return 1 - numpy.dot(face.normed_embedding, reference_face.normed_embedding) + return 0 def sort_by_order(faces : List[Face], order : FaceAnalyserOrder) -> List[Face]: diff --git a/facefusion/face_helper.py b/facefusion/face_helper.py index ce7940f..d49a2e6 100644 --- a/facefusion/face_helper.py +++ b/facefusion/face_helper.py @@ -10,47 +10,59 @@ TEMPLATES : Dict[Template, numpy.ndarray[Any, Any]] =\ { 'arcface_112_v1': numpy.array( [ - [ 39.7300, 51.1380 ], - [ 72.2700, 51.1380 ], - [ 56.0000, 68.4930 ], - [ 42.4630, 87.0100 ], - [ 69.5370, 87.0100 ] + [ 0.35473214, 0.45658929 ], + [ 0.64526786, 0.45658929 ], + [ 0.50000000, 0.61154464 ], + [ 0.37913393, 0.77687500 ], + [ 0.62086607, 0.77687500 ] ]), 'arcface_112_v2': numpy.array( [ - [ 38.2946, 51.6963 ], - [ 73.5318, 51.5014 ], - [ 56.0252, 71.7366 ], - [ 41.5493, 92.3655 ], - [ 70.7299, 92.2041 ] + [ 0.34191607, 0.46157411 ], + [ 0.65653393, 0.45983393 ], + [ 0.50022500, 0.64050536 ], + [ 0.37097589, 0.82469196 ], + [ 0.63151696, 0.82325089 ] ]), 'arcface_128_v2': numpy.array( [ - [ 46.2946, 51.6963 ], - [ 81.5318, 51.5014 ], - [ 64.0252, 71.7366 ], - [ 49.5493, 92.3655 ], - [ 78.7299, 92.2041 ] + [ 0.36167656, 0.40387734 ], + [ 0.63696719, 0.40235469 ], + [ 0.50019687, 0.56044219 ], + [ 0.38710391, 0.72160547 ], + [ 0.61507734, 0.72034453 ] ]), 'ffhq_512': numpy.array( [ - [ 192.98138, 239.94708 ], - [ 318.90277, 240.1936 ], - [ 256.63416, 314.01935 ], - [ 201.26117, 371.41043 ], - [ 313.08905, 371.15118 ] + [ 0.37691676, 0.46864664 ], + [ 0.62285697, 0.46912813 ], + [ 0.50123859, 0.61331904 ], + [ 0.39308822, 0.72541100 ], + [ 0.61150205, 0.72490465 ] ]) } -def warp_face(temp_frame : Frame, kps : Kps, template : Template, size : Size) -> Tuple[Frame, Matrix]: - normed_template = TEMPLATES.get(template) * size[1] / size[0] +def warp_face_by_kps(temp_frame : Frame, kps : Kps, template : Template, crop_size : Size) -> Tuple[Frame, Matrix]: + normed_template = TEMPLATES.get(template) * crop_size affine_matrix = cv2.estimateAffinePartial2D(kps, normed_template, method = cv2.RANSAC, ransacReprojThreshold = 100)[0] - crop_frame = cv2.warpAffine(temp_frame, affine_matrix, (size[1], size[1]), borderMode = cv2.BORDER_REPLICATE) + crop_frame = cv2.warpAffine(temp_frame, affine_matrix, crop_size, borderMode = cv2.BORDER_REPLICATE, flags = cv2.INTER_AREA) return crop_frame, affine_matrix -def paste_back(temp_frame : Frame, crop_frame: Frame, crop_mask : Mask, affine_matrix : Matrix) -> Frame: +def warp_face_by_bbox(temp_frame : Frame, bbox : Bbox, crop_size : Size) -> Tuple[Frame, Matrix]: + source_kps = numpy.array([[ bbox[0], bbox[1] ], [bbox[2], bbox[1] ], [bbox[0], bbox[3] ]], dtype = numpy.float32) + target_kps = numpy.array([[ 0, 0 ], [ crop_size[0], 0 ], [ 0, crop_size[1] ]], dtype = numpy.float32) + affine_matrix = cv2.getAffineTransform(source_kps, target_kps) + if bbox[2] - bbox[0] > crop_size[0] or bbox[3] - bbox[1] > crop_size[1]: + interpolation_method = cv2.INTER_AREA + else: + interpolation_method = cv2.INTER_LINEAR + crop_frame = cv2.warpAffine(temp_frame, affine_matrix, crop_size, flags = interpolation_method) + return crop_frame, affine_matrix + + +def paste_back(temp_frame : Frame, crop_frame : Frame, crop_mask : Mask, affine_matrix : Matrix) -> Frame: inverse_matrix = cv2.invertAffineTransform(affine_matrix) temp_frame_size = temp_frame.shape[:2][::-1] inverse_crop_mask = cv2.warpAffine(crop_mask, inverse_matrix, temp_frame_size).clip(0, 1) diff --git a/facefusion/face_masker.py b/facefusion/face_masker.py index 96d877b..9884654 100755 --- a/facefusion/face_masker.py +++ b/facefusion/face_masker.py @@ -8,6 +8,7 @@ import onnxruntime import facefusion.globals from facefusion.typing import Frame, Mask, Padding, FaceMaskRegion, ModelSet +from facefusion.execution_helper import apply_execution_provider_options from facefusion.filesystem import resolve_relative_path from facefusion.download import conditional_download @@ -48,7 +49,7 @@ def get_face_occluder() -> Any: with THREAD_LOCK: if FACE_OCCLUDER is None: model_path = MODELS.get('face_occluder').get('path') - FACE_OCCLUDER = onnxruntime.InferenceSession(model_path, providers = facefusion.globals.execution_providers) + FACE_OCCLUDER = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) return FACE_OCCLUDER @@ -58,7 +59,7 @@ def get_face_parser() -> Any: with THREAD_LOCK: if FACE_PARSER is None: model_path = MODELS.get('face_parser').get('path') - FACE_PARSER = onnxruntime.InferenceSession(model_path, providers = facefusion.globals.execution_providers) + FACE_PARSER = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) return FACE_PARSER diff --git a/facefusion/face_store.py b/facefusion/face_store.py index 1f0dfa4..36c8b51 100644 --- a/facefusion/face_store.py +++ b/facefusion/face_store.py @@ -1,5 +1,6 @@ from typing import Optional, List import hashlib +import numpy from facefusion.typing import Frame, Face, FaceStore, FaceSet @@ -27,8 +28,8 @@ def clear_static_faces() -> None: FACE_STORE['static_faces'] = {} -def create_frame_hash(frame: Frame) -> Optional[str]: - return hashlib.sha1(frame.tobytes()).hexdigest() if frame.any() else None +def create_frame_hash(frame : Frame) -> Optional[str]: + return hashlib.sha1(frame.tobytes()).hexdigest() if numpy.any(frame) else None def get_reference_faces() -> Optional[FaceSet]: diff --git a/facefusion/ffmpeg.py b/facefusion/ffmpeg.py index 4cbb38e..9da029d 100644 --- a/facefusion/ffmpeg.py +++ b/facefusion/ffmpeg.py @@ -1,10 +1,10 @@ -from typing import List +from typing import List, Optional import subprocess import facefusion.globals from facefusion import logger +from facefusion.typing import OutputVideoPreset, Fps from facefusion.filesystem import get_temp_frames_pattern, get_temp_output_video_path -from facefusion.vision import detect_fps def run_ffmpeg(args : List[str]) -> bool: @@ -24,20 +24,20 @@ def open_ffmpeg(args : List[str]) -> subprocess.Popen[bytes]: return subprocess.Popen(commands, stdin = subprocess.PIPE) -def extract_frames(target_path : str, fps : float) -> bool: +def extract_frames(target_path : str, video_resolution : str, video_fps : Fps) -> bool: temp_frame_compression = round(31 - (facefusion.globals.temp_frame_quality * 0.31)) trim_frame_start = facefusion.globals.trim_frame_start trim_frame_end = facefusion.globals.trim_frame_end temp_frames_pattern = get_temp_frames_pattern(target_path, '%04d') commands = [ '-hwaccel', 'auto', '-i', target_path, '-q:v', str(temp_frame_compression), '-pix_fmt', 'rgb24' ] if trim_frame_start is not None and trim_frame_end is not None: - commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',fps=' + str(fps) ]) + commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',scale=' + str(video_resolution) + ',fps=' + str(video_fps) ]) elif trim_frame_start is not None: - commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ',fps=' + str(fps) ]) + commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ',scale=' + str(video_resolution) + ',fps=' + str(video_fps) ]) elif trim_frame_end is not None: - commands.extend([ '-vf', 'trim=end_frame=' + str(trim_frame_end) + ',fps=' + str(fps) ]) + commands.extend([ '-vf', 'trim=end_frame=' + str(trim_frame_end) + ',scale=' + str(video_resolution) + ',fps=' + str(video_fps) ]) else: - commands.extend([ '-vf', 'fps=' + str(fps) ]) + commands.extend([ '-vf', 'scale=' + str(video_resolution) + ',fps=' + str(video_fps) ]) commands.extend([ '-vsync', '0', temp_frames_pattern ]) return run_ffmpeg(commands) @@ -48,34 +48,51 @@ def compress_image(output_path : str) -> bool: return run_ffmpeg(commands) -def merge_video(target_path : str, fps : float) -> bool: +def merge_video(target_path : str, video_fps : Fps) -> bool: temp_output_video_path = get_temp_output_video_path(target_path) temp_frames_pattern = get_temp_frames_pattern(target_path, '%04d') - commands = [ '-hwaccel', 'auto', '-r', str(fps), '-i', temp_frames_pattern, '-c:v', facefusion.globals.output_video_encoder ] + commands = [ '-hwaccel', 'auto', '-r', str(video_fps), '-i', temp_frames_pattern, '-c:v', facefusion.globals.output_video_encoder ] if facefusion.globals.output_video_encoder in [ 'libx264', 'libx265' ]: output_video_compression = round(51 - (facefusion.globals.output_video_quality * 0.51)) - commands.extend([ '-crf', str(output_video_compression) ]) + commands.extend([ '-crf', str(output_video_compression), '-preset', facefusion.globals.output_video_preset ]) if facefusion.globals.output_video_encoder in [ 'libvpx-vp9' ]: output_video_compression = round(63 - (facefusion.globals.output_video_quality * 0.63)) commands.extend([ '-crf', str(output_video_compression) ]) if facefusion.globals.output_video_encoder in [ 'h264_nvenc', 'hevc_nvenc' ]: output_video_compression = round(51 - (facefusion.globals.output_video_quality * 0.51)) - commands.extend([ '-cq', str(output_video_compression) ]) + commands.extend([ '-cq', str(output_video_compression), '-preset', map_nvenc_preset(facefusion.globals.output_video_preset) ]) commands.extend([ '-pix_fmt', 'yuv420p', '-colorspace', 'bt709', '-y', temp_output_video_path ]) return run_ffmpeg(commands) -def restore_audio(target_path : str, output_path : str) -> bool: - fps = detect_fps(target_path) +def restore_audio(target_path : str, output_path : str, video_fps : Fps) -> bool: trim_frame_start = facefusion.globals.trim_frame_start trim_frame_end = facefusion.globals.trim_frame_end temp_output_video_path = get_temp_output_video_path(target_path) commands = [ '-hwaccel', 'auto', '-i', temp_output_video_path ] if trim_frame_start is not None: - start_time = trim_frame_start / fps + start_time = trim_frame_start / video_fps commands.extend([ '-ss', str(start_time) ]) if trim_frame_end is not None: - end_time = trim_frame_end / fps + end_time = trim_frame_end / video_fps commands.extend([ '-to', str(end_time) ]) commands.extend([ '-i', target_path, '-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest', '-y', output_path ]) return run_ffmpeg(commands) + + +def map_nvenc_preset(output_video_preset : OutputVideoPreset) -> Optional[str]: + if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast' ]: + return 'p1' + if output_video_preset == 'faster': + return 'p2' + if output_video_preset == 'fast': + return 'p3' + if output_video_preset == 'medium': + return 'p4' + if output_video_preset == 'slow': + return 'p5' + if output_video_preset == 'slower': + return 'p6' + if output_video_preset == 'veryslow': + return 'p7' + return None diff --git a/facefusion/filesystem.py b/facefusion/filesystem.py index ce97281..87923d2 100644 --- a/facefusion/filesystem.py +++ b/facefusion/filesystem.py @@ -84,8 +84,8 @@ def resolve_relative_path(path : str) -> str: return os.path.abspath(os.path.join(os.path.dirname(__file__), path)) -def list_module_names(path : str) -> Optional[List[str]]: - if os.path.exists(path): - files = os.listdir(path) +def list_directory(directory_path : str) -> Optional[List[str]]: + if is_directory(directory_path): + files = os.listdir(directory_path) return [ Path(file).stem for file in files if not Path(file).stem.startswith(('.', '__')) ] return None diff --git a/facefusion/globals.py b/facefusion/globals.py index fe7aed3..b632441 100755 --- a/facefusion/globals.py +++ b/facefusion/globals.py @@ -1,6 +1,6 @@ from typing import List, Optional -from facefusion.typing import LogLevel, FaceSelectorMode, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, FaceMaskType, FaceMaskRegion, OutputVideoEncoder, FaceDetectorModel, FaceRecognizerModel, TempFrameFormat, Padding +from facefusion.typing import LogLevel, VideoMemoryStrategy, FaceSelectorMode, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, FaceMaskType, FaceMaskRegion, OutputVideoEncoder, OutputVideoPreset, FaceDetectorModel, FaceRecognizerModel, TempFrameFormat, Padding # general source_paths : Optional[List[str]] = None @@ -14,7 +14,9 @@ log_level : Optional[LogLevel] = None execution_providers : List[str] = [] execution_thread_count : Optional[int] = None execution_queue_count : Optional[int] = None -max_memory : Optional[int] = None +# memory +video_memory_strategy : Optional[VideoMemoryStrategy] = None +system_memory_limit : Optional[int] = None # face analyser face_analyser_order : Optional[FaceAnalyserOrder] = None face_analyser_age : Optional[FaceAnalyserAge] = None @@ -42,8 +44,10 @@ keep_temp : Optional[bool] = None # output creation output_image_quality : Optional[int] = None output_video_encoder : Optional[OutputVideoEncoder] = None +output_video_preset : Optional[OutputVideoPreset] = None output_video_quality : Optional[int] = None -keep_fps : Optional[bool] = None +output_video_resolution : Optional[str] = None +output_video_fps : Optional[float] = None skip_audio : Optional[bool] = None # frame processors frame_processors : List[str] = [] diff --git a/facefusion/installer.py b/facefusion/installer.py index 1b1d563..cf802ae 100644 --- a/facefusion/installer.py +++ b/facefusion/installer.py @@ -25,7 +25,7 @@ if platform.system().lower() == 'linux' or platform.system().lower() == 'windows TORCH['cuda'] = 'cu118' TORCH['cuda-nightly'] = 'cu121' ONNXRUNTIMES['cuda'] = ('onnxruntime-gpu', '1.16.3') - ONNXRUNTIMES['cuda-nightly'] = ('ort-nightly-gpu', '1.17.0.dev20231205004') + ONNXRUNTIMES['cuda-nightly'] = ('onnxruntime-gpu', '1.17.0') ONNXRUNTIMES['openvino'] = ('onnxruntime-openvino', '1.16.0') if platform.system().lower() == 'linux': TORCH['rocm'] = 'rocm5.6' @@ -72,9 +72,9 @@ def run(program : ArgumentParser) -> None: subprocess.call([ 'pip', 'uninstall', 'torch', '-y', '-q' ]) if torch_wheel == 'default': - subprocess.call([ 'pip', 'install', '-r', 'requirements.txt' ]) + subprocess.call([ 'pip', 'install', '-r', 'requirements.txt', '--force-reinstall' ]) else: - subprocess.call([ 'pip', 'install', '-r', 'requirements.txt', '--extra-index-url', 'https://download.pytorch.org/whl/' + torch_wheel ]) + subprocess.call([ 'pip', 'install', '-r', 'requirements.txt', '--extra-index-url', 'https://download.pytorch.org/whl/' + torch_wheel, '--force-reinstall' ]) if onnxruntime == 'rocm': if python_id in [ 'cp39', 'cp310', 'cp311' ]: wheel_name = 'onnxruntime_training-' + onnxruntime_version + '+rocm56-' + python_id + '-' + python_id + '-manylinux_2_17_x86_64.manylinux2014_x86_64.whl' @@ -82,11 +82,11 @@ def run(program : ArgumentParser) -> None: wheel_url = 'https://download.onnxruntime.ai/' + wheel_name subprocess.call([ 'curl', '--silent', '--location', '--continue-at', '-', '--output', wheel_path, wheel_url ]) subprocess.call([ 'pip', 'uninstall', wheel_path, '-y', '-q' ]) - subprocess.call([ 'pip', 'install', wheel_path ]) + subprocess.call([ 'pip', 'install', wheel_path, '--force-reinstall' ]) os.remove(wheel_path) else: subprocess.call([ 'pip', 'uninstall', 'onnxruntime', onnxruntime_name, '-y', '-q' ]) if onnxruntime == 'cuda-nightly': - subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--extra-index-url', 'https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ort-cuda-12-nightly/pypi/simple' ]) + subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--extra-index-url', 'https://pkgs.dev.azure.com/onnxruntime/onnxruntime/_packaging/onnxruntime-cuda-12/pypi/simple', '--force-reinstall' ]) else: - subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version ]) + subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--force-reinstall' ]) diff --git a/facefusion/logger.py b/facefusion/logger.py index 56fe367..e6e3ba0 100644 --- a/facefusion/logger.py +++ b/facefusion/logger.py @@ -29,6 +29,14 @@ def error(message : str, scope : str) -> None: get_package_logger().error('[' + scope + '] ' + message) +def enable() -> None: + get_package_logger().disabled = False + + +def disable() -> None: + get_package_logger().disabled = True + + def get_log_levels() -> Dict[LogLevel, int]: return\ { diff --git a/facefusion/memory.py b/facefusion/memory.py new file mode 100644 index 0000000..845c7d3 --- /dev/null +++ b/facefusion/memory.py @@ -0,0 +1,21 @@ +import platform + +if platform.system().lower() == 'windows': + import ctypes +else: + import resource + + +def limit_system_memory(system_memory_limit : int = 1) -> bool: + if platform.system().lower() == 'darwin': + system_memory_limit = system_memory_limit * (1024 ** 6) + else: + system_memory_limit = system_memory_limit * (1024 ** 3) + try: + if platform.system().lower() == 'windows': + ctypes.windll.kernel32.SetProcessWorkingSetSize(-1, ctypes.c_size_t(system_memory_limit), ctypes.c_size_t(system_memory_limit)) # type: ignore[attr-defined] + else: + resource.setrlimit(resource.RLIMIT_DATA, (system_memory_limit, system_memory_limit)) + return True + except Exception: + return False diff --git a/facefusion/metadata.py b/facefusion/metadata.py index 181bc41..e63dde7 100644 --- a/facefusion/metadata.py +++ b/facefusion/metadata.py @@ -2,7 +2,7 @@ METADATA =\ { 'name': 'FaceFusion', 'description': 'Next generation face swapper and enhancer', - 'version': '2.1.3', + 'version': '2.2.0', 'license': 'MIT', 'author': 'Henry Ruhs', 'url': 'https://facefusion.io' diff --git a/facefusion/normalizer.py b/facefusion/normalizer.py index eee93e5..354e567 100644 --- a/facefusion/normalizer.py +++ b/facefusion/normalizer.py @@ -2,7 +2,7 @@ from typing import List, Optional import os from facefusion.filesystem import is_file, is_directory -from facefusion.typing import Padding +from facefusion.typing import Padding, Fps def normalize_output_path(source_paths : List[str], target_path : str, output_path : str) -> Optional[str]: @@ -32,3 +32,13 @@ def normalize_padding(padding : Optional[List[int]]) -> Optional[Padding]: if padding and len(padding) == 4: return tuple(padding) # type: ignore[return-value] return None + + +def normalize_fps(fps : Optional[float]) -> Optional[Fps]: + if fps is not None: + if fps < 1.0: + return 1.0 + if fps > 60.0: + return 60.0 + return fps + return None diff --git a/facefusion/processors/frame/choices.py b/facefusion/processors/frame/choices.py index 64e35c4..1894f30 100755 --- a/facefusion/processors/frame/choices.py +++ b/facefusion/processors/frame/choices.py @@ -1,13 +1,13 @@ from typing import List -import numpy +from facefusion.common_helper import create_int_range from facefusion.processors.frame.typings import FaceSwapperModel, FaceEnhancerModel, FrameEnhancerModel, FaceDebuggerItem face_swapper_models : List[FaceSwapperModel] = [ 'blendswap_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_512_unofficial' ] face_enhancer_models : List[FaceEnhancerModel] = [ 'codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'restoreformer' ] frame_enhancer_models : List[FrameEnhancerModel] = [ 'real_esrgan_x2plus', 'real_esrgan_x4plus', 'real_esrnet_x4plus' ] - -face_enhancer_blend_range : List[int] = numpy.arange(0, 101, 1).tolist() -frame_enhancer_blend_range : List[int] = numpy.arange(0, 101, 1).tolist() - face_debugger_items : List[FaceDebuggerItem] = [ 'bbox', 'kps', 'face-mask', 'score' ] + +face_enhancer_blend_range : List[int] = create_int_range(0, 100, 1) +frame_enhancer_blend_range : List[int] = create_int_range(0, 100, 1) + diff --git a/facefusion/processors/frame/core.py b/facefusion/processors/frame/core.py index fdbbe88..a236ddb 100644 --- a/facefusion/processors/frame/core.py +++ b/facefusion/processors/frame/core.py @@ -21,13 +21,14 @@ FRAME_PROCESSORS_METHODS =\ 'register_args', 'apply_args', 'pre_check', + 'post_check', 'pre_process', + 'post_process', 'get_reference_frame', 'process_frame', 'process_frames', 'process_image', - 'process_video', - 'post_process' + 'process_video' ] @@ -38,10 +39,12 @@ def load_frame_processor_module(frame_processor : str) -> Any: if not hasattr(frame_processor_module, method_name): raise NotImplementedError except ModuleNotFoundError as exception: + logger.error(wording.get('frame_processor_not_loaded').format(frame_processor = frame_processor), __name__.upper()) logger.debug(exception.msg, __name__.upper()) - sys.exit(wording.get('frame_processor_not_loaded').format(frame_processor = frame_processor)) + sys.exit(1) except NotImplementedError: - sys.exit(wording.get('frame_processor_not_implemented').format(frame_processor = frame_processor)) + logger.error(wording.get('frame_processor_not_implemented').format(frame_processor = frame_processor), __name__.upper()) + sys.exit(1) return frame_processor_module @@ -73,11 +76,11 @@ def multi_process_frames(source_paths : List[str], temp_frame_paths : List[str], }) with ThreadPoolExecutor(max_workers = facefusion.globals.execution_thread_count) as executor: futures = [] - queue_temp_frame_paths : Queue[str] = create_queue(temp_frame_paths) + queue_frame_paths : Queue[str] = create_queue(temp_frame_paths) queue_per_future = max(len(temp_frame_paths) // facefusion.globals.execution_thread_count * facefusion.globals.execution_queue_count, 1) - while not queue_temp_frame_paths.empty(): - payload_temp_frame_paths = pick_queue(queue_temp_frame_paths, queue_per_future) - future = executor.submit(process_frames, source_paths, payload_temp_frame_paths, progress.update) + while not queue_frame_paths.empty(): + submit_frame_paths = pick_queue(queue_frame_paths, queue_per_future) + future = executor.submit(process_frames, source_paths, submit_frame_paths, progress.update) futures.append(future) for future_done in as_completed(futures): future_done.result() diff --git a/facefusion/processors/frame/modules/face_debugger.py b/facefusion/processors/frame/modules/face_debugger.py index 9ec4e9e..060a2e1 100755 --- a/facefusion/processors/frame/modules/face_debugger.py +++ b/facefusion/processors/frame/modules/face_debugger.py @@ -5,13 +5,13 @@ import numpy import facefusion.globals import facefusion.processors.frame.core as frame_processors -from facefusion import wording +from facefusion import config, wording from facefusion.face_analyser import get_one_face, get_average_face, get_many_faces, find_similar_faces, clear_face_analyser from facefusion.face_store import get_reference_faces from facefusion.content_analyser import clear_content_analyser from facefusion.typing import Face, FaceSet, Frame, Update_Process, ProcessMode from facefusion.vision import read_image, read_static_image, read_static_images, write_image -from facefusion.face_helper import warp_face +from facefusion.face_helper import warp_face_by_kps from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, create_region_mask, clear_face_occluder, clear_face_parser from facefusion.processors.frame import globals as frame_processors_globals, choices as frame_processors_choices @@ -35,7 +35,7 @@ def set_options(key : Literal['model'], value : Any) -> None: def register_args(program : ArgumentParser) -> None: - program.add_argument('--face-debugger-items', help = wording.get('face_debugger_items_help').format(choices = ', '.join(frame_processors_choices.face_debugger_items)), default = [ 'kps', 'face-mask' ], choices = frame_processors_choices.face_debugger_items, nargs = '+', metavar = 'FACE_DEBUGGER_ITEMS') + program.add_argument('--face-debugger-items', help = wording.get('face_debugger_items_help').format(choices = ', '.join(frame_processors_choices.face_debugger_items)), default = config.get_str_list('frame_processors.face_debugger_items', 'kps face-mask'), choices = frame_processors_choices.face_debugger_items, nargs = '+', metavar = 'FACE_DEBUGGER_ITEMS') def apply_args(program : ArgumentParser) -> None: @@ -47,27 +47,34 @@ def pre_check() -> bool: return True +def post_check() -> bool: + return True + + def pre_process(mode : ProcessMode) -> bool: return True def post_process() -> None: - clear_frame_processor() - clear_face_analyser() - clear_content_analyser() - clear_face_occluder() - clear_face_parser() - read_static_image.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict' or facefusion.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + read_static_image.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + clear_face_occluder() + clear_face_parser() -def debug_face(source_face : Face, target_face : Face, temp_frame : Frame) -> Frame: +def debug_face(source_face : Face, target_face : Face, reference_faces : FaceSet, temp_frame : Frame) -> Frame: primary_color = (0, 0, 255) secondary_color = (0, 255, 0) bounding_box = target_face.bbox.astype(numpy.int32) + temp_frame = temp_frame.copy() if 'bbox' in frame_processors_globals.face_debugger_items: cv2.rectangle(temp_frame, (bounding_box[0], bounding_box[1]), (bounding_box[2], bounding_box[3]), secondary_color, 2) if 'face-mask' in frame_processors_globals.face_debugger_items: - crop_frame, affine_matrix = warp_face(temp_frame, target_face.kps, 'arcface_128_v2', (128, 512)) + crop_frame, affine_matrix = warp_face_by_kps(temp_frame, target_face.kps, 'arcface_128_v2', (512, 512)) inverse_matrix = cv2.invertAffineTransform(affine_matrix) temp_frame_size = temp_frame.shape[:2][::-1] crop_mask_list = [] @@ -80,9 +87,9 @@ def debug_face(source_face : Face, target_face : Face, temp_frame : Frame) -> Fr crop_mask = numpy.minimum.reduce(crop_mask_list).clip(0, 1) crop_mask = (crop_mask * 255).astype(numpy.uint8) inverse_mask_frame = cv2.warpAffine(crop_mask, inverse_matrix, temp_frame_size) - inverse_mask_frame_edges = cv2.threshold(inverse_mask_frame, 100, 255, cv2.THRESH_BINARY)[1] - inverse_mask_frame_edges[inverse_mask_frame_edges > 0] = 255 - inverse_mask_contours = cv2.findContours(inverse_mask_frame_edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)[0] + inverse_mask_frame = cv2.threshold(inverse_mask_frame, 100, 255, cv2.THRESH_BINARY)[1] + inverse_mask_frame[inverse_mask_frame > 0] = 255 + inverse_mask_contours = cv2.findContours(inverse_mask_frame, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)[0] cv2.drawContours(temp_frame, inverse_mask_contours, -1, primary_color, 2) if bounding_box[3] - bounding_box[1] > 60 and bounding_box[2] - bounding_box[0] > 60: if 'kps' in frame_processors_globals.face_debugger_items: @@ -90,9 +97,9 @@ def debug_face(source_face : Face, target_face : Face, temp_frame : Frame) -> Fr for index in range(kps.shape[0]): cv2.circle(temp_frame, (kps[index][0], kps[index][1]), 3, primary_color, -1) if 'score' in frame_processors_globals.face_debugger_items: - score_text = str(round(target_face.score, 2)) - score_position = (bounding_box[0] + 10, bounding_box[1] + 20) - cv2.putText(temp_frame, score_text, score_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, secondary_color, 2) + face_score_text = str(round(target_face.score, 2)) + face_score_position = (bounding_box[0] + 10, bounding_box[1] + 20) + cv2.putText(temp_frame, face_score_text, face_score_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, secondary_color, 2) return temp_frame @@ -105,16 +112,16 @@ def process_frame(source_face : Face, reference_faces : FaceSet, temp_frame : Fr similar_faces = find_similar_faces(temp_frame, reference_faces, facefusion.globals.reference_face_distance) if similar_faces: for similar_face in similar_faces: - temp_frame = debug_face(source_face, similar_face, temp_frame) + temp_frame = debug_face(source_face, similar_face, reference_faces, temp_frame) if 'one' in facefusion.globals.face_selector_mode: target_face = get_one_face(temp_frame) if target_face: - temp_frame = debug_face(source_face, target_face, temp_frame) + temp_frame = debug_face(source_face, target_face, None, temp_frame) if 'many' in facefusion.globals.face_selector_mode: many_faces = get_many_faces(temp_frame) if many_faces: for target_face in many_faces: - temp_frame = debug_face(source_face, target_face, temp_frame) + temp_frame = debug_face(source_face, target_face, None, temp_frame) return temp_frame diff --git a/facefusion/processors/frame/modules/face_enhancer.py b/facefusion/processors/frame/modules/face_enhancer.py index f9997b3..397f73c 100755 --- a/facefusion/processors/frame/modules/face_enhancer.py +++ b/facefusion/processors/frame/modules/face_enhancer.py @@ -7,9 +7,10 @@ import onnxruntime import facefusion.globals import facefusion.processors.frame.core as frame_processors -from facefusion import logger, wording +from facefusion import config, logger, wording from facefusion.face_analyser import get_many_faces, clear_face_analyser, find_similar_faces, get_one_face -from facefusion.face_helper import warp_face, paste_back +from facefusion.execution_helper import apply_execution_provider_options +from facefusion.face_helper import warp_face_by_kps, paste_back from facefusion.content_analyser import clear_content_analyser from facefusion.face_store import get_reference_faces from facefusion.typing import Face, FaceSet, Frame, Update_Process, ProcessMode, ModelSet, OptionsWithModel @@ -60,7 +61,7 @@ MODELS : ModelSet =\ 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gpen_bfr_256.onnx', 'path': resolve_relative_path('../.assets/models/gpen_bfr_256.onnx'), 'template': 'arcface_128_v2', - 'size': (128, 256) + 'size': (256, 256) }, 'gpen_bfr_512': { @@ -86,7 +87,7 @@ def get_frame_processor() -> Any: with THREAD_LOCK: if FRAME_PROCESSOR is None: model_path = get_options('model').get('path') - FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = facefusion.globals.execution_providers) + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) return FRAME_PROCESSOR @@ -114,8 +115,8 @@ def set_options(key : Literal['model'], value : Any) -> None: def register_args(program : ArgumentParser) -> None: - program.add_argument('--face-enhancer-model', help = wording.get('frame_processor_model_help'), default = 'gfpgan_1.4', choices = frame_processors_choices.face_enhancer_models) - program.add_argument('--face-enhancer-blend', help = wording.get('frame_processor_blend_help'), type = int, default = 80, choices = frame_processors_choices.face_enhancer_blend_range, metavar = create_metavar(frame_processors_choices.face_enhancer_blend_range)) + program.add_argument('--face-enhancer-model', help = wording.get('frame_processor_model_help'), default = config.get_str_value('frame_processors.face_enhancer_model', 'gfpgan_1.4'), choices = frame_processors_choices.face_enhancer_models) + program.add_argument('--face-enhancer-blend', help = wording.get('frame_processor_blend_help'), type = int, default = config.get_int_value('frame_processors.face_enhancer_blend', '80'), choices = frame_processors_choices.face_enhancer_blend_range, metavar = create_metavar(frame_processors_choices.face_enhancer_blend_range)) def apply_args(program : ArgumentParser) -> None: @@ -132,7 +133,7 @@ def pre_check() -> bool: return True -def pre_process(mode : ProcessMode) -> bool: +def post_check() -> bool: model_url = get_options('model').get('url') model_path = get_options('model').get('path') if not facefusion.globals.skip_download and not is_download_done(model_url, model_path): @@ -141,6 +142,10 @@ def pre_process(mode : ProcessMode) -> bool: elif not is_file(model_path): logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) return False + return True + + +def pre_process(mode : ProcessMode) -> bool: if mode in [ 'output', 'preview' ] and not is_image(facefusion.globals.target_path) and not is_video(facefusion.globals.target_path): logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) return False @@ -151,18 +156,19 @@ def pre_process(mode : ProcessMode) -> bool: def post_process() -> None: - clear_frame_processor() - clear_face_analyser() - clear_content_analyser() - clear_face_occluder() - read_static_image.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict' or facefusion.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + read_static_image.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + clear_face_occluder() -def enhance_face(target_face: Face, temp_frame: Frame) -> Frame: - frame_processor = get_frame_processor() +def enhance_face(target_face: Face, temp_frame : Frame) -> Frame: model_template = get_options('model').get('template') model_size = get_options('model').get('size') - crop_frame, affine_matrix = warp_face(temp_frame, target_face.kps, model_template, model_size) + crop_frame, affine_matrix = warp_face_by_kps(temp_frame, target_face.kps, model_template, model_size) crop_mask_list =\ [ create_static_box_mask(crop_frame.shape[:2][::-1], facefusion.globals.face_mask_blur, (0, 0, 0, 0)) @@ -170,14 +176,7 @@ def enhance_face(target_face: Face, temp_frame: Frame) -> Frame: if 'occlusion' in facefusion.globals.face_mask_types: crop_mask_list.append(create_occlusion_mask(crop_frame)) crop_frame = prepare_crop_frame(crop_frame) - frame_processor_inputs = {} - for frame_processor_input in frame_processor.get_inputs(): - if frame_processor_input.name == 'input': - frame_processor_inputs[frame_processor_input.name] = crop_frame - if frame_processor_input.name == 'weight': - frame_processor_inputs[frame_processor_input.name] = numpy.array([ 1 ], dtype = numpy.double) - with THREAD_SEMAPHORE: - crop_frame = frame_processor.run(None, frame_processor_inputs)[0][0] + crop_frame = apply_enhance(crop_frame) crop_frame = normalize_crop_frame(crop_frame) crop_mask = numpy.minimum.reduce(crop_mask_list).clip(0, 1) paste_frame = paste_back(temp_frame, crop_frame, crop_mask, affine_matrix) @@ -185,6 +184,21 @@ def enhance_face(target_face: Face, temp_frame: Frame) -> Frame: return temp_frame +def apply_enhance(crop_frame : Frame) -> Frame: + frame_processor = get_frame_processor() + frame_processor_inputs = {} + + for frame_processor_input in frame_processor.get_inputs(): + if frame_processor_input.name == 'input': + frame_processor_inputs[frame_processor_input.name] = crop_frame + if frame_processor_input.name == 'weight': + weight = numpy.array([ 1 ], dtype = numpy.double) + frame_processor_inputs[frame_processor_input.name] = weight + with THREAD_SEMAPHORE: + crop_frame = frame_processor.run(None, frame_processor_inputs)[0][0] + return crop_frame + + def prepare_crop_frame(crop_frame : Frame) -> Frame: crop_frame = crop_frame[:, :, ::-1] / 255.0 crop_frame = (crop_frame - 0.5) / 0.5 @@ -207,7 +221,7 @@ def blend_frame(temp_frame : Frame, paste_frame : Frame) -> Frame: return temp_frame -def get_reference_frame(source_face : Face, target_face : Face, temp_frame : Frame) -> Optional[Frame]: +def get_reference_frame(source_face : Face, target_face : Face, temp_frame : Frame) -> Frame: return enhance_face(target_face, temp_frame) diff --git a/facefusion/processors/frame/modules/face_swapper.py b/facefusion/processors/frame/modules/face_swapper.py index 5df0234..59752cc 100755 --- a/facefusion/processors/frame/modules/face_swapper.py +++ b/facefusion/processors/frame/modules/face_swapper.py @@ -1,5 +1,6 @@ from typing import Any, List, Literal, Optional from argparse import ArgumentParser +import platform import threading import numpy import onnx @@ -8,9 +9,10 @@ from onnx import numpy_helper import facefusion.globals import facefusion.processors.frame.core as frame_processors -from facefusion import logger, wording +from facefusion import config, logger, wording +from facefusion.execution_helper import apply_execution_provider_options from facefusion.face_analyser import get_one_face, get_average_face, get_many_faces, find_similar_faces, clear_face_analyser -from facefusion.face_helper import warp_face, paste_back +from facefusion.face_helper import warp_face_by_kps, paste_back from facefusion.face_store import get_reference_faces from facefusion.content_analyser import clear_content_analyser from facefusion.typing import Face, FaceSet, Frame, Update_Process, ProcessMode, ModelSet, OptionsWithModel, Embedding @@ -33,7 +35,7 @@ MODELS : ModelSet =\ 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/blendswap_256.onnx', 'path': resolve_relative_path('../.assets/models/blendswap_256.onnx'), 'template': 'ffhq_512', - 'size': (512, 256), + 'size': (256, 256), 'mean': [ 0.0, 0.0, 0.0 ], 'standard_deviation': [ 1.0, 1.0, 1.0 ] }, @@ -63,7 +65,7 @@ MODELS : ModelSet =\ 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/simswap_256.onnx', 'path': resolve_relative_path('../.assets/models/simswap_256.onnx'), 'template': 'arcface_112_v1', - 'size': (112, 256), + 'size': (256, 256), 'mean': [ 0.485, 0.456, 0.406 ], 'standard_deviation': [ 0.229, 0.224, 0.225 ] }, @@ -73,7 +75,7 @@ MODELS : ModelSet =\ 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/simswap_512_unofficial.onnx', 'path': resolve_relative_path('../.assets/models/simswap_512_unofficial.onnx'), 'template': 'arcface_112_v1', - 'size': (112, 512), + 'size': (512, 512), 'mean': [ 0.0, 0.0, 0.0 ], 'standard_deviation': [ 1.0, 1.0, 1.0 ] } @@ -87,7 +89,7 @@ def get_frame_processor() -> Any: with THREAD_LOCK: if FRAME_PROCESSOR is None: model_path = get_options('model').get('path') - FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = facefusion.globals.execution_providers) + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) return FRAME_PROCESSOR @@ -132,7 +134,11 @@ def set_options(key : Literal['model'], value : Any) -> None: def register_args(program : ArgumentParser) -> None: - program.add_argument('--face-swapper-model', help = wording.get('frame_processor_model_help'), default = 'inswapper_128', choices = frame_processors_choices.face_swapper_models) + if platform.system().lower() == 'darwin': + face_swapper_model_fallback = 'inswapper_128' + else: + face_swapper_model_fallback = 'inswapper_128_fp16' + program.add_argument('--face-swapper-model', help = wording.get('frame_processor_model_help'), default = config.get_str_value('frame_processors.face_swapper_model', face_swapper_model_fallback), choices = frame_processors_choices.face_swapper_models) def apply_args(program : ArgumentParser) -> None: @@ -154,7 +160,7 @@ def pre_check() -> bool: return True -def pre_process(mode : ProcessMode) -> bool: +def post_check() -> bool: model_url = get_options('model').get('url') model_path = get_options('model').get('path') if not facefusion.globals.skip_download and not is_download_done(model_url, model_path): @@ -163,6 +169,10 @@ def pre_process(mode : ProcessMode) -> bool: elif not is_file(model_path): logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) return False + return True + + +def pre_process(mode : ProcessMode) -> bool: if not are_images(facefusion.globals.source_paths): logger.error(wording.get('select_image_source') + wording.get('exclamation_mark'), NAME) return False @@ -180,28 +190,42 @@ def pre_process(mode : ProcessMode) -> bool: def post_process() -> None: - clear_frame_processor() - clear_model_matrix() - clear_face_analyser() - clear_content_analyser() - clear_face_occluder() - clear_face_parser() - read_static_image.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict' or facefusion.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + clear_model_matrix() + read_static_image.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + clear_face_occluder() + clear_face_parser() def swap_face(source_face : Face, target_face : Face, temp_frame : Frame) -> Frame: - frame_processor = get_frame_processor() model_template = get_options('model').get('template') model_size = get_options('model').get('size') - model_type = get_options('model').get('type') - crop_frame, affine_matrix = warp_face(temp_frame, target_face.kps, model_template, model_size) + crop_frame, affine_matrix = warp_face_by_kps(temp_frame, target_face.kps, model_template, model_size) crop_mask_list = [] + if 'box' in facefusion.globals.face_mask_types: crop_mask_list.append(create_static_box_mask(crop_frame.shape[:2][::-1], facefusion.globals.face_mask_blur, facefusion.globals.face_mask_padding)) if 'occlusion' in facefusion.globals.face_mask_types: crop_mask_list.append(create_occlusion_mask(crop_frame)) crop_frame = prepare_crop_frame(crop_frame) + crop_frame = apply_swap(source_face, crop_frame) + crop_frame = normalize_crop_frame(crop_frame) + if 'region' in facefusion.globals.face_mask_types: + crop_mask_list.append(create_region_mask(crop_frame, facefusion.globals.face_mask_regions)) + crop_mask = numpy.minimum.reduce(crop_mask_list).clip(0, 1) + temp_frame = paste_back(temp_frame, crop_frame, crop_mask, affine_matrix) + return temp_frame + + +def apply_swap(source_face : Face, crop_frame : Frame) -> Frame: + frame_processor = get_frame_processor() + model_type = get_options('model').get('type') frame_processor_inputs = {} + for frame_processor_input in frame_processor.get_inputs(): if frame_processor_input.name == 'source': if model_type == 'blendswap': @@ -211,17 +235,12 @@ def swap_face(source_face : Face, target_face : Face, temp_frame : Frame) -> Fra if frame_processor_input.name == 'target': frame_processor_inputs[frame_processor_input.name] = crop_frame crop_frame = frame_processor.run(None, frame_processor_inputs)[0][0] - crop_frame = normalize_crop_frame(crop_frame) - if 'region' in facefusion.globals.face_mask_types: - crop_mask_list.append(create_region_mask(crop_frame, facefusion.globals.face_mask_regions)) - crop_mask = numpy.minimum.reduce(crop_mask_list).clip(0, 1) - temp_frame = paste_back(temp_frame, crop_frame, crop_mask, affine_matrix) - return temp_frame + return crop_frame def prepare_source_frame(source_face : Face) -> Frame: source_frame = read_static_image(facefusion.globals.source_paths[0]) - source_frame, _ = warp_face(source_frame, source_face.kps, 'arcface_112_v2', (112, 112)) + source_frame, _ = warp_face_by_kps(source_frame, source_face.kps, 'arcface_112_v2', (112, 112)) source_frame = source_frame[:, :, ::-1] / 255.0 source_frame = source_frame.transpose(2, 0, 1) source_frame = numpy.expand_dims(source_frame, axis = 0).astype(numpy.float32) @@ -252,7 +271,7 @@ def prepare_crop_frame(crop_frame : Frame) -> Frame: def normalize_crop_frame(crop_frame : Frame) -> Frame: crop_frame = crop_frame.transpose(1, 2, 0) crop_frame = (crop_frame * 255.0).round() - crop_frame = crop_frame[:, :, ::-1].astype(numpy.uint8) + crop_frame = crop_frame[:, :, ::-1] return crop_frame diff --git a/facefusion/processors/frame/modules/frame_enhancer.py b/facefusion/processors/frame/modules/frame_enhancer.py index 4e5cca5..e23cd56 100644 --- a/facefusion/processors/frame/modules/frame_enhancer.py +++ b/facefusion/processors/frame/modules/frame_enhancer.py @@ -7,12 +7,12 @@ from realesrgan import RealESRGANer import facefusion.globals import facefusion.processors.frame.core as frame_processors -from facefusion import logger, wording +from facefusion import config, logger, wording from facefusion.face_analyser import clear_face_analyser from facefusion.content_analyser import clear_content_analyser from facefusion.typing import Face, FaceSet, Frame, Update_Process, ProcessMode, ModelSet, OptionsWithModel from facefusion.common_helper import create_metavar -from facefusion.execution_helper import map_device +from facefusion.execution_helper import map_torch_backend from facefusion.filesystem import is_file, resolve_relative_path from facefusion.download import conditional_download, is_download_done from facefusion.vision import read_image, read_static_image, write_image @@ -61,7 +61,7 @@ def get_frame_processor() -> Any: num_out_ch = 3, scale = model_scale ), - device = map_device(facefusion.globals.execution_providers), + device = map_torch_backend(facefusion.globals.execution_providers), scale = model_scale ) return FRAME_PROCESSOR @@ -91,8 +91,8 @@ def set_options(key : Literal['model'], value : Any) -> None: def register_args(program : ArgumentParser) -> None: - program.add_argument('--frame-enhancer-model', help = wording.get('frame_processor_model_help'), default = 'real_esrgan_x2plus', choices = frame_processors_choices.frame_enhancer_models) - program.add_argument('--frame-enhancer-blend', help = wording.get('frame_processor_blend_help'), type = int, default = 80, choices = frame_processors_choices.frame_enhancer_blend_range, metavar = create_metavar(frame_processors_choices.frame_enhancer_blend_range)) + program.add_argument('--frame-enhancer-model', help = wording.get('frame_processor_model_help'), default = config.get_str_value('frame_processors.frame_enhancer_model', 'real_esrgan_x2plus'), choices = frame_processors_choices.frame_enhancer_models) + program.add_argument('--frame-enhancer-blend', help = wording.get('frame_processor_blend_help'), type = int, default = config.get_int_value('frame_processors.frame_enhancer_blend', '80'), choices = frame_processors_choices.frame_enhancer_blend_range, metavar = create_metavar(frame_processors_choices.frame_enhancer_blend_range)) def apply_args(program : ArgumentParser) -> None: @@ -109,7 +109,7 @@ def pre_check() -> bool: return True -def pre_process(mode : ProcessMode) -> bool: +def post_check() -> bool: model_url = get_options('model').get('url') model_path = get_options('model').get('path') if not facefusion.globals.skip_download and not is_download_done(model_url, model_path): @@ -118,6 +118,10 @@ def pre_process(mode : ProcessMode) -> bool: elif not is_file(model_path): logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) return False + return True + + +def pre_process(mode : ProcessMode) -> bool: if mode == 'output' and not facefusion.globals.output_path: logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) return False @@ -125,10 +129,12 @@ def pre_process(mode : ProcessMode) -> bool: def post_process() -> None: - clear_frame_processor() - clear_face_analyser() - clear_content_analyser() - read_static_image.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict' or facefusion.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + read_static_image.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() def enhance_frame(temp_frame : Frame) -> Frame: diff --git a/facefusion/processors/frame/typings.py b/facefusion/processors/frame/typings.py index a397eef..074e733 100644 --- a/facefusion/processors/frame/typings.py +++ b/facefusion/processors/frame/typings.py @@ -3,5 +3,4 @@ from typing import Literal FaceSwapperModel = Literal['blendswap_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_512_unofficial'] FaceEnhancerModel = Literal['codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'restoreformer'] FrameEnhancerModel = Literal['real_esrgan_x2plus', 'real_esrgan_x4plus', 'real_esrnet_x4plus'] - -FaceDebuggerItem = Literal['bbox', 'kps', 'face-mask', 'score'] +FaceDebuggerItem = Literal['bbox', 'kps', 'face-mask', 'score', 'distance'] diff --git a/facefusion/typing.py b/facefusion/typing.py index 2964040..7a2459f 100755 --- a/facefusion/typing.py +++ b/facefusion/typing.py @@ -25,13 +25,19 @@ FaceStore = TypedDict('FaceStore', Frame = numpy.ndarray[Any, Any] Mask = numpy.ndarray[Any, Any] Matrix = numpy.ndarray[Any, Any] + +Fps = float Padding = Tuple[int, int, int, int] +Resolution = Tuple[int, int] Update_Process = Callable[[], None] Process_Frames = Callable[[List[str], List[str], Update_Process], None] -LogLevel = Literal['error', 'warn', 'info', 'debug'] + Template = Literal['arcface_112_v1', 'arcface_112_v2', 'arcface_128_v2', 'ffhq_512'] ProcessMode = Literal['output', 'preview', 'stream'] + +LogLevel = Literal['error', 'warn', 'info', 'debug'] +VideoMemoryStrategy = Literal['strict', 'moderate', 'tolerant'] FaceSelectorMode = Literal['reference', 'one', 'many'] FaceAnalyserOrder = Literal['left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small', 'best-worst', 'worst-best'] FaceAnalyserAge = Literal['child', 'teen', 'adult', 'senior'] @@ -40,8 +46,9 @@ FaceDetectorModel = Literal['retinaface', 'yunet'] FaceRecognizerModel = Literal['arcface_blendswap', 'arcface_inswapper', 'arcface_simswap'] FaceMaskType = Literal['box', 'occlusion', 'region'] FaceMaskRegion = Literal['skin', 'left-eyebrow', 'right-eyebrow', 'left-eye', 'right-eye', 'eye-glasses', 'nose', 'mouth', 'upper-lip', 'lower-lip'] -TempFrameFormat = Literal['jpg', 'png'] +TempFrameFormat = Literal['jpg', 'png', 'bmp'] OutputVideoEncoder = Literal['libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc'] +OutputVideoPreset = Literal['ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow'] ModelValue = Dict[str, Any] ModelSet = Dict[str, ModelValue] diff --git a/facefusion/uis/choices.py b/facefusion/uis/choices.py index 92ae549..cae0256 100644 --- a/facefusion/uis/choices.py +++ b/facefusion/uis/choices.py @@ -2,6 +2,6 @@ from typing import List from facefusion.uis.typing import WebcamMode -common_options : List[str] = [ 'keep-fps', 'keep-temp', 'skip-audio', 'skip-download' ] +common_options : List[str] = [ 'keep-temp', 'skip-audio', 'skip-download' ] webcam_modes : List[WebcamMode] = [ 'inline', 'udp', 'v4l2' ] webcam_resolutions : List[str] = [ '320x240', '640x480', '800x600', '1024x768', '1280x720', '1280x960', '1920x1080', '2560x1440', '3840x2160' ] diff --git a/facefusion/uis/components/benchmark.py b/facefusion/uis/components/benchmark.py index bc5c08b..e8280c7 100644 --- a/facefusion/uis/components/benchmark.py +++ b/facefusion/uis/components/benchmark.py @@ -6,11 +6,11 @@ import gradio import facefusion.globals from facefusion import wording -from facefusion.face_analyser import get_face_analyser from facefusion.face_store import clear_static_faces from facefusion.processors.frame.core import get_frame_processors_modules -from facefusion.vision import count_video_frame_total -from facefusion.core import limit_resources, conditional_process +from facefusion.vision import count_video_frame_total, detect_video_resolution, detect_video_fps, pack_resolution +from facefusion.core import conditional_process +from facefusion.memory import limit_system_memory from facefusion.normalizer import normalize_output_path from facefusion.filesystem import clear_temp from facefusion.uis.core import get_ui_component @@ -77,6 +77,8 @@ def listen() -> None: def start(benchmark_runs : List[str], benchmark_cycles : int) -> Generator[List[Any], None, None]: facefusion.globals.source_paths = [ '.assets/examples/source.jpg' ] + facefusion.globals.temp_frame_format = 'bmp' + facefusion.globals.output_video_preset = 'ultrafast' target_paths = [ BENCHMARKS[benchmark_run] for benchmark_run in benchmark_runs if benchmark_run in BENCHMARKS ] benchmark_results = [] if target_paths: @@ -88,8 +90,8 @@ def start(benchmark_runs : List[str], benchmark_cycles : int) -> Generator[List[ def pre_process() -> None: - limit_resources() - get_face_analyser() + if facefusion.globals.system_memory_limit > 0: + limit_system_memory(facefusion.globals.system_memory_limit) for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): frame_processor_module.get_frame_processor() @@ -101,9 +103,12 @@ def post_process() -> None: def benchmark(target_path : str, benchmark_cycles : int) -> List[Any]: process_times = [] total_fps = 0.0 - for i in range(benchmark_cycles): + for index in range(benchmark_cycles): facefusion.globals.target_path = target_path facefusion.globals.output_path = normalize_output_path(facefusion.globals.source_paths, facefusion.globals.target_path, tempfile.gettempdir()) + target_video_resolution = detect_video_resolution(facefusion.globals.target_path) + facefusion.globals.output_video_resolution = pack_resolution(target_video_resolution) + facefusion.globals.output_video_fps = detect_video_fps(facefusion.globals.target_path) video_frame_total = count_video_frame_total(facefusion.globals.target_path) start_time = time.perf_counter() conditional_process() diff --git a/facefusion/uis/components/benchmark_options.py b/facefusion/uis/components/benchmark_options.py index 75767a8..9150061 100644 --- a/facefusion/uis/components/benchmark_options.py +++ b/facefusion/uis/components/benchmark_options.py @@ -20,7 +20,7 @@ def render() -> None: ) BENCHMARK_CYCLES_SLIDER = gradio.Slider( label = wording.get('benchmark_cycles_slider_label'), - value = 3, + value = 5, step = 1, minimum = 1, maximum = 10 diff --git a/facefusion/uis/components/common_options.py b/facefusion/uis/components/common_options.py index 0b3e2d3..17d7364 100644 --- a/facefusion/uis/components/common_options.py +++ b/facefusion/uis/components/common_options.py @@ -12,8 +12,6 @@ def render() -> None: global COMMON_OPTIONS_CHECKBOX_GROUP value = [] - if facefusion.globals.keep_fps: - value.append('keep-fps') if facefusion.globals.keep_temp: value.append('keep-temp') if facefusion.globals.skip_audio: @@ -32,7 +30,6 @@ def listen() -> None: def update(common_options : List[str]) -> None: - facefusion.globals.keep_fps = 'keep-fps' in common_options facefusion.globals.keep_temp = 'keep-temp' in common_options facefusion.globals.skip_audio = 'skip-audio' in common_options facefusion.globals.skip_download = 'skip-download' in common_options diff --git a/facefusion/uis/components/face_analyser.py b/facefusion/uis/components/face_analyser.py index cf0b23b..77bf3a6 100644 --- a/facefusion/uis/components/face_analyser.py +++ b/facefusion/uis/components/face_analyser.py @@ -66,11 +66,11 @@ def render() -> None: def listen() -> None: - FACE_ANALYSER_ORDER_DROPDOWN.select(update_face_analyser_order, inputs = FACE_ANALYSER_ORDER_DROPDOWN) - FACE_ANALYSER_AGE_DROPDOWN.select(update_face_analyser_age, inputs = FACE_ANALYSER_AGE_DROPDOWN) - FACE_ANALYSER_GENDER_DROPDOWN.select(update_face_analyser_gender, inputs = FACE_ANALYSER_GENDER_DROPDOWN) + FACE_ANALYSER_ORDER_DROPDOWN.change(update_face_analyser_order, inputs = FACE_ANALYSER_ORDER_DROPDOWN) + FACE_ANALYSER_AGE_DROPDOWN.change(update_face_analyser_age, inputs = FACE_ANALYSER_AGE_DROPDOWN) + FACE_ANALYSER_GENDER_DROPDOWN.change(update_face_analyser_gender, inputs = FACE_ANALYSER_GENDER_DROPDOWN) FACE_DETECTOR_MODEL_DROPDOWN.change(update_face_detector_model, inputs = FACE_DETECTOR_MODEL_DROPDOWN) - FACE_DETECTOR_SIZE_DROPDOWN.select(update_face_detector_size, inputs = FACE_DETECTOR_SIZE_DROPDOWN) + FACE_DETECTOR_SIZE_DROPDOWN.change(update_face_detector_size, inputs = FACE_DETECTOR_SIZE_DROPDOWN) FACE_DETECTOR_SCORE_SLIDER.change(update_face_detector_score, inputs = FACE_DETECTOR_SCORE_SLIDER) diff --git a/facefusion/uis/components/face_selector.py b/facefusion/uis/components/face_selector.py index 90ebf3d..5d9bb90 100644 --- a/facefusion/uis/components/face_selector.py +++ b/facefusion/uis/components/face_selector.py @@ -7,9 +7,9 @@ import facefusion.choices from facefusion import wording from facefusion.face_store import clear_static_faces, clear_reference_faces from facefusion.vision import get_video_frame, read_static_image, normalize_frame_color +from facefusion.filesystem import is_image, is_video from facefusion.face_analyser import get_many_faces from facefusion.typing import Frame, FaceSelectorMode -from facefusion.filesystem import is_image, is_video from facefusion.uis.core import get_ui_component, register_ui_component from facefusion.uis.typing import ComponentName @@ -57,7 +57,7 @@ def render() -> None: def listen() -> None: - FACE_SELECTOR_MODE_DROPDOWN.select(update_face_selector_mode, inputs = FACE_SELECTOR_MODE_DROPDOWN, outputs = [ REFERENCE_FACE_POSITION_GALLERY, REFERENCE_FACE_DISTANCE_SLIDER ]) + FACE_SELECTOR_MODE_DROPDOWN.change(update_face_selector_mode, inputs = FACE_SELECTOR_MODE_DROPDOWN, outputs = [ REFERENCE_FACE_POSITION_GALLERY, REFERENCE_FACE_DISTANCE_SLIDER ]) REFERENCE_FACE_POSITION_GALLERY.select(clear_and_update_reference_face_position) REFERENCE_FACE_DISTANCE_SLIDER.change(update_reference_face_distance, inputs = REFERENCE_FACE_DISTANCE_SLIDER) multi_component_names : List[ComponentName] =\ diff --git a/facefusion/uis/components/frame_processors.py b/facefusion/uis/components/frame_processors.py index ac68734..e86fd44 100644 --- a/facefusion/uis/components/frame_processors.py +++ b/facefusion/uis/components/frame_processors.py @@ -4,7 +4,7 @@ import gradio import facefusion.globals from facefusion import wording from facefusion.processors.frame.core import load_frame_processor_module, clear_frame_processors_modules -from facefusion.filesystem import list_module_names +from facefusion.filesystem import list_directory from facefusion.uis.core import register_ui_component FRAME_PROCESSORS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None @@ -36,5 +36,5 @@ def update_frame_processors(frame_processors : List[str]) -> gradio.CheckboxGrou def sort_frame_processors(frame_processors : List[str]) -> list[str]: - available_frame_processors = list_module_names('facefusion/processors/frame/modules') + available_frame_processors = list_directory('facefusion/processors/frame/modules') return sorted(available_frame_processors, key = lambda frame_processor : frame_processors.index(frame_processor) if frame_processor in frame_processors else len(frame_processors)) diff --git a/facefusion/uis/components/frame_processors_options.py b/facefusion/uis/components/frame_processors_options.py index 40f73ca..89044b5 100755 --- a/facefusion/uis/components/frame_processors_options.py +++ b/facefusion/uis/components/frame_processors_options.py @@ -56,7 +56,7 @@ def render() -> None: step = frame_processors_choices.frame_enhancer_blend_range[1] - frame_processors_choices.frame_enhancer_blend_range[0], minimum = frame_processors_choices.frame_enhancer_blend_range[0], maximum = frame_processors_choices.frame_enhancer_blend_range[-1], - visible = 'face_enhancer' in facefusion.globals.frame_processors + visible = 'frame_enhancer' in facefusion.globals.frame_processors ) FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP = gradio.CheckboxGroup( label = wording.get('face_debugger_items_checkbox_group_label'), diff --git a/facefusion/uis/components/limit_resources.py b/facefusion/uis/components/limit_resources.py deleted file mode 100644 index 6703cf1..0000000 --- a/facefusion/uis/components/limit_resources.py +++ /dev/null @@ -1,27 +0,0 @@ -from typing import Optional -import gradio - -import facefusion.globals -import facefusion.choices -from facefusion import wording - -MAX_MEMORY_SLIDER : Optional[gradio.Slider] = None - - -def render() -> None: - global MAX_MEMORY_SLIDER - - MAX_MEMORY_SLIDER = gradio.Slider( - label = wording.get('max_memory_slider_label'), - step = facefusion.choices.max_memory_range[1] - facefusion.choices.max_memory_range[0], - minimum = facefusion.choices.max_memory_range[0], - maximum = facefusion.choices.max_memory_range[-1] - ) - - -def listen() -> None: - MAX_MEMORY_SLIDER.change(update_max_memory, inputs = MAX_MEMORY_SLIDER) - - -def update_max_memory(max_memory : int) -> None: - facefusion.globals.max_memory = max_memory if max_memory > 0 else None diff --git a/facefusion/uis/components/memory.py b/facefusion/uis/components/memory.py new file mode 100644 index 0000000..cda4554 --- /dev/null +++ b/facefusion/uis/components/memory.py @@ -0,0 +1,41 @@ +from typing import Optional +import gradio + +import facefusion.globals +import facefusion.choices +from facefusion.typing import VideoMemoryStrategy +from facefusion import wording + +VIDEO_MEMORY_STRATEGY : Optional[gradio.Dropdown] = None +SYSTEM_MEMORY_LIMIT_SLIDER : Optional[gradio.Slider] = None + + +def render() -> None: + global VIDEO_MEMORY_STRATEGY + global SYSTEM_MEMORY_LIMIT_SLIDER + + VIDEO_MEMORY_STRATEGY = gradio.Dropdown( + label = wording.get('video_memory_strategy_dropdown_label'), + choices = facefusion.choices.video_memory_strategies, + value = facefusion.globals.video_memory_strategy + ) + SYSTEM_MEMORY_LIMIT_SLIDER = gradio.Slider( + label = wording.get('system_memory_limit_slider_label'), + step =facefusion.choices.system_memory_limit_range[1] - facefusion.choices.system_memory_limit_range[0], + minimum = facefusion.choices.system_memory_limit_range[0], + maximum = facefusion.choices.system_memory_limit_range[-1], + value = facefusion.globals.system_memory_limit + ) + + +def listen() -> None: + VIDEO_MEMORY_STRATEGY.change(update_video_memory_strategy, inputs = VIDEO_MEMORY_STRATEGY) + SYSTEM_MEMORY_LIMIT_SLIDER.change(update_system_memory_limit, inputs = SYSTEM_MEMORY_LIMIT_SLIDER) + + +def update_video_memory_strategy(video_memory_strategy : VideoMemoryStrategy) -> None: + facefusion.globals.video_memory_strategy = video_memory_strategy + + +def update_system_memory_limit(system_memory_limit : int) -> None: + facefusion.globals.system_memory_limit = system_memory_limit diff --git a/facefusion/uis/components/output.py b/facefusion/uis/components/output.py index 5d73885..fb6460e 100644 --- a/facefusion/uis/components/output.py +++ b/facefusion/uis/components/output.py @@ -3,10 +3,11 @@ import gradio import facefusion.globals from facefusion import wording -from facefusion.core import limit_resources, conditional_process +from facefusion.core import conditional_process +from facefusion.memory import limit_system_memory from facefusion.uis.core import get_ui_component from facefusion.normalizer import normalize_output_path -from facefusion.filesystem import is_image, is_video, clear_temp +from facefusion.filesystem import clear_temp, is_image, is_video OUTPUT_IMAGE : Optional[gradio.Image] = None OUTPUT_VIDEO : Optional[gradio.Video] = None @@ -47,7 +48,8 @@ def listen() -> None: def start(output_path : str) -> Tuple[gradio.Image, gradio.Video]: facefusion.globals.output_path = normalize_output_path(facefusion.globals.source_paths, facefusion.globals.target_path, output_path) - limit_resources() + if facefusion.globals.system_memory_limit > 0: + limit_system_memory(facefusion.globals.system_memory_limit) conditional_process() if is_image(facefusion.globals.output_path): return gradio.Image(value = facefusion.globals.output_path, visible = True), gradio.Video(value = None, visible = False) diff --git a/facefusion/uis/components/output_options.py b/facefusion/uis/components/output_options.py index 6b32a11..01a406e 100644 --- a/facefusion/uis/components/output_options.py +++ b/facefusion/uis/components/output_options.py @@ -5,22 +5,29 @@ import gradio import facefusion.globals import facefusion.choices from facefusion import wording -from facefusion.typing import OutputVideoEncoder +from facefusion.typing import OutputVideoEncoder, OutputVideoPreset, Fps from facefusion.filesystem import is_image, is_video from facefusion.uis.typing import ComponentName from facefusion.uis.core import get_ui_component, register_ui_component +from facefusion.vision import detect_video_fps, create_video_resolutions, detect_video_resolution, pack_resolution OUTPUT_PATH_TEXTBOX : Optional[gradio.Textbox] = None OUTPUT_IMAGE_QUALITY_SLIDER : Optional[gradio.Slider] = None OUTPUT_VIDEO_ENCODER_DROPDOWN : Optional[gradio.Dropdown] = None +OUTPUT_VIDEO_PRESET_DROPDOWN : Optional[gradio.Dropdown] = None +OUTPUT_VIDEO_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None OUTPUT_VIDEO_QUALITY_SLIDER : Optional[gradio.Slider] = None +OUTPUT_VIDEO_FPS_SLIDER : Optional[gradio.Slider] = None def render() -> None: global OUTPUT_PATH_TEXTBOX global OUTPUT_IMAGE_QUALITY_SLIDER global OUTPUT_VIDEO_ENCODER_DROPDOWN + global OUTPUT_VIDEO_PRESET_DROPDOWN + global OUTPUT_VIDEO_RESOLUTION_DROPDOWN global OUTPUT_VIDEO_QUALITY_SLIDER + global OUTPUT_VIDEO_FPS_SLIDER OUTPUT_PATH_TEXTBOX = gradio.Textbox( label = wording.get('output_path_textbox_label'), @@ -41,6 +48,12 @@ def render() -> None: value = facefusion.globals.output_video_encoder, visible = is_video(facefusion.globals.target_path) ) + OUTPUT_VIDEO_PRESET_DROPDOWN = gradio.Dropdown( + label = wording.get('output_video_preset_dropdown_label'), + choices = facefusion.choices.output_video_presets, + value = facefusion.globals.output_video_preset, + visible = is_video(facefusion.globals.target_path) + ) OUTPUT_VIDEO_QUALITY_SLIDER = gradio.Slider( label = wording.get('output_video_quality_slider_label'), value = facefusion.globals.output_video_quality, @@ -49,14 +62,31 @@ def render() -> None: maximum = facefusion.choices.output_video_quality_range[-1], visible = is_video(facefusion.globals.target_path) ) + OUTPUT_VIDEO_RESOLUTION_DROPDOWN = gradio.Dropdown( + label = wording.get('output_video_resolution_dropdown_label'), + choices = create_video_resolutions(facefusion.globals.target_path), + value = facefusion.globals.output_video_resolution, + visible = is_video(facefusion.globals.target_path) + ) + OUTPUT_VIDEO_FPS_SLIDER = gradio.Slider( + label = wording.get('output_video_fps_slider_label'), + value = facefusion.globals.output_video_fps, + step = 0.01, + minimum = 1, + maximum = 60, + visible = is_video(facefusion.globals.target_path) + ) register_ui_component('output_path_textbox', OUTPUT_PATH_TEXTBOX) def listen() -> None: OUTPUT_PATH_TEXTBOX.change(update_output_path, inputs = OUTPUT_PATH_TEXTBOX) OUTPUT_IMAGE_QUALITY_SLIDER.change(update_output_image_quality, inputs = OUTPUT_IMAGE_QUALITY_SLIDER) - OUTPUT_VIDEO_ENCODER_DROPDOWN.select(update_output_video_encoder, inputs = OUTPUT_VIDEO_ENCODER_DROPDOWN) + OUTPUT_VIDEO_ENCODER_DROPDOWN.change(update_output_video_encoder, inputs = OUTPUT_VIDEO_ENCODER_DROPDOWN) + OUTPUT_VIDEO_PRESET_DROPDOWN.change(update_output_video_preset, inputs = OUTPUT_VIDEO_PRESET_DROPDOWN) OUTPUT_VIDEO_QUALITY_SLIDER.change(update_output_video_quality, inputs = OUTPUT_VIDEO_QUALITY_SLIDER) + OUTPUT_VIDEO_RESOLUTION_DROPDOWN.change(update_output_video_resolution, inputs = OUTPUT_VIDEO_RESOLUTION_DROPDOWN) + OUTPUT_VIDEO_FPS_SLIDER.change(update_output_video_fps, inputs = OUTPUT_VIDEO_FPS_SLIDER) multi_component_names : List[ComponentName] =\ [ 'source_image', @@ -67,15 +97,19 @@ def listen() -> None: component = get_ui_component(component_name) if component: for method in [ 'upload', 'change', 'clear' ]: - getattr(component, method)(remote_update, outputs = [ OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER ]) + getattr(component, method)(remote_update, outputs = [ OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_PRESET_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER, OUTPUT_VIDEO_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_FPS_SLIDER ]) -def remote_update() -> Tuple[gradio.Slider, gradio.Dropdown, gradio.Slider]: +def remote_update() -> Tuple[gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Slider]: if is_image(facefusion.globals.target_path): - return gradio.Slider(visible = True), gradio.Dropdown(visible = False), gradio.Slider(visible = False) + return gradio.Slider(visible = True), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False, value = None, choices = None), gradio.Slider(visible = False, value = None) if is_video(facefusion.globals.target_path): - return gradio.Slider(visible = False), gradio.Dropdown(visible = True), gradio.Slider(visible = True) - return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False) + target_video_resolution = detect_video_resolution(facefusion.globals.target_path) + output_video_resolution = pack_resolution(target_video_resolution) + output_video_resolutions = create_video_resolutions(facefusion.globals.target_path) + output_video_fps = detect_video_fps(facefusion.globals.target_path) + return gradio.Slider(visible = False), gradio.Dropdown(visible = True), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Dropdown(visible = True, value = output_video_resolution, choices = output_video_resolutions), gradio.Slider(visible = True, value = output_video_fps) + return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False, value = None, choices = None), gradio.Slider(visible = False, value = None) def update_output_path(output_path : str) -> None: @@ -90,5 +124,17 @@ def update_output_video_encoder(output_video_encoder: OutputVideoEncoder) -> Non facefusion.globals.output_video_encoder = output_video_encoder +def update_output_video_preset(output_video_preset : OutputVideoPreset) -> None: + facefusion.globals.output_video_preset = output_video_preset + + def update_output_video_quality(output_video_quality : int) -> None: facefusion.globals.output_video_quality = output_video_quality + + +def update_output_video_resolution(output_video_resolution : str) -> None: + facefusion.globals.output_video_resolution = output_video_resolution + + +def update_output_video_fps(output_video_fps : Fps) -> None: + facefusion.globals.output_video_fps = output_video_fps diff --git a/facefusion/uis/components/preview.py b/facefusion/uis/components/preview.py index 6658874..fe01f93 100755 --- a/facefusion/uis/components/preview.py +++ b/facefusion/uis/components/preview.py @@ -1,17 +1,18 @@ from typing import Any, Dict, List, Optional +from time import sleep import cv2 import gradio import facefusion.globals -from facefusion import wording +from facefusion import wording, logger from facefusion.core import conditional_append_reference_faces from facefusion.face_store import clear_static_faces, get_reference_faces, clear_reference_faces from facefusion.typing import Frame, Face, FaceSet -from facefusion.vision import get_video_frame, count_video_frame_total, normalize_frame_color, resize_frame_dimension, read_static_image, read_static_images +from facefusion.vision import get_video_frame, count_video_frame_total, normalize_frame_color, resize_frame_resolution, read_static_image, read_static_images +from facefusion.filesystem import is_image, is_video from facefusion.face_analyser import get_average_face, clear_face_analyser from facefusion.content_analyser import analyse_frame from facefusion.processors.frame.core import load_frame_processor_module -from facefusion.filesystem import is_image, is_video from facefusion.uis.typing import ComponentName from facefusion.uis.core import get_ui_component, register_ui_component @@ -94,9 +95,7 @@ def listen() -> None: change_one_component_names : List[ComponentName] =\ [ 'face_debugger_items_checkbox_group', - 'face_enhancer_model_dropdown', 'face_enhancer_blend_slider', - 'frame_enhancer_model_dropdown', 'frame_enhancer_blend_slider', 'face_selector_mode_dropdown', 'reference_face_distance_slider', @@ -115,7 +114,9 @@ def listen() -> None: change_two_component_names : List[ComponentName] =\ [ 'frame_processors_checkbox_group', + 'face_enhancer_model_dropdown', 'face_swapper_model_dropdown', + 'frame_enhancer_model_dropdown', 'face_detector_model_dropdown', 'face_detector_size_dropdown', 'face_detector_score_slider' @@ -130,22 +131,29 @@ def clear_and_update_preview_image(frame_number : int = 0) -> gradio.Image: clear_face_analyser() clear_reference_faces() clear_static_faces() + sleep(0.5) return update_preview_image(frame_number) def update_preview_image(frame_number : int = 0) -> gradio.Image: + for frame_processor in facefusion.globals.frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + while not frame_processor_module.post_check(): + logger.disable() + sleep(0.5) + logger.enable() conditional_append_reference_faces() source_frames = read_static_images(facefusion.globals.source_paths) source_face = get_average_face(source_frames) - reference_face = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None if is_image(facefusion.globals.target_path): target_frame = read_static_image(facefusion.globals.target_path) - preview_frame = process_preview_frame(source_face, reference_face, target_frame) + preview_frame = process_preview_frame(source_face, reference_faces, target_frame) preview_frame = normalize_frame_color(preview_frame) return gradio.Image(value = preview_frame) if is_video(facefusion.globals.target_path): temp_frame = get_video_frame(facefusion.globals.target_path, frame_number) - preview_frame = process_preview_frame(source_face, reference_face, temp_frame) + preview_frame = process_preview_frame(source_face, reference_faces, temp_frame) preview_frame = normalize_frame_color(preview_frame) return gradio.Image(value = preview_frame) return gradio.Image(value = None) @@ -159,12 +167,14 @@ def update_preview_frame_slider() -> gradio.Slider: def process_preview_frame(source_face : Face, reference_faces : FaceSet, temp_frame : Frame) -> Frame: - temp_frame = resize_frame_dimension(temp_frame, 640, 640) + temp_frame = resize_frame_resolution(temp_frame, 640, 640) if analyse_frame(temp_frame): return cv2.GaussianBlur(temp_frame, (99, 99), 0) for frame_processor in facefusion.globals.frame_processors: frame_processor_module = load_frame_processor_module(frame_processor) + logger.disable() if frame_processor_module.pre_process('preview'): + logger.enable() temp_frame = frame_processor_module.process_frame( source_face, reference_faces, diff --git a/facefusion/uis/components/temp_frame.py b/facefusion/uis/components/temp_frame.py index d07f836..21d0312 100644 --- a/facefusion/uis/components/temp_frame.py +++ b/facefusion/uis/components/temp_frame.py @@ -33,7 +33,7 @@ def render() -> None: def listen() -> None: - TEMP_FRAME_FORMAT_DROPDOWN.select(update_temp_frame_format, inputs = TEMP_FRAME_FORMAT_DROPDOWN) + TEMP_FRAME_FORMAT_DROPDOWN.change(update_temp_frame_format, inputs = TEMP_FRAME_FORMAT_DROPDOWN) TEMP_FRAME_QUALITY_SLIDER.change(update_temp_frame_quality, inputs = TEMP_FRAME_QUALITY_SLIDER) target_video = get_ui_component('target_video') if target_video: diff --git a/facefusion/uis/components/webcam.py b/facefusion/uis/components/webcam.py index d1217a6..f3fcbe4 100644 --- a/facefusion/uis/components/webcam.py +++ b/facefusion/uis/components/webcam.py @@ -1,22 +1,23 @@ -from typing import Optional, Generator, Deque -from concurrent.futures import ThreadPoolExecutor -from collections import deque +from typing import Optional, Generator, Deque, List import os import platform import subprocess import cv2 import gradio +from time import sleep +from concurrent.futures import ThreadPoolExecutor +from collections import deque from tqdm import tqdm import facefusion.globals from facefusion import logger, wording from facefusion.content_analyser import analyse_stream -from facefusion.typing import Frame, Face +from facefusion.typing import Frame, Face, Fps from facefusion.face_analyser import get_average_face -from facefusion.processors.frame.core import get_frame_processors_modules +from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module from facefusion.ffmpeg import open_ffmpeg -from facefusion.vision import normalize_frame_color, read_static_images -from facefusion.uis.typing import StreamMode, WebcamMode +from facefusion.vision import normalize_frame_color, read_static_images, unpack_resolution +from facefusion.uis.typing import StreamMode, WebcamMode, ComponentName from facefusion.uis.core import get_ui_component WEBCAM_CAPTURE : Optional[cv2.VideoCapture] = None @@ -73,28 +74,36 @@ def listen() -> None: if webcam_mode_radio and webcam_resolution_dropdown and webcam_fps_slider: start_event = WEBCAM_START_BUTTON.click(start, inputs = [ webcam_mode_radio, webcam_resolution_dropdown, webcam_fps_slider ], outputs = WEBCAM_IMAGE) WEBCAM_STOP_BUTTON.click(stop, cancels = start_event) - source_image = get_ui_component('source_image') - if source_image: - for method in [ 'upload', 'change', 'clear' ]: - getattr(source_image, method)(stop, cancels = start_event) + change_two_component_names : List[ComponentName] =\ + [ + 'frame_processors_checkbox_group', + 'face_swapper_model_dropdown', + 'face_enhancer_model_dropdown', + 'frame_enhancer_model_dropdown', + 'source_image' + ] + for component_name in change_two_component_names: + component = get_ui_component(component_name) + if component: + component.change(update, cancels = start_event) -def start(webcam_mode : WebcamMode, resolution : str, fps : float) -> Generator[Frame, None, None]: +def start(webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps) -> Generator[Frame, None, None]: facefusion.globals.face_selector_mode = 'one' facefusion.globals.face_analyser_order = 'large-small' source_frames = read_static_images(facefusion.globals.source_paths) source_face = get_average_face(source_frames) stream = None if webcam_mode in [ 'udp', 'v4l2' ]: - stream = open_stream(webcam_mode, resolution, fps) # type: ignore[arg-type] - webcam_width, webcam_height = map(int, resolution.split('x')) + stream = open_stream(webcam_mode, webcam_resolution, webcam_fps) # type: ignore[arg-type] + webcam_width, webcam_height = unpack_resolution(webcam_resolution) webcam_capture = get_webcam_capture() if webcam_capture and webcam_capture.isOpened(): webcam_capture.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) # type: ignore[attr-defined] webcam_capture.set(cv2.CAP_PROP_FRAME_WIDTH, webcam_width) webcam_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, webcam_height) - webcam_capture.set(cv2.CAP_PROP_FPS, fps) - for capture_frame in multi_process_capture(source_face, webcam_capture, fps): + webcam_capture.set(cv2.CAP_PROP_FPS, webcam_fps) + for capture_frame in multi_process_capture(source_face, webcam_capture, webcam_fps): if webcam_mode == 'inline': yield normalize_frame_color(capture_frame) else: @@ -105,14 +114,14 @@ def start(webcam_mode : WebcamMode, resolution : str, fps : float) -> Generator[ yield None -def multi_process_capture(source_face : Face, webcam_capture : cv2.VideoCapture, fps : float) -> Generator[Frame, None, None]: +def multi_process_capture(source_face : Face, webcam_capture : cv2.VideoCapture, webcam_fps : Fps) -> Generator[Frame, None, None]: with tqdm(desc = wording.get('processing'), unit = 'frame', ascii = ' =', disable = facefusion.globals.log_level in [ 'warn', 'error' ]) as progress: with ThreadPoolExecutor(max_workers = facefusion.globals.execution_thread_count) as executor: futures = [] deque_capture_frames : Deque[Frame] = deque() while webcam_capture and webcam_capture.isOpened(): _, capture_frame = webcam_capture.read() - if analyse_stream(capture_frame, fps): + if analyse_stream(capture_frame, webcam_fps): return future = executor.submit(process_stream_frame, source_face, capture_frame) futures.append(future) @@ -125,6 +134,15 @@ def multi_process_capture(source_face : Face, webcam_capture : cv2.VideoCapture, yield deque_capture_frames.popleft() +def update() -> None: + for frame_processor in facefusion.globals.frame_processors: + frame_processor_module = load_frame_processor_module(frame_processor) + while not frame_processor_module.post_check(): + logger.disable() + sleep(0.5) + logger.enable() + + def stop() -> gradio.Image: clear_webcam_capture() return gradio.Image(value = None) @@ -132,7 +150,9 @@ def stop() -> gradio.Image: def process_stream_frame(source_face : Face, temp_frame : Frame) -> Frame: for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): + logger.disable() if frame_processor_module.pre_process('stream'): + logger.enable() temp_frame = frame_processor_module.process_frame( source_face, None, @@ -141,8 +161,8 @@ def process_stream_frame(source_face : Face, temp_frame : Frame) -> Frame: return temp_frame -def open_stream(stream_mode : StreamMode, resolution : str, fps : float) -> subprocess.Popen[bytes]: - commands = [ '-f', 'rawvideo', '-pix_fmt', 'bgr24', '-s', resolution, '-r', str(fps), '-i', '-' ] +def open_stream(stream_mode : StreamMode, stream_resolution : str, stream_fps : Fps) -> subprocess.Popen[bytes]: + commands = [ '-f', 'rawvideo', '-pix_fmt', 'bgr24', '-s', stream_resolution, '-r', str(stream_fps), '-i', '-'] if stream_mode == 'udp': commands.extend([ '-b:v', '2000k', '-f', 'mpegts', 'udp://localhost:27000?pkt_size=1316' ]) if stream_mode == 'v4l2': diff --git a/facefusion/uis/core.py b/facefusion/uis/core.py index 9f7b6cd..3af6a56 100644 --- a/facefusion/uis/core.py +++ b/facefusion/uis/core.py @@ -28,10 +28,12 @@ def load_ui_layout_module(ui_layout : str) -> Any: if not hasattr(ui_layout_module, method_name): raise NotImplementedError except ModuleNotFoundError as exception: + logger.error(wording.get('ui_layout_not_loaded').format(ui_layout=ui_layout), __name__.upper()) logger.debug(exception.msg, __name__.upper()) - sys.exit(wording.get('ui_layout_not_loaded').format(ui_layout = ui_layout)) + sys.exit(1) except NotImplementedError: - sys.exit(wording.get('ui_layout_not_implemented').format(ui_layout = ui_layout)) + logger.error(wording.get('ui_layout_not_implemented').format(ui_layout = ui_layout), __name__.upper()) + sys.exit(1) return ui_layout_module diff --git a/facefusion/uis/layouts/benchmark.py b/facefusion/uis/layouts/benchmark.py index ae9c320..83cde03 100644 --- a/facefusion/uis/layouts/benchmark.py +++ b/facefusion/uis/layouts/benchmark.py @@ -2,7 +2,7 @@ import gradio import facefusion.globals from facefusion.download import conditional_download -from facefusion.uis.components import about, frame_processors, frame_processors_options, execution, execution_thread_count, execution_queue_count, limit_resources, benchmark_options, benchmark +from facefusion.uis.components import about, frame_processors, frame_processors_options, execution, execution_thread_count, execution_queue_count, memory, benchmark_options, benchmark def pre_check() -> bool: @@ -40,7 +40,7 @@ def render() -> gradio.Blocks: execution_thread_count.render() execution_queue_count.render() with gradio.Blocks(): - limit_resources.render() + memory.render() with gradio.Blocks(): benchmark_options.render() with gradio.Column(scale = 5): @@ -55,7 +55,7 @@ def listen() -> None: execution.listen() execution_thread_count.listen() execution_queue_count.listen() - limit_resources.listen() + memory.listen() benchmark.listen() diff --git a/facefusion/uis/layouts/default.py b/facefusion/uis/layouts/default.py index 4537297..ef8ac40 100755 --- a/facefusion/uis/layouts/default.py +++ b/facefusion/uis/layouts/default.py @@ -1,6 +1,6 @@ import gradio -from facefusion.uis.components import about, frame_processors, frame_processors_options, execution, execution_thread_count, execution_queue_count, limit_resources, temp_frame, output_options, common_options, source, target, output, preview, trim_frame, face_analyser, face_selector, face_masker +from facefusion.uis.components import about, frame_processors, frame_processors_options, execution, execution_thread_count, execution_queue_count, memory, temp_frame, output_options, common_options, source, target, output, preview, trim_frame, face_analyser, face_selector, face_masker def pre_check() -> bool: @@ -25,13 +25,11 @@ def render() -> gradio.Blocks: execution_thread_count.render() execution_queue_count.render() with gradio.Blocks(): - limit_resources.render() + memory.render() with gradio.Blocks(): temp_frame.render() with gradio.Blocks(): output_options.render() - with gradio.Blocks(): - common_options.render() with gradio.Column(scale = 2): with gradio.Blocks(): source.render() @@ -50,6 +48,8 @@ def render() -> gradio.Blocks: face_masker.render() with gradio.Blocks(): face_analyser.render() + with gradio.Blocks(): + common_options.render() return layout @@ -59,10 +59,9 @@ def listen() -> None: execution.listen() execution_thread_count.listen() execution_queue_count.listen() - limit_resources.listen() + memory.listen() temp_frame.listen() output_options.listen() - common_options.listen() source.listen() target.listen() output.listen() @@ -71,6 +70,7 @@ def listen() -> None: face_selector.listen() face_masker.listen() face_analyser.listen() + common_options.listen() def run(ui : gradio.Blocks) -> None: diff --git a/facefusion/uis/typing.py b/facefusion/uis/typing.py index b2c57d3..fe65112 100644 --- a/facefusion/uis/typing.py +++ b/facefusion/uis/typing.py @@ -39,5 +39,6 @@ ComponentName = Literal\ 'webcam_resolution_dropdown', 'webcam_fps_slider' ] + WebcamMode = Literal['inline', 'udp', 'v4l2'] StreamMode = Literal['udp', 'v4l2'] diff --git a/facefusion/vision.py b/facefusion/vision.py index 4706bf7..fc5e84a 100644 --- a/facefusion/vision.py +++ b/facefusion/vision.py @@ -1,12 +1,14 @@ -from typing import Optional, List +from typing import Optional, List, Tuple from functools import lru_cache import cv2 -from facefusion.typing import Frame +from facefusion.typing import Frame, Resolution +from facefusion.choices import video_template_sizes +from facefusion.filesystem import is_image, is_video def get_video_frame(video_path : str, frame_number : int = 0) -> Optional[Frame]: - if video_path: + if is_video(video_path): video_capture = cv2.VideoCapture(video_path) if video_capture.isOpened(): frame_total = video_capture.get(cv2.CAP_PROP_FRAME_COUNT) @@ -18,16 +20,8 @@ def get_video_frame(video_path : str, frame_number : int = 0) -> Optional[Frame] return None -def detect_fps(video_path : str) -> Optional[float]: - if video_path: - video_capture = cv2.VideoCapture(video_path) - if video_capture.isOpened(): - return video_capture.get(cv2.CAP_PROP_FPS) - return None - - def count_video_frame_total(video_path : str) -> int: - if video_path: + if is_video(video_path): video_capture = cv2.VideoCapture(video_path) if video_capture.isOpened(): video_frame_total = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)) @@ -36,12 +30,70 @@ def count_video_frame_total(video_path : str) -> int: return 0 -def normalize_frame_color(frame : Frame) -> Frame: - return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) +def detect_video_fps(video_path : str) -> Optional[float]: + if is_video(video_path): + video_capture = cv2.VideoCapture(video_path) + if video_capture.isOpened(): + video_fps = video_capture.get(cv2.CAP_PROP_FPS) + video_capture.release() + return video_fps + return None -def resize_frame_dimension(frame : Frame, max_width : int, max_height : int) -> Frame: +def detect_video_resolution(video_path : str) -> Optional[Tuple[float, float]]: + if is_video(video_path): + video_capture = cv2.VideoCapture(video_path) + if video_capture.isOpened(): + width = video_capture.get(cv2.CAP_PROP_FRAME_WIDTH) + height = video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT) + video_capture.release() + return width, height + return None + + +def create_video_resolutions(video_path : str) -> Optional[List[str]]: + temp_resolutions = [] + video_resolutions = [] + video_resolution = detect_video_resolution(video_path) + + if video_resolution: + width, height = video_resolution + temp_resolutions.append(normalize_resolution(video_resolution)) + for template_size in video_template_sizes: + if width > height: + temp_resolutions.append(normalize_resolution((template_size * width / height, template_size))) + else: + temp_resolutions.append(normalize_resolution((template_size, template_size * height / width))) + temp_resolutions = sorted(set(temp_resolutions)) + for temp in temp_resolutions: + video_resolutions.append(pack_resolution(temp)) + return video_resolutions + return None + + +def normalize_resolution(resolution : Tuple[float, float]) -> Resolution: + width, height = resolution + + if width and height: + normalize_width = round(width / 2) * 2 + normalize_height = round(height / 2) * 2 + return normalize_width, normalize_height + return 0, 0 + + +def pack_resolution(resolution : Tuple[float, float]) -> str: + width, height = normalize_resolution(resolution) + return str(width) + 'x' + str(height) + + +def unpack_resolution(resolution : str) -> Resolution: + width, height = map(int, resolution.split('x')) + return width, height + + +def resize_frame_resolution(frame : Frame, max_width : int, max_height : int) -> Frame: height, width = frame.shape[:2] + if height > max_height or width > max_width: scale = min(max_height / height, max_width / width) new_width = int(width * scale) @@ -50,6 +102,10 @@ def resize_frame_dimension(frame : Frame, max_width : int, max_height : int) -> return frame +def normalize_frame_color(frame : Frame) -> Frame: + return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + @lru_cache(maxsize = 128) def read_static_image(image_path : str) -> Optional[Frame]: return read_image(image_path) @@ -64,7 +120,7 @@ def read_static_images(image_paths : List[str]) -> Optional[List[Frame]]: def read_image(image_path : str) -> Optional[Frame]: - if image_path: + if is_image(image_path): return cv2.imread(image_path) return None diff --git a/facefusion/wording.py b/facefusion/wording.py index 78f3cd3..a02455a 100755 --- a/facefusion/wording.py +++ b/facefusion/wording.py @@ -12,7 +12,6 @@ WORDING =\ 'frame_processor_blend_help': 'specify the blend amount for the frame processor', 'face_debugger_items_help': 'specify the face debugger items (choices: {choices})', 'ui_layouts_help': 'choose from the available ui layouts (choices: {choices}, ...)', - 'keep_fps_help': 'preserve the frames per second (fps) of the target', 'keep_temp_help': 'retain temporary frames after processing', 'skip_audio_help': 'omit audio from the target', 'face_analyser_order_help': 'specify the order used for the face analyser', @@ -35,8 +34,12 @@ WORDING =\ 'temp_frame_quality_help': 'specify the image quality used for frame extraction', 'output_image_quality_help': 'specify the quality used for the output image', 'output_video_encoder_help': 'specify the encoder used for the output video', + 'output_video_preset_help': 'specify the preset used for the output video', 'output_video_quality_help': 'specify the quality used for the output video', - 'max_memory_help': 'specify the maximum amount of ram to be used (in gb)', + 'output_video_resolution_help': 'specify the resolution used for the output video', + 'output_video_fps_help': 'specify the frames per second (fps) used for the output video', + 'video_memory_strategy_help': 'specify strategy to handle the video memory', + 'system_memory_limit_help': 'specify the amount (gb) of system memory to be used', 'execution_providers_help': 'choose from the available execution providers (choices: {choices}, ...)', 'execution_thread_count_help': 'specify the number of execution threads', 'execution_queue_count_help': 'specify the number of execution queries', @@ -44,22 +47,22 @@ WORDING =\ 'headless_help': 'run the program in headless mode', 'log_level_help': 'choose from the available log levels', 'creating_temp': 'Creating temporary resources', - 'extracting_frames_fps': 'Extracting frames with {fps} FPS', + 'extracting_frames_fps': 'Extracting frames with {video_fps} FPS', 'analysing': 'Analysing', 'processing': 'Processing', 'downloading': 'Downloading', 'temp_frames_not_found': 'Temporary frames not found', 'compressing_image': 'Compressing image', 'compressing_image_failed': 'Compressing image failed', - 'merging_video_fps': 'Merging video with {fps} FPS', + 'merging_video_fps': 'Merging video with {video_fps} FPS', 'merging_video_failed': 'Merging video failed', 'skipping_audio': 'Skipping audio', 'restoring_audio': 'Restoring audio', 'restoring_audio_skipped': 'Restoring audio skipped', 'clearing_temp': 'Clearing temporary resources', - 'processing_image_succeed': 'Processing to image succeed', + 'processing_image_succeed': 'Processing to image succeed in {seconds} seconds', 'processing_image_failed': 'Processing to image failed', - 'processing_video_succeed': 'Processing to video succeed', + 'processing_video_succeed': 'Processing to video succeed in {seconds} seconds', 'processing_video_failed': 'Processing to video failed', 'model_download_not_done': 'Download of the model is not done', 'model_file_not_present': 'File of the model is not present', @@ -98,12 +101,16 @@ WORDING =\ 'face_mask_padding_left_slider_label': 'FACE MASK PADDING LEFT', 'face_mask_padding_right_slider_label': 'FACE MASK PADDING RIGHT', 'face_mask_region_checkbox_group_label': 'FACE MASK REGIONS', - 'max_memory_slider_label': 'MAX MEMORY', + 'video_memory_strategy_dropdown_label': 'VIDEO MEMORY STRATEGY', + 'system_memory_limit_slider_label': 'SYSTEM MEMORY LIMIT', 'output_image_or_video_label': 'OUTPUT', 'output_path_textbox_label': 'OUTPUT PATH', 'output_image_quality_slider_label': 'OUTPUT IMAGE QUALITY', 'output_video_encoder_dropdown_label': 'OUTPUT VIDEO ENCODER', + 'output_video_preset_dropdown_label': 'OUTPUT VIDEO PRESET', 'output_video_quality_slider_label': 'OUTPUT VIDEO QUALITY', + 'output_video_resolution_dropdown_label': 'OUTPUT VIDEO RESOLUTION', + 'output_video_fps_slider_label': 'OUTPUT VIDEO FPS', 'preview_image_label': 'PREVIEW', 'preview_frame_slider_label': 'PREVIEW FRAME', 'frame_processors_checkbox_group_label': 'FRAME PROCESSORS', diff --git a/requirements.txt b/requirements.txt index d692985..d4f0bc5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,5 +7,5 @@ onnxruntime==1.16.3 opencv-python==4.8.1.78 psutil==5.9.6 realesrgan==0.3.0 -torch==2.1.1 +torch==2.1.2 tqdm==4.66.1 diff --git a/tests/test_cli.py b/tests/test_cli.py index 0935222..cad4ffb 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2,7 +2,6 @@ import subprocess import sys import pytest -from facefusion import wording from facefusion.download import conditional_download @@ -21,7 +20,7 @@ def test_image_to_image() -> None: run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) assert run.returncode == 0 - assert wording.get('processing_image_succeed') in run.stdout.decode() + assert 'image succeed' in run.stdout.decode() def test_image_to_video() -> None: @@ -29,4 +28,4 @@ def test_image_to_video() -> None: run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) assert run.returncode == 0 - assert wording.get('processing_video_succeed') in run.stdout.decode() + assert 'video succeed' in run.stdout.decode() diff --git a/tests/test_common_helper.py b/tests/test_common_helper.py index 40ef7f3..339e9a1 100644 --- a/tests/test_common_helper.py +++ b/tests/test_common_helper.py @@ -1,10 +1,15 @@ -from facefusion.common_helper import create_metavar, create_range +from facefusion.common_helper import create_metavar, create_int_range, create_float_range def test_create_metavar() -> None: assert create_metavar([ 1, 2, 3, 4, 5 ]) == '[1-5]' -def test_create_range() -> None: - assert create_range(0.0, 1.0, 0.5) == [ 0.0, 0.5, 1.0 ] - assert create_range(0.0, 0.2, 0.05) == [ 0.0, 0.05, 0.10, 0.15, 0.20 ] +def test_create_int_range() -> None: + assert create_int_range(0, 2, 1) == [ 0, 1, 2 ] + assert create_float_range(0, 1, 1) == [ 0, 1 ] + + +def test_create_float_range() -> None: + assert create_float_range(0.0, 1.0, 0.5) == [ 0.0, 0.5, 1.0 ] + assert create_float_range(0.0, 0.2, 0.05) == [ 0.0, 0.05, 0.10, 0.15, 0.20 ] diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..8c830fd --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,96 @@ +from configparser import ConfigParser +import pytest + +from facefusion import config + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + config.CONFIG = ConfigParser() + config.CONFIG.read_dict( + { + 'str': + { + 'valid': 'a', + 'unset': '' + }, + 'int': + { + 'valid': '1', + 'unset': '' + }, + 'float': + { + 'valid': '1.0', + 'unset': '' + }, + 'bool': + { + 'valid': 'True', + 'unset': '' + }, + 'str_list': + { + 'valid': 'a b c', + 'unset': '' + }, + 'int_list': + { + 'valid': '1 2 3', + 'unset': '' + }, + 'float_list': + { + 'valid': '1.0 2.0 3.0', + 'unset': '' + } + }) + + +def test_get_str_value() -> None: + assert config.get_str_value('str.valid') == 'a' + assert config.get_str_value('str.unset', 'b') == 'b' + assert config.get_str_value('str.unset') is None + assert config.get_str_value('str.invalid') is None + + +def test_get_int_value() -> None: + assert config.get_int_value('int.valid') == 1 + assert config.get_int_value('int.unset', '1') == 1 + assert config.get_int_value('int.unset') is None + assert config.get_int_value('int.invalid') is None + + +def test_get_float_value() -> None: + assert config.get_float_value('float.valid') == 1.0 + assert config.get_float_value('float.unset', '1.0') == 1.0 + assert config.get_float_value('float.unset') is None + assert config.get_float_value('float.invalid') is None + + +def test_get_bool_value() -> None: + assert config.get_bool_value('bool.valid') is True + assert config.get_bool_value('bool.unset', 'False') is False + assert config.get_bool_value('bool.unset') is None + assert config.get_bool_value('bool.invalid') is None + + +def test_get_str_list() -> None: + assert config.get_str_list('str_list.valid') == [ 'a', 'b', 'c' ] + assert config.get_str_list('str_list.unset', 'c b a') == [ 'c', 'b', 'a' ] + assert config.get_str_list('str_list.unset') is None + assert config.get_str_list('str_list.invalid') is None + + +def test_get_int_list() -> None: + assert config.get_int_list('int_list.valid') == [ 1, 2, 3 ] + assert config.get_int_list('int_list.unset', '3 2 1') == [ 3, 2, 1 ] + assert config.get_int_list('int_list.unset') is None + assert config.get_int_list('int_list.invalid') is None + + +def test_get_float_list() -> None: + assert config.get_float_list('float_list.valid') == [ 1.0, 2.0, 3.0 ] + assert config.get_float_list('float_list.unset', '3.0 2.0 1.0') == [ 3.0, 2.0, 1.0 ] + assert config.get_float_list('float_list.unset') is None + assert config.get_float_list('float_list.invalid') is None diff --git a/tests/test_execution_helper.py b/tests/test_execution_helper.py index 5d19912..bbb69ae 100644 --- a/tests/test_execution_helper.py +++ b/tests/test_execution_helper.py @@ -1,4 +1,4 @@ -from facefusion.execution_helper import encode_execution_providers, decode_execution_providers +from facefusion.execution_helper import encode_execution_providers, decode_execution_providers, apply_execution_provider_options, map_torch_backend def test_encode_execution_providers() -> None: @@ -7,3 +7,20 @@ def test_encode_execution_providers() -> None: def test_decode_execution_providers() -> None: assert decode_execution_providers([ 'cpu' ]) == [ 'CPUExecutionProvider' ] + + +def test_multiple_execution_providers() -> None: + execution_provider_with_options =\ + [ + 'CPUExecutionProvider', + ('CUDAExecutionProvider', + { + 'cudnn_conv_algo_search': 'DEFAULT' + }) + ] + assert apply_execution_provider_options([ 'CPUExecutionProvider', 'CUDAExecutionProvider' ]) == execution_provider_with_options + + +def test_map_device() -> None: + assert map_torch_backend([ 'CPUExecutionProvider' ]) == 'cpu' + assert map_torch_backend([ 'CPUExecutionProvider', 'CUDAExecutionProvider' ]) == 'cuda' diff --git a/tests/test_ffmpeg.py b/tests/test_ffmpeg.py index b67ed75..55a6bcb 100644 --- a/tests/test_ffmpeg.py +++ b/tests/test_ffmpeg.py @@ -39,7 +39,7 @@ def test_extract_frames() -> None: temp_directory_path = get_temp_directory_path(target_path) create_temp(target_path) - assert extract_frames(target_path, 30.0) is True + assert extract_frames(target_path, '452x240', 30.0) is True assert len(glob.glob1(temp_directory_path, '*.jpg')) == 324 clear_temp(target_path) @@ -57,7 +57,7 @@ def test_extract_frames_with_trim_start() -> None: temp_directory_path = get_temp_directory_path(target_path) create_temp(target_path) - assert extract_frames(target_path, 30.0) is True + assert extract_frames(target_path, '452x240', 30.0) is True assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total clear_temp(target_path) @@ -76,7 +76,7 @@ def test_extract_frames_with_trim_start_and_trim_end() -> None: temp_directory_path = get_temp_directory_path(target_path) create_temp(target_path) - assert extract_frames(target_path, 30.0) is True + assert extract_frames(target_path, '452x240', 30.0) is True assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total clear_temp(target_path) @@ -94,7 +94,7 @@ def test_extract_frames_with_trim_end() -> None: temp_directory_path = get_temp_directory_path(target_path) create_temp(target_path) - assert extract_frames(target_path, 30.0) is True + assert extract_frames(target_path, '426x240', 30.0) is True assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total clear_temp(target_path) diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index c504742..6db581d 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -1,4 +1,15 @@ -from facefusion.filesystem import is_file, is_directory, is_image, are_images, is_video +import pytest + +from facefusion.download import conditional_download +from facefusion.filesystem import is_file, is_directory, is_image, are_images, is_video, list_directory + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg' + ]) def test_is_file() -> None: @@ -29,3 +40,9 @@ def test_is_video() -> None: assert is_video('.assets/examples/target-240p.mp4') is True assert is_video('.assets/examples/source.jpg') is False assert is_video('invalid') is False + + +def test_list_directory() -> None: + assert list_directory('.assets/examples') + assert list_directory('.assets/examples/source.jpg') is None + assert list_directory('invalid') is None diff --git a/tests/test_memory.py b/tests/test_memory.py new file mode 100644 index 0000000..8ab07f5 --- /dev/null +++ b/tests/test_memory.py @@ -0,0 +1,9 @@ +import platform + +from facefusion.memory import limit_system_memory + + +def test_limit_system_memory() -> None: + assert limit_system_memory(4) is True + if platform.system().lower() == 'darwin' or platform.system().lower() == 'linux': + assert limit_system_memory(1024) is False diff --git a/tests/test_normalizer.py b/tests/test_normalizer.py index 0547e12..0d0512a 100644 --- a/tests/test_normalizer.py +++ b/tests/test_normalizer.py @@ -1,6 +1,6 @@ import platform -from facefusion.normalizer import normalize_output_path, normalize_padding +from facefusion.normalizer import normalize_output_path, normalize_padding, normalize_fps def test_normalize_output_path() -> None: @@ -23,3 +23,10 @@ def test_normalize_padding() -> None: assert normalize_padding([ 1, 2 ]) == (1, 2, 1, 2) assert normalize_padding([ 1, 2, 3 ]) == (1, 2, 3, 2) assert normalize_padding(None) is None + + +def test_normalize_fps() -> None: + assert normalize_fps(0.0) == 1.0 + assert normalize_fps(25.0) == 25.0 + assert normalize_fps(61.0) == 60.0 + assert normalize_fps(None) is None diff --git a/tests/test_vision.py b/tests/test_vision.py index 5b51e62..3368eef 100644 --- a/tests/test_vision.py +++ b/tests/test_vision.py @@ -2,7 +2,7 @@ import subprocess import pytest from facefusion.download import conditional_download -from facefusion.vision import get_video_frame, detect_fps, count_video_frame_total +from facefusion.vision import get_video_frame, count_video_frame_total, detect_video_fps, detect_video_resolution, pack_resolution, unpack_resolution, create_video_resolutions @pytest.fixture(scope = 'module', autouse = True) @@ -10,11 +10,14 @@ def before_all() -> None: conditional_download('.assets/examples', [ 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', - 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1080p.mp4' ]) subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=25', '.assets/examples/target-240p-25fps.mp4' ]) subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=30', '.assets/examples/target-240p-30fps.mp4' ]) subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=60', '.assets/examples/target-240p-60fps.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'transpose=0', '.assets/examples/target-240p-90deg.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-1080p.mp4', '-vf', 'transpose=0', '.assets/examples/target-1080p-90deg.mp4' ]) def test_get_video_frame() -> None: @@ -22,15 +25,39 @@ def test_get_video_frame() -> None: assert get_video_frame('invalid') is None -def test_detect_fps() -> None: - assert detect_fps('.assets/examples/target-240p-25fps.mp4') == 25.0 - assert detect_fps('.assets/examples/target-240p-30fps.mp4') == 30.0 - assert detect_fps('.assets/examples/target-240p-60fps.mp4') == 60.0 - assert detect_fps('invalid') is None - - def test_count_video_frame_total() -> None: assert count_video_frame_total('.assets/examples/target-240p-25fps.mp4') == 270 assert count_video_frame_total('.assets/examples/target-240p-30fps.mp4') == 324 assert count_video_frame_total('.assets/examples/target-240p-60fps.mp4') == 648 assert count_video_frame_total('invalid') == 0 + + +def test_detect_video_fps() -> None: + assert detect_video_fps('.assets/examples/target-240p-25fps.mp4') == 25.0 + assert detect_video_fps('.assets/examples/target-240p-30fps.mp4') == 30.0 + assert detect_video_fps('.assets/examples/target-240p-60fps.mp4') == 60.0 + assert detect_video_fps('invalid') is None + + +def test_detect_video_resolution() -> None: + assert detect_video_resolution('.assets/examples/target-240p.mp4') == (426.0, 226.0) + assert detect_video_resolution('.assets/examples/target-1080p.mp4') == (2048.0, 1080.0) + assert detect_video_resolution('invalid') is None + + +def test_pack_resolution() -> None: + assert pack_resolution((1.0, 1.0)) == '0x0' + assert pack_resolution((2.0, 2.0)) == '2x2' + + +def test_unpack_resolution() -> None: + assert unpack_resolution('0x0') == (0, 0) + assert unpack_resolution('2x2') == (2, 2) + + +def test_create_video_resolutions() -> None: + assert create_video_resolutions('.assets/examples/target-240p.mp4') == [ '426x226', '452x240', '678x360', '904x480', '1018x540', '1358x720', '2036x1080', '2714x1440', '4072x2160' ] + assert create_video_resolutions('.assets/examples/target-240p-90deg.mp4') == [ '226x426', '240x452', '360x678', '480x904', '540x1018', '720x1358', '1080x2036', '1440x2714', '2160x4072' ] + assert create_video_resolutions('.assets/examples/target-1080p.mp4') == [ '456x240', '682x360', '910x480', '1024x540', '1366x720', '2048x1080', '2730x1440', '4096x2160' ] + assert create_video_resolutions('.assets/examples/target-1080p-90deg.mp4') == [ '240x456', '360x682', '480x910', '540x1024', '720x1366', '1080x2048', '1440x2730', '2160x4096' ] + assert create_video_resolutions('invalid') is None