From da0da3a4b475e28589e7010804796bd2d6dbce09 Mon Sep 17 00:00:00 2001 From: Henry Ruhs Date: Mon, 8 Sep 2025 10:43:58 +0200 Subject: [PATCH] Next (#945) * Rename calcXXX to calculateXXX * Add migraphx support * Add migraphx support * Add migraphx support * Add migraphx support * Add migraphx support * Add migraphx support * Use True for the flags * Add migraphx support * add face-swapper-weight * add face-swapper-weight to facefusion.ini * changes * change choice * Fix typing for xxxWeight * Feat/log inference session (#906) * Log inference session, Introduce time helper * Log inference session, Introduce time helper * Log inference session, Introduce time helper * Log inference session, Introduce time helper * Mark as NEXT * Follow industry standard x1, x2, y1 and y2 * Follow industry standard x1, x2, y1 and y2 * Follow industry standard in terms of naming (#908) * Follow industry standard in terms of naming * Improve xxx_embedding naming * Fix norm vs. norms * Reduce timeout to 5 * Sort out voice_extractor once again * changes * Introduce many to the occlusion mask (#910) * Introduce many to the occlusion mask * Then we use minimum * Add support for wmv * Run platform tests before has_execution_provider (#911) * Add support for wmv * Introduce benchmark mode (#912) * Honestly makes no difference to me * Honestly makes no difference to me * Fix wording * Bring back YuNet (#922) * Reintroduce YuNet without cv2 dependency * Fix variable naming * Avoid RGB to YUV colorshift using libx264rgb * Avoid RGB to YUV colorshift using libx264rgb * Make libx264 the default again * Make libx264 the default again * Fix types in ffmpeg builder * Fix quality stuff in ffmpeg builder * Fix quality stuff in ffmpeg builder * Add libx264rgb to test * Revamp Processors (#923) * Introduce new concept of pure target frames * Radical refactoring of process flow * Introduce new concept of pure target frames * Fix webcam * Minor improvements * Minor improvements * Use deque for video processing * Use deque for video processing * Extend the video manager * Polish deque * Polish deque * Deque is not even used * Improve speed with multiple futures * Fix temp frame mutation and * Fix RAM usage * Remove old types and manage method * Remove execution_queue_count * Use init_state for benchmarker to avoid issues * add voice extractor option * Change the order of voice extractor in code * Use official download urls * Use official download urls * add gui * fix preview * Add remote updates for voice extractor * fix crash on headless-run * update test_job_helper.py * Fix it for good * Remove pointless method * Fix types and unused imports * Revamp reference (#925) * Initial revamp of face references * Initial revamp of face references * Initial revamp of face references * Terminate find_similar_faces * Improve find mutant faces * Improve find mutant faces * Move sort where it belongs * Forward reference vision frame * Forward reference vision frame also in preview * Fix reference selection * Use static video frame * Fix CI * Remove reference type from frame processors * Improve some naming * Fix types and unused imports * Fix find mutant faces * Fix find mutant faces * Fix imports * Correct naming * Correct naming * simplify pad * Improve webcam performance on highres * Camera manager (#932) * Introduce webcam manager * Fix order * Rename to camera manager, improve video manager * Fix CI * Remove optional * Fix naming in webcam options * Avoid using temp faces (#933) * output video scale * Fix imports * output image scale * upscale fix (not limiter) * add unit test scale_resolution & remove unused methods * fix and add test * fix * change pack_resolution * fix tests * Simplify output scale testing * Fix benchmark UI * Fix benchmark UI * Update dependencies * Introduce REAL multi gpu support using multi dimensional inference pool (#935) * Introduce REAL multi gpu support using multi dimensional inference pool * Remove the MULTI:GPU flag * Restore "processing stop" * Restore "processing stop" * Remove old templates * Go fill in with caching * add expression restorer areas * re-arrange * rename method * Fix stop for extract frames and merge video * Replace arcface_converter models with latest crossface models * Replace arcface_converter models with latest crossface models * Move module logs to debug mode * Refactor/streamer (#938) * Introduce webcam manager * Fix order * Rename to camera manager, improve video manager * Fix CI * Fix naming in webcam options * Move logic over to streamer * Fix streamer, improve webcam experience * Improve webcam experience * Revert method * Revert method * Improve webcam again * Use release on capture instead * Only forward valid frames * Fix resolution logging * Add AVIF support * Add AVIF support * Limit avif to unix systems * Drop avif * Drop avif * Drop avif * Default to Documents in the UI if output path is not set * Update wording.py (#939) "succeed" is grammatically incorrect in the given context. To succeed is the infinitive form of the verb. Correct would be either "succeeded" or alternatively a form involving the noun "success". * Fix more grammar issue * Fix more grammar issue * Sort out caching * Move webcam choices back to UI * Move preview options to own file (#940) * Fix Migraphx execution provider * Fix benchmark * Reuse blend frame method * Fix CI * Fix CI * Fix CI * Hotfix missing check in face debugger, Enable logger for preview * Fix reference selection (#942) * Fix reference selection * Fix reference selection * Fix reference selection * Fix reference selection * Side by side preview (#941) * Initial side by side preview * More work on preview, remove UI only stuff from vision.py * Improve more * Use fit frame * Add different fit methods for vision * Improve preview part2 * Improve preview part3 * Improve preview part4 * Remove none as choice * Remove useless methods * Fix CI * Fix naming * use 1024 as preview resolution default * Fix fit_cover_frame * Uniform fit_xxx_frame methods * Add back disabled logger * Use ui choices alias * Extract select face logic from processors (#943) * Extract select face logic from processors to use it for face by face in preview * Fix order * Remove old code * Merge methods * Refactor face debugger (#944) * Refactor huge method of face debugger * Remove text metrics from face debugger * Remove useless copy of temp frame * Resort methods * Fix spacing * Remove old method * Fix hard exit to work without signals * Prevent upscaling for face-by-face * Switch to version * Improve exiting --------- Co-authored-by: harisreedhar Co-authored-by: Harisreedhar <46858047+harisreedhar@users.noreply.github.com> Co-authored-by: Rafael Tappe Maestro --- facefusion.ini | 13 +- facefusion/args.py | 26 +- facefusion/audio.py | 6 +- facefusion/benchmarker.py | 23 +- facefusion/camera_manager.py | 57 ++++ facefusion/choices.py | 25 +- facefusion/common_helper.py | 8 +- facefusion/content_analyser.py | 19 +- facefusion/core.py | 171 ++++++---- facefusion/download.py | 10 +- facefusion/execution.py | 10 +- facefusion/exit_helper.py | 10 +- facefusion/face_analyser.py | 24 +- facefusion/face_classifier.py | 2 +- facefusion/face_detector.py | 128 +++++++- facefusion/face_helper.py | 72 ++--- facefusion/face_landmarker.py | 16 +- facefusion/face_masker.py | 33 +- facefusion/face_recognizer.py | 16 +- facefusion/face_selector.py | 52 +++- facefusion/face_store.py | 19 +- facefusion/ffmpeg.py | 32 +- facefusion/ffmpeg_builder.py | 34 +- facefusion/inference_manager.py | 54 ++-- facefusion/jobs/job_helper.py | 10 +- facefusion/jobs/job_list.py | 2 +- facefusion/jobs/job_manager.py | 2 +- facefusion/metadata.py | 2 +- facefusion/model_helper.py | 2 +- facefusion/process_manager.py | 10 +- facefusion/processors/choices.py | 6 +- facefusion/processors/core.py | 60 +--- facefusion/processors/live_portrait.py | 6 +- facefusion/processors/modules/age_modifier.py | 79 +---- facefusion/processors/modules/deep_swapper.py | 72 +---- .../processors/modules/expression_restorer.py | 142 ++++----- .../processors/modules/face_debugger.py | 293 +++++++++--------- facefusion/processors/modules/face_editor.py | 88 ++---- .../processors/modules/face_enhancer.py | 80 +---- facefusion/processors/modules/face_swapper.py | 227 +++++++------- .../processors/modules/frame_colorizer.py | 53 +--- .../processors/modules/frame_enhancer.py | 72 ++--- facefusion/processors/modules/lip_syncer.py | 119 ++----- facefusion/processors/types.py | 95 +++--- facefusion/program.py | 26 +- facefusion/state_manager.py | 4 + facefusion/streamer.py | 98 ++++++ facefusion/thread_helper.py | 3 +- facefusion/{date_helper.py => time_helper.py} | 5 + facefusion/types.py | 70 +++-- facefusion/uis/choices.py | 8 +- .../uis/components/age_modifier_options.py | 4 +- facefusion/uis/components/benchmark.py | 16 +- .../uis/components/benchmark_options.py | 44 ++- .../uis/components/deep_swapper_options.py | 4 +- .../uis/components/execution_queue_count.py | 29 -- .../uis/components/execution_thread_count.py | 4 +- .../components/expression_restorer_options.py | 28 +- facefusion/uis/components/face_detector.py | 4 +- .../uis/components/face_editor_options.py | 30 +- .../uis/components/face_enhancer_options.py | 10 +- facefusion/uis/components/face_landmarker.py | 4 +- facefusion/uis/components/face_masker.py | 12 +- facefusion/uis/components/face_selector.py | 73 +++-- .../uis/components/face_swapper_options.py | 38 ++- .../uis/components/frame_colorizer_options.py | 4 +- .../uis/components/frame_enhancer_options.py | 4 +- facefusion/uis/components/instant_runner.py | 6 +- facefusion/uis/components/job_runner.py | 8 +- .../uis/components/lip_syncer_options.py | 8 +- facefusion/uis/components/memory.py | 4 +- facefusion/uis/components/output.py | 8 +- facefusion/uis/components/output_options.py | 79 +++-- facefusion/uis/components/preview.py | 228 ++++++++------ facefusion/uis/components/preview_options.py | 61 ++++ facefusion/uis/components/target.py | 3 +- facefusion/uis/components/voice_extractor.py | 50 +++ facefusion/uis/components/webcam.py | 206 ++++-------- facefusion/uis/components/webcam_options.py | 22 +- facefusion/uis/layouts/benchmark.py | 14 +- facefusion/uis/layouts/default.py | 9 +- facefusion/uis/layouts/webcam.py | 9 +- facefusion/uis/types.py | 10 +- facefusion/video_manager.py | 33 +- facefusion/vision.py | 109 ++++--- facefusion/voice_extractor.py | 156 ++++++---- facefusion/wording.py | 63 ++-- requirements.txt | 10 +- tests/test_cli_output_scale.py | 57 ++++ tests/test_common_helper.py | 6 +- tests/test_face_analyser.py | 38 ++- tests/test_ffmpeg.py | 14 +- tests/test_ffmpeg_builder.py | 3 + tests/test_inference_manager.py | 2 +- tests/test_job_helper.py | 1 + ...est_date_helper.py => test_time_helper.py} | 2 +- tests/test_vision.py | 26 +- 97 files changed, 2113 insertions(+), 1934 deletions(-) create mode 100644 facefusion/camera_manager.py create mode 100644 facefusion/streamer.py rename facefusion/{date_helper.py => time_helper.py} (89%) delete mode 100644 facefusion/uis/components/execution_queue_count.py create mode 100644 facefusion/uis/components/preview_options.py create mode 100644 facefusion/uis/components/voice_extractor.py create mode 100644 tests/test_cli_output_scale.py rename tests/{test_date_helper.py => test_time_helper.py} (91%) diff --git a/facefusion.ini b/facefusion.ini index 37b772a..a0ba6a7 100644 --- a/facefusion.ini +++ b/facefusion.ini @@ -40,6 +40,9 @@ face_mask_regions = face_mask_blur = face_mask_padding = +[voice_extractor] +voice_extractor_model = + [frame_extraction] trim_frame_start = trim_frame_end = @@ -48,14 +51,14 @@ keep_temp = [output_creation] output_image_quality = -output_image_resolution = +output_image_scale = output_audio_encoder = output_audio_quality = output_audio_volume = output_video_encoder = output_video_preset = output_video_quality = -output_video_resolution = +output_video_scale = output_video_fps = [processors] @@ -66,6 +69,7 @@ deep_swapper_model = deep_swapper_morph = expression_restorer_model = expression_restorer_factor = +expression_restorer_areas = face_debugger_items = face_editor_model = face_editor_eyebrow_direction = @@ -87,6 +91,7 @@ face_enhancer_blend = face_enhancer_weight = face_swapper_model = face_swapper_pixel_boost = +face_swapper_weight = frame_colorizer_model = frame_colorizer_size = frame_colorizer_blend = @@ -105,14 +110,14 @@ download_providers = download_scope = [benchmark] +benchmark_mode = benchmark_resolutions = benchmark_cycle_count = [execution] -execution_device_id = +execution_device_ids = execution_providers = execution_thread_count = -execution_queue_count = [memory] video_memory_strategy = diff --git a/facefusion/args.py b/facefusion/args.py index ba5c511..1f574c9 100644 --- a/facefusion/args.py +++ b/facefusion/args.py @@ -1,10 +1,10 @@ from facefusion import state_manager -from facefusion.filesystem import get_file_name, is_image, is_video, resolve_file_paths +from facefusion.filesystem import get_file_name, is_video, resolve_file_paths from facefusion.jobs import job_store from facefusion.normalizer import normalize_fps, normalize_padding from facefusion.processors.core import get_processors_modules from facefusion.types import ApplyStateItem, Args -from facefusion.vision import create_image_resolutions, create_video_resolutions, detect_image_resolution, detect_video_fps, detect_video_resolution, pack_resolution +from facefusion.vision import detect_video_fps def reduce_step_args(args : Args) -> Args: @@ -78,6 +78,8 @@ def apply_args(args : Args, apply_state_item : ApplyStateItem) -> None: apply_state_item('face_mask_regions', args.get('face_mask_regions')) apply_state_item('face_mask_blur', args.get('face_mask_blur')) apply_state_item('face_mask_padding', normalize_padding(args.get('face_mask_padding'))) + # voice extractor + apply_state_item('voice_extractor_model', args.get('voice_extractor_model')) # frame extraction apply_state_item('trim_frame_start', args.get('trim_frame_start')) apply_state_item('trim_frame_end', args.get('trim_frame_end')) @@ -85,26 +87,14 @@ def apply_args(args : Args, apply_state_item : ApplyStateItem) -> None: apply_state_item('keep_temp', args.get('keep_temp')) # output creation apply_state_item('output_image_quality', args.get('output_image_quality')) - if is_image(args.get('target_path')): - output_image_resolution = detect_image_resolution(args.get('target_path')) - output_image_resolutions = create_image_resolutions(output_image_resolution) - if args.get('output_image_resolution') in output_image_resolutions: - apply_state_item('output_image_resolution', args.get('output_image_resolution')) - else: - apply_state_item('output_image_resolution', pack_resolution(output_image_resolution)) + apply_state_item('output_image_scale', args.get('output_image_scale')) apply_state_item('output_audio_encoder', args.get('output_audio_encoder')) apply_state_item('output_audio_quality', args.get('output_audio_quality')) apply_state_item('output_audio_volume', args.get('output_audio_volume')) apply_state_item('output_video_encoder', args.get('output_video_encoder')) apply_state_item('output_video_preset', args.get('output_video_preset')) apply_state_item('output_video_quality', args.get('output_video_quality')) - if is_video(args.get('target_path')): - output_video_resolution = detect_video_resolution(args.get('target_path')) - output_video_resolutions = create_video_resolutions(output_video_resolution) - if args.get('output_video_resolution') in output_video_resolutions: - apply_state_item('output_video_resolution', args.get('output_video_resolution')) - else: - apply_state_item('output_video_resolution', pack_resolution(output_video_resolution)) + apply_state_item('output_video_scale', args.get('output_video_scale')) if args.get('output_video_fps') or is_video(args.get('target_path')): output_video_fps = normalize_fps(args.get('output_video_fps')) or detect_video_fps(args.get('target_path')) apply_state_item('output_video_fps', output_video_fps) @@ -118,14 +108,14 @@ def apply_args(args : Args, apply_state_item : ApplyStateItem) -> None: apply_state_item('ui_layouts', args.get('ui_layouts')) apply_state_item('ui_workflow', args.get('ui_workflow')) # execution - apply_state_item('execution_device_id', args.get('execution_device_id')) + apply_state_item('execution_device_ids', args.get('execution_device_ids')) apply_state_item('execution_providers', args.get('execution_providers')) apply_state_item('execution_thread_count', args.get('execution_thread_count')) - apply_state_item('execution_queue_count', args.get('execution_queue_count')) # download apply_state_item('download_providers', args.get('download_providers')) apply_state_item('download_scope', args.get('download_scope')) # benchmark + apply_state_item('benchmark_mode', args.get('benchmark_mode')) apply_state_item('benchmark_resolutions', args.get('benchmark_resolutions')) apply_state_item('benchmark_cycle_count', args.get('benchmark_cycle_count')) # memory diff --git a/facefusion/audio.py b/facefusion/audio.py index 43b72b1..cb12ca9 100644 --- a/facefusion/audio.py +++ b/facefusion/audio.py @@ -118,12 +118,12 @@ def convert_mel_to_hertz(mel : Mel) -> NDArray[Any]: def create_mel_filter_bank() -> MelFilterBank: audio_sample_rate = 16000 - audio_min_frequency = 55.0 - audio_max_frequency = 7600.0 + audio_frequency_min = 55.0 + audio_frequency_max = 7600.0 mel_filter_total = 80 mel_bin_total = 800 mel_filter_bank = numpy.zeros((mel_filter_total, mel_bin_total // 2 + 1)) - mel_frequency_range = numpy.linspace(convert_hertz_to_mel(audio_min_frequency), convert_hertz_to_mel(audio_max_frequency), mel_filter_total + 2) + mel_frequency_range = numpy.linspace(convert_hertz_to_mel(audio_frequency_min), convert_hertz_to_mel(audio_frequency_max), mel_filter_total + 2) indices = numpy.floor((mel_bin_total + 1) * convert_mel_to_hertz(mel_frequency_range) / audio_sample_rate).astype(numpy.int16) for index in range(mel_filter_total): diff --git a/facefusion/benchmarker.py b/facefusion/benchmarker.py index 762fbb2..ae90f5e 100644 --- a/facefusion/benchmarker.py +++ b/facefusion/benchmarker.py @@ -6,12 +6,13 @@ from time import perf_counter from typing import Generator, List import facefusion.choices -from facefusion import core, state_manager +from facefusion import content_analyser, core, state_manager from facefusion.cli_helper import render_table from facefusion.download import conditional_download, resolve_download_url +from facefusion.face_store import clear_static_faces from facefusion.filesystem import get_file_extension from facefusion.types import BenchmarkCycleSet -from facefusion.vision import count_video_frame_total, detect_video_fps, detect_video_resolution, pack_resolution +from facefusion.vision import count_video_frame_total, detect_video_fps def pre_check() -> bool: @@ -42,11 +43,11 @@ def run() -> Generator[List[BenchmarkCycleSet], None, None]: state_manager.init_item('video_memory_strategy', 'tolerant') benchmarks = [] - target_paths = [facefusion.choices.benchmark_set.get(benchmark_resolution) for benchmark_resolution in benchmark_resolutions if benchmark_resolution in facefusion.choices.benchmark_set] + target_paths = [ facefusion.choices.benchmark_set.get(benchmark_resolution) for benchmark_resolution in benchmark_resolutions if benchmark_resolution in facefusion.choices.benchmark_set ] for target_path in target_paths: - state_manager.set_item('target_path', target_path) - state_manager.set_item('output_path', suggest_output_path(state_manager.get_item('target_path'))) + state_manager.init_item('target_path', target_path) + state_manager.init_item('output_path', suggest_output_path(state_manager.get_item('target_path'))) benchmarks.append(cycle(benchmark_cycle_count)) yield benchmarks @@ -54,13 +55,17 @@ def run() -> Generator[List[BenchmarkCycleSet], None, None]: def cycle(cycle_count : int) -> BenchmarkCycleSet: process_times = [] video_frame_total = count_video_frame_total(state_manager.get_item('target_path')) - output_video_resolution = detect_video_resolution(state_manager.get_item('target_path')) - state_manager.set_item('output_video_resolution', pack_resolution(output_video_resolution)) - state_manager.set_item('output_video_fps', detect_video_fps(state_manager.get_item('target_path'))) + state_manager.init_item('output_video_fps', detect_video_fps(state_manager.get_item('target_path'))) - core.conditional_process() + if state_manager.get_item('benchmark_mode') == 'warm': + core.conditional_process() for index in range(cycle_count): + if state_manager.get_item('benchmark_mode') == 'cold': + content_analyser.analyse_image.cache_clear() + content_analyser.analyse_video.cache_clear() + clear_static_faces() + start_time = perf_counter() core.conditional_process() end_time = perf_counter() diff --git a/facefusion/camera_manager.py b/facefusion/camera_manager.py new file mode 100644 index 0000000..aa3b351 --- /dev/null +++ b/facefusion/camera_manager.py @@ -0,0 +1,57 @@ +from typing import List + +import cv2 + +from facefusion.common_helper import is_windows +from facefusion.types import CameraPoolSet + +CAMERA_POOL_SET : CameraPoolSet =\ +{ + 'capture': {} +} + + +def get_local_camera_capture(camera_id : int) -> cv2.VideoCapture: + camera_key = str(camera_id) + + if camera_key not in CAMERA_POOL_SET.get('capture'): + if is_windows(): + camera_capture = cv2.VideoCapture(camera_id, cv2.CAP_DSHOW) + else: + camera_capture = cv2.VideoCapture(camera_id) + + if camera_capture.isOpened(): + CAMERA_POOL_SET['capture'][camera_key] = camera_capture + + return CAMERA_POOL_SET.get('capture').get(camera_key) + + +def get_remote_camera_capture(camera_url : str) -> cv2.VideoCapture: + if camera_url not in CAMERA_POOL_SET.get('capture'): + camera_capture = cv2.VideoCapture(camera_url) + + if camera_capture.isOpened(): + CAMERA_POOL_SET['capture'][camera_url] = camera_capture + + return CAMERA_POOL_SET.get('capture').get(camera_url) + + +def clear_camera_pool() -> None: + for camera_capture in CAMERA_POOL_SET.get('capture').values(): + camera_capture.release() + + CAMERA_POOL_SET['capture'].clear() + + +def detect_local_camera_ids(id_start : int, id_end : int) -> List[int]: + local_camera_ids = [] + + for camera_id in range(id_start, id_end): + cv2.setLogLevel(0) + camera_capture = get_local_camera_capture(camera_id) + cv2.setLogLevel(3) + + if camera_capture and camera_capture.isOpened(): + local_camera_ids.append(camera_id) + + return local_camera_ids diff --git a/facefusion/choices.py b/facefusion/choices.py index c51463e..5403ce0 100755 --- a/facefusion/choices.py +++ b/facefusion/choices.py @@ -2,14 +2,15 @@ import logging from typing import List, Sequence from facefusion.common_helper import create_float_range, create_int_range -from facefusion.types import Angle, AudioEncoder, AudioFormat, AudioTypeSet, BenchmarkResolution, BenchmarkSet, DownloadProvider, DownloadProviderSet, DownloadScope, EncoderSet, ExecutionProvider, ExecutionProviderSet, FaceDetectorModel, FaceDetectorSet, FaceLandmarkerModel, FaceMaskArea, FaceMaskAreaSet, FaceMaskRegion, FaceMaskRegionSet, FaceMaskType, FaceOccluderModel, FaceParserModel, FaceSelectorMode, FaceSelectorOrder, Gender, ImageFormat, ImageTypeSet, JobStatus, LogLevel, LogLevelSet, Race, Score, TempFrameFormat, UiWorkflow, VideoEncoder, VideoFormat, VideoMemoryStrategy, VideoPreset, VideoTypeSet, WebcamMode +from facefusion.types import Angle, AudioEncoder, AudioFormat, AudioTypeSet, BenchmarkMode, BenchmarkResolution, BenchmarkSet, DownloadProvider, DownloadProviderSet, DownloadScope, EncoderSet, ExecutionProvider, ExecutionProviderSet, FaceDetectorModel, FaceDetectorSet, FaceLandmarkerModel, FaceMaskArea, FaceMaskAreaSet, FaceMaskRegion, FaceMaskRegionSet, FaceMaskType, FaceOccluderModel, FaceParserModel, FaceSelectorMode, FaceSelectorOrder, Gender, ImageFormat, ImageTypeSet, JobStatus, LogLevel, LogLevelSet, Race, Score, TempFrameFormat, UiWorkflow, VideoEncoder, VideoFormat, VideoMemoryStrategy, VideoPreset, VideoTypeSet, VoiceExtractorModel face_detector_set : FaceDetectorSet =\ { 'many': [ '640x640' ], 'retinaface': [ '160x160', '320x320', '480x480', '512x512', '640x640' ], 'scrfd': [ '160x160', '320x320', '480x480', '512x512', '640x640' ], - 'yolo_face': [ '640x640' ] + 'yolo_face': [ '640x640' ], + 'yunet': [ '640x640' ] } face_detector_models : List[FaceDetectorModel] = list(face_detector_set.keys()) face_landmarker_models : List[FaceLandmarkerModel] = [ 'many', '2dfan4', 'peppa_wutz' ] @@ -17,7 +18,7 @@ face_selector_modes : List[FaceSelectorMode] = [ 'many', 'one', 'reference' ] face_selector_orders : List[FaceSelectorOrder] = [ 'left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small', 'best-worst', 'worst-best' ] face_selector_genders : List[Gender] = [ 'female', 'male' ] face_selector_races : List[Race] = [ 'white', 'black', 'latino', 'asian', 'indian', 'arabic' ] -face_occluder_models : List[FaceOccluderModel] = [ 'xseg_1', 'xseg_2', 'xseg_3' ] +face_occluder_models : List[FaceOccluderModel] = [ 'many', 'xseg_1', 'xseg_2', 'xseg_3' ] face_parser_models : List[FaceParserModel] = [ 'bisenet_resnet_18', 'bisenet_resnet_34' ] face_mask_types : List[FaceMaskType] = [ 'box', 'occlusion', 'area', 'region' ] face_mask_area_set : FaceMaskAreaSet =\ @@ -42,6 +43,8 @@ face_mask_region_set : FaceMaskRegionSet =\ face_mask_areas : List[FaceMaskArea] = list(face_mask_area_set.keys()) face_mask_regions : List[FaceMaskRegion] = list(face_mask_region_set.keys()) +voice_extractor_models : List[VoiceExtractorModel] = [ 'kim_vocal_1', 'kim_vocal_2', 'uvr_mdxnet' ] + audio_type_set : AudioTypeSet =\ { 'flac': 'audio/flac', @@ -66,7 +69,8 @@ video_type_set : VideoTypeSet =\ 'mkv': 'video/x-matroska', 'mp4': 'video/mp4', 'mov': 'video/quicktime', - 'webm': 'video/webm' + 'webm': 'video/webm', + 'wmv': 'video/x-ms-wmv' } audio_formats : List[AudioFormat] = list(audio_type_set.keys()) image_formats : List[ImageFormat] = list(image_type_set.keys()) @@ -76,15 +80,13 @@ temp_frame_formats : List[TempFrameFormat] = [ 'bmp', 'jpeg', 'png', 'tiff' ] output_encoder_set : EncoderSet =\ { 'audio': [ 'flac', 'aac', 'libmp3lame', 'libopus', 'libvorbis', 'pcm_s16le', 'pcm_s32le' ], - 'video': [ 'libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf', 'h264_qsv', 'hevc_qsv', 'h264_videotoolbox', 'hevc_videotoolbox', 'rawvideo' ] + 'video': [ 'libx264', 'libx264rgb', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf', 'h264_qsv', 'hevc_qsv', 'h264_videotoolbox', 'hevc_videotoolbox', 'rawvideo' ] } output_audio_encoders : List[AudioEncoder] = output_encoder_set.get('audio') output_video_encoders : List[VideoEncoder] = output_encoder_set.get('video') output_video_presets : List[VideoPreset] = [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow' ] -image_template_sizes : List[float] = [ 0.25, 0.5, 0.75, 1, 1.5, 2, 2.5, 3, 3.5, 4 ] -video_template_sizes : List[int] = [ 240, 360, 480, 540, 720, 1080, 1440, 2160, 4320 ] - +benchmark_modes : List[BenchmarkMode] = [ 'warm', 'cold' ] benchmark_set : BenchmarkSet =\ { '240p': '.assets/examples/target-240p.mp4', @@ -97,15 +99,13 @@ benchmark_set : BenchmarkSet =\ } benchmark_resolutions : List[BenchmarkResolution] = list(benchmark_set.keys()) -webcam_modes : List[WebcamMode] = [ 'inline', 'udp', 'v4l2' ] -webcam_resolutions : List[str] = [ '320x240', '640x480', '800x600', '1024x768', '1280x720', '1280x960', '1920x1080', '2560x1440', '3840x2160' ] - execution_provider_set : ExecutionProviderSet =\ { 'cuda': 'CUDAExecutionProvider', 'tensorrt': 'TensorrtExecutionProvider', 'directml': 'DmlExecutionProvider', 'rocm': 'ROCMExecutionProvider', + 'migraphx': 'MIGraphXExecutionProvider', 'openvino': 'OpenVINOExecutionProvider', 'coreml': 'CoreMLExecutionProvider', 'cpu': 'CPUExecutionProvider' @@ -150,7 +150,6 @@ job_statuses : List[JobStatus] = [ 'drafted', 'queued', 'completed', 'failed' ] benchmark_cycle_count_range : Sequence[int] = create_int_range(1, 10, 1) execution_thread_count_range : Sequence[int] = create_int_range(1, 32, 1) -execution_queue_count_range : Sequence[int] = create_int_range(1, 4, 1) system_memory_limit_range : Sequence[int] = create_int_range(0, 128, 4) face_detector_angles : Sequence[Angle] = create_int_range(0, 270, 90) face_detector_score_range : Sequence[Score] = create_float_range(0.0, 1.0, 0.05) @@ -160,6 +159,8 @@ face_mask_padding_range : Sequence[int] = create_int_range(0, 100, 1) face_selector_age_range : Sequence[int] = create_int_range(0, 100, 1) reference_face_distance_range : Sequence[float] = create_float_range(0.0, 1.0, 0.05) output_image_quality_range : Sequence[int] = create_int_range(0, 100, 1) +output_image_scale_range : Sequence[float] = create_float_range(0.25, 8.0, 0.25) output_audio_quality_range : Sequence[int] = create_int_range(0, 100, 1) output_audio_volume_range : Sequence[int] = create_int_range(0, 100, 1) output_video_quality_range : Sequence[int] = create_int_range(0, 100, 1) +output_video_scale_range : Sequence[float] = create_float_range(0.25, 8.0, 0.25) diff --git a/facefusion/common_helper.py b/facefusion/common_helper.py index b38ceb7..3587784 100644 --- a/facefusion/common_helper.py +++ b/facefusion/common_helper.py @@ -15,11 +15,11 @@ def is_windows() -> bool: def create_int_metavar(int_range : Sequence[int]) -> str: - return '[' + str(int_range[0]) + '..' + str(int_range[-1]) + ':' + str(calc_int_step(int_range)) + ']' + return '[' + str(int_range[0]) + '..' + str(int_range[-1]) + ':' + str(calculate_int_step(int_range)) + ']' def create_float_metavar(float_range : Sequence[float]) -> str: - return '[' + str(float_range[0]) + '..' + str(float_range[-1]) + ':' + str(calc_float_step(float_range)) + ']' + return '[' + str(float_range[0]) + '..' + str(float_range[-1]) + ':' + str(calculate_float_step(float_range)) + ']' def create_int_range(start : int, end : int, step : int) -> Sequence[int]: @@ -42,11 +42,11 @@ def create_float_range(start : float, end : float, step : float) -> Sequence[flo return float_range -def calc_int_step(int_range : Sequence[int]) -> int: +def calculate_int_step(int_range : Sequence[int]) -> int: return int_range[1] - int_range[0] -def calc_float_step(float_range : Sequence[float]) -> float: +def calculate_float_step(float_range : Sequence[float]) -> float: return round(float_range[1] - float_range[0], 2) diff --git a/facefusion/content_analyser.py b/facefusion/content_analyser.py index 8c4cfa1..ec46ca8 100644 --- a/facefusion/content_analyser.py +++ b/facefusion/content_analyser.py @@ -5,17 +5,18 @@ import numpy from tqdm import tqdm from facefusion import inference_manager, state_manager, wording +from facefusion.common_helper import is_macos from facefusion.download import conditional_download_hashes, conditional_download_sources, resolve_download_url from facefusion.execution import has_execution_provider from facefusion.filesystem import resolve_relative_path from facefusion.thread_helper import conditional_thread_semaphore from facefusion.types import Detection, DownloadScope, DownloadSet, ExecutionProvider, Fps, InferencePool, ModelSet, VisionFrame -from facefusion.vision import detect_video_fps, fit_frame, read_image, read_video_frame +from facefusion.vision import detect_video_fps, fit_contain_frame, read_image, read_video_frame STREAM_COUNTER = 0 -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { @@ -101,7 +102,7 @@ def clear_inference_pool() -> None: def resolve_execution_providers() -> List[ExecutionProvider]: - if has_execution_provider('coreml'): + if is_macos() and has_execution_provider('coreml'): return [ 'cpu' ] return state_manager.get_item('execution_providers') @@ -137,13 +138,13 @@ def analyse_frame(vision_frame : VisionFrame) -> bool: return detect_nsfw(vision_frame) -@lru_cache(maxsize = None) +@lru_cache() def analyse_image(image_path : str) -> bool: vision_frame = read_image(image_path) return analyse_frame(vision_frame) -@lru_cache(maxsize = None) +@lru_cache() def analyse_video(video_path : str, trim_frame_start : int, trim_frame_end : int) -> bool: video_fps = detect_video_fps(video_path) frame_range = range(trim_frame_start, trim_frame_end) @@ -196,8 +197,8 @@ def detect_with_nsfw_3(vision_frame : VisionFrame) -> bool: return bool(detection_score > 10.5) -def forward_nsfw(vision_frame : VisionFrame, nsfw_model : str) -> Detection: - content_analyser = get_inference_pool().get(nsfw_model) +def forward_nsfw(vision_frame : VisionFrame, model_name : str) -> Detection: + content_analyser = get_inference_pool().get(model_name) with conditional_thread_semaphore(): detection = content_analyser.run(None, @@ -205,7 +206,7 @@ def forward_nsfw(vision_frame : VisionFrame, nsfw_model : str) -> Detection: 'input': vision_frame })[0] - if nsfw_model in [ 'nsfw_2', 'nsfw_3' ]: + if model_name in [ 'nsfw_2', 'nsfw_3' ]: return detection[0] return detection @@ -217,7 +218,7 @@ def prepare_detect_frame(temp_vision_frame : VisionFrame, model_name : str) -> V model_mean = model_set.get('mean') model_standard_deviation = model_set.get('standard_deviation') - detect_vision_frame = fit_frame(temp_vision_frame, model_size) + detect_vision_frame = fit_contain_frame(temp_vision_frame, model_size) detect_vision_frame = detect_vision_frame[:, :, ::-1] / 255.0 detect_vision_frame -= model_mean detect_vision_frame /= model_standard_deviation diff --git a/facefusion/core.py b/facefusion/core.py index abc28ef..c8cd164 100755 --- a/facefusion/core.py +++ b/facefusion/core.py @@ -3,19 +3,19 @@ import itertools import shutil import signal import sys +from concurrent.futures import ThreadPoolExecutor, as_completed from time import time import numpy +from tqdm import tqdm from facefusion import benchmarker, cli_helper, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, hash_helper, logger, process_manager, state_manager, video_manager, voice_extractor, wording from facefusion.args import apply_args, collect_job_args, reduce_job_args, reduce_step_args +from facefusion.audio import create_empty_audio_frame, get_audio_frame, get_voice_frame from facefusion.common_helper import get_first from facefusion.content_analyser import analyse_image, analyse_video from facefusion.download import conditional_download_hashes, conditional_download_sources from facefusion.exit_helper import hard_exit, signal_exit -from facefusion.face_analyser import get_average_face, get_many_faces, get_one_face -from facefusion.face_selector import sort_and_filter_faces -from facefusion.face_store import append_reference_face, clear_reference_faces, get_reference_faces from facefusion.ffmpeg import copy_image, extract_frames, finalize_image, merge_video, replace_audio, restore_audio from facefusion.filesystem import filter_audio_paths, get_file_name, is_image, is_video, resolve_file_paths, resolve_file_pattern from facefusion.jobs import job_helper, job_manager, job_runner @@ -25,8 +25,9 @@ from facefusion.processors.core import get_processors_modules from facefusion.program import create_program from facefusion.program_helper import validate_args from facefusion.temp_helper import clear_temp_directory, create_temp_directory, get_temp_file_path, move_temp_file, resolve_temp_frame_paths +from facefusion.time_helper import calculate_end_time from facefusion.types import Args, ErrorCode -from facefusion.vision import pack_resolution, read_image, read_static_images, read_video_frame, restrict_image_resolution, restrict_trim_frame, restrict_video_fps, restrict_video_resolution, unpack_resolution +from facefusion.vision import detect_image_resolution, detect_video_resolution, pack_resolution, read_static_image, read_static_images, read_static_video_frame, restrict_image_resolution, restrict_trim_frame, restrict_video_fps, restrict_video_resolution, scale_resolution, write_image def cli() -> None: @@ -57,11 +58,11 @@ def route(args : Args) -> None: if state_manager.get_item('command') == 'force-download': error_code = force_download() - return hard_exit(error_code) + hard_exit(error_code) if state_manager.get_item('command') == 'benchmark': if not common_pre_check() or not processors_pre_check() or not benchmarker.pre_check(): - return hard_exit(2) + hard_exit(2) benchmarker.render() if state_manager.get_item('command') in [ 'job-list', 'job-create', 'job-submit', 'job-submit-all', 'job-delete', 'job-delete-all', 'job-add-step', 'job-remix-step', 'job-insert-step', 'job-remove-step' ]: @@ -74,10 +75,10 @@ def route(args : Args) -> None: import facefusion.uis.core as ui if not common_pre_check() or not processors_pre_check(): - return hard_exit(2) + hard_exit(2) for ui_layout in ui.get_ui_layouts_modules(state_manager.get_item('ui_layouts')): if not ui_layout.pre_check(): - return hard_exit(2) + hard_exit(2) ui.init() ui.launch() @@ -128,9 +129,9 @@ def common_pre_check() -> bool: ] content_analyser_content = inspect.getsource(content_analyser).encode() - is_valid = hash_helper.create_hash(content_analyser_content) == 'b159fd9d' + content_analyser_hash = hash_helper.create_hash(content_analyser_content) - return all(module.pre_check() for module in common_modules) and is_valid + return all(module.pre_check() for module in common_modules) and content_analyser_hash == '803b5ec7' def processors_pre_check() -> bool: @@ -251,7 +252,7 @@ def route_job_runner() -> ErrorCode: if state_manager.get_item('command') == 'job-run': logger.info(wording.get('running_job').format(job_id = state_manager.get_item('job_id')), __name__) if job_runner.run_job(state_manager.get_item('job_id'), process_step): - logger.info(wording.get('processing_job_succeed').format(job_id = state_manager.get_item('job_id')), __name__) + logger.info(wording.get('processing_job_succeeded').format(job_id = state_manager.get_item('job_id')), __name__) return 0 logger.info(wording.get('processing_job_failed').format(job_id = state_manager.get_item('job_id')), __name__) return 1 @@ -259,7 +260,7 @@ def route_job_runner() -> ErrorCode: if state_manager.get_item('command') == 'job-run-all': logger.info(wording.get('running_jobs'), __name__) if job_runner.run_jobs(process_step, state_manager.get_item('halt_on_error')): - logger.info(wording.get('processing_jobs_succeed'), __name__) + logger.info(wording.get('processing_jobs_succeeded'), __name__) return 0 logger.info(wording.get('processing_jobs_failed'), __name__) return 1 @@ -267,7 +268,7 @@ def route_job_runner() -> ErrorCode: if state_manager.get_item('command') == 'job-retry': logger.info(wording.get('retrying_job').format(job_id = state_manager.get_item('job_id')), __name__) if job_runner.retry_job(state_manager.get_item('job_id'), process_step): - logger.info(wording.get('processing_job_succeed').format(job_id = state_manager.get_item('job_id')), __name__) + logger.info(wording.get('processing_job_succeeded').format(job_id = state_manager.get_item('job_id')), __name__) return 0 logger.info(wording.get('processing_job_failed').format(job_id = state_manager.get_item('job_id')), __name__) return 1 @@ -275,7 +276,7 @@ def route_job_runner() -> ErrorCode: if state_manager.get_item('command') == 'job-retry-all': logger.info(wording.get('retrying_jobs'), __name__) if job_runner.retry_jobs(process_step, state_manager.get_item('halt_on_error')): - logger.info(wording.get('processing_jobs_succeed'), __name__) + logger.info(wording.get('processing_jobs_succeeded'), __name__) return 0 logger.info(wording.get('processing_jobs_failed'), __name__) return 1 @@ -321,7 +322,6 @@ def process_batch(args : Args) -> ErrorCode: def process_step(job_id : str, step_index : int, step_args : Args) -> bool: - clear_reference_faces() step_total = job_manager.count_step_total(job_id) step_args.update(collect_job_args()) apply_args(step_args, state_manager.set_item) @@ -340,8 +340,6 @@ def conditional_process() -> ErrorCode: if not processor_module.pre_process('output'): return 2 - conditional_append_reference_faces() - if is_image(state_manager.get_item('target_path')): return process_image(start_time) if is_video(state_manager.get_item('target_path')): @@ -350,28 +348,6 @@ def conditional_process() -> ErrorCode: return 0 -def conditional_append_reference_faces() -> None: - if 'reference' in state_manager.get_item('face_selector_mode') and not get_reference_faces(): - source_frames = read_static_images(state_manager.get_item('source_paths')) - source_faces = get_many_faces(source_frames) - source_face = get_average_face(source_faces) - if is_video(state_manager.get_item('target_path')): - reference_frame = read_video_frame(state_manager.get_item('target_path'), state_manager.get_item('reference_frame_number')) - else: - reference_frame = read_image(state_manager.get_item('target_path')) - reference_faces = sort_and_filter_faces(get_many_faces([ reference_frame ])) - reference_face = get_one_face(reference_faces, state_manager.get_item('reference_face_position')) - append_reference_face('origin', reference_face) - - if source_face and reference_face: - for processor_module in get_processors_modules(state_manager.get_item('processors')): - abstract_reference_frame = processor_module.get_reference_frame(source_face, reference_face, reference_frame) - if numpy.any(abstract_reference_frame): - abstract_reference_faces = sort_and_filter_faces(get_many_faces([ abstract_reference_frame ])) - abstract_reference_face = get_one_face(abstract_reference_faces, state_manager.get_item('reference_face_position')) - append_reference_face(processor_module.__name__, abstract_reference_face) - - def process_image(start_time : float) -> ErrorCode: if analyse_image(state_manager.get_item('target_path')): return 3 @@ -382,27 +358,47 @@ def process_image(start_time : float) -> ErrorCode: create_temp_directory(state_manager.get_item('target_path')) process_manager.start() - temp_image_resolution = pack_resolution(restrict_image_resolution(state_manager.get_item('target_path'), unpack_resolution(state_manager.get_item('output_image_resolution')))) - logger.info(wording.get('copying_image').format(resolution = temp_image_resolution), __name__) + + output_image_resolution = scale_resolution(detect_image_resolution(state_manager.get_item('target_path')), state_manager.get_item('output_image_scale')) + temp_image_resolution = restrict_image_resolution(state_manager.get_item('target_path'), output_image_resolution) + logger.info(wording.get('copying_image').format(resolution = pack_resolution(temp_image_resolution)), __name__) if copy_image(state_manager.get_item('target_path'), temp_image_resolution): - logger.debug(wording.get('copying_image_succeed'), __name__) + logger.debug(wording.get('copying_image_succeeded'), __name__) else: logger.error(wording.get('copying_image_failed'), __name__) process_manager.end() return 1 temp_image_path = get_temp_file_path(state_manager.get_item('target_path')) + reference_vision_frame = read_static_image(temp_image_path) + source_vision_frames = read_static_images(state_manager.get_item('source_paths')) + source_audio_frame = create_empty_audio_frame() + source_voice_frame = create_empty_audio_frame() + target_vision_frame = read_static_image(temp_image_path) + temp_vision_frame = target_vision_frame.copy() + for processor_module in get_processors_modules(state_manager.get_item('processors')): logger.info(wording.get('processing'), processor_module.__name__) - processor_module.process_image(state_manager.get_item('source_paths'), temp_image_path, temp_image_path) + + temp_vision_frame = processor_module.process_frame( + { + 'reference_vision_frame': reference_vision_frame, + 'source_vision_frames': source_vision_frames, + 'source_audio_frame': source_audio_frame, + 'source_voice_frame': source_voice_frame, + 'target_vision_frame': target_vision_frame, + 'temp_vision_frame': temp_vision_frame + }) + processor_module.post_process() + + write_image(temp_image_path, temp_vision_frame) if is_process_stopping(): - process_manager.end() return 4 - logger.info(wording.get('finalizing_image').format(resolution = state_manager.get_item('output_image_resolution')), __name__) - if finalize_image(state_manager.get_item('target_path'), state_manager.get_item('output_path'), state_manager.get_item('output_image_resolution')): - logger.debug(wording.get('finalizing_image_succeed'), __name__) + logger.info(wording.get('finalizing_image').format(resolution = pack_resolution(output_image_resolution)), __name__) + if finalize_image(state_manager.get_item('target_path'), state_manager.get_item('output_path'), output_image_resolution): + logger.debug(wording.get('finalizing_image_succeeded'), __name__) else: logger.warn(wording.get('finalizing_image_skipped'), __name__) @@ -410,8 +406,7 @@ def process_image(start_time : float) -> ErrorCode: clear_temp_directory(state_manager.get_item('target_path')) if is_image(state_manager.get_item('output_path')): - seconds = '{:.2f}'.format((time() - start_time) % 60) - logger.info(wording.get('processing_image_succeed').format(seconds = seconds), __name__) + logger.info(wording.get('processing_image_succeeded').format(seconds = calculate_end_time(start_time)), __name__) else: logger.error(wording.get('processing_image_failed'), __name__) process_manager.end() @@ -431,25 +426,45 @@ def process_video(start_time : float) -> ErrorCode: create_temp_directory(state_manager.get_item('target_path')) process_manager.start() - temp_video_resolution = pack_resolution(restrict_video_resolution(state_manager.get_item('target_path'), unpack_resolution(state_manager.get_item('output_video_resolution')))) + output_video_resolution = scale_resolution(detect_video_resolution(state_manager.get_item('target_path')), state_manager.get_item('output_video_scale')) + temp_video_resolution = restrict_video_resolution(state_manager.get_item('target_path'), output_video_resolution) temp_video_fps = restrict_video_fps(state_manager.get_item('target_path'), state_manager.get_item('output_video_fps')) - logger.info(wording.get('extracting_frames').format(resolution = temp_video_resolution, fps = temp_video_fps), __name__) + logger.info(wording.get('extracting_frames').format(resolution = pack_resolution(temp_video_resolution), fps = temp_video_fps), __name__) + if extract_frames(state_manager.get_item('target_path'), temp_video_resolution, temp_video_fps, trim_frame_start, trim_frame_end): - logger.debug(wording.get('extracting_frames_succeed'), __name__) + logger.debug(wording.get('extracting_frames_succeeded'), __name__) else: if is_process_stopping(): - process_manager.end() return 4 logger.error(wording.get('extracting_frames_failed'), __name__) process_manager.end() return 1 temp_frame_paths = resolve_temp_frame_paths(state_manager.get_item('target_path')) + if temp_frame_paths: + with tqdm(total = len(temp_frame_paths), desc = wording.get('processing'), unit = 'frame', ascii = ' =', disable = state_manager.get_item('log_level') in [ 'warn', 'error' ]) as progress: + progress.set_postfix(execution_providers = state_manager.get_item('execution_providers')) + + with ThreadPoolExecutor(max_workers = state_manager.get_item('execution_thread_count')) as executor: + futures = [] + + for frame_number, temp_frame_path in enumerate(temp_frame_paths): + future = executor.submit(process_temp_frame, temp_frame_path, frame_number) + futures.append(future) + + for future in as_completed(futures): + if is_process_stopping(): + for __future__ in futures: + __future__.cancel() + + if not future.cancelled(): + future.result() + progress.update() + for processor_module in get_processors_modules(state_manager.get_item('processors')): - logger.info(wording.get('processing'), processor_module.__name__) - processor_module.process_video(state_manager.get_item('source_paths'), temp_frame_paths) processor_module.post_process() + if is_process_stopping(): return 4 else: @@ -457,12 +472,11 @@ def process_video(start_time : float) -> ErrorCode: process_manager.end() return 1 - logger.info(wording.get('merging_video').format(resolution = state_manager.get_item('output_video_resolution'), fps = state_manager.get_item('output_video_fps')), __name__) - if merge_video(state_manager.get_item('target_path'), temp_video_fps, state_manager.get_item('output_video_resolution'), state_manager.get_item('output_video_fps'), trim_frame_start, trim_frame_end): - logger.debug(wording.get('merging_video_succeed'), __name__) + logger.info(wording.get('merging_video').format(resolution = pack_resolution(output_video_resolution), fps = state_manager.get_item('output_video_fps')), __name__) + if merge_video(state_manager.get_item('target_path'), temp_video_fps, output_video_resolution, state_manager.get_item('output_video_fps'), trim_frame_start, trim_frame_end): + logger.debug(wording.get('merging_video_succeeded'), __name__) else: if is_process_stopping(): - process_manager.end() return 4 logger.error(wording.get('merging_video_failed'), __name__) process_manager.end() @@ -476,22 +490,20 @@ def process_video(start_time : float) -> ErrorCode: if source_audio_path: if replace_audio(state_manager.get_item('target_path'), source_audio_path, state_manager.get_item('output_path')): video_manager.clear_video_pool() - logger.debug(wording.get('replacing_audio_succeed'), __name__) + logger.debug(wording.get('replacing_audio_succeeded'), __name__) else: video_manager.clear_video_pool() if is_process_stopping(): - process_manager.end() return 4 logger.warn(wording.get('replacing_audio_skipped'), __name__) move_temp_file(state_manager.get_item('target_path'), state_manager.get_item('output_path')) else: if restore_audio(state_manager.get_item('target_path'), state_manager.get_item('output_path'), trim_frame_start, trim_frame_end): video_manager.clear_video_pool() - logger.debug(wording.get('restoring_audio_succeed'), __name__) + logger.debug(wording.get('restoring_audio_succeeded'), __name__) else: video_manager.clear_video_pool() if is_process_stopping(): - process_manager.end() return 4 logger.warn(wording.get('restoring_audio_skipped'), __name__) move_temp_file(state_manager.get_item('target_path'), state_manager.get_item('output_path')) @@ -500,8 +512,7 @@ def process_video(start_time : float) -> ErrorCode: clear_temp_directory(state_manager.get_item('target_path')) if is_video(state_manager.get_item('output_path')): - seconds = '{:.2f}'.format((time() - start_time)) - logger.info(wording.get('processing_video_succeed').format(seconds = seconds), __name__) + logger.info(wording.get('processing_video_succeeded').format(seconds = calculate_end_time(start_time)), __name__) else: logger.error(wording.get('processing_video_failed'), __name__) process_manager.end() @@ -510,6 +521,36 @@ def process_video(start_time : float) -> ErrorCode: return 0 +def process_temp_frame(temp_frame_path : str, frame_number : int) -> bool: + reference_vision_frame = read_static_video_frame(state_manager.get_item('target_path'), state_manager.get_item('reference_frame_number')) + source_vision_frames = read_static_images(state_manager.get_item('source_paths')) + source_audio_path = get_first(filter_audio_paths(state_manager.get_item('source_paths'))) + temp_video_fps = restrict_video_fps(state_manager.get_item('target_path'), state_manager.get_item('output_video_fps')) + target_vision_frame = read_static_image(temp_frame_path) + temp_vision_frame = target_vision_frame.copy() + + source_audio_frame = get_audio_frame(source_audio_path, temp_video_fps, frame_number) + source_voice_frame = get_voice_frame(source_audio_path, temp_video_fps, frame_number) + + if not numpy.any(source_audio_frame): + source_audio_frame = create_empty_audio_frame() + if not numpy.any(source_voice_frame): + source_voice_frame = create_empty_audio_frame() + + for processor_module in get_processors_modules(state_manager.get_item('processors')): + temp_vision_frame = processor_module.process_frame( + { + 'reference_vision_frame': reference_vision_frame, + 'source_vision_frames': source_vision_frames, + 'source_audio_frame': source_audio_frame, + 'source_voice_frame': source_voice_frame, + 'target_vision_frame': target_vision_frame, + 'temp_vision_frame': temp_vision_frame + }) + + return write_image(temp_frame_path, temp_vision_frame) + + def is_process_stopping() -> bool: if process_manager.is_stopping(): process_manager.end() diff --git a/facefusion/download.py b/facefusion/download.py index f0c92f4..37179e8 100644 --- a/facefusion/download.py +++ b/facefusion/download.py @@ -29,7 +29,7 @@ def conditional_download(download_directory_path : str, urls : List[str]) -> Non with tqdm(total = download_size, initial = initial_size, desc = wording.get('downloading'), unit = 'B', unit_scale = True, unit_divisor = 1024, ascii = ' =', disable = state_manager.get_item('log_level') in [ 'warn', 'error' ]) as progress: commands = curl_builder.chain( curl_builder.download(url, download_file_path), - curl_builder.set_timeout(10) + curl_builder.set_timeout(5) ) open_curl(commands) current_size = initial_size @@ -41,7 +41,7 @@ def conditional_download(download_directory_path : str, urls : List[str]) -> Non progress.update(current_size - progress.n) -@lru_cache(maxsize = None) +@lru_cache(maxsize = 1024) def get_static_download_size(url : str) -> int: commands = curl_builder.chain( curl_builder.head(url), @@ -59,7 +59,7 @@ def get_static_download_size(url : str) -> int: return 0 -@lru_cache(maxsize = None) +@lru_cache(maxsize = 1024) def ping_static_url(url : str) -> bool: commands = curl_builder.chain( curl_builder.head(url), @@ -87,7 +87,7 @@ def conditional_download_hashes(hash_set : DownloadSet) -> bool: for valid_hash_path in valid_hash_paths: valid_hash_file_name = get_file_name(valid_hash_path) - logger.debug(wording.get('validating_hash_succeed').format(hash_file_name = valid_hash_file_name), __name__) + logger.debug(wording.get('validating_hash_succeeded').format(hash_file_name = valid_hash_file_name), __name__) for invalid_hash_path in invalid_hash_paths: invalid_hash_file_name = get_file_name(invalid_hash_path) logger.error(wording.get('validating_hash_failed').format(hash_file_name = invalid_hash_file_name), __name__) @@ -114,7 +114,7 @@ def conditional_download_sources(source_set : DownloadSet) -> bool: for valid_source_path in valid_source_paths: valid_source_file_name = get_file_name(valid_source_path) - logger.debug(wording.get('validating_source_succeed').format(source_file_name = valid_source_file_name), __name__) + logger.debug(wording.get('validating_source_succeeded').format(source_file_name = valid_source_file_name), __name__) for invalid_source_path in invalid_source_paths: invalid_source_file_name = get_file_name(invalid_source_path) logger.error(wording.get('validating_source_failed').format(source_file_name = invalid_source_file_name), __name__) diff --git a/facefusion/execution.py b/facefusion/execution.py index dbec8bf..d39be91 100644 --- a/facefusion/execution.py +++ b/facefusion/execution.py @@ -53,6 +53,12 @@ def create_inference_session_providers(execution_device_id : str, execution_prov { 'device_id': execution_device_id })) + if execution_provider == 'migraphx': + inference_session_providers.append((facefusion.choices.execution_provider_set.get(execution_provider), + { + 'device_id': execution_device_id, + 'migraphx_model_cache_dir': '.caches' + })) if execution_provider == 'openvino': inference_session_providers.append((facefusion.choices.execution_provider_set.get(execution_provider), { @@ -86,8 +92,6 @@ def resolve_cudnn_conv_algo_search() -> str: def resolve_openvino_device_type(execution_device_id : str) -> str: if execution_device_id == '0': return 'GPU' - if execution_device_id == '∞': - return 'MULTI:GPU' return 'GPU.' + execution_device_id @@ -96,7 +100,7 @@ def run_nvidia_smi() -> subprocess.Popen[bytes]: return subprocess.Popen(commands, stdout = subprocess.PIPE) -@lru_cache(maxsize = None) +@lru_cache() def detect_static_execution_devices() -> List[ExecutionDevice]: return detect_execution_devices() diff --git a/facefusion/exit_helper.py b/facefusion/exit_helper.py index 23da74a..cdf66cc 100644 --- a/facefusion/exit_helper.py +++ b/facefusion/exit_helper.py @@ -1,3 +1,4 @@ +import os import signal import sys from time import sleep @@ -8,8 +9,11 @@ from facefusion.temp_helper import clear_temp_directory from facefusion.types import ErrorCode +def fatal_exit(error_code : ErrorCode) -> None: + os._exit(error_code) + + def hard_exit(error_code : ErrorCode) -> None: - signal.signal(signal.SIGINT, signal.SIG_IGN) sys.exit(error_code) @@ -18,9 +22,13 @@ def signal_exit(signum : int, frame : FrameType) -> None: def graceful_exit(error_code : ErrorCode) -> None: + signal.signal(signal.SIGINT, signal.SIG_IGN) process_manager.stop() + while process_manager.is_processing(): sleep(0.5) + if state_manager.get_item('target_path'): clear_temp_directory(state_manager.get_item('target_path')) + hard_exit(error_code) diff --git a/facefusion/face_analyser.py b/facefusion/face_analyser.py index 673ecbe..b1dad44 100644 --- a/facefusion/face_analyser.py +++ b/facefusion/face_analyser.py @@ -5,10 +5,10 @@ import numpy from facefusion import state_manager from facefusion.common_helper import get_first from facefusion.face_classifier import classify_face -from facefusion.face_detector import detect_faces, detect_rotated_faces +from facefusion.face_detector import detect_faces, detect_faces_by_angle from facefusion.face_helper import apply_nms, convert_to_face_landmark_5, estimate_face_angle, get_nms_threshold from facefusion.face_landmarker import detect_face_landmark, estimate_face_landmark_68_5 -from facefusion.face_recognizer import calc_embedding +from facefusion.face_recognizer import calculate_face_embedding from facefusion.face_store import get_static_faces, set_static_faces from facefusion.types import BoundingBox, Face, FaceLandmark5, FaceLandmarkSet, FaceScoreSet, Score, VisionFrame @@ -45,15 +45,15 @@ def create_faces(vision_frame : VisionFrame, bounding_boxes : List[BoundingBox], 'detector': face_score, 'landmarker': face_landmark_score_68 } - embedding, normed_embedding = calc_embedding(vision_frame, face_landmark_set.get('5/68')) + face_embedding, face_embedding_norm = calculate_face_embedding(vision_frame, face_landmark_set.get('5/68')) gender, age, race = classify_face(vision_frame, face_landmark_set.get('5/68')) faces.append(Face( bounding_box = bounding_box, score_set = face_score_set, landmark_set = face_landmark_set, angle = face_angle, - embedding = embedding, - normed_embedding = normed_embedding, + embedding = face_embedding, + embedding_norm = face_embedding_norm, gender = gender, age = age, race = race @@ -69,23 +69,23 @@ def get_one_face(faces : List[Face], position : int = 0) -> Optional[Face]: def get_average_face(faces : List[Face]) -> Optional[Face]: - embeddings = [] - normed_embeddings = [] + face_embeddings = [] + face_embeddings_norm = [] if faces: first_face = get_first(faces) for face in faces: - embeddings.append(face.embedding) - normed_embeddings.append(face.normed_embedding) + face_embeddings.append(face.embedding) + face_embeddings_norm.append(face.embedding_norm) return Face( bounding_box = first_face.bounding_box, score_set = first_face.score_set, landmark_set = first_face.landmark_set, angle = first_face.angle, - embedding = numpy.mean(embeddings, axis = 0), - normed_embedding = numpy.mean(normed_embeddings, axis = 0), + embedding = numpy.mean(face_embeddings, axis = 0), + embedding_norm = numpy.mean(face_embeddings_norm, axis = 0), gender = first_face.gender, age = first_face.age, race = first_face.race @@ -110,7 +110,7 @@ def get_many_faces(vision_frames : List[VisionFrame]) -> List[Face]: if face_detector_angle == 0: bounding_boxes, face_scores, face_landmarks_5 = detect_faces(vision_frame) else: - bounding_boxes, face_scores, face_landmarks_5 = detect_rotated_faces(vision_frame, face_detector_angle) + bounding_boxes, face_scores, face_landmarks_5 = detect_faces_by_angle(vision_frame, face_detector_angle) all_bounding_boxes.extend(bounding_boxes) all_face_scores.extend(face_scores) all_face_landmarks_5.extend(face_landmarks_5) diff --git a/facefusion/face_classifier.py b/facefusion/face_classifier.py index 3b09990..8551a23 100644 --- a/facefusion/face_classifier.py +++ b/facefusion/face_classifier.py @@ -11,7 +11,7 @@ from facefusion.thread_helper import conditional_thread_semaphore from facefusion.types import Age, DownloadScope, FaceLandmark5, Gender, InferencePool, ModelOptions, ModelSet, Race, VisionFrame -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { diff --git a/facefusion/face_detector.py b/facefusion/face_detector.py index c3532fd..0d197ae 100644 --- a/facefusion/face_detector.py +++ b/facefusion/face_detector.py @@ -6,14 +6,14 @@ import numpy from facefusion import inference_manager, state_manager from facefusion.download import conditional_download_hashes, conditional_download_sources, resolve_download_url -from facefusion.face_helper import create_rotated_matrix_and_size, create_static_anchors, distance_to_bounding_box, distance_to_face_landmark_5, normalize_bounding_box, transform_bounding_box, transform_points +from facefusion.face_helper import create_rotation_matrix_and_size, create_static_anchors, distance_to_bounding_box, distance_to_face_landmark_5, normalize_bounding_box, transform_bounding_box, transform_points from facefusion.filesystem import resolve_relative_path from facefusion.thread_helper import thread_semaphore from facefusion.types import Angle, BoundingBox, Detection, DownloadScope, DownloadSet, FaceLandmark5, InferencePool, ModelSet, Score, VisionFrame from facefusion.vision import restrict_frame, unpack_resolution -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { @@ -73,6 +73,25 @@ def create_static_model_set(download_scope : DownloadScope) -> ModelSet: 'path': resolve_relative_path('../.assets/models/yoloface_8n.onnx') } } + }, + 'yunet': + { + 'hashes': + { + 'yunet': + { + 'url': resolve_download_url('models-3.4.0', 'yunet_2023_mar.hash'), + 'path': resolve_relative_path('../.assets/models/yunet_2023_mar.hash') + } + }, + 'sources': + { + 'yunet': + { + 'url': resolve_download_url('models-3.4.0', 'yunet_2023_mar.onnx'), + 'path': resolve_relative_path('../.assets/models/yunet_2023_mar.onnx') + } + } } } @@ -94,7 +113,7 @@ def collect_model_downloads() -> Tuple[DownloadSet, DownloadSet]: model_hash_set = {} model_source_set = {} - for face_detector_model in [ 'retinaface', 'scrfd', 'yolo_face' ]: + for face_detector_model in [ 'retinaface', 'scrfd', 'yolo_face', 'yunet' ]: if state_manager.get_item('face_detector_model') in [ 'many', face_detector_model ]: model_hash_set[face_detector_model] = model_set.get(face_detector_model).get('hashes').get(face_detector_model) model_source_set[face_detector_model] = model_set.get(face_detector_model).get('sources').get(face_detector_model) @@ -131,17 +150,23 @@ def detect_faces(vision_frame : VisionFrame) -> Tuple[List[BoundingBox], List[Sc all_face_scores.extend(face_scores) all_face_landmarks_5.extend(face_landmarks_5) + if state_manager.get_item('face_detector_model') == 'yunet': + bounding_boxes, face_scores, face_landmarks_5 = detect_with_yunet(vision_frame, state_manager.get_item('face_detector_size')) + all_bounding_boxes.extend(bounding_boxes) + all_face_scores.extend(face_scores) + all_face_landmarks_5.extend(face_landmarks_5) + all_bounding_boxes = [ normalize_bounding_box(all_bounding_box) for all_bounding_box in all_bounding_boxes ] return all_bounding_boxes, all_face_scores, all_face_landmarks_5 -def detect_rotated_faces(vision_frame : VisionFrame, angle : Angle) -> Tuple[List[BoundingBox], List[Score], List[FaceLandmark5]]: - rotated_matrix, rotated_size = create_rotated_matrix_and_size(angle, vision_frame.shape[:2][::-1]) - rotated_vision_frame = cv2.warpAffine(vision_frame, rotated_matrix, rotated_size) - rotated_inverse_matrix = cv2.invertAffineTransform(rotated_matrix) - bounding_boxes, face_scores, face_landmarks_5 = detect_faces(rotated_vision_frame) - bounding_boxes = [ transform_bounding_box(bounding_box, rotated_inverse_matrix) for bounding_box in bounding_boxes ] - face_landmarks_5 = [ transform_points(face_landmark_5, rotated_inverse_matrix) for face_landmark_5 in face_landmarks_5 ] +def detect_faces_by_angle(vision_frame : VisionFrame, face_angle : Angle) -> Tuple[List[BoundingBox], List[Score], List[FaceLandmark5]]: + rotation_matrix, rotation_size = create_rotation_matrix_and_size(face_angle, vision_frame.shape[:2][::-1]) + rotation_vision_frame = cv2.warpAffine(vision_frame, rotation_matrix, rotation_size) + rotation_inverse_matrix = cv2.invertAffineTransform(rotation_matrix) + bounding_boxes, face_scores, face_landmarks_5 = detect_faces(rotation_vision_frame) + bounding_boxes = [ transform_bounding_box(bounding_box, rotation_inverse_matrix) for bounding_box in bounding_boxes ] + face_landmarks_5 = [ transform_points(face_landmark_5, rotation_inverse_matrix) for face_landmark_5 in face_landmarks_5 ] return bounding_boxes, face_scores, face_landmarks_5 @@ -162,7 +187,8 @@ def detect_with_retinaface(vision_frame : VisionFrame, face_detector_size : str) detection = forward_with_retinaface(detect_vision_frame) for index, feature_stride in enumerate(feature_strides): - keep_indices = numpy.where(detection[index] >= face_detector_score)[0] + face_scores_raw = detection[index] + keep_indices = numpy.where(face_scores_raw >= face_detector_score)[0] if numpy.any(keep_indices): stride_height = face_detector_height // feature_stride @@ -180,7 +206,7 @@ def detect_with_retinaface(vision_frame : VisionFrame, face_detector_size : str) bounding_box_raw[3] * ratio_height ])) - for face_score_raw in detection[index][keep_indices]: + for face_score_raw in face_scores_raw[keep_indices]: face_scores.append(face_score_raw[0]) for face_landmark_raw_5 in distance_to_face_landmark_5(anchors, face_landmarks_5_raw)[keep_indices]: @@ -206,7 +232,8 @@ def detect_with_scrfd(vision_frame : VisionFrame, face_detector_size : str) -> T detection = forward_with_scrfd(detect_vision_frame) for index, feature_stride in enumerate(feature_strides): - keep_indices = numpy.where(detection[index] >= face_detector_score)[0] + face_scores_raw = detection[index] + keep_indices = numpy.where(face_scores_raw >= face_detector_score)[0] if numpy.any(keep_indices): stride_height = face_detector_height // feature_stride @@ -224,7 +251,7 @@ def detect_with_scrfd(vision_frame : VisionFrame, face_detector_size : str) -> T bounding_box_raw[3] * ratio_height ])) - for face_score_raw in detection[index][keep_indices]: + for face_score_raw in face_scores_raw[keep_indices]: face_scores.append(face_score_raw[0]) for face_landmark_raw_5 in distance_to_face_landmark_5(anchors, face_landmarks_5_raw)[keep_indices]: @@ -271,6 +298,67 @@ def detect_with_yolo_face(vision_frame : VisionFrame, face_detector_size : str) return bounding_boxes, face_scores, face_landmarks_5 +def detect_with_yunet(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[Score], List[FaceLandmark5]]: + bounding_boxes = [] + face_scores = [] + face_landmarks_5 = [] + feature_strides = [ 8, 16, 32 ] + feature_map_channel = 3 + anchor_total = 1 + face_detector_score = state_manager.get_item('face_detector_score') + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + temp_vision_frame = restrict_frame(vision_frame, (face_detector_width, face_detector_height)) + ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] + ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] + detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size) + detect_vision_frame = normalize_detect_frame(detect_vision_frame, [ 0, 255 ]) + detection = forward_with_yunet(detect_vision_frame) + + for index, feature_stride in enumerate(feature_strides): + face_scores_raw = (detection[index] * detection[index + feature_map_channel]).reshape(-1) + keep_indices = numpy.where(face_scores_raw >= face_detector_score)[0] + + if numpy.any(keep_indices): + stride_height = face_detector_height // feature_stride + stride_width = face_detector_width // feature_stride + anchors = create_static_anchors(feature_stride, anchor_total, stride_height, stride_width) + bounding_boxes_center = detection[index + feature_map_channel * 2].squeeze(0)[:, :2] * feature_stride + anchors + bounding_boxes_size = numpy.exp(detection[index + feature_map_channel * 2].squeeze(0)[:, 2:4]) * feature_stride + face_landmarks_5_raw = detection[index + feature_map_channel * 3].squeeze(0) + + bounding_boxes_raw = numpy.stack( + [ + bounding_boxes_center[:, 0] - bounding_boxes_size[:, 0] / 2, + bounding_boxes_center[:, 1] - bounding_boxes_size[:, 1] / 2, + bounding_boxes_center[:, 0] + bounding_boxes_size[:, 0] / 2, + bounding_boxes_center[:, 1] + bounding_boxes_size[:, 1] / 2 + ], axis = -1) + + for bounding_box_raw in bounding_boxes_raw[keep_indices]: + bounding_boxes.append(numpy.array( + [ + bounding_box_raw[0] * ratio_width, + bounding_box_raw[1] * ratio_height, + bounding_box_raw[2] * ratio_width, + bounding_box_raw[3] * ratio_height + ])) + + face_scores.extend(face_scores_raw[keep_indices]) + face_landmarks_5_raw = numpy.concatenate( + [ + face_landmarks_5_raw[:, [0, 1]] * feature_stride + anchors, + face_landmarks_5_raw[:, [2, 3]] * feature_stride + anchors, + face_landmarks_5_raw[:, [4, 5]] * feature_stride + anchors, + face_landmarks_5_raw[:, [6, 7]] * feature_stride + anchors, + face_landmarks_5_raw[:, [8, 9]] * feature_stride + anchors + ], axis = -1).reshape(-1, 5, 2) + + for face_landmark_raw_5 in face_landmarks_5_raw[keep_indices]: + face_landmarks_5.append(face_landmark_raw_5 * [ ratio_width, ratio_height ]) + + return bounding_boxes, face_scores, face_landmarks_5 + + def forward_with_retinaface(detect_vision_frame : VisionFrame) -> Detection: face_detector = get_inference_pool().get('retinaface') @@ -307,6 +395,18 @@ def forward_with_yolo_face(detect_vision_frame : VisionFrame) -> Detection: return detection +def forward_with_yunet(detect_vision_frame : VisionFrame) -> Detection: + face_detector = get_inference_pool().get('yunet') + + with thread_semaphore(): + detection = face_detector.run(None, + { + 'input': detect_vision_frame + }) + + return detection + + def prepare_detect_frame(temp_vision_frame : VisionFrame, face_detector_size : str) -> VisionFrame: face_detector_width, face_detector_height = unpack_resolution(face_detector_size) detect_vision_frame = numpy.zeros((face_detector_height, face_detector_width, 3)) diff --git a/facefusion/face_helper.py b/facefusion/face_helper.py index cc37fe0..8ca47f8 100644 --- a/facefusion/face_helper.py +++ b/facefusion/face_helper.py @@ -69,8 +69,8 @@ WARP_TEMPLATE_SET : WarpTemplateSet =\ def estimate_matrix_by_face_landmark_5(face_landmark_5 : FaceLandmark5, warp_template : WarpTemplate, crop_size : Size) -> Matrix: - normed_warp_template = WARP_TEMPLATE_SET.get(warp_template) * crop_size - affine_matrix = cv2.estimateAffinePartial2D(face_landmark_5, normed_warp_template, method = cv2.RANSAC, ransacReprojThreshold = 100)[0] + warp_template_norm = WARP_TEMPLATE_SET.get(warp_template) * crop_size + affine_matrix = cv2.estimateAffinePartial2D(face_landmark_5, warp_template_norm, method = cv2.RANSAC, ransacReprojThreshold = 100)[0] return affine_matrix @@ -99,58 +99,58 @@ def warp_face_by_translation(temp_vision_frame : VisionFrame, translation : Tran def paste_back(temp_vision_frame : VisionFrame, crop_vision_frame : VisionFrame, crop_mask : Mask, affine_matrix : Matrix) -> VisionFrame: - paste_bounding_box, paste_matrix = calc_paste_area(temp_vision_frame, crop_vision_frame, affine_matrix) - x_min, y_min, x_max, y_max = paste_bounding_box - paste_width = x_max - x_min - paste_height = y_max - y_min + paste_bounding_box, paste_matrix = calculate_paste_area(temp_vision_frame, crop_vision_frame, affine_matrix) + x1, y1, x2, y2 = paste_bounding_box + paste_width = x2 - x1 + paste_height = y2 - y1 inverse_mask = cv2.warpAffine(crop_mask, paste_matrix, (paste_width, paste_height)).clip(0, 1) inverse_mask = numpy.expand_dims(inverse_mask, axis = -1) inverse_vision_frame = cv2.warpAffine(crop_vision_frame, paste_matrix, (paste_width, paste_height), borderMode = cv2.BORDER_REPLICATE) temp_vision_frame = temp_vision_frame.copy() - paste_vision_frame = temp_vision_frame[y_min:y_max, x_min:x_max] + paste_vision_frame = temp_vision_frame[y1:y2, x1:x2] paste_vision_frame = paste_vision_frame * (1 - inverse_mask) + inverse_vision_frame * inverse_mask - temp_vision_frame[y_min:y_max, x_min:x_max] = paste_vision_frame.astype(temp_vision_frame.dtype) + temp_vision_frame[y1:y2, x1:x2] = paste_vision_frame.astype(temp_vision_frame.dtype) return temp_vision_frame -def calc_paste_area(temp_vision_frame : VisionFrame, crop_vision_frame : VisionFrame, affine_matrix : Matrix) -> Tuple[BoundingBox, Matrix]: +def calculate_paste_area(temp_vision_frame : VisionFrame, crop_vision_frame : VisionFrame, affine_matrix : Matrix) -> Tuple[BoundingBox, Matrix]: temp_height, temp_width = temp_vision_frame.shape[:2] crop_height, crop_width = crop_vision_frame.shape[:2] inverse_matrix = cv2.invertAffineTransform(affine_matrix) crop_points = numpy.array([ [ 0, 0 ], [ crop_width, 0 ], [ crop_width, crop_height ], [ 0, crop_height ] ]) paste_region_points = transform_points(crop_points, inverse_matrix) - min_point = numpy.floor(paste_region_points.min(axis = 0)).astype(int) - max_point = numpy.ceil(paste_region_points.max(axis = 0)).astype(int) - x_min, y_min = numpy.clip(min_point, 0, [ temp_width, temp_height ]) - x_max, y_max = numpy.clip(max_point, 0, [ temp_width, temp_height ]) - paste_bounding_box = numpy.array([ x_min, y_min, x_max, y_max ]) + paste_region_point_min = numpy.floor(paste_region_points.min(axis = 0)).astype(int) + paste_region_point_max = numpy.ceil(paste_region_points.max(axis = 0)).astype(int) + x1, y1 = numpy.clip(paste_region_point_min, 0, [ temp_width, temp_height ]) + x2, y2 = numpy.clip(paste_region_point_max, 0, [ temp_width, temp_height ]) + paste_bounding_box = numpy.array([ x1, y1, x2, y2 ]) paste_matrix = inverse_matrix.copy() - paste_matrix[0, 2] -= x_min - paste_matrix[1, 2] -= y_min + paste_matrix[0, 2] -= x1 + paste_matrix[1, 2] -= y1 return paste_bounding_box, paste_matrix -@lru_cache(maxsize = None) +@lru_cache() def create_static_anchors(feature_stride : int, anchor_total : int, stride_height : int, stride_width : int) -> Anchors: - y, x = numpy.mgrid[:stride_height, :stride_width][::-1] + x, y = numpy.mgrid[:stride_width, :stride_height] anchors = numpy.stack((y, x), axis = -1) anchors = (anchors * feature_stride).reshape((-1, 2)) anchors = numpy.stack([ anchors ] * anchor_total, axis = 1).reshape((-1, 2)) return anchors -def create_rotated_matrix_and_size(angle : Angle, size : Size) -> Tuple[Matrix, Size]: - rotated_matrix = cv2.getRotationMatrix2D((size[0] / 2, size[1] / 2), angle, 1) - rotated_size = numpy.dot(numpy.abs(rotated_matrix[:, :2]), size) - rotated_matrix[:, -1] += (rotated_size - size) * 0.5 #type:ignore[misc] - rotated_size = int(rotated_size[0]), int(rotated_size[1]) - return rotated_matrix, rotated_size +def create_rotation_matrix_and_size(angle : Angle, size : Size) -> Tuple[Matrix, Size]: + rotation_matrix = cv2.getRotationMatrix2D((size[0] / 2, size[1] / 2), angle, 1) + rotation_size = numpy.dot(numpy.abs(rotation_matrix[:, :2]), size) + rotation_matrix[:, -1] += (rotation_size - size) * 0.5 #type:ignore[misc] + rotation_size = int(rotation_size[0]), int(rotation_size[1]) + return rotation_matrix, rotation_size def create_bounding_box(face_landmark_68 : FaceLandmark68) -> BoundingBox: - min_x, min_y = numpy.min(face_landmark_68, axis = 0) - max_x, max_y = numpy.max(face_landmark_68, axis = 0) - bounding_box = normalize_bounding_box(numpy.array([ min_x, min_y, max_x, max_y ])) + x1, y1 = numpy.min(face_landmark_68, axis = 0) + x2, y2 = numpy.max(face_landmark_68, axis = 0) + bounding_box = normalize_bounding_box(numpy.array([ x1, y1, x2, y2 ])) return bounding_box @@ -229,8 +229,8 @@ def estimate_face_angle(face_landmark_68 : FaceLandmark68) -> Angle: def apply_nms(bounding_boxes : List[BoundingBox], scores : List[Score], score_threshold : float, nms_threshold : float) -> Sequence[int]: - normed_bounding_boxes = [ (x1, y1, x2 - x1, y2 - y1) for (x1, y1, x2, y2) in bounding_boxes ] - keep_indices = cv2.dnn.NMSBoxes(normed_bounding_boxes, scores, score_threshold = score_threshold, nms_threshold = nms_threshold) + bounding_boxes_norm = [ (x1, y1, x2 - x1, y2 - y1) for (x1, y1, x2, y2) in bounding_boxes ] + keep_indices = cv2.dnn.NMSBoxes(bounding_boxes_norm, scores, score_threshold = score_threshold, nms_threshold = nms_threshold) return keep_indices @@ -246,9 +246,11 @@ def get_nms_threshold(face_detector_model : FaceDetectorModel, face_detector_ang return 0.4 -def merge_matrix(matrices : List[Matrix]) -> Matrix: - merged_matrix = numpy.vstack([ matrices[0], [ 0, 0, 1 ] ]) - for matrix in matrices[1:]: - matrix = numpy.vstack([ matrix, [ 0, 0, 1 ] ]) - merged_matrix = numpy.dot(merged_matrix, matrix) - return merged_matrix[:2, :] +def merge_matrix(temp_matrices : List[Matrix]) -> Matrix: + matrix = numpy.vstack([temp_matrices[0], [0, 0, 1]]) + + for temp_matrix in temp_matrices[1:]: + temp_matrix = numpy.vstack([ temp_matrix, [ 0, 0, 1 ] ]) + matrix = numpy.dot(temp_matrix, matrix) + + return matrix[:2, :] diff --git a/facefusion/face_landmarker.py b/facefusion/face_landmarker.py index cab9627..6edbd84 100644 --- a/facefusion/face_landmarker.py +++ b/facefusion/face_landmarker.py @@ -6,13 +6,13 @@ import numpy from facefusion import inference_manager, state_manager from facefusion.download import conditional_download_hashes, conditional_download_sources, resolve_download_url -from facefusion.face_helper import create_rotated_matrix_and_size, estimate_matrix_by_face_landmark_5, transform_points, warp_face_by_translation +from facefusion.face_helper import create_rotation_matrix_and_size, estimate_matrix_by_face_landmark_5, transform_points, warp_face_by_translation from facefusion.filesystem import resolve_relative_path from facefusion.thread_helper import conditional_thread_semaphore from facefusion.types import Angle, BoundingBox, DownloadScope, DownloadSet, FaceLandmark5, FaceLandmark68, InferencePool, ModelSet, Prediction, Score, VisionFrame -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { @@ -136,14 +136,14 @@ def detect_with_2dfan4(temp_vision_frame: VisionFrame, bounding_box: BoundingBox model_size = create_static_model_set('full').get('2dfan4').get('size') scale = 195 / numpy.subtract(bounding_box[2:], bounding_box[:2]).max().clip(1, None) translation = (model_size[0] - numpy.add(bounding_box[2:], bounding_box[:2]) * scale) * 0.5 - rotated_matrix, rotated_size = create_rotated_matrix_and_size(face_angle, model_size) + rotation_matrix, rotation_size = create_rotation_matrix_and_size(face_angle, model_size) crop_vision_frame, affine_matrix = warp_face_by_translation(temp_vision_frame, translation, scale, model_size) - crop_vision_frame = cv2.warpAffine(crop_vision_frame, rotated_matrix, rotated_size) + crop_vision_frame = cv2.warpAffine(crop_vision_frame, rotation_matrix, rotation_size) crop_vision_frame = conditional_optimize_contrast(crop_vision_frame) crop_vision_frame = crop_vision_frame.transpose(2, 0, 1).astype(numpy.float32) / 255.0 face_landmark_68, face_heatmap = forward_with_2dfan4(crop_vision_frame) face_landmark_68 = face_landmark_68[:, :, :2][0] / 64 * 256 - face_landmark_68 = transform_points(face_landmark_68, cv2.invertAffineTransform(rotated_matrix)) + face_landmark_68 = transform_points(face_landmark_68, cv2.invertAffineTransform(rotation_matrix)) face_landmark_68 = transform_points(face_landmark_68, cv2.invertAffineTransform(affine_matrix)) face_landmark_score_68 = numpy.amax(face_heatmap, axis = (2, 3)) face_landmark_score_68 = numpy.mean(face_landmark_score_68) @@ -155,15 +155,15 @@ def detect_with_peppa_wutz(temp_vision_frame : VisionFrame, bounding_box : Bound model_size = create_static_model_set('full').get('peppa_wutz').get('size') scale = 195 / numpy.subtract(bounding_box[2:], bounding_box[:2]).max().clip(1, None) translation = (model_size[0] - numpy.add(bounding_box[2:], bounding_box[:2]) * scale) * 0.5 - rotated_matrix, rotated_size = create_rotated_matrix_and_size(face_angle, model_size) + rotation_matrix, rotation_size = create_rotation_matrix_and_size(face_angle, model_size) crop_vision_frame, affine_matrix = warp_face_by_translation(temp_vision_frame, translation, scale, model_size) - crop_vision_frame = cv2.warpAffine(crop_vision_frame, rotated_matrix, rotated_size) + crop_vision_frame = cv2.warpAffine(crop_vision_frame, rotation_matrix, rotation_size) crop_vision_frame = conditional_optimize_contrast(crop_vision_frame) crop_vision_frame = crop_vision_frame.transpose(2, 0, 1).astype(numpy.float32) / 255.0 crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0) prediction = forward_with_peppa_wutz(crop_vision_frame) face_landmark_68 = prediction.reshape(-1, 3)[:, :2] / 64 * model_size[0] - face_landmark_68 = transform_points(face_landmark_68, cv2.invertAffineTransform(rotated_matrix)) + face_landmark_68 = transform_points(face_landmark_68, cv2.invertAffineTransform(rotation_matrix)) face_landmark_68 = transform_points(face_landmark_68, cv2.invertAffineTransform(affine_matrix)) face_landmark_score_68 = prediction.reshape(-1, 3)[:, 2].mean() face_landmark_score_68 = numpy.interp(face_landmark_score_68, [ 0, 0.95 ], [ 0, 1 ]) diff --git a/facefusion/face_masker.py b/facefusion/face_masker.py index 400838d..e61992e 100755 --- a/facefusion/face_masker.py +++ b/facefusion/face_masker.py @@ -12,7 +12,7 @@ from facefusion.thread_helper import conditional_thread_semaphore from facefusion.types import DownloadScope, DownloadSet, FaceLandmark68, FaceMaskArea, FaceMaskRegion, InferencePool, Mask, ModelSet, Padding, VisionFrame -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { @@ -137,7 +137,7 @@ def collect_model_downloads() -> Tuple[DownloadSet, DownloadSet]: model_source_set = {} for face_occluder_model in [ 'xseg_1', 'xseg_2', 'xseg_3' ]: - if state_manager.get_item('face_occluder_model') == face_occluder_model: + if state_manager.get_item('face_occluder_model') in [ 'many', face_occluder_model ]: model_hash_set[face_occluder_model] = model_set.get(face_occluder_model).get('hashes').get('face_occluder') model_source_set[face_occluder_model] = model_set.get(face_occluder_model).get('sources').get('face_occluder') @@ -171,14 +171,24 @@ def create_box_mask(crop_vision_frame : VisionFrame, face_mask_blur : float, fac def create_occlusion_mask(crop_vision_frame : VisionFrame) -> Mask: - model_name = state_manager.get_item('face_occluder_model') - model_size = create_static_model_set('full').get(model_name).get('size') - prepare_vision_frame = cv2.resize(crop_vision_frame, model_size) - prepare_vision_frame = numpy.expand_dims(prepare_vision_frame, axis = 0).astype(numpy.float32) / 255.0 - prepare_vision_frame = prepare_vision_frame.transpose(0, 1, 2, 3) - occlusion_mask = forward_occlude_face(prepare_vision_frame) - occlusion_mask = occlusion_mask.transpose(0, 1, 2).clip(0, 1).astype(numpy.float32) - occlusion_mask = cv2.resize(occlusion_mask, crop_vision_frame.shape[:2][::-1]) + temp_masks = [] + + if state_manager.get_item('face_occluder_model') == 'many': + model_names = [ 'xseg_1', 'xseg_2', 'xseg_3' ] + else: + model_names = [ state_manager.get_item('face_occluder_model') ] + + for model_name in model_names: + model_size = create_static_model_set('full').get(model_name).get('size') + prepare_vision_frame = cv2.resize(crop_vision_frame, model_size) + prepare_vision_frame = numpy.expand_dims(prepare_vision_frame, axis = 0).astype(numpy.float32) / 255.0 + prepare_vision_frame = prepare_vision_frame.transpose(0, 1, 2, 3) + temp_mask = forward_occlude_face(prepare_vision_frame, model_name) + temp_mask = temp_mask.transpose(0, 1, 2).clip(0, 1).astype(numpy.float32) + temp_mask = cv2.resize(temp_mask, crop_vision_frame.shape[:2][::-1]) + temp_masks.append(temp_mask) + + occlusion_mask = numpy.minimum.reduce(temp_masks) occlusion_mask = (cv2.GaussianBlur(occlusion_mask.clip(0, 1), (0, 0), 5).clip(0.5, 1) - 0.5) * 2 return occlusion_mask @@ -214,8 +224,7 @@ def create_region_mask(crop_vision_frame : VisionFrame, face_mask_regions : List return region_mask -def forward_occlude_face(prepare_vision_frame : VisionFrame) -> Mask: - model_name = state_manager.get_item('face_occluder_model') +def forward_occlude_face(prepare_vision_frame : VisionFrame, model_name : str) -> Mask: face_occluder = get_inference_pool().get(model_name) with conditional_thread_semaphore(): diff --git a/facefusion/face_recognizer.py b/facefusion/face_recognizer.py index c289026..258d8c9 100644 --- a/facefusion/face_recognizer.py +++ b/facefusion/face_recognizer.py @@ -11,7 +11,7 @@ from facefusion.thread_helper import conditional_thread_semaphore from facefusion.types import DownloadScope, Embedding, FaceLandmark5, InferencePool, ModelOptions, ModelSet, VisionFrame -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { @@ -62,26 +62,26 @@ def pre_check() -> bool: return conditional_download_hashes(model_hash_set) and conditional_download_sources(model_source_set) -def calc_embedding(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandmark5) -> Tuple[Embedding, Embedding]: +def calculate_face_embedding(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandmark5) -> Tuple[Embedding, Embedding]: model_template = get_model_options().get('template') model_size = get_model_options().get('size') crop_vision_frame, matrix = warp_face_by_face_landmark_5(temp_vision_frame, face_landmark_5, model_template, model_size) crop_vision_frame = crop_vision_frame / 127.5 - 1 crop_vision_frame = crop_vision_frame[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0) - embedding = forward(crop_vision_frame) - embedding = embedding.ravel() - normed_embedding = embedding / numpy.linalg.norm(embedding) - return embedding, normed_embedding + face_embedding = forward(crop_vision_frame) + face_embedding = face_embedding.ravel() + face_embedding_norm = face_embedding / numpy.linalg.norm(face_embedding) + return face_embedding, face_embedding_norm def forward(crop_vision_frame : VisionFrame) -> Embedding: face_recognizer = get_inference_pool().get('face_recognizer') with conditional_thread_semaphore(): - embedding = face_recognizer.run(None, + face_embedding = face_recognizer.run(None, { 'input': crop_vision_frame })[0] - return embedding + return face_embedding diff --git a/facefusion/face_selector.py b/facefusion/face_selector.py index 0c0341e..00459e9 100644 --- a/facefusion/face_selector.py +++ b/facefusion/face_selector.py @@ -3,31 +3,53 @@ from typing import List import numpy from facefusion import state_manager -from facefusion.types import Face, FaceSelectorOrder, FaceSet, Gender, Race, Score +from facefusion.face_analyser import get_many_faces, get_one_face +from facefusion.types import Face, FaceSelectorOrder, Gender, Race, Score, VisionFrame -def find_similar_faces(faces : List[Face], reference_faces : FaceSet, face_distance : float) -> List[Face]: - similar_faces : List[Face] = [] +def select_faces(reference_vision_frame : VisionFrame, target_vision_frame : VisionFrame) -> List[Face]: + target_faces = get_many_faces([ target_vision_frame ]) - if faces and reference_faces: - for reference_set in reference_faces: - if not similar_faces: - for reference_face in reference_faces[reference_set]: - for face in faces: - if compare_faces(face, reference_face, face_distance): - similar_faces.append(face) - return similar_faces + if state_manager.get_item('face_selector_mode') == 'many': + return sort_and_filter_faces(target_faces) + + if state_manager.get_item('face_selector_mode') == 'one': + target_face = get_one_face(sort_and_filter_faces(target_faces)) + if target_face: + return [ target_face ] + + if state_manager.get_item('face_selector_mode') == 'reference': + reference_faces = get_many_faces([ reference_vision_frame ]) + reference_faces = sort_and_filter_faces(reference_faces) + reference_face = get_one_face(reference_faces, state_manager.get_item('reference_face_position')) + if reference_face: + match_faces = find_match_faces([ reference_face ], target_faces, state_manager.get_item('reference_face_distance')) + return match_faces + + return [] + + +def find_match_faces(reference_faces : List[Face], target_faces : List[Face], face_distance : float) -> List[Face]: + match_faces : List[Face] = [] + + for reference_face in reference_faces: + if reference_face: + for index, target_face in enumerate(target_faces): + if compare_faces(target_face, reference_face, face_distance): + match_faces.append(target_faces[index]) + + return match_faces def compare_faces(face : Face, reference_face : Face, face_distance : float) -> bool: - current_face_distance = calc_face_distance(face, reference_face) + current_face_distance = calculate_face_distance(face, reference_face) current_face_distance = float(numpy.interp(current_face_distance, [ 0, 2 ], [ 0, 1 ])) return current_face_distance < face_distance -def calc_face_distance(face : Face, reference_face : Face) -> float: - if hasattr(face, 'normed_embedding') and hasattr(reference_face, 'normed_embedding'): - return 1 - numpy.dot(face.normed_embedding, reference_face.normed_embedding) +def calculate_face_distance(face : Face, reference_face : Face) -> float: + if hasattr(face, 'embedding_norm') and hasattr(reference_face, 'embedding_norm'): + return 1 - numpy.dot(face.embedding_norm, reference_face.embedding_norm) return 0 diff --git a/facefusion/face_store.py b/facefusion/face_store.py index bd7b2c5..bd14caa 100644 --- a/facefusion/face_store.py +++ b/facefusion/face_store.py @@ -1,12 +1,11 @@ from typing import List, Optional from facefusion.hash_helper import create_hash -from facefusion.types import Face, FaceSet, FaceStore, VisionFrame +from facefusion.types import Face, FaceStore, VisionFrame FACE_STORE : FaceStore =\ { - 'static_faces': {}, - 'reference_faces': {} + 'static_faces': {} } @@ -27,17 +26,3 @@ def set_static_faces(vision_frame : VisionFrame, faces : List[Face]) -> None: def clear_static_faces() -> None: FACE_STORE['static_faces'].clear() - - -def get_reference_faces() -> Optional[FaceSet]: - return FACE_STORE.get('reference_faces') - - -def append_reference_face(name : str, face : Face) -> None: - if name not in FACE_STORE.get('reference_faces'): - FACE_STORE['reference_faces'][name] = [] - FACE_STORE['reference_faces'][name].append(face) - - -def clear_reference_faces() -> None: - FACE_STORE['reference_faces'].clear() diff --git a/facefusion/ffmpeg.py b/facefusion/ffmpeg.py index bbfd026..009abd3 100644 --- a/facefusion/ffmpeg.py +++ b/facefusion/ffmpeg.py @@ -10,8 +10,8 @@ import facefusion.choices from facefusion import ffmpeg_builder, logger, process_manager, state_manager, wording from facefusion.filesystem import get_file_format, remove_file from facefusion.temp_helper import get_temp_file_path, get_temp_frames_pattern -from facefusion.types import AudioBuffer, AudioEncoder, Commands, EncoderSet, Fps, UpdateProgress, VideoEncoder, VideoFormat -from facefusion.vision import detect_video_duration, detect_video_fps, predict_video_frame_total +from facefusion.types import AudioBuffer, AudioEncoder, Commands, EncoderSet, Fps, Resolution, UpdateProgress, VideoEncoder, VideoFormat +from facefusion.vision import detect_video_duration, detect_video_fps, pack_resolution, predict_video_frame_total def run_ffmpeg_with_progress(commands : Commands, update_progress : UpdateProgress) -> subprocess.Popen[bytes]: @@ -23,8 +23,10 @@ def run_ffmpeg_with_progress(commands : Commands, update_progress : UpdateProgre while process_manager.is_processing(): try: - while __line__ := process.stdout.readline().decode().lower(): + if process_manager.is_stopping(): + process.terminate() + if 'frame=' in __line__: _, frame_number = __line__.split('frame=') update_progress(int(frame_number)) @@ -36,8 +38,6 @@ def run_ffmpeg_with_progress(commands : Commands, update_progress : UpdateProgre continue return process - if process_manager.is_stopping(): - process.terminate() return process @@ -61,6 +61,7 @@ def run_ffmpeg(commands : Commands) -> subprocess.Popen[bytes]: if process_manager.is_stopping(): process.terminate() + return process @@ -106,12 +107,12 @@ def get_available_encoder_set() -> EncoderSet: return available_encoder_set -def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fps : Fps, trim_frame_start : int, trim_frame_end : int) -> bool: +def extract_frames(target_path : str, temp_video_resolution : Resolution, temp_video_fps : Fps, trim_frame_start : int, trim_frame_end : int) -> bool: extract_frame_total = predict_video_frame_total(target_path, temp_video_fps, trim_frame_start, trim_frame_end) temp_frames_pattern = get_temp_frames_pattern(target_path, '%08d') commands = ffmpeg_builder.chain( ffmpeg_builder.set_input(target_path), - ffmpeg_builder.set_media_resolution(temp_video_resolution), + ffmpeg_builder.set_media_resolution(pack_resolution(temp_video_resolution)), ffmpeg_builder.set_frame_quality(0), ffmpeg_builder.select_frame_range(trim_frame_start, trim_frame_end, temp_video_fps), ffmpeg_builder.prevent_frame_drop(), @@ -123,23 +124,23 @@ def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fp return process.returncode == 0 -def copy_image(target_path : str, temp_image_resolution : str) -> bool: +def copy_image(target_path : str, temp_image_resolution : Resolution) -> bool: temp_image_path = get_temp_file_path(target_path) commands = ffmpeg_builder.chain( ffmpeg_builder.set_input(target_path), - ffmpeg_builder.set_media_resolution(temp_image_resolution), + ffmpeg_builder.set_media_resolution(pack_resolution(temp_image_resolution)), ffmpeg_builder.set_image_quality(target_path, 100), ffmpeg_builder.force_output(temp_image_path) ) return run_ffmpeg(commands).returncode == 0 -def finalize_image(target_path : str, output_path : str, output_image_resolution : str) -> bool: +def finalize_image(target_path : str, output_path : str, output_image_resolution : Resolution) -> bool: output_image_quality = state_manager.get_item('output_image_quality') temp_image_path = get_temp_file_path(target_path) commands = ffmpeg_builder.chain( ffmpeg_builder.set_input(temp_image_path), - ffmpeg_builder.set_media_resolution(output_image_resolution), + ffmpeg_builder.set_media_resolution(pack_resolution(output_image_resolution)), ffmpeg_builder.set_image_quality(target_path, output_image_quality), ffmpeg_builder.force_output(output_path) ) @@ -211,7 +212,7 @@ def replace_audio(target_path : str, audio_path : str, output_path : str) -> boo return run_ffmpeg(commands).returncode == 0 -def merge_video(target_path : str, temp_video_fps : Fps, output_video_resolution : str, output_video_fps : Fps, trim_frame_start : int, trim_frame_end : int) -> bool: +def merge_video(target_path : str, temp_video_fps : Fps, output_video_resolution : Resolution, output_video_fps : Fps, trim_frame_start : int, trim_frame_end : int) -> bool: output_video_encoder = state_manager.get_item('output_video_encoder') output_video_quality = state_manager.get_item('output_video_quality') output_video_preset = state_manager.get_item('output_video_preset') @@ -224,13 +225,12 @@ def merge_video(target_path : str, temp_video_fps : Fps, output_video_resolution commands = ffmpeg_builder.chain( ffmpeg_builder.set_input_fps(temp_video_fps), ffmpeg_builder.set_input(temp_frames_pattern), - ffmpeg_builder.set_media_resolution(output_video_resolution), + ffmpeg_builder.set_media_resolution(pack_resolution(output_video_resolution)), ffmpeg_builder.set_video_encoder(output_video_encoder), ffmpeg_builder.set_video_quality(output_video_encoder, output_video_quality), ffmpeg_builder.set_video_preset(output_video_encoder, output_video_preset), ffmpeg_builder.set_video_fps(output_video_fps), ffmpeg_builder.set_pixel_format(output_video_encoder), - ffmpeg_builder.set_video_colorspace('bt709'), ffmpeg_builder.force_output(temp_video_path) ) @@ -265,7 +265,7 @@ def concat_video(output_path : str, temp_output_paths : List[str]) -> bool: def fix_audio_encoder(video_format : VideoFormat, audio_encoder : AudioEncoder) -> AudioEncoder: if video_format == 'avi' and audio_encoder == 'libopus': return 'aac' - if video_format == 'm4v': + if video_format in [ 'm4v', 'wmv' ]: return 'aac' if video_format == 'mov' and audio_encoder in [ 'flac', 'libopus' ]: return 'aac' @@ -275,7 +275,7 @@ def fix_audio_encoder(video_format : VideoFormat, audio_encoder : AudioEncoder) def fix_video_encoder(video_format : VideoFormat, video_encoder : VideoEncoder) -> VideoEncoder: - if video_format == 'm4v': + if video_format in [ 'm4v', 'wmv' ]: return 'libx264' if video_format in [ 'mkv', 'mp4' ] and video_encoder == 'rawvideo': return 'libx264' diff --git a/facefusion/ffmpeg_builder.py b/facefusion/ffmpeg_builder.py index 8eb8e91..ba9ca32 100644 --- a/facefusion/ffmpeg_builder.py +++ b/facefusion/ffmpeg_builder.py @@ -108,9 +108,9 @@ def set_media_resolution(video_resolution : str) -> Commands: def set_image_quality(image_path : str, image_quality : int) -> Commands: if get_file_format(image_path) == 'webp': - image_compression = image_quality - else: - image_compression = round(31 - (image_quality * 0.31)) + return [ '-q:v', str(image_quality) ] + + image_compression = round(31 - (image_quality * 0.31)) return [ '-q:v', str(image_compression) ] @@ -140,16 +140,16 @@ def set_audio_channel_total(audio_channel_total : int) -> Commands: def set_audio_quality(audio_encoder : AudioEncoder, audio_quality : int) -> Commands: if audio_encoder == 'aac': - audio_compression = round(numpy.interp(audio_quality, [ 0, 100 ], [ 0.1, 2.0 ]), 1) + audio_compression = numpy.round(numpy.interp(audio_quality, [ 0, 100 ], [ 0.1, 2.0 ]), 1).astype(float).item() return [ '-q:a', str(audio_compression) ] if audio_encoder == 'libmp3lame': - audio_compression = round(numpy.interp(audio_quality, [ 0, 100 ], [ 9, 0 ])) + audio_compression = numpy.round(numpy.interp(audio_quality, [ 0, 100 ], [ 9, 0 ])).astype(int).item() return [ '-q:a', str(audio_compression) ] if audio_encoder == 'libopus': - audio_bit_rate = round(numpy.interp(audio_quality, [ 0, 100 ], [ 64, 256 ])) + audio_bit_rate = numpy.round(numpy.interp(audio_quality, [ 0, 100 ], [ 64, 256 ])).astype(int).item() return [ '-b:a', str(audio_bit_rate) + 'k' ] if audio_encoder == 'libvorbis': - audio_compression = round(numpy.interp(audio_quality, [ 0, 100 ], [ -1, 10 ]), 1) + audio_compression = numpy.round(numpy.interp(audio_quality, [ 0, 100 ], [ -1, 10 ]), 1).astype(float).item() return [ '-q:a', str(audio_compression) ] return [] @@ -167,29 +167,29 @@ def copy_video_encoder() -> Commands: def set_video_quality(video_encoder : VideoEncoder, video_quality : int) -> Commands: - if video_encoder in [ 'libx264', 'libx265' ]: - video_compression = round(numpy.interp(video_quality, [ 0, 100 ], [ 51, 0 ])) + if video_encoder in [ 'libx264', 'libx264rgb', 'libx265' ]: + video_compression = numpy.round(numpy.interp(video_quality, [ 0, 100 ], [ 51, 0 ])).astype(int).item() return [ '-crf', str(video_compression) ] if video_encoder == 'libvpx-vp9': - video_compression = round(numpy.interp(video_quality, [ 0, 100 ], [ 63, 0 ])) + video_compression = numpy.round(numpy.interp(video_quality, [ 0, 100 ], [ 63, 0 ])).astype(int).item() return [ '-crf', str(video_compression) ] if video_encoder in [ 'h264_nvenc', 'hevc_nvenc' ]: - video_compression = round(numpy.interp(video_quality, [ 0, 100 ], [ 51, 0 ])) + video_compression = numpy.round(numpy.interp(video_quality, [ 0, 100 ], [ 51, 0 ])).astype(int).item() return [ '-cq', str(video_compression) ] if video_encoder in [ 'h264_amf', 'hevc_amf' ]: - video_compression = round(numpy.interp(video_quality, [ 0, 100 ], [ 51, 0 ])) + video_compression = numpy.round(numpy.interp(video_quality, [ 0, 100 ], [ 51, 0 ])).astype(int).item() return [ '-qp_i', str(video_compression), '-qp_p', str(video_compression), '-qp_b', str(video_compression) ] if video_encoder in [ 'h264_qsv', 'hevc_qsv' ]: - video_compression = round(numpy.interp(video_quality, [ 0, 100 ], [ 51, 0 ])) + video_compression = numpy.round(numpy.interp(video_quality, [ 0, 100 ], [ 51, 0 ])).astype(int).item() return [ '-qp', str(video_compression) ] if video_encoder in [ 'h264_videotoolbox', 'hevc_videotoolbox' ]: - video_bit_rate = round(numpy.interp(video_quality, [ 0, 100 ], [ 1024, 50512 ])) + video_bit_rate = numpy.round(numpy.interp(video_quality, [ 0, 100 ], [ 1024, 50512 ])).astype(int).item() return [ '-b:v', str(video_bit_rate) + 'k' ] return [] def set_video_preset(video_encoder : VideoEncoder, video_preset : VideoPreset) -> Commands: - if video_encoder in [ 'libx264', 'libx265' ]: + if video_encoder in [ 'libx264', 'libx264rgb', 'libx265' ]: return [ '-preset', video_preset ] if video_encoder in [ 'h264_nvenc', 'hevc_nvenc' ]: return [ '-preset', map_nvenc_preset(video_preset) ] @@ -200,10 +200,6 @@ def set_video_preset(video_encoder : VideoEncoder, video_preset : VideoPreset) - return [] -def set_video_colorspace(video_colorspace : str) -> Commands: - return [ '-colorspace', video_colorspace ] - - def set_video_fps(video_fps : Fps) -> Commands: return [ '-vf', 'framerate=fps=' + str(video_fps) ] diff --git a/facefusion/inference_manager.py b/facefusion/inference_manager.py index f93d7e8..780ada6 100644 --- a/facefusion/inference_manager.py +++ b/facefusion/inference_manager.py @@ -1,13 +1,16 @@ import importlib -from time import sleep +import random +from time import sleep, time from typing import List from onnxruntime import InferenceSession -from facefusion import process_manager, state_manager +from facefusion import logger, process_manager, state_manager, wording from facefusion.app_context import detect_app_context from facefusion.execution import create_inference_session_providers -from facefusion.filesystem import is_file +from facefusion.exit_helper import fatal_exit +from facefusion.filesystem import get_file_name, is_file +from facefusion.time_helper import calculate_end_time from facefusion.types import DownloadSet, ExecutionProvider, InferencePool, InferencePoolSet INFERENCE_POOL_SET : InferencePoolSet =\ @@ -20,19 +23,22 @@ INFERENCE_POOL_SET : InferencePoolSet =\ def get_inference_pool(module_name : str, model_names : List[str], model_source_set : DownloadSet) -> InferencePool: while process_manager.is_checking(): sleep(0.5) - execution_device_id = state_manager.get_item('execution_device_id') + execution_device_ids = state_manager.get_item('execution_device_ids') execution_providers = resolve_execution_providers(module_name) app_context = detect_app_context() - inference_context = get_inference_context(module_name, model_names, execution_device_id, execution_providers) - if app_context == 'cli' and INFERENCE_POOL_SET.get('ui').get(inference_context): - INFERENCE_POOL_SET['cli'][inference_context] = INFERENCE_POOL_SET.get('ui').get(inference_context) - if app_context == 'ui' and INFERENCE_POOL_SET.get('cli').get(inference_context): - INFERENCE_POOL_SET['ui'][inference_context] = INFERENCE_POOL_SET.get('cli').get(inference_context) - if not INFERENCE_POOL_SET.get(app_context).get(inference_context): - INFERENCE_POOL_SET[app_context][inference_context] = create_inference_pool(model_source_set, execution_device_id, execution_providers) + for execution_device_id in execution_device_ids: + inference_context = get_inference_context(module_name, model_names, execution_device_id, execution_providers) - return INFERENCE_POOL_SET.get(app_context).get(inference_context) + if app_context == 'cli' and INFERENCE_POOL_SET.get('ui').get(inference_context): + INFERENCE_POOL_SET['cli'][inference_context] = INFERENCE_POOL_SET.get('ui').get(inference_context) + if app_context == 'ui' and INFERENCE_POOL_SET.get('cli').get(inference_context): + INFERENCE_POOL_SET['ui'][inference_context] = INFERENCE_POOL_SET.get('cli').get(inference_context) + if not INFERENCE_POOL_SET.get(app_context).get(inference_context): + INFERENCE_POOL_SET[app_context][inference_context] = create_inference_pool(model_source_set, execution_device_id, execution_providers) + + current_inference_context = get_inference_context(module_name, model_names, random.choice(execution_device_ids), execution_providers) + return INFERENCE_POOL_SET.get(app_context).get(current_inference_context) def create_inference_pool(model_source_set : DownloadSet, execution_device_id : str, execution_providers : List[ExecutionProvider]) -> InferencePool: @@ -47,18 +53,30 @@ def create_inference_pool(model_source_set : DownloadSet, execution_device_id : def clear_inference_pool(module_name : str, model_names : List[str]) -> None: - execution_device_id = state_manager.get_item('execution_device_id') + execution_device_ids = state_manager.get_item('execution_device_ids') execution_providers = resolve_execution_providers(module_name) app_context = detect_app_context() - inference_context = get_inference_context(module_name, model_names, execution_device_id, execution_providers) - if INFERENCE_POOL_SET.get(app_context).get(inference_context): - del INFERENCE_POOL_SET[app_context][inference_context] + for execution_device_id in execution_device_ids: + inference_context = get_inference_context(module_name, model_names, execution_device_id, execution_providers) + + if INFERENCE_POOL_SET.get(app_context).get(inference_context): + del INFERENCE_POOL_SET[app_context][inference_context] def create_inference_session(model_path : str, execution_device_id : str, execution_providers : List[ExecutionProvider]) -> InferenceSession: - inference_session_providers = create_inference_session_providers(execution_device_id, execution_providers) - return InferenceSession(model_path, providers = inference_session_providers) + model_file_name = get_file_name(model_path) + start_time = time() + + try: + inference_session_providers = create_inference_session_providers(execution_device_id, execution_providers) + inference_session = InferenceSession(model_path, providers = inference_session_providers) + logger.debug(wording.get('loading_model_succeeded').format(model_name = model_file_name, seconds = calculate_end_time(start_time)), __name__) + return inference_session + + except Exception: + logger.error(wording.get('loading_model_failed').format(model_name = model_file_name), __name__) + fatal_exit(1) def get_inference_context(module_name : str, model_names : List[str], execution_device_id : str, execution_providers : List[ExecutionProvider]) -> str: diff --git a/facefusion/jobs/job_helper.py b/facefusion/jobs/job_helper.py index 6e3139b..d7e9021 100644 --- a/facefusion/jobs/job_helper.py +++ b/facefusion/jobs/job_helper.py @@ -7,10 +7,12 @@ from facefusion.filesystem import get_file_extension, get_file_name def get_step_output_path(job_id : str, step_index : int, output_path : str) -> Optional[str]: if output_path: - output_directory_path, _ = os.path.split(output_path) - output_file_name = get_file_name(_) - output_file_extension = get_file_extension(_) - return os.path.join(output_directory_path, output_file_name + '-' + job_id + '-' + str(step_index) + output_file_extension) + output_directory_path, output_file_path = os.path.split(output_path) + output_file_name = get_file_name(output_file_path) + output_file_extension = get_file_extension(output_file_path) + + if output_file_name and output_file_extension: + return os.path.join(output_directory_path, output_file_name + '-' + job_id + '-' + str(step_index) + output_file_extension) return None diff --git a/facefusion/jobs/job_list.py b/facefusion/jobs/job_list.py index 2003b96..4ca34d4 100644 --- a/facefusion/jobs/job_list.py +++ b/facefusion/jobs/job_list.py @@ -1,8 +1,8 @@ from datetime import datetime from typing import Optional, Tuple -from facefusion.date_helper import describe_time_ago from facefusion.jobs import job_manager +from facefusion.time_helper import describe_time_ago from facefusion.types import JobStatus, TableContents, TableHeaders diff --git a/facefusion/jobs/job_manager.py b/facefusion/jobs/job_manager.py index 58f46e5..8abc34b 100644 --- a/facefusion/jobs/job_manager.py +++ b/facefusion/jobs/job_manager.py @@ -3,10 +3,10 @@ from copy import copy from typing import List, Optional import facefusion.choices -from facefusion.date_helper import get_current_date_time from facefusion.filesystem import create_directory, get_file_name, is_directory, is_file, move_file, remove_directory, remove_file, resolve_file_pattern from facefusion.jobs.job_helper import get_step_output_path from facefusion.json import read_json, write_json +from facefusion.time_helper import get_current_date_time from facefusion.types import Args, Job, JobSet, JobStatus, JobStep, JobStepStatus JOBS_PATH : Optional[str] = None diff --git a/facefusion/metadata.py b/facefusion/metadata.py index baacd55..a058cf6 100644 --- a/facefusion/metadata.py +++ b/facefusion/metadata.py @@ -4,7 +4,7 @@ METADATA =\ { 'name': 'FaceFusion', 'description': 'Industry leading face manipulation platform', - 'version': '3.3.2', + 'version': '3.4.0', 'license': 'OpenRAIL-AS', 'author': 'Henry Ruhs', 'url': 'https://facefusion.io' diff --git a/facefusion/model_helper.py b/facefusion/model_helper.py index 0646cda..c55e34b 100644 --- a/facefusion/model_helper.py +++ b/facefusion/model_helper.py @@ -5,7 +5,7 @@ import onnx from facefusion.types import ModelInitializer -@lru_cache(maxsize = None) +@lru_cache() def get_static_model_initializer(model_path : str) -> ModelInitializer: model = onnx.load(model_path) return onnx.numpy_helper.to_array(model.graph.initializer[-1]) diff --git a/facefusion/process_manager.py b/facefusion/process_manager.py index ce15014..f5377fa 100644 --- a/facefusion/process_manager.py +++ b/facefusion/process_manager.py @@ -1,6 +1,4 @@ -from typing import Generator, List - -from facefusion.types import ProcessState, QueuePayload +from facefusion.types import ProcessState PROCESS_STATE : ProcessState = 'pending' @@ -45,9 +43,3 @@ def stop() -> None: def end() -> None: set_process_state('pending') - - -def manage(queue_payloads : List[QueuePayload]) -> Generator[QueuePayload, None, None]: - for query_payload in queue_payloads: - if is_processing(): - yield query_payload diff --git a/facefusion/processors/choices.py b/facefusion/processors/choices.py index e33a2b1..ed80d9f 100755 --- a/facefusion/processors/choices.py +++ b/facefusion/processors/choices.py @@ -2,7 +2,7 @@ from typing import List, Sequence from facefusion.common_helper import create_float_range, create_int_range from facefusion.filesystem import get_file_name, resolve_file_paths, resolve_relative_path -from facefusion.processors.types import AgeModifierModel, DeepSwapperModel, ExpressionRestorerModel, FaceDebuggerItem, FaceEditorModel, FaceEnhancerModel, FaceSwapperModel, FaceSwapperSet, FrameColorizerModel, FrameEnhancerModel, LipSyncerModel +from facefusion.processors.types import AgeModifierModel, DeepSwapperModel, ExpressionRestorerArea, ExpressionRestorerModel, FaceDebuggerItem, FaceEditorModel, FaceEnhancerModel, FaceSwapperModel, FaceSwapperSet, FaceSwapperWeight, FrameColorizerModel, FrameEnhancerModel, LipSyncerModel age_modifier_models : List[AgeModifierModel] = [ 'styleganex_age' ] deep_swapper_models : List[DeepSwapperModel] =\ @@ -175,7 +175,8 @@ if custom_model_file_paths: deep_swapper_models.append(model_id) expression_restorer_models : List[ExpressionRestorerModel] = [ 'live_portrait' ] -face_debugger_items : List[FaceDebuggerItem] = [ 'bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask', 'face-detector-score', 'face-landmarker-score', 'age', 'gender', 'race' ] +expression_restorer_areas : List[ExpressionRestorerArea] = [ 'upper-face', 'lower-face' ] +face_debugger_items : List[FaceDebuggerItem] = [ 'bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask' ] face_editor_models : List[FaceEditorModel] = [ 'live_portrait' ] face_enhancer_models : List[FaceEnhancerModel] = [ 'codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'gpen_bfr_1024', 'gpen_bfr_2048', 'restoreformer_plus_plus' ] face_swapper_set : FaceSwapperSet =\ @@ -219,6 +220,7 @@ face_editor_head_yaw_range : Sequence[float] = create_float_range(-1.0, 1.0, 0.0 face_editor_head_roll_range : Sequence[float] = create_float_range(-1.0, 1.0, 0.05) face_enhancer_blend_range : Sequence[int] = create_int_range(0, 100, 1) face_enhancer_weight_range : Sequence[float] = create_float_range(0.0, 1.0, 0.05) +face_swapper_weight_range : Sequence[FaceSwapperWeight] = create_float_range(0.0, 1.0, 0.05) frame_colorizer_blend_range : Sequence[int] = create_int_range(0, 100, 1) frame_enhancer_blend_range : Sequence[int] = create_int_range(0, 100, 1) lip_syncer_weight_range : Sequence[float] = create_float_range(0.0, 1.0, 0.05) diff --git a/facefusion/processors/core.py b/facefusion/processors/core.py index 545370f..143696a 100644 --- a/facefusion/processors/core.py +++ b/facefusion/processors/core.py @@ -1,15 +1,9 @@ import importlib -import os -from concurrent.futures import ThreadPoolExecutor, as_completed -from queue import Queue from types import ModuleType from typing import Any, List -from tqdm import tqdm - -from facefusion import logger, state_manager, wording +from facefusion import logger, wording from facefusion.exit_helper import hard_exit -from facefusion.types import ProcessFrames, QueuePayload PROCESSORS_METHODS =\ [ @@ -20,11 +14,7 @@ PROCESSORS_METHODS =\ 'pre_check', 'pre_process', 'post_process', - 'get_reference_frame', - 'process_frame', - 'process_frames', - 'process_image', - 'process_video' + 'process_frame' ] @@ -51,49 +41,3 @@ def get_processors_modules(processors : List[str]) -> List[ModuleType]: processor_module = load_processor_module(processor) processor_modules.append(processor_module) return processor_modules - - -def multi_process_frames(source_paths : List[str], temp_frame_paths : List[str], process_frames : ProcessFrames) -> None: - queue_payloads = create_queue_payloads(temp_frame_paths) - with tqdm(total = len(queue_payloads), desc = wording.get('processing'), unit = 'frame', ascii = ' =', disable = state_manager.get_item('log_level') in [ 'warn', 'error' ]) as progress: - progress.set_postfix(execution_providers = state_manager.get_item('execution_providers')) - with ThreadPoolExecutor(max_workers = state_manager.get_item('execution_thread_count')) as executor: - futures = [] - queue : Queue[QueuePayload] = create_queue(queue_payloads) - queue_per_future = max(len(queue_payloads) // state_manager.get_item('execution_thread_count') * state_manager.get_item('execution_queue_count'), 1) - - while not queue.empty(): - future = executor.submit(process_frames, source_paths, pick_queue(queue, queue_per_future), progress.update) - futures.append(future) - - for future_done in as_completed(futures): - future_done.result() - - -def create_queue(queue_payloads : List[QueuePayload]) -> Queue[QueuePayload]: - queue : Queue[QueuePayload] = Queue() - for queue_payload in queue_payloads: - queue.put(queue_payload) - return queue - - -def pick_queue(queue : Queue[QueuePayload], queue_per_future : int) -> List[QueuePayload]: - queues = [] - for _ in range(queue_per_future): - if not queue.empty(): - queues.append(queue.get()) - return queues - - -def create_queue_payloads(temp_frame_paths : List[str]) -> List[QueuePayload]: - queue_payloads = [] - temp_frame_paths = sorted(temp_frame_paths, key = os.path.basename) - - for frame_number, frame_path in enumerate(temp_frame_paths): - frame_payload : QueuePayload =\ - { - 'frame_number': frame_number, - 'frame_path': frame_path - } - queue_payloads.append(frame_payload) - return queue_payloads diff --git a/facefusion/processors/live_portrait.py b/facefusion/processors/live_portrait.py index 5805bc5..183006e 100644 --- a/facefusion/processors/live_portrait.py +++ b/facefusion/processors/live_portrait.py @@ -63,15 +63,15 @@ def limit_expression(expression : LivePortraitExpression) -> LivePortraitExpress return numpy.clip(expression, EXPRESSION_MIN, EXPRESSION_MAX) -def limit_euler_angles(target_pitch : LivePortraitPitch, target_yaw : LivePortraitYaw, target_roll : LivePortraitRoll, output_pitch : LivePortraitPitch, output_yaw : LivePortraitYaw, output_roll : LivePortraitRoll) -> Tuple[LivePortraitPitch, LivePortraitYaw, LivePortraitRoll]: - pitch_min, pitch_max, yaw_min, yaw_max, roll_min, roll_max = calc_euler_limits(target_pitch, target_yaw, target_roll) +def limit_angle(target_pitch : LivePortraitPitch, target_yaw : LivePortraitYaw, target_roll : LivePortraitRoll, output_pitch : LivePortraitPitch, output_yaw : LivePortraitYaw, output_roll : LivePortraitRoll) -> Tuple[LivePortraitPitch, LivePortraitYaw, LivePortraitRoll]: + pitch_min, pitch_max, yaw_min, yaw_max, roll_min, roll_max = calculate_euler_limits(target_pitch, target_yaw, target_roll) output_pitch = numpy.clip(output_pitch, pitch_min, pitch_max) output_yaw = numpy.clip(output_yaw, yaw_min, yaw_max) output_roll = numpy.clip(output_roll, roll_min, roll_max) return output_pitch, output_yaw, output_roll -def calc_euler_limits(pitch : LivePortraitPitch, yaw : LivePortraitYaw, roll : LivePortraitRoll) -> Tuple[float, float, float, float, float, float]: +def calculate_euler_limits(pitch : LivePortraitPitch, yaw : LivePortraitYaw, roll : LivePortraitRoll) -> Tuple[float, float, float, float, float, float]: pitch_min = -30.0 pitch_max = 30.0 yaw_min = -60.0 diff --git a/facefusion/processors/modules/age_modifier.py b/facefusion/processors/modules/age_modifier.py index 0ed6725..e6233f0 100755 --- a/facefusion/processors/modules/age_modifier.py +++ b/facefusion/processors/modules/age_modifier.py @@ -1,6 +1,5 @@ from argparse import ArgumentParser from functools import lru_cache -from typing import List import cv2 import numpy @@ -8,26 +7,23 @@ import numpy import facefusion.choices import facefusion.jobs.job_manager import facefusion.jobs.job_store -import facefusion.processors.core as processors -from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, inference_manager, logger, process_manager, state_manager, video_manager, wording -from facefusion.common_helper import create_int_metavar +from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, inference_manager, logger, state_manager, video_manager, wording +from facefusion.common_helper import create_int_metavar, is_macos from facefusion.download import conditional_download_hashes, conditional_download_sources, resolve_download_url from facefusion.execution import has_execution_provider -from facefusion.face_analyser import get_many_faces, get_one_face from facefusion.face_helper import merge_matrix, paste_back, scale_face_landmark_5, warp_face_by_face_landmark_5 from facefusion.face_masker import create_box_mask, create_occlusion_mask -from facefusion.face_selector import find_similar_faces, sort_and_filter_faces -from facefusion.face_store import get_reference_faces +from facefusion.face_selector import select_faces from facefusion.filesystem import in_directory, is_image, is_video, resolve_relative_path, same_file_extension from facefusion.processors import choices as processors_choices from facefusion.processors.types import AgeModifierDirection, AgeModifierInputs from facefusion.program_helper import find_argument_group from facefusion.thread_helper import thread_semaphore -from facefusion.types import ApplyStateItem, Args, DownloadScope, Face, InferencePool, ModelOptions, ModelSet, ProcessMode, QueuePayload, UpdateProgress, VisionFrame -from facefusion.vision import match_frame_color, read_image, read_static_image, write_image +from facefusion.types import ApplyStateItem, Args, DownloadScope, Face, InferencePool, ModelOptions, ModelSet, ProcessMode, VisionFrame +from facefusion.vision import match_frame_color, read_static_image, read_static_video_frame -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { @@ -115,6 +111,7 @@ def pre_process(mode : ProcessMode) -> bool: def post_process() -> None: read_static_image.cache_clear() + read_static_video_frame.cache_clear() video_manager.clear_video_pool() if state_manager.get_item('video_memory_strategy') in [ 'strict', 'moderate' ]: clear_inference_pool() @@ -143,8 +140,8 @@ def modify_age(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFra if 'occlusion' in state_manager.get_item('face_mask_types'): occlusion_mask = create_occlusion_mask(crop_vision_frame) - combined_matrix = merge_matrix([ extend_affine_matrix, cv2.invertAffineTransform(affine_matrix) ]) - occlusion_mask = cv2.warpAffine(occlusion_mask, combined_matrix, model_sizes.get('target_with_background')) + temp_matrix = merge_matrix([ extend_affine_matrix, cv2.invertAffineTransform(affine_matrix) ]) + occlusion_mask = cv2.warpAffine(occlusion_mask, temp_matrix, model_sizes.get('target_with_background')) crop_masks.append(occlusion_mask) crop_vision_frame = prepare_vision_frame(crop_vision_frame) @@ -164,7 +161,7 @@ def forward(crop_vision_frame : VisionFrame, extend_vision_frame : VisionFrame, age_modifier = get_inference_pool().get('age_modifier') age_modifier_inputs = {} - if has_execution_provider('coreml'): + if is_macos() and has_execution_provider('coreml'): age_modifier.set_providers([ facefusion.choices.execution_provider_set.get('cpu') ]) for age_modifier_input in age_modifier.get_inputs(): @@ -199,56 +196,14 @@ def normalize_extend_frame(extend_vision_frame : VisionFrame) -> VisionFrame: return extend_vision_frame -def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: - return modify_age(target_face, temp_vision_frame) - - def process_frame(inputs : AgeModifierInputs) -> VisionFrame: - reference_faces = inputs.get('reference_faces') + reference_vision_frame = inputs.get('reference_vision_frame') target_vision_frame = inputs.get('target_vision_frame') - many_faces = sort_and_filter_faces(get_many_faces([ target_vision_frame ])) + temp_vision_frame = inputs.get('temp_vision_frame') + target_faces = select_faces(reference_vision_frame, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'many': - if many_faces: - for target_face in many_faces: - target_vision_frame = modify_age(target_face, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'one': - target_face = get_one_face(many_faces) - if target_face: - target_vision_frame = modify_age(target_face, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'reference': - similar_faces = find_similar_faces(many_faces, reference_faces, state_manager.get_item('reference_face_distance')) - if similar_faces: - for similar_face in similar_faces: - target_vision_frame = modify_age(similar_face, target_vision_frame) - return target_vision_frame + if target_faces: + for target_face in target_faces: + temp_vision_frame = modify_age(target_face, temp_vision_frame) - -def process_frames(source_path : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - - for queue_payload in process_manager.manage(queue_payloads): - target_vision_path = queue_payload['frame_path'] - target_vision_frame = read_image(target_vision_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'target_vision_frame': target_vision_frame - }) - write_image(target_vision_path, output_vision_frame) - update_progress(1) - - -def process_image(source_path : str, target_path : str, output_path : str) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - target_vision_frame = read_static_image(target_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'target_vision_frame': target_vision_frame - }) - write_image(output_path, output_vision_frame) - - -def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: - processors.multi_process_frames(None, temp_frame_paths, process_frames) + return temp_vision_frame diff --git a/facefusion/processors/modules/deep_swapper.py b/facefusion/processors/modules/deep_swapper.py index 1b1b35e..42619f8 100755 --- a/facefusion/processors/modules/deep_swapper.py +++ b/facefusion/processors/modules/deep_swapper.py @@ -1,6 +1,6 @@ from argparse import ArgumentParser from functools import lru_cache -from typing import List, Tuple +from typing import Tuple import cv2 import numpy @@ -8,25 +8,22 @@ from cv2.typing import Size import facefusion.jobs.job_manager import facefusion.jobs.job_store -import facefusion.processors.core as processors -from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, inference_manager, logger, process_manager, state_manager, video_manager, wording +from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, inference_manager, logger, state_manager, video_manager, wording from facefusion.common_helper import create_int_metavar from facefusion.download import conditional_download_hashes, conditional_download_sources, resolve_download_url_by_provider -from facefusion.face_analyser import get_many_faces, get_one_face from facefusion.face_helper import paste_back, warp_face_by_face_landmark_5 from facefusion.face_masker import create_area_mask, create_box_mask, create_occlusion_mask, create_region_mask -from facefusion.face_selector import find_similar_faces, sort_and_filter_faces -from facefusion.face_store import get_reference_faces +from facefusion.face_selector import select_faces from facefusion.filesystem import get_file_name, in_directory, is_image, is_video, resolve_file_paths, resolve_relative_path, same_file_extension from facefusion.processors import choices as processors_choices from facefusion.processors.types import DeepSwapperInputs, DeepSwapperMorph from facefusion.program_helper import find_argument_group from facefusion.thread_helper import thread_semaphore -from facefusion.types import ApplyStateItem, Args, DownloadScope, Face, InferencePool, Mask, ModelOptions, ModelSet, ProcessMode, QueuePayload, UpdateProgress, VisionFrame -from facefusion.vision import conditional_match_frame_color, read_image, read_static_image, write_image +from facefusion.types import ApplyStateItem, Args, DownloadScope, Face, InferencePool, Mask, ModelOptions, ModelSet, ProcessMode, VisionFrame +from facefusion.vision import conditional_match_frame_color, read_static_image, read_static_video_frame -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: model_config = [] @@ -311,6 +308,7 @@ def pre_process(mode : ProcessMode) -> bool: def post_process() -> None: read_static_image.cache_clear() + read_static_video_frame.cache_clear() video_manager.clear_video_pool() if state_manager.get_item('video_memory_strategy') in [ 'strict', 'moderate' ]: clear_inference_pool() @@ -409,56 +407,16 @@ def prepare_crop_mask(crop_source_mask : Mask, crop_target_mask : Mask) -> Mask: return crop_mask -def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: - return swap_face(target_face, temp_vision_frame) - - def process_frame(inputs : DeepSwapperInputs) -> VisionFrame: - reference_faces = inputs.get('reference_faces') + reference_vision_frame = inputs.get('reference_vision_frame') target_vision_frame = inputs.get('target_vision_frame') - many_faces = sort_and_filter_faces(get_many_faces([ target_vision_frame ])) + temp_vision_frame = inputs.get('temp_vision_frame') + target_faces = select_faces(reference_vision_frame, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'many': - if many_faces: - for target_face in many_faces: - target_vision_frame = swap_face(target_face, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'one': - target_face = get_one_face(many_faces) - if target_face: - target_vision_frame = swap_face(target_face, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'reference': - similar_faces = find_similar_faces(many_faces, reference_faces, state_manager.get_item('reference_face_distance')) - if similar_faces: - for similar_face in similar_faces: - target_vision_frame = swap_face(similar_face, target_vision_frame) - return target_vision_frame + if target_faces: + for target_face in target_faces: + temp_vision_frame = swap_face(target_face, temp_vision_frame) + + return temp_vision_frame -def process_frames(source_path : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - - for queue_payload in process_manager.manage(queue_payloads): - target_vision_path = queue_payload['frame_path'] - target_vision_frame = read_image(target_vision_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'target_vision_frame': target_vision_frame - }) - write_image(target_vision_path, output_vision_frame) - update_progress(1) - - -def process_image(source_path : str, target_path : str, output_path : str) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - target_vision_frame = read_static_image(target_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'target_vision_frame': target_vision_frame - }) - write_image(output_path, output_vision_frame) - - -def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: - processors.multi_process_frames(None, temp_frame_paths, process_frames) diff --git a/facefusion/processors/modules/expression_restorer.py b/facefusion/processors/modules/expression_restorer.py index 12ebc87..83efcf6 100755 --- a/facefusion/processors/modules/expression_restorer.py +++ b/facefusion/processors/modules/expression_restorer.py @@ -1,32 +1,29 @@ from argparse import ArgumentParser from functools import lru_cache -from typing import List, Tuple +from typing import Tuple import cv2 import numpy import facefusion.jobs.job_manager import facefusion.jobs.job_store -import facefusion.processors.core as processors -from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, inference_manager, logger, process_manager, state_manager, video_manager, wording +from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, inference_manager, logger, state_manager, video_manager, wording from facefusion.common_helper import create_int_metavar from facefusion.download import conditional_download_hashes, conditional_download_sources, resolve_download_url -from facefusion.face_analyser import get_many_faces, get_one_face from facefusion.face_helper import paste_back, warp_face_by_face_landmark_5 from facefusion.face_masker import create_box_mask, create_occlusion_mask -from facefusion.face_selector import find_similar_faces, sort_and_filter_faces -from facefusion.face_store import get_reference_faces +from facefusion.face_selector import select_faces from facefusion.filesystem import in_directory, is_image, is_video, resolve_relative_path, same_file_extension from facefusion.processors import choices as processors_choices from facefusion.processors.live_portrait import create_rotation, limit_expression from facefusion.processors.types import ExpressionRestorerInputs, LivePortraitExpression, LivePortraitFeatureVolume, LivePortraitMotionPoints, LivePortraitPitch, LivePortraitRoll, LivePortraitScale, LivePortraitTranslation, LivePortraitYaw from facefusion.program_helper import find_argument_group from facefusion.thread_helper import conditional_thread_semaphore, thread_semaphore -from facefusion.types import ApplyStateItem, Args, DownloadScope, Face, InferencePool, ModelOptions, ModelSet, ProcessMode, QueuePayload, UpdateProgress, VisionFrame -from facefusion.vision import read_image, read_static_image, read_video_frame, write_image +from facefusion.types import ApplyStateItem, Args, DownloadScope, Face, InferencePool, ModelOptions, ModelSet, ProcessMode, VisionFrame +from facefusion.vision import read_static_image, read_static_video_frame -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { @@ -96,12 +93,14 @@ def register_args(program : ArgumentParser) -> None: if group_processors: group_processors.add_argument('--expression-restorer-model', help = wording.get('help.expression_restorer_model'), default = config.get_str_value('processors', 'expression_restorer_model', 'live_portrait'), choices = processors_choices.expression_restorer_models) group_processors.add_argument('--expression-restorer-factor', help = wording.get('help.expression_restorer_factor'), type = int, default = config.get_int_value('processors', 'expression_restorer_factor', '80'), choices = processors_choices.expression_restorer_factor_range, metavar = create_int_metavar(processors_choices.expression_restorer_factor_range)) - facefusion.jobs.job_store.register_step_keys([ 'expression_restorer_model', 'expression_restorer_factor' ]) + group_processors.add_argument('--expression-restorer-areas', help = wording.get('help.expression_restorer_areas').format(choices = ', '.join(processors_choices.expression_restorer_areas)), default = config.get_str_list('processors', 'expression_restorer_areas', ' '.join(processors_choices.expression_restorer_areas)), choices = processors_choices.expression_restorer_areas, nargs = '+', metavar = 'EXPRESSION_RESTORER_AREAS') + facefusion.jobs.job_store.register_step_keys([ 'expression_restorer_model', 'expression_restorer_factor', 'expression_restorer_areas' ]) def apply_args(args : Args, apply_state_item : ApplyStateItem) -> None: apply_state_item('expression_restorer_model', args.get('expression_restorer_model')) apply_state_item('expression_restorer_factor', args.get('expression_restorer_factor')) + apply_state_item('expression_restorer_areas', args.get('expression_restorer_areas')) def pre_check() -> bool: @@ -129,6 +128,7 @@ def pre_process(mode : ProcessMode) -> bool: def post_process() -> None: read_static_image.cache_clear() + read_static_video_frame.cache_clear() video_manager.clear_video_pool() if state_manager.get_item('video_memory_strategy') in [ 'strict', 'moderate' ]: clear_inference_pool() @@ -141,46 +141,58 @@ def post_process() -> None: face_recognizer.clear_inference_pool() -def restore_expression(source_vision_frame : VisionFrame, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: +def restore_expression(target_face : Face, target_vision_frame : VisionFrame, temp_vision_frame : VisionFrame) -> VisionFrame: model_template = get_model_options().get('template') model_size = get_model_options().get('size') expression_restorer_factor = float(numpy.interp(float(state_manager.get_item('expression_restorer_factor')), [ 0, 100 ], [ 0, 1.2 ])) - source_vision_frame = cv2.resize(source_vision_frame, temp_vision_frame.shape[:2][::-1]) - source_crop_vision_frame, _ = warp_face_by_face_landmark_5(source_vision_frame, target_face.landmark_set.get('5/68'), model_template, model_size) - target_crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmark_set.get('5/68'), model_template, model_size) - box_mask = create_box_mask(target_crop_vision_frame, state_manager.get_item('face_mask_blur'), (0, 0, 0, 0)) + target_crop_vision_frame, _ = warp_face_by_face_landmark_5(target_vision_frame, target_face.landmark_set.get('5/68'), model_template, model_size) + temp_crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmark_set.get('5/68'), model_template, model_size) + box_mask = create_box_mask(temp_crop_vision_frame, state_manager.get_item('face_mask_blur'), (0, 0, 0, 0)) crop_masks =\ [ box_mask ] if 'occlusion' in state_manager.get_item('face_mask_types'): - occlusion_mask = create_occlusion_mask(target_crop_vision_frame) + occlusion_mask = create_occlusion_mask(temp_crop_vision_frame) crop_masks.append(occlusion_mask) - source_crop_vision_frame = prepare_crop_frame(source_crop_vision_frame) target_crop_vision_frame = prepare_crop_frame(target_crop_vision_frame) - target_crop_vision_frame = apply_restore(source_crop_vision_frame, target_crop_vision_frame, expression_restorer_factor) - target_crop_vision_frame = normalize_crop_frame(target_crop_vision_frame) + temp_crop_vision_frame = prepare_crop_frame(temp_crop_vision_frame) + temp_crop_vision_frame = apply_restore(target_crop_vision_frame, temp_crop_vision_frame, expression_restorer_factor) + temp_crop_vision_frame = normalize_crop_frame(temp_crop_vision_frame) crop_mask = numpy.minimum.reduce(crop_masks).clip(0, 1) - temp_vision_frame = paste_back(temp_vision_frame, target_crop_vision_frame, crop_mask, affine_matrix) - return temp_vision_frame + paste_vision_frame = paste_back(temp_vision_frame, temp_crop_vision_frame, crop_mask, affine_matrix) + return paste_vision_frame -def apply_restore(source_crop_vision_frame : VisionFrame, target_crop_vision_frame : VisionFrame, expression_restorer_factor : float) -> VisionFrame: - feature_volume = forward_extract_feature(target_crop_vision_frame) - source_expression = forward_extract_motion(source_crop_vision_frame)[5] - pitch, yaw, roll, scale, translation, target_expression, motion_points = forward_extract_motion(target_crop_vision_frame) +def apply_restore(target_crop_vision_frame : VisionFrame, temp_crop_vision_frame : VisionFrame, expression_restorer_factor : float) -> VisionFrame: + feature_volume = forward_extract_feature(temp_crop_vision_frame) + target_expression = forward_extract_motion(target_crop_vision_frame)[5] + pitch, yaw, roll, scale, translation, temp_expression, motion_points = forward_extract_motion(temp_crop_vision_frame) rotation = create_rotation(pitch, yaw, roll) - source_expression[:, [ 0, 4, 5, 8, 9 ]] = target_expression[:, [ 0, 4, 5, 8, 9 ]] - source_expression = source_expression * expression_restorer_factor + target_expression * (1 - expression_restorer_factor) - source_expression = limit_expression(source_expression) - source_motion_points = scale * (motion_points @ rotation.T + source_expression) + translation + target_expression = restrict_expression_areas(temp_expression, target_expression) + target_expression = target_expression * expression_restorer_factor + temp_expression * (1 - expression_restorer_factor) + target_expression = limit_expression(target_expression) target_motion_points = scale * (motion_points @ rotation.T + target_expression) + translation - crop_vision_frame = forward_generate_frame(feature_volume, source_motion_points, target_motion_points) + temp_motion_points = scale * (motion_points @ rotation.T + temp_expression) + translation + crop_vision_frame = forward_generate_frame(feature_volume, target_motion_points, temp_motion_points) return crop_vision_frame +def restrict_expression_areas(temp_expression : LivePortraitExpression, target_expression : LivePortraitExpression) -> LivePortraitExpression: + expression_restorer_areas = state_manager.get_item('expression_restorer_areas') + + if 'upper-face' not in expression_restorer_areas: + target_expression[:, [1, 2, 6, 10, 11, 12, 13, 15, 16]] = temp_expression[:, [1, 2, 6, 10, 11, 12, 13, 15, 16]] + + if 'lower-face' not in expression_restorer_areas: + target_expression[:, [3, 7, 14, 17, 18, 19, 20]] = temp_expression[:, [3, 7, 14, 17, 18, 19, 20]] + + target_expression[:, [0, 4, 5, 8, 9]] = temp_expression[:, [0, 4, 5, 8, 9]] + return target_expression + + def forward_extract_feature(crop_vision_frame : VisionFrame) -> LivePortraitFeatureVolume: feature_extractor = get_inference_pool().get('feature_extractor') @@ -205,15 +217,15 @@ def forward_extract_motion(crop_vision_frame : VisionFrame) -> Tuple[LivePortrai return pitch, yaw, roll, scale, translation, expression, motion_points -def forward_generate_frame(feature_volume : LivePortraitFeatureVolume, source_motion_points : LivePortraitMotionPoints, target_motion_points : LivePortraitMotionPoints) -> VisionFrame: +def forward_generate_frame(feature_volume : LivePortraitFeatureVolume, target_motion_points : LivePortraitMotionPoints, temp_motion_points : LivePortraitMotionPoints) -> VisionFrame: generator = get_inference_pool().get('generator') with thread_semaphore(): crop_vision_frame = generator.run(None, { 'feature_volume': feature_volume, - 'source': source_motion_points, - 'target': target_motion_points + 'source': target_motion_points, + 'target': temp_motion_points })[0][0] return crop_vision_frame @@ -235,64 +247,14 @@ def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: return crop_vision_frame -def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: - pass - - def process_frame(inputs : ExpressionRestorerInputs) -> VisionFrame: - reference_faces = inputs.get('reference_faces') - source_vision_frame = inputs.get('source_vision_frame') + reference_vision_frame = inputs.get('reference_vision_frame') target_vision_frame = inputs.get('target_vision_frame') - many_faces = sort_and_filter_faces(get_many_faces([ target_vision_frame ])) + temp_vision_frame = inputs.get('temp_vision_frame') + target_faces = select_faces(reference_vision_frame, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'many': - if many_faces: - for target_face in many_faces: - target_vision_frame = restore_expression(source_vision_frame, target_face, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'one': - target_face = get_one_face(many_faces) - if target_face: - target_vision_frame = restore_expression(source_vision_frame, target_face, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'reference': - similar_faces = find_similar_faces(many_faces, reference_faces, state_manager.get_item('reference_face_distance')) - if similar_faces: - for similar_face in similar_faces: - target_vision_frame = restore_expression(source_vision_frame, similar_face, target_vision_frame) - return target_vision_frame + if target_faces: + for target_face in target_faces: + temp_vision_frame = restore_expression(target_face, target_vision_frame, temp_vision_frame) - -def process_frames(source_path : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - - for queue_payload in process_manager.manage(queue_payloads): - frame_number = queue_payload.get('frame_number') - if state_manager.get_item('trim_frame_start'): - frame_number += state_manager.get_item('trim_frame_start') - source_vision_frame = read_video_frame(state_manager.get_item('target_path'), frame_number) - target_vision_path = queue_payload.get('frame_path') - target_vision_frame = read_image(target_vision_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'source_vision_frame': source_vision_frame, - 'target_vision_frame': target_vision_frame - }) - write_image(target_vision_path, output_vision_frame) - update_progress(1) - - -def process_image(source_path : str, target_path : str, output_path : str) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - source_vision_frame = read_static_image(state_manager.get_item('target_path')) - target_vision_frame = read_static_image(target_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'source_vision_frame': source_vision_frame, - 'target_vision_frame': target_vision_frame - }) - write_image(output_path, output_vision_frame) - - -def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: - processors.multi_process_frames(None, temp_frame_paths, process_frames) + return temp_vision_frame diff --git a/facefusion/processors/modules/face_debugger.py b/facefusion/processors/modules/face_debugger.py index 2402182..633674a 100755 --- a/facefusion/processors/modules/face_debugger.py +++ b/facefusion/processors/modules/face_debugger.py @@ -1,24 +1,20 @@ from argparse import ArgumentParser -from typing import List import cv2 import numpy import facefusion.jobs.job_manager import facefusion.jobs.job_store -import facefusion.processors.core as processors -from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, logger, process_manager, state_manager, video_manager, wording -from facefusion.face_analyser import get_many_faces, get_one_face +from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, logger, state_manager, video_manager, wording from facefusion.face_helper import warp_face_by_face_landmark_5 from facefusion.face_masker import create_area_mask, create_box_mask, create_occlusion_mask, create_region_mask -from facefusion.face_selector import find_similar_faces, sort_and_filter_faces -from facefusion.face_store import get_reference_faces -from facefusion.filesystem import in_directory, same_file_extension +from facefusion.face_selector import select_faces +from facefusion.filesystem import in_directory, is_image, is_video, same_file_extension from facefusion.processors import choices as processors_choices from facefusion.processors.types import FaceDebuggerInputs from facefusion.program_helper import find_argument_group -from facefusion.types import ApplyStateItem, Args, Face, InferencePool, ProcessMode, QueuePayload, UpdateProgress, VisionFrame -from facefusion.vision import read_image, read_static_image, write_image +from facefusion.types import ApplyStateItem, Args, Face, InferencePool, ProcessMode, VisionFrame +from facefusion.vision import read_static_image, read_static_video_frame def get_inference_pool() -> InferencePool: @@ -45,6 +41,9 @@ def pre_check() -> bool: def pre_process(mode : ProcessMode) -> bool: + if mode in [ 'output', 'preview' ] and not is_image(state_manager.get_item('target_path')) and not is_video(state_manager.get_item('target_path')): + logger.error(wording.get('choose_image_or_video_target') + wording.get('exclamation_mark'), __name__) + return False if mode == 'output' and not in_directory(state_manager.get_item('output_path')): logger.error(wording.get('specify_image_or_video_output') + wording.get('exclamation_mark'), __name__) return False @@ -56,6 +55,7 @@ def pre_process(mode : ProcessMode) -> bool: def post_process() -> None: read_static_image.cache_clear() + read_static_video_frame.cache_clear() video_manager.clear_video_pool() if state_manager.get_item('video_memory_strategy') == 'strict': content_analyser.clear_inference_pool() @@ -67,162 +67,159 @@ def post_process() -> None: def debug_face(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: - primary_color = (0, 0, 255) - primary_light_color = (100, 100, 255) - secondary_color = (0, 255, 0) - tertiary_color = (255, 255, 0) - bounding_box = target_face.bounding_box.astype(numpy.int32) - temp_vision_frame = temp_vision_frame.copy() - has_face_landmark_5_fallback = numpy.array_equal(target_face.landmark_set.get('5'), target_face.landmark_set.get('5/68')) - has_face_landmark_68_fallback = numpy.array_equal(target_face.landmark_set.get('68'), target_face.landmark_set.get('68/5')) face_debugger_items = state_manager.get_item('face_debugger_items') if 'bounding-box' in face_debugger_items: - x1, y1, x2, y2 = bounding_box - cv2.rectangle(temp_vision_frame, (x1, y1), (x2, y2), primary_color, 2) - - if target_face.angle == 0: - cv2.line(temp_vision_frame, (x1, y1), (x2, y1), primary_light_color, 3) - if target_face.angle == 180: - cv2.line(temp_vision_frame, (x1, y2), (x2, y2), primary_light_color, 3) - if target_face.angle == 90: - cv2.line(temp_vision_frame, (x2, y1), (x2, y2), primary_light_color, 3) - if target_face.angle == 270: - cv2.line(temp_vision_frame, (x1, y1), (x1, y2), primary_light_color, 3) + temp_vision_frame = draw_bounding_box(target_face, temp_vision_frame) if 'face-mask' in face_debugger_items: - crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmark_set.get('5/68'), 'arcface_128', (512, 512)) - inverse_matrix = cv2.invertAffineTransform(affine_matrix) - temp_size = temp_vision_frame.shape[:2][::-1] - crop_masks = [] + temp_vision_frame = draw_face_mask(target_face, temp_vision_frame) - if 'box' in state_manager.get_item('face_mask_types'): - box_mask = create_box_mask(crop_vision_frame, 0, state_manager.get_item('face_mask_padding')) - crop_masks.append(box_mask) + if 'face-landmark-5' in face_debugger_items: + temp_vision_frame = draw_face_landmark_5(target_face, temp_vision_frame) - if 'occlusion' in state_manager.get_item('face_mask_types'): - occlusion_mask = create_occlusion_mask(crop_vision_frame) - crop_masks.append(occlusion_mask) + if 'face-landmark-5/68' in face_debugger_items: + temp_vision_frame = draw_face_landmark_5_68(target_face, temp_vision_frame) - if 'area' in state_manager.get_item('face_mask_types'): - face_landmark_68 = cv2.transform(target_face.landmark_set.get('68').reshape(1, -1, 2), affine_matrix).reshape(-1, 2) - area_mask = create_area_mask(crop_vision_frame, face_landmark_68, state_manager.get_item('face_mask_areas')) - crop_masks.append(area_mask) + if 'face-landmark-68' in face_debugger_items: + temp_vision_frame = draw_face_landmark_68(target_face, temp_vision_frame) - if 'region' in state_manager.get_item('face_mask_types'): - region_mask = create_region_mask(crop_vision_frame, state_manager.get_item('face_mask_regions')) - crop_masks.append(region_mask) - - crop_mask = numpy.minimum.reduce(crop_masks).clip(0, 1) - crop_mask = (crop_mask * 255).astype(numpy.uint8) - inverse_vision_frame = cv2.warpAffine(crop_mask, inverse_matrix, temp_size) - inverse_vision_frame = cv2.threshold(inverse_vision_frame, 100, 255, cv2.THRESH_BINARY)[1] - inverse_vision_frame[inverse_vision_frame > 0] = 255 #type:ignore[operator] - inverse_contours = cv2.findContours(inverse_vision_frame, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)[0] - cv2.drawContours(temp_vision_frame, inverse_contours, -1, tertiary_color if has_face_landmark_5_fallback else secondary_color, 2) - - if 'face-landmark-5' in face_debugger_items and numpy.any(target_face.landmark_set.get('5')): - face_landmark_5 = target_face.landmark_set.get('5').astype(numpy.int32) - for index in range(face_landmark_5.shape[0]): - cv2.circle(temp_vision_frame, (face_landmark_5[index][0], face_landmark_5[index][1]), 3, primary_color, -1) - - if 'face-landmark-5/68' in face_debugger_items and numpy.any(target_face.landmark_set.get('5/68')): - face_landmark_5_68 = target_face.landmark_set.get('5/68').astype(numpy.int32) - for index in range(face_landmark_5_68.shape[0]): - cv2.circle(temp_vision_frame, (face_landmark_5_68[index][0], face_landmark_5_68[index][1]), 3, tertiary_color if has_face_landmark_5_fallback else secondary_color, -1) - - if 'face-landmark-68' in face_debugger_items and numpy.any(target_face.landmark_set.get('68')): - face_landmark_68 = target_face.landmark_set.get('68').astype(numpy.int32) - for index in range(face_landmark_68.shape[0]): - cv2.circle(temp_vision_frame, (face_landmark_68[index][0], face_landmark_68[index][1]), 3, tertiary_color if has_face_landmark_68_fallback else secondary_color, -1) - - if 'face-landmark-68/5' in face_debugger_items and numpy.any(target_face.landmark_set.get('68')): - face_landmark_68 = target_face.landmark_set.get('68/5').astype(numpy.int32) - for index in range(face_landmark_68.shape[0]): - cv2.circle(temp_vision_frame, (face_landmark_68[index][0], face_landmark_68[index][1]), 3, tertiary_color, -1) - - if bounding_box[3] - bounding_box[1] > 50 and bounding_box[2] - bounding_box[0] > 50: - top = bounding_box[1] - left = bounding_box[0] - 20 - - if 'face-detector-score' in face_debugger_items: - face_score_text = str(round(target_face.score_set.get('detector'), 2)) - top = top + 20 - cv2.putText(temp_vision_frame, face_score_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, primary_color, 2) - - if 'face-landmarker-score' in face_debugger_items: - face_score_text = str(round(target_face.score_set.get('landmarker'), 2)) - top = top + 20 - cv2.putText(temp_vision_frame, face_score_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, tertiary_color if has_face_landmark_5_fallback else secondary_color, 2) - - if 'age' in face_debugger_items: - face_age_text = str(target_face.age.start) + '-' + str(target_face.age.stop) - top = top + 20 - cv2.putText(temp_vision_frame, face_age_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, primary_color, 2) - - if 'gender' in face_debugger_items: - face_gender_text = target_face.gender - top = top + 20 - cv2.putText(temp_vision_frame, face_gender_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, primary_color, 2) - - if 'race' in face_debugger_items: - face_race_text = target_face.race - top = top + 20 - cv2.putText(temp_vision_frame, face_race_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, primary_color, 2) + if 'face-landmark-68/5' in face_debugger_items: + temp_vision_frame = draw_face_landmark_68_5(target_face, temp_vision_frame) return temp_vision_frame -def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: - pass +def draw_bounding_box(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + box_color = 0, 0, 255 + border_color = 100, 100, 255 + bounding_box = target_face.bounding_box.astype(numpy.int32) + x1, y1, x2, y2 = bounding_box + + cv2.rectangle(temp_vision_frame, (x1, y1), (x2, y2), box_color, 2) + + if target_face.angle == 0: + cv2.line(temp_vision_frame, (x1, y1), (x2, y1), border_color, 3) + if target_face.angle == 180: + cv2.line(temp_vision_frame, (x1, y2), (x2, y2), border_color, 3) + if target_face.angle == 90: + cv2.line(temp_vision_frame, (x2, y1), (x2, y2), border_color, 3) + if target_face.angle == 270: + cv2.line(temp_vision_frame, (x1, y1), (x1, y2), border_color, 3) + + return temp_vision_frame + + +def draw_face_mask(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + crop_masks = [] + face_landmark_5 = target_face.landmark_set.get('5') + face_landmark_68 = target_face.landmark_set.get('68') + face_landmark_5_68 = target_face.landmark_set.get('5/68') + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, face_landmark_5_68, 'arcface_128', (512, 512)) + inverse_matrix = cv2.invertAffineTransform(affine_matrix) + temp_size = temp_vision_frame.shape[:2][::-1] + mask_color = 0, 255, 0 + + if numpy.array_equal(face_landmark_5, face_landmark_5_68): + mask_color = 255, 255, 0 + + if 'box' in state_manager.get_item('face_mask_types'): + box_mask = create_box_mask(crop_vision_frame, 0, state_manager.get_item('face_mask_padding')) + crop_masks.append(box_mask) + + if 'occlusion' in state_manager.get_item('face_mask_types'): + occlusion_mask = create_occlusion_mask(crop_vision_frame) + crop_masks.append(occlusion_mask) + + if 'area' in state_manager.get_item('face_mask_types'): + face_landmark_68 = cv2.transform(face_landmark_68.reshape(1, -1, 2), affine_matrix).reshape(-1, 2) + area_mask = create_area_mask(crop_vision_frame, face_landmark_68, state_manager.get_item('face_mask_areas')) + crop_masks.append(area_mask) + + if 'region' in state_manager.get_item('face_mask_types'): + region_mask = create_region_mask(crop_vision_frame, state_manager.get_item('face_mask_regions')) + crop_masks.append(region_mask) + + crop_mask = numpy.minimum.reduce(crop_masks).clip(0, 1) + crop_mask = (crop_mask * 255).astype(numpy.uint8) + inverse_vision_frame = cv2.warpAffine(crop_mask, inverse_matrix, temp_size) + inverse_vision_frame = cv2.threshold(inverse_vision_frame, 100, 255, cv2.THRESH_BINARY)[1] + inverse_contours, _ = cv2.findContours(inverse_vision_frame, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) + cv2.drawContours(temp_vision_frame, inverse_contours, -1, mask_color, 2) + + return temp_vision_frame + + +def draw_face_landmark_5(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + face_landmark_5 = target_face.landmark_set.get('5') + point_color = 0, 0, 255 + + if numpy.any(face_landmark_5): + face_landmark_5 = face_landmark_5.astype(numpy.int32) + + for point in face_landmark_5: + cv2.circle(temp_vision_frame, tuple(point), 3, point_color, -1) + + return temp_vision_frame + + +def draw_face_landmark_5_68(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + face_landmark_5 = target_face.landmark_set.get('5') + face_landmark_5_68 = target_face.landmark_set.get('5/68') + point_color = 0, 255, 0 + + if numpy.array_equal(face_landmark_5, face_landmark_5_68): + point_color = 255, 255, 0 + + if numpy.any(face_landmark_5_68): + face_landmark_5_68 = face_landmark_5_68.astype(numpy.int32) + + for point in face_landmark_5_68: + cv2.circle(temp_vision_frame, tuple(point), 3, point_color, -1) + + return temp_vision_frame + + +def draw_face_landmark_68(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + face_landmark_68 = target_face.landmark_set.get('68') + face_landmark_68_5 = target_face.landmark_set.get('68/5') + point_color = 0, 255, 0 + + if numpy.array_equal(face_landmark_68, face_landmark_68_5): + point_color = 255, 255, 0 + + if numpy.any(face_landmark_68): + face_landmark_68 = face_landmark_68.astype(numpy.int32) + + for point in face_landmark_68: + cv2.circle(temp_vision_frame, tuple(point), 3, point_color, -1) + + return temp_vision_frame + + +def draw_face_landmark_68_5(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + face_landmark_68_5 = target_face.landmark_set.get('68/5') + point_color = 255, 255, 0 + + if numpy.any(face_landmark_68_5): + face_landmark_68_5 = face_landmark_68_5.astype(numpy.int32) + + for point in face_landmark_68_5: + cv2.circle(temp_vision_frame, tuple(point), 3, point_color, -1) + + return temp_vision_frame def process_frame(inputs : FaceDebuggerInputs) -> VisionFrame: - reference_faces = inputs.get('reference_faces') + reference_vision_frame = inputs.get('reference_vision_frame') target_vision_frame = inputs.get('target_vision_frame') - many_faces = sort_and_filter_faces(get_many_faces([ target_vision_frame ])) + temp_vision_frame = inputs.get('temp_vision_frame') + target_faces = select_faces(reference_vision_frame, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'many': - if many_faces: - for target_face in many_faces: - target_vision_frame = debug_face(target_face, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'one': - target_face = get_one_face(many_faces) - if target_face: - target_vision_frame = debug_face(target_face, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'reference': - similar_faces = find_similar_faces(many_faces, reference_faces, state_manager.get_item('reference_face_distance')) - if similar_faces: - for similar_face in similar_faces: - target_vision_frame = debug_face(similar_face, target_vision_frame) - return target_vision_frame + if target_faces: + for target_face in target_faces: + temp_vision_frame = debug_face(target_face, temp_vision_frame) + + return temp_vision_frame -def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - - for queue_payload in process_manager.manage(queue_payloads): - target_vision_path = queue_payload['frame_path'] - target_vision_frame = read_image(target_vision_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'target_vision_frame': target_vision_frame - }) - write_image(target_vision_path, output_vision_frame) - update_progress(1) - - -def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - target_vision_frame = read_static_image(target_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'target_vision_frame': target_vision_frame - }) - write_image(output_path, output_vision_frame) - - -def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: - processors.multi_process_frames(source_paths, temp_frame_paths, process_frames) diff --git a/facefusion/processors/modules/face_editor.py b/facefusion/processors/modules/face_editor.py index f567b87..fd42f24 100755 --- a/facefusion/processors/modules/face_editor.py +++ b/facefusion/processors/modules/face_editor.py @@ -1,32 +1,29 @@ from argparse import ArgumentParser from functools import lru_cache -from typing import List, Tuple +from typing import Tuple import cv2 import numpy import facefusion.jobs.job_manager import facefusion.jobs.job_store -import facefusion.processors.core as processors -from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, inference_manager, logger, process_manager, state_manager, video_manager, wording +from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, inference_manager, logger, state_manager, video_manager, wording from facefusion.common_helper import create_float_metavar from facefusion.download import conditional_download_hashes, conditional_download_sources, resolve_download_url -from facefusion.face_analyser import get_many_faces, get_one_face from facefusion.face_helper import paste_back, scale_face_landmark_5, warp_face_by_face_landmark_5 from facefusion.face_masker import create_box_mask -from facefusion.face_selector import find_similar_faces, sort_and_filter_faces -from facefusion.face_store import get_reference_faces +from facefusion.face_selector import select_faces from facefusion.filesystem import in_directory, is_image, is_video, resolve_relative_path, same_file_extension from facefusion.processors import choices as processors_choices -from facefusion.processors.live_portrait import create_rotation, limit_euler_angles, limit_expression +from facefusion.processors.live_portrait import create_rotation, limit_angle, limit_expression from facefusion.processors.types import FaceEditorInputs, LivePortraitExpression, LivePortraitFeatureVolume, LivePortraitMotionPoints, LivePortraitPitch, LivePortraitRoll, LivePortraitRotation, LivePortraitScale, LivePortraitTranslation, LivePortraitYaw from facefusion.program_helper import find_argument_group from facefusion.thread_helper import conditional_thread_semaphore, thread_semaphore -from facefusion.types import ApplyStateItem, Args, DownloadScope, Face, FaceLandmark68, InferencePool, ModelOptions, ModelSet, ProcessMode, QueuePayload, UpdateProgress, VisionFrame -from facefusion.vision import read_image, read_static_image, write_image +from facefusion.types import ApplyStateItem, Args, DownloadScope, Face, FaceLandmark68, InferencePool, ModelOptions, ModelSet, ProcessMode, VisionFrame +from facefusion.vision import read_static_image, read_static_video_frame -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { @@ -182,6 +179,7 @@ def pre_process(mode : ProcessMode) -> bool: def post_process() -> None: read_static_image.cache_clear() + read_static_video_frame.cache_clear() video_manager.clear_video_pool() if state_manager.get_item('video_memory_strategy') in [ 'strict', 'moderate' ]: clear_inference_pool() @@ -203,8 +201,8 @@ def edit_face(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFram crop_vision_frame = prepare_crop_frame(crop_vision_frame) crop_vision_frame = apply_edit(crop_vision_frame, target_face.landmark_set.get('68')) crop_vision_frame = normalize_crop_frame(crop_vision_frame) - temp_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, box_mask, affine_matrix) - return temp_vision_frame + paste_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, box_mask, affine_matrix) + return paste_vision_frame def apply_edit(crop_vision_frame : VisionFrame, face_landmark_68 : FaceLandmark68) -> VisionFrame: @@ -342,8 +340,8 @@ def edit_eye_gaze(expression : LivePortraitExpression) -> LivePortraitExpression def edit_eye_open(motion_points : LivePortraitMotionPoints, face_landmark_68 : FaceLandmark68) -> LivePortraitMotionPoints: face_editor_eye_open_ratio = state_manager.get_item('face_editor_eye_open_ratio') - left_eye_ratio = calc_distance_ratio(face_landmark_68, 37, 40, 39, 36) - right_eye_ratio = calc_distance_ratio(face_landmark_68, 43, 46, 45, 42) + left_eye_ratio = calculate_distance_ratio(face_landmark_68, 37, 40, 39, 36) + right_eye_ratio = calculate_distance_ratio(face_landmark_68, 43, 46, 45, 42) if face_editor_eye_open_ratio < 0: eye_motion_points = numpy.concatenate([ motion_points.ravel(), [ left_eye_ratio, right_eye_ratio, 0.0 ] ]) @@ -357,7 +355,7 @@ def edit_eye_open(motion_points : LivePortraitMotionPoints, face_landmark_68 : F def edit_lip_open(motion_points : LivePortraitMotionPoints, face_landmark_68 : FaceLandmark68) -> LivePortraitMotionPoints: face_editor_lip_open_ratio = state_manager.get_item('face_editor_lip_open_ratio') - lip_ratio = calc_distance_ratio(face_landmark_68, 62, 66, 54, 48) + lip_ratio = calculate_distance_ratio(face_landmark_68, 62, 66, 54, 48) if face_editor_lip_open_ratio < 0: lip_motion_points = numpy.concatenate([ motion_points.ravel(), [ lip_ratio, 0.0 ] ]) @@ -450,12 +448,12 @@ def edit_head_rotation(pitch : LivePortraitPitch, yaw : LivePortraitYaw, roll : edit_pitch = pitch + float(numpy.interp(face_editor_head_pitch, [ -1, 1 ], [ 20, -20 ])) edit_yaw = yaw + float(numpy.interp(face_editor_head_yaw, [ -1, 1 ], [ 60, -60 ])) edit_roll = roll + float(numpy.interp(face_editor_head_roll, [ -1, 1 ], [ -15, 15 ])) - edit_pitch, edit_yaw, edit_roll = limit_euler_angles(pitch, yaw, roll, edit_pitch, edit_yaw, edit_roll) + edit_pitch, edit_yaw, edit_roll = limit_angle(pitch, yaw, roll, edit_pitch, edit_yaw, edit_roll) rotation = create_rotation(edit_pitch, edit_yaw, edit_roll) return rotation -def calc_distance_ratio(face_landmark_68 : FaceLandmark68, top_index : int, bottom_index : int, left_index : int, right_index : int) -> float: +def calculate_distance_ratio(face_landmark_68 : FaceLandmark68, top_index : int, bottom_index : int, left_index : int, right_index : int) -> float: vertical_direction = face_landmark_68[top_index] - face_landmark_68[bottom_index] horizontal_direction = face_landmark_68[left_index] - face_landmark_68[right_index] distance_ratio = float(numpy.linalg.norm(vertical_direction) / (numpy.linalg.norm(horizontal_direction) + 1e-6)) @@ -478,56 +476,14 @@ def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: return crop_vision_frame -def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: - pass - - def process_frame(inputs : FaceEditorInputs) -> VisionFrame: - reference_faces = inputs.get('reference_faces') + reference_vision_frame = inputs.get('reference_vision_frame') target_vision_frame = inputs.get('target_vision_frame') - many_faces = sort_and_filter_faces(get_many_faces([ target_vision_frame ])) + temp_vision_frame = inputs.get('temp_vision_frame') + target_faces = select_faces(reference_vision_frame, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'many': - if many_faces: - for target_face in many_faces: - target_vision_frame = edit_face(target_face, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'one': - target_face = get_one_face(many_faces) - if target_face: - target_vision_frame = edit_face(target_face, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'reference': - similar_faces = find_similar_faces(many_faces, reference_faces, state_manager.get_item('reference_face_distance')) - if similar_faces: - for similar_face in similar_faces: - target_vision_frame = edit_face(similar_face, target_vision_frame) - return target_vision_frame + if target_faces: + for target_face in target_faces: + temp_vision_frame = edit_face(target_face, temp_vision_frame) - -def process_frames(source_path : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - - for queue_payload in process_manager.manage(queue_payloads): - target_vision_path = queue_payload['frame_path'] - target_vision_frame = read_image(target_vision_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'target_vision_frame': target_vision_frame - }) - write_image(target_vision_path, output_vision_frame) - update_progress(1) - - -def process_image(source_path : str, target_path : str, output_path : str) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - target_vision_frame = read_static_image(target_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'target_vision_frame': target_vision_frame - }) - write_image(output_path, output_vision_frame) - - -def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: - processors.multi_process_frames(None, temp_frame_paths, process_frames) + return temp_vision_frame diff --git a/facefusion/processors/modules/face_enhancer.py b/facefusion/processors/modules/face_enhancer.py index dce358e..7cc8b4b 100755 --- a/facefusion/processors/modules/face_enhancer.py +++ b/facefusion/processors/modules/face_enhancer.py @@ -1,31 +1,26 @@ from argparse import ArgumentParser from functools import lru_cache -from typing import List -import cv2 import numpy import facefusion.jobs.job_manager import facefusion.jobs.job_store -import facefusion.processors.core as processors -from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, inference_manager, logger, process_manager, state_manager, video_manager, wording +from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, inference_manager, logger, state_manager, video_manager, wording from facefusion.common_helper import create_float_metavar, create_int_metavar from facefusion.download import conditional_download_hashes, conditional_download_sources, resolve_download_url -from facefusion.face_analyser import get_many_faces, get_one_face from facefusion.face_helper import paste_back, warp_face_by_face_landmark_5 from facefusion.face_masker import create_box_mask, create_occlusion_mask -from facefusion.face_selector import find_similar_faces, sort_and_filter_faces -from facefusion.face_store import get_reference_faces +from facefusion.face_selector import select_faces from facefusion.filesystem import in_directory, is_image, is_video, resolve_relative_path, same_file_extension from facefusion.processors import choices as processors_choices from facefusion.processors.types import FaceEnhancerInputs, FaceEnhancerWeight from facefusion.program_helper import find_argument_group from facefusion.thread_helper import thread_semaphore -from facefusion.types import ApplyStateItem, Args, DownloadScope, Face, InferencePool, ModelOptions, ModelSet, ProcessMode, QueuePayload, UpdateProgress, VisionFrame -from facefusion.vision import read_image, read_static_image, write_image +from facefusion.types import ApplyStateItem, Args, DownloadScope, Face, InferencePool, ModelOptions, ModelSet, ProcessMode, VisionFrame +from facefusion.vision import blend_frame, read_static_image, read_static_video_frame -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { @@ -243,7 +238,7 @@ def register_args(program : ArgumentParser) -> None: if group_processors: group_processors.add_argument('--face-enhancer-model', help = wording.get('help.face_enhancer_model'), default = config.get_str_value('processors', 'face_enhancer_model', 'gfpgan_1.4'), choices = processors_choices.face_enhancer_models) group_processors.add_argument('--face-enhancer-blend', help = wording.get('help.face_enhancer_blend'), type = int, default = config.get_int_value('processors', 'face_enhancer_blend', '80'), choices = processors_choices.face_enhancer_blend_range, metavar = create_int_metavar(processors_choices.face_enhancer_blend_range)) - group_processors.add_argument('--face-enhancer-weight', help = wording.get('help.face_enhancer_weight'), type = float, default = config.get_float_value('processors', 'face_enhancer_weight', '1.0'), choices = processors_choices.face_enhancer_weight_range, metavar = create_float_metavar(processors_choices.face_enhancer_weight_range)) + group_processors.add_argument('--face-enhancer-weight', help = wording.get('help.face_enhancer_weight'), type = float, default = config.get_float_value('processors', 'face_enhancer_weight', '0.5'), choices = processors_choices.face_enhancer_weight_range, metavar = create_float_metavar(processors_choices.face_enhancer_weight_range)) facefusion.jobs.job_store.register_step_keys([ 'face_enhancer_model', 'face_enhancer_blend', 'face_enhancer_weight' ]) @@ -275,6 +270,7 @@ def pre_process(mode : ProcessMode) -> bool: def post_process() -> None: read_static_image.cache_clear() + read_static_video_frame.cache_clear() video_manager.clear_video_pool() if state_manager.get_item('video_memory_strategy') in [ 'strict', 'moderate' ]: clear_inference_pool() @@ -307,7 +303,7 @@ def enhance_face(target_face : Face, temp_vision_frame : VisionFrame) -> VisionF crop_vision_frame = normalize_crop_frame(crop_vision_frame) crop_mask = numpy.minimum.reduce(crop_masks).clip(0, 1) paste_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix) - temp_vision_frame = blend_frame(temp_vision_frame, paste_vision_frame) + temp_vision_frame = blend_paste_frame(temp_vision_frame, paste_vision_frame) return temp_vision_frame @@ -353,62 +349,20 @@ def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: return crop_vision_frame -def blend_frame(temp_vision_frame : VisionFrame, paste_vision_frame : VisionFrame) -> VisionFrame: +def blend_paste_frame(temp_vision_frame : VisionFrame, paste_vision_frame : VisionFrame) -> VisionFrame: face_enhancer_blend = 1 - (state_manager.get_item('face_enhancer_blend') / 100) - temp_vision_frame = cv2.addWeighted(temp_vision_frame, face_enhancer_blend, paste_vision_frame, 1 - face_enhancer_blend, 0) + temp_vision_frame = blend_frame(temp_vision_frame, paste_vision_frame, 1 - face_enhancer_blend) return temp_vision_frame -def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: - return enhance_face(target_face, temp_vision_frame) - - def process_frame(inputs : FaceEnhancerInputs) -> VisionFrame: - reference_faces = inputs.get('reference_faces') + reference_vision_frame = inputs.get('reference_vision_frame') target_vision_frame = inputs.get('target_vision_frame') - many_faces = sort_and_filter_faces(get_many_faces([ target_vision_frame ])) + temp_vision_frame = inputs.get('temp_vision_frame') + target_faces = select_faces(reference_vision_frame, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'many': - if many_faces: - for target_face in many_faces: - target_vision_frame = enhance_face(target_face, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'one': - target_face = get_one_face(many_faces) - if target_face: - target_vision_frame = enhance_face(target_face, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'reference': - similar_faces = find_similar_faces(many_faces, reference_faces, state_manager.get_item('reference_face_distance')) - if similar_faces: - for similar_face in similar_faces: - target_vision_frame = enhance_face(similar_face, target_vision_frame) - return target_vision_frame + if target_faces: + for target_face in target_faces: + temp_vision_frame = enhance_face(target_face, temp_vision_frame) - -def process_frames(source_path : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - - for queue_payload in process_manager.manage(queue_payloads): - target_vision_path = queue_payload['frame_path'] - target_vision_frame = read_image(target_vision_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'target_vision_frame': target_vision_frame - }) - write_image(target_vision_path, output_vision_frame) - update_progress(1) - - -def process_image(source_path : str, target_path : str, output_path : str) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - target_vision_frame = read_static_image(target_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'target_vision_frame': target_vision_frame - }) - write_image(output_path, output_vision_frame) - - -def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: - processors.multi_process_frames(None, temp_frame_paths, process_frames) + return temp_vision_frame diff --git a/facefusion/processors/modules/face_swapper.py b/facefusion/processors/modules/face_swapper.py index 3cd4445..cfa9dd9 100755 --- a/facefusion/processors/modules/face_swapper.py +++ b/facefusion/processors/modules/face_swapper.py @@ -1,6 +1,6 @@ from argparse import ArgumentParser from functools import lru_cache -from typing import List, Tuple +from typing import List, Optional, Tuple import cv2 import numpy @@ -8,16 +8,14 @@ import numpy import facefusion.choices import facefusion.jobs.job_manager import facefusion.jobs.job_store -import facefusion.processors.core as processors -from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, inference_manager, logger, process_manager, state_manager, video_manager, wording -from facefusion.common_helper import get_first +from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, inference_manager, logger, state_manager, video_manager, wording +from facefusion.common_helper import get_first, is_macos from facefusion.download import conditional_download_hashes, conditional_download_sources, resolve_download_url from facefusion.execution import has_execution_provider from facefusion.face_analyser import get_average_face, get_many_faces, get_one_face from facefusion.face_helper import paste_back, warp_face_by_face_landmark_5 from facefusion.face_masker import create_area_mask, create_box_mask, create_occlusion_mask, create_region_mask -from facefusion.face_selector import find_similar_faces, sort_and_filter_faces, sort_faces_by_order -from facefusion.face_store import get_reference_faces +from facefusion.face_selector import select_faces, sort_faces_by_order from facefusion.filesystem import filter_image_paths, has_image, in_directory, is_image, is_video, resolve_relative_path, same_file_extension from facefusion.model_helper import get_static_model_initializer from facefusion.processors import choices as processors_choices @@ -25,11 +23,11 @@ from facefusion.processors.pixel_boost import explode_pixel_boost, implode_pixel from facefusion.processors.types import FaceSwapperInputs from facefusion.program_helper import find_argument_group from facefusion.thread_helper import conditional_thread_semaphore -from facefusion.types import ApplyStateItem, Args, DownloadScope, Embedding, Face, InferencePool, ModelOptions, ModelSet, ProcessMode, QueuePayload, UpdateProgress, VisionFrame -from facefusion.vision import read_image, read_static_image, read_static_images, unpack_resolution, write_image +from facefusion.types import ApplyStateItem, Args, DownloadScope, Embedding, Face, InferencePool, ModelOptions, ModelSet, ProcessMode, VisionFrame +from facefusion.vision import read_static_image, read_static_images, read_static_video_frame, unpack_resolution -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { @@ -68,8 +66,8 @@ def create_static_model_set(download_scope : DownloadScope) -> ModelSet: }, 'embedding_converter': { - 'url': resolve_download_url('models-3.0.0', 'arcface_converter_ghost.hash'), - 'path': resolve_relative_path('../.assets/models/arcface_converter_ghost.hash') + 'url': resolve_download_url('models-3.4.0', 'crossface_ghost.hash'), + 'path': resolve_relative_path('../.assets/models/crossface_ghost.hash') } }, 'sources': @@ -81,8 +79,8 @@ def create_static_model_set(download_scope : DownloadScope) -> ModelSet: }, 'embedding_converter': { - 'url': resolve_download_url('models-3.0.0', 'arcface_converter_ghost.onnx'), - 'path': resolve_relative_path('../.assets/models/arcface_converter_ghost.onnx') + 'url': resolve_download_url('models-3.4.0', 'crossface_ghost.onnx'), + 'path': resolve_relative_path('../.assets/models/crossface_ghost.onnx') } }, 'type': 'ghost', @@ -102,8 +100,8 @@ def create_static_model_set(download_scope : DownloadScope) -> ModelSet: }, 'embedding_converter': { - 'url': resolve_download_url('models-3.0.0', 'arcface_converter_ghost.hash'), - 'path': resolve_relative_path('../.assets/models/arcface_converter_ghost.hash') + 'url': resolve_download_url('models-3.4.0', 'crossface_ghost.hash'), + 'path': resolve_relative_path('../.assets/models/crossface_ghost.hash') } }, 'sources': @@ -115,8 +113,8 @@ def create_static_model_set(download_scope : DownloadScope) -> ModelSet: }, 'embedding_converter': { - 'url': resolve_download_url('models-3.0.0', 'arcface_converter_ghost.onnx'), - 'path': resolve_relative_path('../.assets/models/arcface_converter_ghost.onnx') + 'url': resolve_download_url('models-3.4.0', 'crossface_ghost.onnx'), + 'path': resolve_relative_path('../.assets/models/crossface_ghost.onnx') } }, 'type': 'ghost', @@ -136,8 +134,8 @@ def create_static_model_set(download_scope : DownloadScope) -> ModelSet: }, 'embedding_converter': { - 'url': resolve_download_url('models-3.0.0', 'arcface_converter_ghost.hash'), - 'path': resolve_relative_path('../.assets/models/arcface_converter_ghost.hash') + 'url': resolve_download_url('models-3.4.0', 'crossface_ghost.hash'), + 'path': resolve_relative_path('../.assets/models/crossface_ghost.hash') } }, 'sources': @@ -149,8 +147,8 @@ def create_static_model_set(download_scope : DownloadScope) -> ModelSet: }, 'embedding_converter': { - 'url': resolve_download_url('models-3.0.0', 'arcface_converter_ghost.onnx'), - 'path': resolve_relative_path('../.assets/models/arcface_converter_ghost.onnx') + 'url': resolve_download_url('models-3.4.0', 'crossface_ghost.onnx'), + 'path': resolve_relative_path('../.assets/models/crossface_ghost.onnx') } }, 'type': 'ghost', @@ -170,8 +168,8 @@ def create_static_model_set(download_scope : DownloadScope) -> ModelSet: }, 'embedding_converter': { - 'url': resolve_download_url('models-3.1.0', 'arcface_converter_hififace.hash'), - 'path': resolve_relative_path('../.assets/models/arcface_converter_hififace.hash') + 'url': resolve_download_url('models-3.4.0', 'crossface_hififace.hash'), + 'path': resolve_relative_path('../.assets/models/crossface_hififace.hash') } }, 'sources': @@ -183,8 +181,8 @@ def create_static_model_set(download_scope : DownloadScope) -> ModelSet: }, 'embedding_converter': { - 'url': resolve_download_url('models-3.1.0', 'arcface_converter_hififace.onnx'), - 'path': resolve_relative_path('../.assets/models/arcface_converter_hififace.onnx') + 'url': resolve_download_url('models-3.4.0', 'crossface_hififace.onnx'), + 'path': resolve_relative_path('../.assets/models/crossface_hififace.onnx') } }, 'type': 'hififace', @@ -324,8 +322,8 @@ def create_static_model_set(download_scope : DownloadScope) -> ModelSet: }, 'embedding_converter': { - 'url': resolve_download_url('models-3.0.0', 'arcface_converter_simswap.hash'), - 'path': resolve_relative_path('../.assets/models/arcface_converter_simswap.hash') + 'url': resolve_download_url('models-3.4.0', 'crossface_simswap.hash'), + 'path': resolve_relative_path('../.assets/models/crossface_simswap.hash') } }, 'sources': @@ -337,8 +335,8 @@ def create_static_model_set(download_scope : DownloadScope) -> ModelSet: }, 'embedding_converter': { - 'url': resolve_download_url('models-3.0.0', 'arcface_converter_simswap.onnx'), - 'path': resolve_relative_path('../.assets/models/arcface_converter_simswap.onnx') + 'url': resolve_download_url('models-3.4.0', 'crossface_simswap.onnx'), + 'path': resolve_relative_path('../.assets/models/crossface_simswap.onnx') } }, 'type': 'simswap', @@ -358,8 +356,8 @@ def create_static_model_set(download_scope : DownloadScope) -> ModelSet: }, 'embedding_converter': { - 'url': resolve_download_url('models-3.0.0', 'arcface_converter_simswap.hash'), - 'path': resolve_relative_path('../.assets/models/arcface_converter_simswap.hash') + 'url': resolve_download_url('models-3.4.0', 'crossface_simswap.hash'), + 'path': resolve_relative_path('../.assets/models/crossface_simswap.hash') } }, 'sources': @@ -371,8 +369,8 @@ def create_static_model_set(download_scope : DownloadScope) -> ModelSet: }, 'embedding_converter': { - 'url': resolve_download_url('models-3.0.0', 'arcface_converter_simswap.onnx'), - 'path': resolve_relative_path('../.assets/models/arcface_converter_simswap.onnx') + 'url': resolve_download_url('models-3.4.0', 'crossface_simswap.onnx'), + 'path': resolve_relative_path('../.assets/models/crossface_simswap.onnx') } }, 'type': 'simswap', @@ -428,7 +426,7 @@ def get_model_options() -> ModelOptions: def get_model_name() -> str: model_name = state_manager.get_item('face_swapper_model') - if has_execution_provider('coreml') and model_name == 'inswapper_128_fp16': + if is_macos() and has_execution_provider('coreml') and model_name == 'inswapper_128_fp16': return 'inswapper_128' return model_name @@ -440,12 +438,14 @@ def register_args(program : ArgumentParser) -> None: known_args, _ = program.parse_known_args() face_swapper_pixel_boost_choices = processors_choices.face_swapper_set.get(known_args.face_swapper_model) group_processors.add_argument('--face-swapper-pixel-boost', help = wording.get('help.face_swapper_pixel_boost'), default = config.get_str_value('processors', 'face_swapper_pixel_boost', get_first(face_swapper_pixel_boost_choices)), choices = face_swapper_pixel_boost_choices) - facefusion.jobs.job_store.register_step_keys([ 'face_swapper_model', 'face_swapper_pixel_boost' ]) + group_processors.add_argument('--face-swapper-weight', help = wording.get('help.face_swapper_weight'), type = float, default = config.get_float_value('processors', 'face_swapper_weight', '0.5'), choices = processors_choices.face_swapper_weight_range) + facefusion.jobs.job_store.register_step_keys([ 'face_swapper_model', 'face_swapper_pixel_boost', 'face_swapper_weight' ]) def apply_args(args : Args, apply_state_item : ApplyStateItem) -> None: apply_state_item('face_swapper_model', args.get('face_swapper_model')) apply_state_item('face_swapper_pixel_boost', args.get('face_swapper_pixel_boost')) + apply_state_item('face_swapper_weight', args.get('face_swapper_weight')) def pre_check() -> bool: @@ -459,26 +459,33 @@ def pre_process(mode : ProcessMode) -> bool: if not has_image(state_manager.get_item('source_paths')): logger.error(wording.get('choose_image_source') + wording.get('exclamation_mark'), __name__) return False + source_image_paths = filter_image_paths(state_manager.get_item('source_paths')) source_frames = read_static_images(source_image_paths) source_faces = get_many_faces(source_frames) + if not get_one_face(source_faces): logger.error(wording.get('no_source_face_detected') + wording.get('exclamation_mark'), __name__) return False + if mode in [ 'output', 'preview' ] and not is_image(state_manager.get_item('target_path')) and not is_video(state_manager.get_item('target_path')): logger.error(wording.get('choose_image_or_video_target') + wording.get('exclamation_mark'), __name__) return False + if mode == 'output' and not in_directory(state_manager.get_item('output_path')): logger.error(wording.get('specify_image_or_video_output') + wording.get('exclamation_mark'), __name__) return False + if mode == 'output' and not same_file_extension(state_manager.get_item('target_path'), state_manager.get_item('output_path')): logger.error(wording.get('match_target_and_output_extension') + wording.get('exclamation_mark'), __name__) return False + return True def post_process() -> None: read_static_image.cache_clear() + read_static_video_frame.cache_clear() video_manager.clear_video_pool() if state_manager.get_item('video_memory_strategy') in [ 'strict', 'moderate' ]: get_static_model_initializer.cache_clear() @@ -512,7 +519,7 @@ def swap_face(source_face : Face, target_face : Face, temp_vision_frame : Vision pixel_boost_vision_frames = implode_pixel_boost(crop_vision_frame, pixel_boost_total, model_size) for pixel_boost_vision_frame in pixel_boost_vision_frames: pixel_boost_vision_frame = prepare_crop_frame(pixel_boost_vision_frame) - pixel_boost_vision_frame = forward_swap_face(source_face, pixel_boost_vision_frame) + pixel_boost_vision_frame = forward_swap_face(source_face, target_face, pixel_boost_vision_frame) pixel_boost_vision_frame = normalize_crop_frame(pixel_boost_vision_frame) temp_vision_frames.append(pixel_boost_vision_frame) crop_vision_frame = explode_pixel_boost(temp_vision_frames, pixel_boost_total, model_size, pixel_boost_size) @@ -527,16 +534,16 @@ def swap_face(source_face : Face, target_face : Face, temp_vision_frame : Vision crop_masks.append(region_mask) crop_mask = numpy.minimum.reduce(crop_masks).clip(0, 1) - temp_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix) - return temp_vision_frame + paste_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix) + return paste_vision_frame -def forward_swap_face(source_face : Face, crop_vision_frame : VisionFrame) -> VisionFrame: +def forward_swap_face(source_face : Face, target_face : Face, crop_vision_frame : VisionFrame) -> VisionFrame: face_swapper = get_inference_pool().get('face_swapper') model_type = get_model_options().get('type') face_swapper_inputs = {} - if has_execution_provider('coreml') and model_type in [ 'ghost', 'uniface' ]: + if is_macos() and has_execution_provider('coreml') and model_type in [ 'ghost', 'uniface' ]: face_swapper.set_providers([ facefusion.choices.execution_provider_set.get('cpu') ]) for face_swapper_input in face_swapper.get_inputs(): @@ -544,7 +551,9 @@ def forward_swap_face(source_face : Face, crop_vision_frame : VisionFrame) -> Vi if model_type in [ 'blendswap', 'uniface' ]: face_swapper_inputs[face_swapper_input.name] = prepare_source_frame(source_face) else: - face_swapper_inputs[face_swapper_input.name] = prepare_source_embedding(source_face) + source_embedding = prepare_source_embedding(source_face) + source_embedding = balance_source_embedding(source_embedding, target_face.embedding) + face_swapper_inputs[face_swapper_input.name] = source_embedding if face_swapper_input.name == 'target': face_swapper_inputs[face_swapper_input.name] = crop_vision_frame @@ -554,16 +563,16 @@ def forward_swap_face(source_face : Face, crop_vision_frame : VisionFrame) -> Vi return crop_vision_frame -def forward_convert_embedding(embedding : Embedding) -> Embedding: +def forward_convert_embedding(face_embedding : Embedding) -> Embedding: embedding_converter = get_inference_pool().get('embedding_converter') with conditional_thread_semaphore(): - embedding = embedding_converter.run(None, + face_embedding = embedding_converter.run(None, { - 'input': embedding + 'input': face_embedding })[0] - return embedding + return face_embedding def prepare_source_frame(source_face : Face) -> VisionFrame: @@ -572,8 +581,10 @@ def prepare_source_frame(source_face : Face) -> VisionFrame: if model_type == 'blendswap': source_vision_frame, _ = warp_face_by_face_landmark_5(source_vision_frame, source_face.landmark_set.get('5/68'), 'arcface_112_v2', (112, 112)) + if model_type == 'uniface': source_vision_frame, _ = warp_face_by_face_landmark_5(source_vision_frame, source_face.landmark_set.get('5/68'), 'ffhq_512', (256, 256)) + source_vision_frame = source_vision_frame[:, :, ::-1] / 255.0 source_vision_frame = source_vision_frame.transpose(2, 0, 1) source_vision_frame = numpy.expand_dims(source_vision_frame, axis = 0).astype(numpy.float32) @@ -584,12 +595,13 @@ def prepare_source_embedding(source_face : Face) -> Embedding: model_type = get_model_options().get('type') if model_type == 'ghost': - source_embedding, _ = convert_embedding(source_face) + source_embedding = source_face.embedding.reshape(-1, 512) + source_embedding, _ = convert_source_embedding(source_embedding) source_embedding = source_embedding.reshape(1, -1) return source_embedding if model_type == 'hyperswap': - source_embedding = source_face.normed_embedding.reshape((1, -1)) + source_embedding = source_face.embedding_norm.reshape((1, -1)) return source_embedding if model_type == 'inswapper': @@ -599,17 +611,31 @@ def prepare_source_embedding(source_face : Face) -> Embedding: source_embedding = numpy.dot(source_embedding, model_initializer) / numpy.linalg.norm(source_embedding) return source_embedding - _, source_normed_embedding = convert_embedding(source_face) - source_embedding = source_normed_embedding.reshape(1, -1) + source_embedding = source_face.embedding.reshape(-1, 512) + _, source_embedding_norm = convert_source_embedding(source_embedding) + source_embedding = source_embedding_norm.reshape(1, -1) return source_embedding -def convert_embedding(source_face : Face) -> Tuple[Embedding, Embedding]: - embedding = source_face.embedding.reshape(-1, 512) - embedding = forward_convert_embedding(embedding) - embedding = embedding.ravel() - normed_embedding = embedding / numpy.linalg.norm(embedding) - return embedding, normed_embedding +def balance_source_embedding(source_embedding : Embedding, target_embedding : Embedding) -> Embedding: + model_type = get_model_options().get('type') + face_swapper_weight = state_manager.get_item('face_swapper_weight') + face_swapper_weight = numpy.interp(face_swapper_weight, [ 0, 1 ], [ 0.35, -0.35 ]).astype(numpy.float32) + + if model_type in [ 'hififace', 'hyperswap', 'inswapper', 'simswap' ]: + target_embedding = target_embedding / numpy.linalg.norm(target_embedding) + + source_embedding = source_embedding.reshape(1, -1) + target_embedding = target_embedding.reshape(1, -1) + source_embedding = source_embedding * (1 - face_swapper_weight) + target_embedding * face_swapper_weight + return source_embedding + + +def convert_source_embedding(source_embedding : Embedding) -> Tuple[Embedding, Embedding]: + source_embedding = forward_convert_embedding(source_embedding) + source_embedding = source_embedding.ravel() + source_embedding_norm = source_embedding / numpy.linalg.norm(source_embedding) + return source_embedding, source_embedding_norm def prepare_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: @@ -629,84 +655,39 @@ def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: model_standard_deviation = get_model_options().get('standard_deviation') crop_vision_frame = crop_vision_frame.transpose(1, 2, 0) + if model_type in [ 'ghost', 'hififace', 'hyperswap', 'uniface' ]: crop_vision_frame = crop_vision_frame * model_standard_deviation + model_mean + crop_vision_frame = crop_vision_frame.clip(0, 1) crop_vision_frame = crop_vision_frame[:, :, ::-1] * 255 return crop_vision_frame -def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: - return swap_face(source_face, target_face, temp_vision_frame) +def extract_source_face(source_vision_frames : List[VisionFrame]) -> Optional[Face]: + source_faces = [] + + if source_vision_frames: + for source_vision_frame in source_vision_frames: + temp_faces = get_many_faces([source_vision_frame]) + temp_faces = sort_faces_by_order(temp_faces, 'large-small') + + if temp_faces: + source_faces.append(get_first(temp_faces)) + + return get_average_face(source_faces) def process_frame(inputs : FaceSwapperInputs) -> VisionFrame: - reference_faces = inputs.get('reference_faces') - source_face = inputs.get('source_face') + reference_vision_frame = inputs.get('reference_vision_frame') + source_vision_frames = inputs.get('source_vision_frames') target_vision_frame = inputs.get('target_vision_frame') - many_faces = sort_and_filter_faces(get_many_faces([ target_vision_frame ])) + temp_vision_frame = inputs.get('temp_vision_frame') + source_face = extract_source_face(source_vision_frames) + target_faces = select_faces(reference_vision_frame, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'many': - if many_faces: - for target_face in many_faces: - target_vision_frame = swap_face(source_face, target_face, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'one': - target_face = get_one_face(many_faces) - if target_face: - target_vision_frame = swap_face(source_face, target_face, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'reference': - similar_faces = find_similar_faces(many_faces, reference_faces, state_manager.get_item('reference_face_distance')) - if similar_faces: - for similar_face in similar_faces: - target_vision_frame = swap_face(source_face, similar_face, target_vision_frame) - return target_vision_frame + if source_face and target_faces: + for target_face in target_faces: + temp_vision_frame = swap_face(source_face, target_face, temp_vision_frame) - -def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - source_frames = read_static_images(source_paths) - source_faces = [] - - for source_frame in source_frames: - temp_faces = get_many_faces([ source_frame ]) - temp_faces = sort_faces_by_order(temp_faces, 'large-small') - if temp_faces: - source_faces.append(get_first(temp_faces)) - source_face = get_average_face(source_faces) - - for queue_payload in process_manager.manage(queue_payloads): - target_vision_path = queue_payload['frame_path'] - target_vision_frame = read_image(target_vision_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'source_face': source_face, - 'target_vision_frame': target_vision_frame - }) - write_image(target_vision_path, output_vision_frame) - update_progress(1) - - -def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - source_frames = read_static_images(source_paths) - source_faces = [] - - for source_frame in source_frames: - temp_faces = get_many_faces([ source_frame ]) - temp_faces = sort_faces_by_order(temp_faces, 'large-small') - if temp_faces: - source_faces.append(get_first(temp_faces)) - source_face = get_average_face(source_faces) - target_vision_frame = read_static_image(target_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'source_face': source_face, - 'target_vision_frame': target_vision_frame - }) - write_image(output_path, output_vision_frame) - - -def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: - processors.multi_process_frames(source_paths, temp_frame_paths, process_frames) + return temp_vision_frame diff --git a/facefusion/processors/modules/frame_colorizer.py b/facefusion/processors/modules/frame_colorizer.py index abf3fc9..70d77f8 100644 --- a/facefusion/processors/modules/frame_colorizer.py +++ b/facefusion/processors/modules/frame_colorizer.py @@ -7,9 +7,8 @@ import numpy import facefusion.jobs.job_manager import facefusion.jobs.job_store -import facefusion.processors.core as processors -from facefusion import config, content_analyser, inference_manager, logger, process_manager, state_manager, video_manager, wording -from facefusion.common_helper import create_int_metavar +from facefusion import config, content_analyser, inference_manager, logger, state_manager, video_manager, wording +from facefusion.common_helper import create_int_metavar, is_macos from facefusion.download import conditional_download_hashes, conditional_download_sources, resolve_download_url from facefusion.execution import has_execution_provider from facefusion.filesystem import in_directory, is_image, is_video, resolve_relative_path, same_file_extension @@ -17,11 +16,11 @@ from facefusion.processors import choices as processors_choices from facefusion.processors.types import FrameColorizerInputs from facefusion.program_helper import find_argument_group from facefusion.thread_helper import thread_semaphore -from facefusion.types import ApplyStateItem, Args, DownloadScope, ExecutionProvider, Face, InferencePool, ModelOptions, ModelSet, ProcessMode, QueuePayload, UpdateProgress, VisionFrame -from facefusion.vision import read_image, read_static_image, unpack_resolution, write_image +from facefusion.types import ApplyStateItem, Args, DownloadScope, ExecutionProvider, InferencePool, ModelOptions, ModelSet, ProcessMode, VisionFrame +from facefusion.vision import blend_frame, read_static_image, read_static_video_frame, unpack_resolution -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { @@ -141,7 +140,7 @@ def clear_inference_pool() -> None: def resolve_execution_providers() -> List[ExecutionProvider]: - if has_execution_provider('coreml'): + if is_macos() and has_execution_provider('coreml'): return [ 'cpu' ] return state_manager.get_item('execution_providers') @@ -188,6 +187,7 @@ def pre_process(mode : ProcessMode) -> bool: def post_process() -> None: read_static_image.cache_clear() + read_static_video_frame.cache_clear() video_manager.clear_video_pool() if state_manager.get_item('video_memory_strategy') in [ 'strict', 'moderate' ]: clear_inference_pool() @@ -199,7 +199,7 @@ def colorize_frame(temp_vision_frame : VisionFrame) -> VisionFrame: color_vision_frame = prepare_temp_frame(temp_vision_frame) color_vision_frame = forward(color_vision_frame) color_vision_frame = merge_color_frame(temp_vision_frame, color_vision_frame) - color_vision_frame = blend_frame(temp_vision_frame, color_vision_frame) + color_vision_frame = blend_color_frame(temp_vision_frame, color_vision_frame) return color_vision_frame @@ -255,41 +255,12 @@ def merge_color_frame(temp_vision_frame : VisionFrame, color_vision_frame : Visi return color_vision_frame -def blend_frame(temp_vision_frame : VisionFrame, paste_vision_frame : VisionFrame) -> VisionFrame: +def blend_color_frame(temp_vision_frame : VisionFrame, color_vision_frame : VisionFrame) -> VisionFrame: frame_colorizer_blend = 1 - (state_manager.get_item('frame_colorizer_blend') / 100) - temp_vision_frame = cv2.addWeighted(temp_vision_frame, frame_colorizer_blend, paste_vision_frame, 1 - frame_colorizer_blend, 0) + temp_vision_frame = blend_frame(temp_vision_frame, color_vision_frame, 1 - frame_colorizer_blend) return temp_vision_frame -def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: - pass - - def process_frame(inputs : FrameColorizerInputs) -> VisionFrame: - target_vision_frame = inputs.get('target_vision_frame') - return colorize_frame(target_vision_frame) - - -def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: - for queue_payload in process_manager.manage(queue_payloads): - target_vision_path = queue_payload['frame_path'] - target_vision_frame = read_image(target_vision_path) - output_vision_frame = process_frame( - { - 'target_vision_frame': target_vision_frame - }) - write_image(target_vision_path, output_vision_frame) - update_progress(1) - - -def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: - target_vision_frame = read_static_image(target_path) - output_vision_frame = process_frame( - { - 'target_vision_frame': target_vision_frame - }) - write_image(output_path, output_vision_frame) - - -def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: - processors.multi_process_frames(None, temp_frame_paths, process_frames) + temp_vision_frame = inputs.get('temp_vision_frame') + return colorize_frame(temp_vision_frame) diff --git a/facefusion/processors/modules/frame_enhancer.py b/facefusion/processors/modules/frame_enhancer.py index 833e191..7d9dfff 100644 --- a/facefusion/processors/modules/frame_enhancer.py +++ b/facefusion/processors/modules/frame_enhancer.py @@ -1,15 +1,13 @@ from argparse import ArgumentParser from functools import lru_cache -from typing import List import cv2 import numpy import facefusion.jobs.job_manager import facefusion.jobs.job_store -import facefusion.processors.core as processors -from facefusion import config, content_analyser, inference_manager, logger, process_manager, state_manager, video_manager, wording -from facefusion.common_helper import create_int_metavar +from facefusion import config, content_analyser, inference_manager, logger, state_manager, video_manager, wording +from facefusion.common_helper import create_int_metavar, is_macos from facefusion.download import conditional_download_hashes, conditional_download_sources, resolve_download_url from facefusion.execution import has_execution_provider from facefusion.filesystem import in_directory, is_image, is_video, resolve_relative_path, same_file_extension @@ -17,11 +15,11 @@ from facefusion.processors import choices as processors_choices from facefusion.processors.types import FrameEnhancerInputs from facefusion.program_helper import find_argument_group from facefusion.thread_helper import conditional_thread_semaphore -from facefusion.types import ApplyStateItem, Args, DownloadScope, Face, InferencePool, ModelOptions, ModelSet, ProcessMode, QueuePayload, UpdateProgress, VisionFrame -from facefusion.vision import create_tile_frames, merge_tile_frames, read_image, read_static_image, write_image +from facefusion.types import ApplyStateItem, Args, DownloadScope, InferencePool, ModelOptions, ModelSet, ProcessMode, VisionFrame +from facefusion.vision import blend_frame, create_tile_frames, merge_tile_frames, read_static_image, read_static_video_frame -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { @@ -426,7 +424,7 @@ def get_model_options() -> ModelOptions: def get_frame_enhancer_model() -> str: frame_enhancer_model = state_manager.get_item('frame_enhancer_model') - if has_execution_provider('coreml'): + if is_macos() and has_execution_provider('coreml'): if frame_enhancer_model == 'real_esrgan_x2_fp16': return 'real_esrgan_x2' if frame_enhancer_model == 'real_esrgan_x4_fp16': @@ -471,6 +469,7 @@ def pre_process(mode : ProcessMode) -> bool: def post_process() -> None: read_static_image.cache_clear() + read_static_video_frame.cache_clear() video_manager.clear_video_pool() if state_manager.get_item('video_memory_strategy') in [ 'strict', 'moderate' ]: clear_inference_pool() @@ -490,7 +489,7 @@ def enhance_frame(temp_vision_frame : VisionFrame) -> VisionFrame: tile_vision_frames[index] = normalize_tile_frame(tile_vision_frame) merge_vision_frame = merge_tile_frames(tile_vision_frames, temp_width * model_scale, temp_height * model_scale, pad_width * model_scale, pad_height * model_scale, (model_size[0] * model_scale, model_size[1] * model_scale, model_size[2] * model_scale)) - temp_vision_frame = blend_frame(temp_vision_frame, merge_vision_frame) + temp_vision_frame = blend_merge_frame(temp_vision_frame, merge_vision_frame) return temp_vision_frame @@ -506,55 +505,26 @@ def forward(tile_vision_frame : VisionFrame) -> VisionFrame: return tile_vision_frame -def prepare_tile_frame(vision_tile_frame : VisionFrame) -> VisionFrame: - vision_tile_frame = numpy.expand_dims(vision_tile_frame[:, :, ::-1], axis = 0) - vision_tile_frame = vision_tile_frame.transpose(0, 3, 1, 2) - vision_tile_frame = vision_tile_frame.astype(numpy.float32) / 255.0 - return vision_tile_frame +def prepare_tile_frame(tile_vision_frame : VisionFrame) -> VisionFrame: + tile_vision_frame = numpy.expand_dims(tile_vision_frame[:, :, ::-1], axis = 0) + tile_vision_frame = tile_vision_frame.transpose(0, 3, 1, 2) + tile_vision_frame = tile_vision_frame.astype(numpy.float32) / 255.0 + return tile_vision_frame -def normalize_tile_frame(vision_tile_frame : VisionFrame) -> VisionFrame: - vision_tile_frame = vision_tile_frame.transpose(0, 2, 3, 1).squeeze(0) * 255 - vision_tile_frame = vision_tile_frame.clip(0, 255).astype(numpy.uint8)[:, :, ::-1] - return vision_tile_frame +def normalize_tile_frame(tile_vision_frame : VisionFrame) -> VisionFrame: + tile_vision_frame = tile_vision_frame.transpose(0, 2, 3, 1).squeeze(0) * 255 + tile_vision_frame = tile_vision_frame.clip(0, 255).astype(numpy.uint8)[:, :, ::-1] + return tile_vision_frame -def blend_frame(temp_vision_frame : VisionFrame, merge_vision_frame : VisionFrame) -> VisionFrame: +def blend_merge_frame(temp_vision_frame : VisionFrame, merge_vision_frame : VisionFrame) -> VisionFrame: frame_enhancer_blend = 1 - (state_manager.get_item('frame_enhancer_blend') / 100) temp_vision_frame = cv2.resize(temp_vision_frame, (merge_vision_frame.shape[1], merge_vision_frame.shape[0])) - temp_vision_frame = cv2.addWeighted(temp_vision_frame, frame_enhancer_blend, merge_vision_frame, 1 - frame_enhancer_blend, 0) + temp_vision_frame = blend_frame(temp_vision_frame, merge_vision_frame, 1 - frame_enhancer_blend) return temp_vision_frame -def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: - pass - - def process_frame(inputs : FrameEnhancerInputs) -> VisionFrame: - target_vision_frame = inputs.get('target_vision_frame') - return enhance_frame(target_vision_frame) - - -def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: - for queue_payload in process_manager.manage(queue_payloads): - target_vision_path = queue_payload['frame_path'] - target_vision_frame = read_image(target_vision_path) - output_vision_frame = process_frame( - { - 'target_vision_frame': target_vision_frame - }) - write_image(target_vision_path, output_vision_frame) - update_progress(1) - - -def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: - target_vision_frame = read_static_image(target_path) - output_vision_frame = process_frame( - { - 'target_vision_frame': target_vision_frame - }) - write_image(output_path, output_vision_frame) - - -def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: - processors.multi_process_frames(None, temp_frame_paths, process_frames) + temp_vision_frame = inputs.get('temp_vision_frame') + return enhance_frame(temp_vision_frame) diff --git a/facefusion/processors/modules/lip_syncer.py b/facefusion/processors/modules/lip_syncer.py index 16d4b68..4683eee 100755 --- a/facefusion/processors/modules/lip_syncer.py +++ b/facefusion/processors/modules/lip_syncer.py @@ -1,33 +1,28 @@ from argparse import ArgumentParser from functools import lru_cache -from typing import List import cv2 import numpy import facefusion.jobs.job_manager import facefusion.jobs.job_store -import facefusion.processors.core as processors -from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, inference_manager, logger, process_manager, state_manager, video_manager, voice_extractor, wording -from facefusion.audio import create_empty_audio_frame, get_voice_frame, read_static_voice +from facefusion import config, content_analyser, face_classifier, face_detector, face_landmarker, face_masker, face_recognizer, inference_manager, logger, state_manager, video_manager, voice_extractor, wording +from facefusion.audio import read_static_voice from facefusion.common_helper import create_float_metavar -from facefusion.common_helper import get_first from facefusion.download import conditional_download_hashes, conditional_download_sources, resolve_download_url -from facefusion.face_analyser import get_many_faces, get_one_face from facefusion.face_helper import create_bounding_box, paste_back, warp_face_by_bounding_box, warp_face_by_face_landmark_5 from facefusion.face_masker import create_area_mask, create_box_mask, create_occlusion_mask -from facefusion.face_selector import find_similar_faces, sort_and_filter_faces -from facefusion.face_store import get_reference_faces -from facefusion.filesystem import filter_audio_paths, has_audio, in_directory, is_image, is_video, resolve_relative_path, same_file_extension +from facefusion.face_selector import select_faces +from facefusion.filesystem import has_audio, resolve_relative_path from facefusion.processors import choices as processors_choices from facefusion.processors.types import LipSyncerInputs, LipSyncerWeight from facefusion.program_helper import find_argument_group from facefusion.thread_helper import conditional_thread_semaphore -from facefusion.types import ApplyStateItem, Args, AudioFrame, BoundingBox, DownloadScope, Face, InferencePool, ModelOptions, ModelSet, ProcessMode, QueuePayload, UpdateProgress, VisionFrame -from facefusion.vision import read_image, read_static_image, restrict_video_fps, write_image +from facefusion.types import ApplyStateItem, Args, AudioFrame, DownloadScope, Face, InferencePool, ModelOptions, ModelSet, ProcessMode, VisionFrame +from facefusion.vision import read_static_image, read_static_video_frame -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { @@ -138,20 +133,12 @@ def pre_process(mode : ProcessMode) -> bool: if not has_audio(state_manager.get_item('source_paths')): logger.error(wording.get('choose_audio_source') + wording.get('exclamation_mark'), __name__) return False - if mode in [ 'output', 'preview' ] and not is_image(state_manager.get_item('target_path')) and not is_video(state_manager.get_item('target_path')): - logger.error(wording.get('choose_image_or_video_target') + wording.get('exclamation_mark'), __name__) - return False - if mode == 'output' and not in_directory(state_manager.get_item('output_path')): - logger.error(wording.get('specify_image_or_video_output') + wording.get('exclamation_mark'), __name__) - return False - if mode == 'output' and not same_file_extension(state_manager.get_item('target_path'), state_manager.get_item('output_path')): - logger.error(wording.get('match_target_and_output_extension') + wording.get('exclamation_mark'), __name__) - return False return True def post_process() -> None: read_static_image.cache_clear() + read_static_video_frame.cache_clear() read_static_voice.cache_clear() video_manager.clear_video_pool() if state_manager.get_item('video_memory_strategy') in [ 'strict', 'moderate' ]: @@ -166,10 +153,10 @@ def post_process() -> None: voice_extractor.clear_inference_pool() -def sync_lip(target_face : Face, temp_audio_frame : AudioFrame, temp_vision_frame : VisionFrame) -> VisionFrame: +def sync_lip(target_face : Face, source_voice_frame : AudioFrame, temp_vision_frame : VisionFrame) -> VisionFrame: model_type = get_model_options().get('type') model_size = get_model_options().get('size') - temp_audio_frame = prepare_audio_frame(temp_audio_frame) + source_voice_frame = prepare_audio_frame(source_voice_frame) crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmark_set.get('5/68'), 'ffhq_512', (512, 512)) crop_masks = [] @@ -182,17 +169,17 @@ def sync_lip(target_face : Face, temp_audio_frame : AudioFrame, temp_vision_fram box_mask = create_box_mask(crop_vision_frame, state_manager.get_item('face_mask_blur'), state_manager.get_item('face_mask_padding')) crop_masks.append(box_mask) crop_vision_frame = prepare_crop_frame(crop_vision_frame) - crop_vision_frame = forward_edtalk(temp_audio_frame, crop_vision_frame, lip_syncer_weight) + crop_vision_frame = forward_edtalk(source_voice_frame, crop_vision_frame, lip_syncer_weight) crop_vision_frame = normalize_crop_frame(crop_vision_frame) + if model_type == 'wav2lip': face_landmark_68 = cv2.transform(target_face.landmark_set.get('68').reshape(1, -1, 2), affine_matrix).reshape(-1, 2) area_mask = create_area_mask(crop_vision_frame, face_landmark_68, [ 'lower-face' ]) crop_masks.append(area_mask) bounding_box = create_bounding_box(face_landmark_68) - bounding_box = resize_bounding_box(bounding_box, 1 / 8) area_vision_frame, area_matrix = warp_face_by_bounding_box(crop_vision_frame, bounding_box, model_size) area_vision_frame = prepare_crop_frame(area_vision_frame) - area_vision_frame = forward_wav2lip(temp_audio_frame, area_vision_frame) + area_vision_frame = forward_wav2lip(source_voice_frame, area_vision_frame) area_vision_frame = normalize_crop_frame(area_vision_frame) crop_vision_frame = cv2.warpAffine(area_vision_frame, cv2.invertAffineTransform(area_matrix), (512, 512), borderMode = cv2.BORDER_REPLICATE) @@ -249,23 +236,17 @@ def prepare_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: crop_vision_frame = cv2.resize(crop_vision_frame, model_size, interpolation = cv2.INTER_AREA) crop_vision_frame = crop_vision_frame[:, :, ::-1] / 255.0 crop_vision_frame = numpy.expand_dims(crop_vision_frame.transpose(2, 0, 1), axis = 0).astype(numpy.float32) + if model_type == 'wav2lip': crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0) prepare_vision_frame = crop_vision_frame.copy() prepare_vision_frame[:, model_size[0] // 2:] = 0 crop_vision_frame = numpy.concatenate((prepare_vision_frame, crop_vision_frame), axis = 3) - crop_vision_frame = crop_vision_frame.transpose(0, 3, 1, 2).astype('float32') / 255.0 + crop_vision_frame = crop_vision_frame.transpose(0, 3, 1, 2).astype(numpy.float32) / 255.0 return crop_vision_frame -def resize_bounding_box(bounding_box : BoundingBox, aspect_ratio : float) -> BoundingBox: - x1, y1, x2, y2 = bounding_box - y1 -= numpy.abs(y2 - y1) * aspect_ratio - bounding_box[1] = max(y1, 0) - return bounding_box - - def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: model_type = get_model_options().get('type') crop_vision_frame = crop_vision_frame[0].transpose(1, 2, 0) @@ -279,70 +260,16 @@ def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: return crop_vision_frame -def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: - pass - - def process_frame(inputs : LipSyncerInputs) -> VisionFrame: - reference_faces = inputs.get('reference_faces') - source_audio_frame = inputs.get('source_audio_frame') + reference_vision_frame = inputs.get('reference_vision_frame') + source_voice_frame = inputs.get('source_voice_frame') target_vision_frame = inputs.get('target_vision_frame') - many_faces = sort_and_filter_faces(get_many_faces([ target_vision_frame ])) + temp_vision_frame = inputs.get('temp_vision_frame') + target_faces = select_faces(reference_vision_frame, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'many': - if many_faces: - for target_face in many_faces: - target_vision_frame = sync_lip(target_face, source_audio_frame, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'one': - target_face = get_one_face(many_faces) - if target_face: - target_vision_frame = sync_lip(target_face, source_audio_frame, target_vision_frame) - if state_manager.get_item('face_selector_mode') == 'reference': - similar_faces = find_similar_faces(many_faces, reference_faces, state_manager.get_item('reference_face_distance')) - if similar_faces: - for similar_face in similar_faces: - target_vision_frame = sync_lip(similar_face, source_audio_frame, target_vision_frame) - return target_vision_frame + if target_faces: + for target_face in target_faces: + temp_vision_frame = sync_lip(target_face, source_voice_frame, temp_vision_frame) + return temp_vision_frame -def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - source_audio_path = get_first(filter_audio_paths(source_paths)) - temp_video_fps = restrict_video_fps(state_manager.get_item('target_path'), state_manager.get_item('output_video_fps')) - - for queue_payload in process_manager.manage(queue_payloads): - frame_number = queue_payload.get('frame_number') - target_vision_path = queue_payload.get('frame_path') - source_audio_frame = get_voice_frame(source_audio_path, temp_video_fps, frame_number) - if not numpy.any(source_audio_frame): - source_audio_frame = create_empty_audio_frame() - target_vision_frame = read_image(target_vision_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'source_audio_frame': source_audio_frame, - 'target_vision_frame': target_vision_frame - }) - write_image(target_vision_path, output_vision_frame) - update_progress(1) - - -def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - source_audio_frame = create_empty_audio_frame() - target_vision_frame = read_static_image(target_path) - output_vision_frame = process_frame( - { - 'reference_faces': reference_faces, - 'source_audio_frame': source_audio_frame, - 'target_vision_frame': target_vision_frame - }) - write_image(output_path, output_vision_frame) - - -def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: - source_audio_paths = filter_audio_paths(state_manager.get_item('source_paths')) - temp_video_fps = restrict_video_fps(state_manager.get_item('target_path'), state_manager.get_item('output_video_fps')) - for source_audio_path in source_audio_paths: - read_static_voice(source_audio_path, temp_video_fps) - processors.multi_process_frames(source_paths, temp_frame_paths, process_frames) diff --git a/facefusion/processors/types.py b/facefusion/processors/types.py index bf9b9f6..444bd73 100644 --- a/facefusion/processors/types.py +++ b/facefusion/processors/types.py @@ -2,12 +2,13 @@ from typing import Any, Dict, List, Literal, TypeAlias, TypedDict from numpy.typing import NDArray -from facefusion.types import AppContext, AudioFrame, Face, FaceSet, VisionFrame +from facefusion.types import AppContext, AudioFrame, VisionFrame AgeModifierModel = Literal['styleganex_age'] DeepSwapperModel : TypeAlias = str ExpressionRestorerModel = Literal['live_portrait'] -FaceDebuggerItem = Literal['bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask', 'face-detector-score', 'face-landmarker-score', 'age', 'gender', 'race'] +ExpressionRestorerArea = Literal['upper-face', 'lower-face'] +FaceDebuggerItem = Literal['bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask'] FaceEditorModel = Literal['live_portrait'] FaceEnhancerModel = Literal['codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'gpen_bfr_1024', 'gpen_bfr_2048', 'restoreformer_plus_plus'] FaceSwapperModel = Literal['blendswap_256', 'ghost_1_256', 'ghost_2_256', 'ghost_3_256', 'hififace_unofficial_256', 'hyperswap_1a_256', 'hyperswap_1b_256', 'hyperswap_1c_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_unofficial_512', 'uniface_256'] @@ -19,56 +20,81 @@ FaceSwapperSet : TypeAlias = Dict[FaceSwapperModel, List[str]] AgeModifierInputs = TypedDict('AgeModifierInputs', { - 'reference_faces' : FaceSet, - 'target_vision_frame' : VisionFrame + 'reference_vision_frame' : VisionFrame, + 'target_vision_frame' : VisionFrame, + 'temp_vision_frame' : VisionFrame }) DeepSwapperInputs = TypedDict('DeepSwapperInputs', { - 'reference_faces' : FaceSet, - 'target_vision_frame' : VisionFrame + 'reference_vision_frame' : VisionFrame, + 'target_vision_frame' : VisionFrame, + 'temp_vision_frame' : VisionFrame }) ExpressionRestorerInputs = TypedDict('ExpressionRestorerInputs', { - 'reference_faces' : FaceSet, - 'source_vision_frame' : VisionFrame, - 'target_vision_frame' : VisionFrame + 'reference_vision_frame' : VisionFrame, + 'source_vision_frames' : List[VisionFrame], + 'target_vision_frame' : VisionFrame, + 'temp_vision_frame' : VisionFrame }) FaceDebuggerInputs = TypedDict('FaceDebuggerInputs', { - 'reference_faces' : FaceSet, - 'target_vision_frame' : VisionFrame + 'reference_vision_frame' : VisionFrame, + 'target_vision_frame' : VisionFrame, + 'temp_vision_frame' : VisionFrame }) FaceEditorInputs = TypedDict('FaceEditorInputs', { - 'reference_faces' : FaceSet, - 'target_vision_frame' : VisionFrame + 'reference_vision_frame' : VisionFrame, + 'target_vision_frame' : VisionFrame, + 'temp_vision_frame' : VisionFrame }) FaceEnhancerInputs = TypedDict('FaceEnhancerInputs', { - 'reference_faces' : FaceSet, - 'target_vision_frame' : VisionFrame + 'reference_vision_frame' : VisionFrame, + 'target_vision_frame' : VisionFrame, + 'temp_vision_frame' : VisionFrame }) FaceSwapperInputs = TypedDict('FaceSwapperInputs', { - 'reference_faces' : FaceSet, - 'source_face' : Face, - 'target_vision_frame' : VisionFrame + 'reference_vision_frame' : VisionFrame, + 'source_vision_frames' : List[VisionFrame], + 'target_vision_frame' : VisionFrame, + 'temp_vision_frame' : VisionFrame }) FrameColorizerInputs = TypedDict('FrameColorizerInputs', { - 'target_vision_frame' : VisionFrame + 'target_vision_frame' : VisionFrame, + 'temp_vision_frame' : VisionFrame }) FrameEnhancerInputs = TypedDict('FrameEnhancerInputs', { - 'target_vision_frame' : VisionFrame + 'target_vision_frame' : VisionFrame, + 'temp_vision_frame' : VisionFrame }) LipSyncerInputs = TypedDict('LipSyncerInputs', { - 'reference_faces' : FaceSet, - 'source_audio_frame' : AudioFrame, - 'target_vision_frame' : VisionFrame + 'reference_vision_frame' : VisionFrame, + 'source_voice_frame' : AudioFrame, + 'target_vision_frame' : VisionFrame, + 'temp_vision_frame' : VisionFrame }) +AgeModifierDirection : TypeAlias = NDArray[Any] +DeepSwapperMorph : TypeAlias = NDArray[Any] +FaceEnhancerWeight : TypeAlias = NDArray[Any] +FaceSwapperWeight : TypeAlias = float +LipSyncerWeight : TypeAlias = NDArray[Any] +LivePortraitPitch : TypeAlias = float +LivePortraitYaw : TypeAlias = float +LivePortraitRoll : TypeAlias = float +LivePortraitExpression : TypeAlias = NDArray[Any] +LivePortraitFeatureVolume : TypeAlias = NDArray[Any] +LivePortraitMotionPoints : TypeAlias = NDArray[Any] +LivePortraitRotation : TypeAlias = NDArray[Any] +LivePortraitScale : TypeAlias = NDArray[Any] +LivePortraitTranslation : TypeAlias = NDArray[Any] + ProcessorStateKey = Literal\ [ 'age_modifier_model', @@ -77,6 +103,7 @@ ProcessorStateKey = Literal\ 'deep_swapper_morph', 'expression_restorer_model', 'expression_restorer_factor', + 'expression_restorer_areas', 'face_debugger_items', 'face_editor_model', 'face_editor_eyebrow_direction', @@ -98,6 +125,7 @@ ProcessorStateKey = Literal\ 'face_enhancer_weight', 'face_swapper_model', 'face_swapper_pixel_boost', + 'face_swapper_weight', 'frame_colorizer_model', 'frame_colorizer_size', 'frame_colorizer_blend', @@ -114,6 +142,7 @@ ProcessorState = TypedDict('ProcessorState', 'deep_swapper_morph' : int, 'expression_restorer_model' : ExpressionRestorerModel, 'expression_restorer_factor' : int, + 'expression_restorer_areas' : List[ExpressionRestorerArea], 'face_debugger_items' : List[FaceDebuggerItem], 'face_editor_model' : FaceEditorModel, 'face_editor_eyebrow_direction' : float, @@ -132,28 +161,16 @@ ProcessorState = TypedDict('ProcessorState', 'face_editor_head_roll' : float, 'face_enhancer_model' : FaceEnhancerModel, 'face_enhancer_blend' : int, - 'face_enhancer_weight' : float, + 'face_enhancer_weight' : FaceEnhancerWeight, 'face_swapper_model' : FaceSwapperModel, 'face_swapper_pixel_boost' : str, + 'face_swapper_weight' : FaceSwapperWeight, 'frame_colorizer_model' : FrameColorizerModel, 'frame_colorizer_size' : str, 'frame_colorizer_blend' : int, 'frame_enhancer_model' : FrameEnhancerModel, 'frame_enhancer_blend' : int, - 'lip_syncer_model' : LipSyncerModel + 'lip_syncer_model' : LipSyncerModel, + 'lip_syncer_weight' : LipSyncerWeight }) ProcessorStateSet : TypeAlias = Dict[AppContext, ProcessorState] - -AgeModifierDirection : TypeAlias = NDArray[Any] -DeepSwapperMorph : TypeAlias = NDArray[Any] -FaceEnhancerWeight : TypeAlias = NDArray[Any] -LipSyncerWeight : TypeAlias = NDArray[Any] -LivePortraitPitch : TypeAlias = float -LivePortraitYaw : TypeAlias = float -LivePortraitRoll : TypeAlias = float -LivePortraitExpression : TypeAlias = NDArray[Any] -LivePortraitFeatureVolume : TypeAlias = NDArray[Any] -LivePortraitMotionPoints : TypeAlias = NDArray[Any] -LivePortraitRotation : TypeAlias = NDArray[Any] -LivePortraitScale : TypeAlias = NDArray[Any] -LivePortraitTranslation : TypeAlias = NDArray[Any] diff --git a/facefusion/program.py b/facefusion/program.py index 5d3d62d..8bbfdab 100755 --- a/facefusion/program.py +++ b/facefusion/program.py @@ -144,6 +144,14 @@ def create_face_masker_program() -> ArgumentParser: return program +def create_voice_extractor_program() -> ArgumentParser: + program = ArgumentParser(add_help = False) + group_voice_extractor = program.add_argument_group('voice extractor') + group_voice_extractor.add_argument('--voice-extractor-model', help = wording.get('help.voice_extractor_model'), default = config.get_str_value('voice_extractor', 'voice_extractor_model', 'kim_vocal_2'), choices = facefusion.choices.voice_extractor_models) + job_store.register_step_keys([ 'voice_extractor_model' ]) + return program + + def create_frame_extraction_program() -> ArgumentParser: program = ArgumentParser(add_help = False) group_frame_extraction = program.add_argument_group('frame extraction') @@ -160,16 +168,16 @@ def create_output_creation_program() -> ArgumentParser: available_encoder_set = get_available_encoder_set() group_output_creation = program.add_argument_group('output creation') group_output_creation.add_argument('--output-image-quality', help = wording.get('help.output_image_quality'), type = int, default = config.get_int_value('output_creation', 'output_image_quality', '80'), choices = facefusion.choices.output_image_quality_range, metavar = create_int_metavar(facefusion.choices.output_image_quality_range)) - group_output_creation.add_argument('--output-image-resolution', help = wording.get('help.output_image_resolution'), default = config.get_str_value('output_creation', 'output_image_resolution')) + group_output_creation.add_argument('--output-image-scale', help = wording.get('help.output_image_scale'), type = float, default = config.get_float_value('output_creation', 'output_image_scale', '1.0'), choices = facefusion.choices.output_image_scale_range) group_output_creation.add_argument('--output-audio-encoder', help = wording.get('help.output_audio_encoder'), default = config.get_str_value('output_creation', 'output_audio_encoder', get_first(available_encoder_set.get('audio'))), choices = available_encoder_set.get('audio')) group_output_creation.add_argument('--output-audio-quality', help = wording.get('help.output_audio_quality'), type = int, default = config.get_int_value('output_creation', 'output_audio_quality', '80'), choices = facefusion.choices.output_audio_quality_range, metavar = create_int_metavar(facefusion.choices.output_audio_quality_range)) group_output_creation.add_argument('--output-audio-volume', help = wording.get('help.output_audio_volume'), type = int, default = config.get_int_value('output_creation', 'output_audio_volume', '100'), choices = facefusion.choices.output_audio_volume_range, metavar = create_int_metavar(facefusion.choices.output_audio_volume_range)) group_output_creation.add_argument('--output-video-encoder', help = wording.get('help.output_video_encoder'), default = config.get_str_value('output_creation', 'output_video_encoder', get_first(available_encoder_set.get('video'))), choices = available_encoder_set.get('video')) group_output_creation.add_argument('--output-video-preset', help = wording.get('help.output_video_preset'), default = config.get_str_value('output_creation', 'output_video_preset', 'veryfast'), choices = facefusion.choices.output_video_presets) group_output_creation.add_argument('--output-video-quality', help = wording.get('help.output_video_quality'), type = int, default = config.get_int_value('output_creation', 'output_video_quality', '80'), choices = facefusion.choices.output_video_quality_range, metavar = create_int_metavar(facefusion.choices.output_video_quality_range)) - group_output_creation.add_argument('--output-video-resolution', help = wording.get('help.output_video_resolution'), default = config.get_str_value('output_creation', 'output_video_resolution')) - group_output_creation.add_argument('--output-video-fps', help = wording.get('help.output_video_fps'), type = float, default = config.get_str_value('output_creation', 'output_video_fps')) - job_store.register_step_keys([ 'output_image_quality', 'output_image_resolution', 'output_audio_encoder', 'output_audio_quality', 'output_audio_volume', 'output_video_encoder', 'output_video_preset', 'output_video_quality', 'output_video_resolution', 'output_video_fps' ]) + group_output_creation.add_argument('--output-video-scale', help = wording.get('help.output_video_scale'), type = float, default = config.get_float_value('output_creation', 'output_video_scale', '1.0'), choices = facefusion.choices.output_video_scale_range) + group_output_creation.add_argument('--output-video-fps', help = wording.get('help.output_video_fps'), type = float, default = config.get_float_value('output_creation', 'output_video_fps')) + job_store.register_step_keys([ 'output_image_quality', 'output_image_scale', 'output_audio_encoder', 'output_audio_quality', 'output_audio_volume', 'output_video_encoder', 'output_video_preset', 'output_video_quality', 'output_video_scale', 'output_video_fps' ]) return program @@ -213,6 +221,7 @@ def create_download_scope_program() -> ArgumentParser: def create_benchmark_program() -> ArgumentParser: program = ArgumentParser(add_help = False) group_benchmark = program.add_argument_group('benchmark') + group_benchmark.add_argument('--benchmark-mode', help = wording.get('help.benchmark_mode'), default = config.get_str_value('benchmark', 'benchmark_mode', 'warm'), choices = facefusion.choices.benchmark_modes) group_benchmark.add_argument('--benchmark-resolutions', help = wording.get('help.benchmark_resolutions'), default = config.get_str_list('benchmark', 'benchmark_resolutions', get_first(facefusion.choices.benchmark_resolutions)), choices = facefusion.choices.benchmark_resolutions, nargs = '+') group_benchmark.add_argument('--benchmark-cycle-count', help = wording.get('help.benchmark_cycle_count'), type = int, default = config.get_int_value('benchmark', 'benchmark_cycle_count', '5'), choices = facefusion.choices.benchmark_cycle_count_range) return program @@ -222,11 +231,10 @@ def create_execution_program() -> ArgumentParser: program = ArgumentParser(add_help = False) available_execution_providers = get_available_execution_providers() group_execution = program.add_argument_group('execution') - group_execution.add_argument('--execution-device-id', help = wording.get('help.execution_device_id'), default = config.get_str_value('execution', 'execution_device_id', '0')) + group_execution.add_argument('--execution-device-ids', help = wording.get('help.execution_device_ids'), default = config.get_str_list('execution', 'execution_device_ids', '0'), nargs = '+', metavar = 'EXECUTION_DEVICE_IDS') group_execution.add_argument('--execution-providers', help = wording.get('help.execution_providers').format(choices = ', '.join(available_execution_providers)), default = config.get_str_list('execution', 'execution_providers', get_first(available_execution_providers)), choices = available_execution_providers, nargs = '+', metavar = 'EXECUTION_PROVIDERS') group_execution.add_argument('--execution-thread-count', help = wording.get('help.execution_thread_count'), type = int, default = config.get_int_value('execution', 'execution_thread_count', '4'), choices = facefusion.choices.execution_thread_count_range, metavar = create_int_metavar(facefusion.choices.execution_thread_count_range)) - group_execution.add_argument('--execution-queue-count', help = wording.get('help.execution_queue_count'), type = int, default = config.get_int_value('execution', 'execution_queue_count', '1'), choices = facefusion.choices.execution_queue_count_range, metavar = create_int_metavar(facefusion.choices.execution_queue_count_range)) - job_store.register_job_keys([ 'execution_device_id', 'execution_providers', 'execution_thread_count', 'execution_queue_count' ]) + job_store.register_job_keys([ 'execution_device_ids', 'execution_providers', 'execution_thread_count' ]) return program @@ -275,7 +283,7 @@ def create_step_index_program() -> ArgumentParser: def collect_step_program() -> ArgumentParser: - return ArgumentParser(parents = [ create_face_detector_program(), create_face_landmarker_program(), create_face_selector_program(), create_face_masker_program(), create_frame_extraction_program(), create_output_creation_program(), create_processors_program() ], add_help = False) + return ArgumentParser(parents = [ create_face_detector_program(), create_face_landmarker_program(), create_face_selector_program(), create_face_masker_program(), create_voice_extractor_program(), create_frame_extraction_program(), create_output_creation_program(), create_processors_program() ], add_help = False) def collect_job_program() -> ArgumentParser: @@ -288,7 +296,7 @@ def create_program() -> ArgumentParser: program.add_argument('-v', '--version', version = metadata.get('name') + ' ' + metadata.get('version'), action = 'version') sub_program = program.add_subparsers(dest = 'command') # general - sub_program.add_parser('run', help = wording.get('help.run'), parents = [ create_config_path_program(), create_temp_path_program(), create_jobs_path_program(), create_source_paths_program(), create_target_path_program(), create_output_path_program(), collect_step_program(), create_uis_program(), collect_job_program() ], formatter_class = create_help_formatter_large) + sub_program.add_parser('run', help = wording.get('help.run'), parents = [ create_config_path_program(), create_temp_path_program(), create_jobs_path_program(), create_source_paths_program(), create_target_path_program(), create_output_path_program(), collect_step_program(), create_uis_program(), create_benchmark_program(), collect_job_program() ], formatter_class = create_help_formatter_large) sub_program.add_parser('headless-run', help = wording.get('help.headless_run'), parents = [ create_config_path_program(), create_temp_path_program(), create_jobs_path_program(), create_source_paths_program(), create_target_path_program(), create_output_path_program(), collect_step_program(), collect_job_program() ], formatter_class = create_help_formatter_large) sub_program.add_parser('batch-run', help = wording.get('help.batch_run'), parents = [ create_config_path_program(), create_temp_path_program(), create_jobs_path_program(), create_source_pattern_program(), create_target_pattern_program(), create_output_pattern_program(), collect_step_program(), collect_job_program() ], formatter_class = create_help_formatter_large) sub_program.add_parser('force-download', help = wording.get('help.force_download'), parents = [ create_download_providers_program(), create_download_scope_program(), create_log_level_program() ], formatter_class = create_help_formatter_large) diff --git a/facefusion/state_manager.py b/facefusion/state_manager.py index aba6c57..3842735 100644 --- a/facefusion/state_manager.py +++ b/facefusion/state_manager.py @@ -16,6 +16,10 @@ def get_state() -> Union[State, ProcessorState]: return STATE_SET.get(app_context) +def sync_state() -> None: + STATE_SET['cli'] = STATE_SET.get('ui') #type:ignore[assignment] + + def init_item(key : Union[StateKey, ProcessorStateKey], value : Any) -> None: STATE_SET['cli'][key] = value #type:ignore[literal-required] STATE_SET['ui'][key] = value #type:ignore[literal-required] diff --git a/facefusion/streamer.py b/facefusion/streamer.py new file mode 100644 index 0000000..b53a811 --- /dev/null +++ b/facefusion/streamer.py @@ -0,0 +1,98 @@ +import os +import subprocess +from collections import deque +from concurrent.futures import ThreadPoolExecutor +from typing import Deque, Generator + +import cv2 +import numpy +from tqdm import tqdm + +from facefusion import ffmpeg_builder, logger, state_manager, wording +from facefusion.audio import create_empty_audio_frame +from facefusion.content_analyser import analyse_stream +from facefusion.ffmpeg import open_ffmpeg +from facefusion.filesystem import is_directory +from facefusion.processors.core import get_processors_modules +from facefusion.types import Fps, StreamMode, VisionFrame +from facefusion.vision import read_static_images + + +def multi_process_capture(camera_capture : cv2.VideoCapture, camera_fps : Fps) -> Generator[VisionFrame, None, None]: + capture_deque : Deque[VisionFrame] = deque() + + with tqdm(desc = wording.get('streaming'), unit = 'frame', disable = state_manager.get_item('log_level') in [ 'warn', 'error' ]) as progress: + with ThreadPoolExecutor(max_workers = state_manager.get_item('execution_thread_count')) as executor: + futures = [] + + while camera_capture and camera_capture.isOpened(): + _, capture_frame = camera_capture.read() + if analyse_stream(capture_frame, camera_fps): + camera_capture.release() + + if numpy.any(capture_frame): + future = executor.submit(process_stream_frame, capture_frame) + futures.append(future) + + for future_done in [ future for future in futures if future.done() ]: + capture_frame = future_done.result() + capture_deque.append(capture_frame) + futures.remove(future_done) + + while capture_deque: + progress.update() + yield capture_deque.popleft() + + +def process_stream_frame(target_vision_frame : VisionFrame) -> VisionFrame: + source_vision_frames = read_static_images(state_manager.get_item('source_paths')) + source_audio_frame = create_empty_audio_frame() + source_voice_frame = create_empty_audio_frame() + temp_vision_frame = target_vision_frame.copy() + + for processor_module in get_processors_modules(state_manager.get_item('processors')): + logger.disable() + if processor_module.pre_process('stream'): + logger.enable() + temp_vision_frame = processor_module.process_frame( + { + 'source_vision_frames': source_vision_frames, + 'source_audio_frame': source_audio_frame, + 'source_voice_frame': source_voice_frame, + 'target_vision_frame': target_vision_frame, + 'temp_vision_frame': temp_vision_frame + }) + logger.enable() + + return temp_vision_frame + + +def open_stream(stream_mode : StreamMode, stream_resolution : str, stream_fps : Fps) -> subprocess.Popen[bytes]: + commands = ffmpeg_builder.chain( + ffmpeg_builder.capture_video(), + ffmpeg_builder.set_media_resolution(stream_resolution), + ffmpeg_builder.set_input_fps(stream_fps) + ) + + if stream_mode == 'udp': + commands.extend(ffmpeg_builder.set_input('-')) + commands.extend(ffmpeg_builder.set_stream_mode('udp')) + commands.extend(ffmpeg_builder.set_stream_quality(2000)) + commands.extend(ffmpeg_builder.set_output('udp://localhost:27000?pkt_size=1316')) + + if stream_mode == 'v4l2': + device_directory_path = '/sys/devices/virtual/video4linux' + commands.extend(ffmpeg_builder.set_input('-')) + commands.extend(ffmpeg_builder.set_stream_mode('v4l2')) + + if is_directory(device_directory_path): + device_names = os.listdir(device_directory_path) + + for device_name in device_names: + device_path = '/dev/' + device_name + commands.extend(ffmpeg_builder.set_output(device_path)) + + else: + logger.error(wording.get('stream_not_loaded').format(stream_mode = stream_mode), __name__) + + return open_ffmpeg(commands) diff --git a/facefusion/thread_helper.py b/facefusion/thread_helper.py index 84717f9..9bd1b7b 100644 --- a/facefusion/thread_helper.py +++ b/facefusion/thread_helper.py @@ -2,6 +2,7 @@ import threading from contextlib import nullcontext from typing import ContextManager, Union +from facefusion.common_helper import is_linux, is_windows from facefusion.execution import has_execution_provider THREAD_LOCK : threading.Lock = threading.Lock() @@ -18,6 +19,6 @@ def thread_semaphore() -> threading.Semaphore: def conditional_thread_semaphore() -> Union[threading.Semaphore, ContextManager[None]]: - if has_execution_provider('directml') or has_execution_provider('rocm'): + if is_windows() and has_execution_provider('directml') or is_linux() and has_execution_provider('migraphx') or is_linux() and has_execution_provider('rocm'): return THREAD_SEMAPHORE return NULL_CONTEXT diff --git a/facefusion/date_helper.py b/facefusion/time_helper.py similarity index 89% rename from facefusion/date_helper.py rename to facefusion/time_helper.py index c60e2f6..17f8d80 100644 --- a/facefusion/date_helper.py +++ b/facefusion/time_helper.py @@ -1,4 +1,5 @@ from datetime import datetime, timedelta +from time import time from typing import Optional, Tuple from facefusion import wording @@ -8,6 +9,10 @@ def get_current_date_time() -> datetime: return datetime.now().astimezone() +def calculate_end_time(start_time : float) -> float: + return round(time() - start_time, 2) + + def split_time_delta(time_delta : timedelta) -> Tuple[int, int, int, int]: days, hours = divmod(time_delta.total_seconds(), 86400) hours, minutes = divmod(hours, 3600) diff --git a/facefusion/types.py b/facefusion/types.py index 45be13f..6af0bb9 100755 --- a/facefusion/types.py +++ b/facefusion/types.py @@ -39,7 +39,7 @@ Face = namedtuple('Face', 'landmark_set', 'angle', 'embedding', - 'normed_embedding', + 'embedding_norm', 'gender', 'age', 'race' @@ -47,10 +47,21 @@ Face = namedtuple('Face', FaceSet : TypeAlias = Dict[str, List[Face]] FaceStore = TypedDict('FaceStore', { - 'static_faces' : FaceSet, - 'reference_faces' : FaceSet + 'static_faces' : FaceSet +}) + +VideoCaptureSet : TypeAlias = Dict[str, cv2.VideoCapture] +VideoWriterSet : TypeAlias = Dict[str, cv2.VideoWriter] +CameraCaptureSet : TypeAlias = Dict[str, cv2.VideoCapture] +VideoPoolSet = TypedDict('VideoPoolSet', +{ + 'capture': VideoCaptureSet, + 'writer': VideoWriterSet +}) +CameraPoolSet = TypedDict('CameraPoolSet', +{ + 'capture': CameraCaptureSet }) -VideoPoolSet : TypeAlias = Dict[str, cv2.VideoCapture] VisionFrame : TypeAlias = NDArray[Any] Mask : TypeAlias = NDArray[Any] @@ -67,6 +78,8 @@ AudioFrame : TypeAlias = NDArray[Any] Spectrogram : TypeAlias = NDArray[Any] Mel : TypeAlias = NDArray[Any] MelFilterBank : TypeAlias = NDArray[Any] +Voice : TypeAlias = NDArray[Any] +VoiceChunk : TypeAlias = NDArray[Any] Fps : TypeAlias = float Duration : TypeAlias = float @@ -75,14 +88,8 @@ Orientation = Literal['landscape', 'portrait'] Resolution : TypeAlias = Tuple[int, int] ProcessState = Literal['checking', 'processing', 'stopping', 'pending'] -QueuePayload = TypedDict('QueuePayload', -{ - 'frame_number' : int, - 'frame_path' : str -}) Args : TypeAlias = Dict[str, Any] UpdateProgress : TypeAlias = Callable[[int], None] -ProcessFrames : TypeAlias = Callable[[List[str], List[QueuePayload], UpdateProgress], None] ProcessStep : TypeAlias = Callable[[str, int, Args], bool] Content : TypeAlias = Dict[str, Any] @@ -100,12 +107,12 @@ LogLevelSet : TypeAlias = Dict[LogLevel, int] TableHeaders = List[str] TableContents = List[List[Any]] -FaceDetectorModel = Literal['many', 'retinaface', 'scrfd', 'yolo_face'] +FaceDetectorModel = Literal['many', 'retinaface', 'scrfd', 'yolo_face', 'yunet'] FaceLandmarkerModel = Literal['many', '2dfan4', 'peppa_wutz'] FaceDetectorSet : TypeAlias = Dict[FaceDetectorModel, List[str]] FaceSelectorMode = Literal['many', 'one', 'reference'] FaceSelectorOrder = Literal['left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small', 'best-worst', 'worst-best'] -FaceOccluderModel = Literal['xseg_1', 'xseg_2', 'xseg_3'] +FaceOccluderModel = Literal['many', 'xseg_1', 'xseg_2', 'xseg_3'] FaceParserModel = Literal['bisenet_resnet_18', 'bisenet_resnet_34'] FaceMaskType = Literal['box', 'occlusion', 'area', 'region'] FaceMaskArea = Literal['upper-face', 'lower-face', 'mouth'] @@ -113,16 +120,18 @@ FaceMaskRegion = Literal['skin', 'left-eyebrow', 'right-eyebrow', 'left-eye', 'r FaceMaskRegionSet : TypeAlias = Dict[FaceMaskRegion, int] FaceMaskAreaSet : TypeAlias = Dict[FaceMaskArea, List[int]] +VoiceExtractorModel = Literal['kim_vocal_1', 'kim_vocal_2', 'uvr_mdxnet'] + AudioFormat = Literal['flac', 'm4a', 'mp3', 'ogg', 'opus', 'wav'] ImageFormat = Literal['bmp', 'jpeg', 'png', 'tiff', 'webp'] -VideoFormat = Literal['avi', 'm4v', 'mkv', 'mov', 'mp4', 'webm'] +VideoFormat = Literal['avi', 'm4v', 'mkv', 'mov', 'mp4', 'webm', 'wmv'] TempFrameFormat = Literal['bmp', 'jpeg', 'png', 'tiff'] AudioTypeSet : TypeAlias = Dict[AudioFormat, str] ImageTypeSet : TypeAlias = Dict[ImageFormat, str] VideoTypeSet : TypeAlias = Dict[VideoFormat, str] AudioEncoder = Literal['flac', 'aac', 'libmp3lame', 'libopus', 'libvorbis', 'pcm_s16le', 'pcm_s32le'] -VideoEncoder = Literal['libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf', 'h264_qsv', 'hevc_qsv', 'h264_videotoolbox', 'hevc_videotoolbox', 'rawvideo'] +VideoEncoder = Literal['libx264', 'libx264rgb', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf', 'h264_qsv', 'hevc_qsv', 'h264_videotoolbox', 'hevc_videotoolbox', 'rawvideo'] EncoderSet = TypedDict('EncoderSet', { 'audio' : List[AudioEncoder], @@ -130,6 +139,7 @@ EncoderSet = TypedDict('EncoderSet', }) VideoPreset = Literal['ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow'] +BenchmarkMode = Literal['warm', 'cold'] BenchmarkResolution = Literal['240p', '360p', '540p', '720p', '1080p', '1440p', '2160p'] BenchmarkSet : TypeAlias = Dict[BenchmarkResolution, str] BenchmarkCycleSet = TypedDict('BenchmarkCycleSet', @@ -149,8 +159,8 @@ ModelOptions : TypeAlias = Dict[str, Any] ModelSet : TypeAlias = Dict[str, ModelOptions] ModelInitializer : TypeAlias = NDArray[Any] -ExecutionProvider = Literal['cpu', 'coreml', 'cuda', 'directml', 'openvino', 'rocm', 'tensorrt'] -ExecutionProviderValue = Literal['CPUExecutionProvider', 'CoreMLExecutionProvider', 'CUDAExecutionProvider', 'DmlExecutionProvider', 'OpenVINOExecutionProvider', 'ROCMExecutionProvider', 'TensorrtExecutionProvider'] +ExecutionProvider = Literal['cpu', 'coreml', 'cuda', 'directml', 'openvino', 'migraphx', 'rocm', 'tensorrt'] +ExecutionProviderValue = Literal['CPUExecutionProvider', 'CoreMLExecutionProvider', 'CUDAExecutionProvider', 'DmlExecutionProvider', 'OpenVINOExecutionProvider', 'MIGraphXExecutionProvider', 'ROCMExecutionProvider', 'TensorrtExecutionProvider'] ExecutionProviderSet : TypeAlias = Dict[ExecutionProvider, ExecutionProviderValue] InferenceSessionProvider : TypeAlias = Any ValueAndUnit = TypedDict('ValueAndUnit', @@ -189,7 +199,7 @@ ExecutionDevice = TypedDict('ExecutionDevice', 'framework' : ExecutionDeviceFramework, 'product' : ExecutionDeviceProduct, 'video_memory' : ExecutionDeviceVideoMemory, - 'temperature': ExecutionDeviceTemperature, + 'temperature' : ExecutionDeviceTemperature, 'utilization' : ExecutionDeviceUtilization }) @@ -252,6 +262,7 @@ StateKey = Literal\ 'output_pattern', 'download_providers', 'download_scope', + 'benchmark_mode', 'benchmark_resolutions', 'benchmark_cycle_count', 'face_detector_model', @@ -276,28 +287,28 @@ StateKey = Literal\ 'face_mask_regions', 'face_mask_blur', 'face_mask_padding', + 'voice_extractor_model', 'trim_frame_start', 'trim_frame_end', 'temp_frame_format', 'keep_temp', 'output_image_quality', - 'output_image_resolution', + 'output_image_scale', 'output_audio_encoder', 'output_audio_quality', 'output_audio_volume', 'output_video_encoder', 'output_video_preset', 'output_video_quality', - 'output_video_resolution', + 'output_video_scale', 'output_video_fps', 'processors', 'open_browser', 'ui_layouts', 'ui_workflow', - 'execution_device_id', + 'execution_device_ids', 'execution_providers', 'execution_thread_count', - 'execution_queue_count', 'video_memory_strategy', 'system_memory_limit', 'log_level', @@ -318,10 +329,11 @@ State = TypedDict('State', 'source_pattern' : str, 'target_pattern' : str, 'output_pattern' : str, - 'download_providers': List[DownloadProvider], - 'download_scope': DownloadScope, - 'benchmark_resolutions': List[BenchmarkResolution], - 'benchmark_cycle_count': int, + 'download_providers' : List[DownloadProvider], + 'download_scope' : DownloadScope, + 'benchmark_mode' : BenchmarkMode, + 'benchmark_resolutions' : List[BenchmarkResolution], + 'benchmark_cycle_count' : int, 'face_detector_model' : FaceDetectorModel, 'face_detector_size' : str, 'face_detector_angles' : List[Angle], @@ -344,28 +356,28 @@ State = TypedDict('State', 'face_mask_regions' : List[FaceMaskRegion], 'face_mask_blur' : float, 'face_mask_padding' : Padding, + 'voice_extractor_model': VoiceExtractorModel, 'trim_frame_start' : int, 'trim_frame_end' : int, 'temp_frame_format' : TempFrameFormat, 'keep_temp' : bool, 'output_image_quality' : int, - 'output_image_resolution' : str, + 'output_image_scale' : Scale, 'output_audio_encoder' : AudioEncoder, 'output_audio_quality' : int, 'output_audio_volume' : int, 'output_video_encoder' : VideoEncoder, 'output_video_preset' : VideoPreset, 'output_video_quality' : int, - 'output_video_resolution' : str, + 'output_video_scale' : Scale, 'output_video_fps' : float, 'processors' : List[str], 'open_browser' : bool, 'ui_layouts' : List[str], 'ui_workflow' : UiWorkflow, - 'execution_device_id' : str, + 'execution_device_ids' : List[str], 'execution_providers' : List[ExecutionProvider], 'execution_thread_count' : int, - 'execution_queue_count' : int, 'video_memory_strategy' : VideoMemoryStrategy, 'system_memory_limit' : int, 'log_level' : LogLevel, diff --git a/facefusion/uis/choices.py b/facefusion/uis/choices.py index 228a67b..2978c20 100644 --- a/facefusion/uis/choices.py +++ b/facefusion/uis/choices.py @@ -1,9 +1,15 @@ from typing import List -from facefusion.uis.types import JobManagerAction, JobRunnerAction +from facefusion.types import WebcamMode +from facefusion.uis.types import JobManagerAction, JobRunnerAction, PreviewMode job_manager_actions : List[JobManagerAction] = [ 'job-create', 'job-submit', 'job-delete', 'job-add-step', 'job-remix-step', 'job-insert-step', 'job-remove-step' ] job_runner_actions : List[JobRunnerAction] = [ 'job-run', 'job-run-all', 'job-retry', 'job-retry-all' ] common_options : List[str] = [ 'keep-temp' ] +preview_modes : List[PreviewMode] = [ 'default', 'frame-by-frame', 'face-by-face' ] +preview_resolutions : List[str] = [ '512x512', '768x768', '1024x1024' ] + +webcam_modes : List[WebcamMode] = [ 'inline', 'udp', 'v4l2' ] +webcam_resolutions : List[str] = [ '320x240', '640x480', '800x600', '1024x768', '1280x720', '1280x960', '1920x1080' ] diff --git a/facefusion/uis/components/age_modifier_options.py b/facefusion/uis/components/age_modifier_options.py index e42065e..e0120c9 100755 --- a/facefusion/uis/components/age_modifier_options.py +++ b/facefusion/uis/components/age_modifier_options.py @@ -3,7 +3,7 @@ from typing import List, Optional, Tuple import gradio from facefusion import state_manager, wording -from facefusion.common_helper import calc_float_step +from facefusion.common_helper import calculate_float_step from facefusion.processors import choices as processors_choices from facefusion.processors.core import load_processor_module from facefusion.processors.types import AgeModifierModel @@ -27,7 +27,7 @@ def render() -> None: AGE_MODIFIER_DIRECTION_SLIDER = gradio.Slider( label = wording.get('uis.age_modifier_direction_slider'), value = state_manager.get_item('age_modifier_direction'), - step = calc_float_step(processors_choices.age_modifier_direction_range), + step = calculate_float_step(processors_choices.age_modifier_direction_range), minimum = processors_choices.age_modifier_direction_range[0], maximum = processors_choices.age_modifier_direction_range[-1], visible = has_age_modifier diff --git a/facefusion/uis/components/benchmark.py b/facefusion/uis/components/benchmark.py index d002920..5bfe7d0 100644 --- a/facefusion/uis/components/benchmark.py +++ b/facefusion/uis/components/benchmark.py @@ -3,8 +3,6 @@ from typing import Any, Generator, List, Optional import gradio from facefusion import benchmarker, state_manager, wording -from facefusion.types import BenchmarkResolution -from facefusion.uis.core import get_ui_component BENCHMARK_BENCHMARKS_DATAFRAME : Optional[gradio.Dataframe] = None BENCHMARK_START_BUTTON : Optional[gradio.Button] = None @@ -43,19 +41,11 @@ def render() -> None: def listen() -> None: - benchmark_resolutions_checkbox_group = get_ui_component('benchmark_resolutions_checkbox_group') - benchmark_cycle_count_slider = get_ui_component('benchmark_cycle_count_slider') - - if benchmark_resolutions_checkbox_group and benchmark_cycle_count_slider: - BENCHMARK_START_BUTTON.click(start, inputs = [ benchmark_resolutions_checkbox_group, benchmark_cycle_count_slider ], outputs = BENCHMARK_BENCHMARKS_DATAFRAME) + BENCHMARK_START_BUTTON.click(start, outputs = BENCHMARK_BENCHMARKS_DATAFRAME) -def start(benchmark_resolutions : List[BenchmarkResolution], benchmark_cycle_count : int) -> Generator[List[Any], None, None]: - state_manager.set_item('benchmark_resolutions', benchmark_resolutions) - state_manager.set_item('benchmark_cycle_count', benchmark_cycle_count) - state_manager.sync_item('execution_providers') - state_manager.sync_item('execution_thread_count') - state_manager.sync_item('execution_queue_count') +def start() -> Generator[List[Any], None, None]: + state_manager.sync_state() for benchmark in benchmarker.run(): yield [ list(benchmark_set.values()) for benchmark_set in benchmark ] diff --git a/facefusion/uis/components/benchmark_options.py b/facefusion/uis/components/benchmark_options.py index 549084a..2766ece 100644 --- a/facefusion/uis/components/benchmark_options.py +++ b/facefusion/uis/components/benchmark_options.py @@ -1,30 +1,54 @@ -from typing import Optional +from typing import List, Optional import gradio import facefusion.choices -from facefusion import wording -from facefusion.uis.core import register_ui_component +from facefusion import state_manager, wording +from facefusion.common_helper import calculate_int_step +from facefusion.types import BenchmarkMode, BenchmarkResolution +BENCHMARK_MODE_DROPDOWN : Optional[gradio.Dropdown] = None BENCHMARK_RESOLUTIONS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None BENCHMARK_CYCLE_COUNT_SLIDER : Optional[gradio.Button] = None def render() -> None: + global BENCHMARK_MODE_DROPDOWN global BENCHMARK_RESOLUTIONS_CHECKBOX_GROUP global BENCHMARK_CYCLE_COUNT_SLIDER + BENCHMARK_MODE_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.benchmark_mode_dropdown'), + choices = facefusion.choices.benchmark_modes, + value = state_manager.get_item('benchmark_mode') + ) BENCHMARK_RESOLUTIONS_CHECKBOX_GROUP = gradio.CheckboxGroup( label = wording.get('uis.benchmark_resolutions_checkbox_group'), choices = facefusion.choices.benchmark_resolutions, - value = facefusion.choices.benchmark_resolutions + value = state_manager.get_item('benchmark_resolutions') ) BENCHMARK_CYCLE_COUNT_SLIDER = gradio.Slider( label = wording.get('uis.benchmark_cycle_count_slider'), - value = 5, - step = 1, - minimum = min(facefusion.choices.benchmark_cycle_count_range), - maximum = max(facefusion.choices.benchmark_cycle_count_range) + value = state_manager.get_item('benchmark_cycle_count'), + step = calculate_int_step(facefusion.choices.benchmark_cycle_count_range), + minimum = facefusion.choices.benchmark_cycle_count_range[0], + maximum = facefusion.choices.benchmark_cycle_count_range[-1] ) - register_ui_component('benchmark_resolutions_checkbox_group', BENCHMARK_RESOLUTIONS_CHECKBOX_GROUP) - register_ui_component('benchmark_cycle_count_slider', BENCHMARK_CYCLE_COUNT_SLIDER) + + +def listen() -> None: + BENCHMARK_MODE_DROPDOWN.change(update_benchmark_mode, inputs = BENCHMARK_MODE_DROPDOWN) + BENCHMARK_RESOLUTIONS_CHECKBOX_GROUP.change(update_benchmark_resolutions, inputs = BENCHMARK_RESOLUTIONS_CHECKBOX_GROUP) + BENCHMARK_CYCLE_COUNT_SLIDER.release(update_benchmark_cycle_count, inputs = BENCHMARK_CYCLE_COUNT_SLIDER) + + +def update_benchmark_mode(benchmark_mode : BenchmarkMode) -> None: + state_manager.set_item('benchmark_mode', benchmark_mode) + + +def update_benchmark_resolutions(benchmark_resolutions : List[BenchmarkResolution]) -> None: + state_manager.set_item('benchmark_resolutions', benchmark_resolutions) + + +def update_benchmark_cycle_count(benchmark_cycle_count : int) -> None: + state_manager.set_item('benchmark_cycle_count', benchmark_cycle_count) diff --git a/facefusion/uis/components/deep_swapper_options.py b/facefusion/uis/components/deep_swapper_options.py index 210193d..c27d07c 100755 --- a/facefusion/uis/components/deep_swapper_options.py +++ b/facefusion/uis/components/deep_swapper_options.py @@ -3,7 +3,7 @@ from typing import List, Optional, Tuple import gradio from facefusion import state_manager, wording -from facefusion.common_helper import calc_int_step +from facefusion.common_helper import calculate_int_step from facefusion.processors import choices as processors_choices from facefusion.processors.core import load_processor_module from facefusion.processors.types import DeepSwapperModel @@ -27,7 +27,7 @@ def render() -> None: DEEP_SWAPPER_MORPH_SLIDER = gradio.Slider( label = wording.get('uis.deep_swapper_morph_slider'), value = state_manager.get_item('deep_swapper_morph'), - step = calc_int_step(processors_choices.deep_swapper_morph_range), + step = calculate_int_step(processors_choices.deep_swapper_morph_range), minimum = processors_choices.deep_swapper_morph_range[0], maximum = processors_choices.deep_swapper_morph_range[-1], visible = has_deep_swapper and load_processor_module('deep_swapper').get_inference_pool() and load_processor_module('deep_swapper').has_morph_input() diff --git a/facefusion/uis/components/execution_queue_count.py b/facefusion/uis/components/execution_queue_count.py deleted file mode 100644 index b5ab5da..0000000 --- a/facefusion/uis/components/execution_queue_count.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import Optional - -import gradio - -import facefusion.choices -from facefusion import state_manager, wording -from facefusion.common_helper import calc_int_step - -EXECUTION_QUEUE_COUNT_SLIDER : Optional[gradio.Slider] = None - - -def render() -> None: - global EXECUTION_QUEUE_COUNT_SLIDER - - EXECUTION_QUEUE_COUNT_SLIDER = gradio.Slider( - label = wording.get('uis.execution_queue_count_slider'), - value = state_manager.get_item('execution_queue_count'), - step = calc_int_step(facefusion.choices.execution_queue_count_range), - minimum = facefusion.choices.execution_queue_count_range[0], - maximum = facefusion.choices.execution_queue_count_range[-1] - ) - - -def listen() -> None: - EXECUTION_QUEUE_COUNT_SLIDER.release(update_execution_queue_count, inputs = EXECUTION_QUEUE_COUNT_SLIDER) - - -def update_execution_queue_count(execution_queue_count : float) -> None: - state_manager.set_item('execution_queue_count', int(execution_queue_count)) diff --git a/facefusion/uis/components/execution_thread_count.py b/facefusion/uis/components/execution_thread_count.py index f5716a9..76a43ac 100644 --- a/facefusion/uis/components/execution_thread_count.py +++ b/facefusion/uis/components/execution_thread_count.py @@ -4,7 +4,7 @@ import gradio import facefusion.choices from facefusion import state_manager, wording -from facefusion.common_helper import calc_int_step +from facefusion.common_helper import calculate_int_step EXECUTION_THREAD_COUNT_SLIDER : Optional[gradio.Slider] = None @@ -15,7 +15,7 @@ def render() -> None: EXECUTION_THREAD_COUNT_SLIDER = gradio.Slider( label = wording.get('uis.execution_thread_count_slider'), value = state_manager.get_item('execution_thread_count'), - step = calc_int_step(facefusion.choices.execution_thread_count_range), + step = calculate_int_step(facefusion.choices.execution_thread_count_range), minimum = facefusion.choices.execution_thread_count_range[0], maximum = facefusion.choices.execution_thread_count_range[-1] ) diff --git a/facefusion/uis/components/expression_restorer_options.py b/facefusion/uis/components/expression_restorer_options.py index bf5eec4..95b99da 100755 --- a/facefusion/uis/components/expression_restorer_options.py +++ b/facefusion/uis/components/expression_restorer_options.py @@ -3,19 +3,21 @@ from typing import List, Optional, Tuple import gradio from facefusion import state_manager, wording -from facefusion.common_helper import calc_float_step +from facefusion.common_helper import calculate_float_step from facefusion.processors import choices as processors_choices from facefusion.processors.core import load_processor_module -from facefusion.processors.types import ExpressionRestorerModel +from facefusion.processors.types import ExpressionRestorerArea, ExpressionRestorerModel from facefusion.uis.core import get_ui_component, register_ui_component EXPRESSION_RESTORER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None EXPRESSION_RESTORER_FACTOR_SLIDER : Optional[gradio.Slider] = None +EXPRESSION_RESTORER_AREAS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None def render() -> None: global EXPRESSION_RESTORER_MODEL_DROPDOWN global EXPRESSION_RESTORER_FACTOR_SLIDER + global EXPRESSION_RESTORER_AREAS_CHECKBOX_GROUP has_expression_restorer = 'expression_restorer' in state_manager.get_item('processors') EXPRESSION_RESTORER_MODEL_DROPDOWN = gradio.Dropdown( @@ -27,27 +29,35 @@ def render() -> None: EXPRESSION_RESTORER_FACTOR_SLIDER = gradio.Slider( label = wording.get('uis.expression_restorer_factor_slider'), value = state_manager.get_item('expression_restorer_factor'), - step = calc_float_step(processors_choices.expression_restorer_factor_range), + step = calculate_float_step(processors_choices.expression_restorer_factor_range), minimum = processors_choices.expression_restorer_factor_range[0], maximum = processors_choices.expression_restorer_factor_range[-1], visible = has_expression_restorer ) + EXPRESSION_RESTORER_AREAS_CHECKBOX_GROUP = gradio.CheckboxGroup( + label = wording.get('uis.expression_restorer_areas_checkbox_group'), + choices = processors_choices.expression_restorer_areas, + value = state_manager.get_item('expression_restorer_areas'), + visible = has_expression_restorer + ) register_ui_component('expression_restorer_model_dropdown', EXPRESSION_RESTORER_MODEL_DROPDOWN) register_ui_component('expression_restorer_factor_slider', EXPRESSION_RESTORER_FACTOR_SLIDER) + register_ui_component('expression_restorer_areas_checkbox_group', EXPRESSION_RESTORER_AREAS_CHECKBOX_GROUP) def listen() -> None: EXPRESSION_RESTORER_MODEL_DROPDOWN.change(update_expression_restorer_model, inputs = EXPRESSION_RESTORER_MODEL_DROPDOWN, outputs = EXPRESSION_RESTORER_MODEL_DROPDOWN) EXPRESSION_RESTORER_FACTOR_SLIDER.release(update_expression_restorer_factor, inputs = EXPRESSION_RESTORER_FACTOR_SLIDER) + EXPRESSION_RESTORER_AREAS_CHECKBOX_GROUP.change(update_expression_restorer_areas, inputs = EXPRESSION_RESTORER_AREAS_CHECKBOX_GROUP, outputs = EXPRESSION_RESTORER_AREAS_CHECKBOX_GROUP) processors_checkbox_group = get_ui_component('processors_checkbox_group') if processors_checkbox_group: - processors_checkbox_group.change(remote_update, inputs = processors_checkbox_group, outputs = [ EXPRESSION_RESTORER_MODEL_DROPDOWN, EXPRESSION_RESTORER_FACTOR_SLIDER ]) + processors_checkbox_group.change(remote_update, inputs = processors_checkbox_group, outputs = [ EXPRESSION_RESTORER_MODEL_DROPDOWN, EXPRESSION_RESTORER_FACTOR_SLIDER, EXPRESSION_RESTORER_AREAS_CHECKBOX_GROUP ]) -def remote_update(processors : List[str]) -> Tuple[gradio.Dropdown, gradio.Slider]: +def remote_update(processors : List[str]) -> Tuple[gradio.Dropdown, gradio.Slider, gradio.CheckboxGroup]: has_expression_restorer = 'expression_restorer' in processors - return gradio.Dropdown(visible = has_expression_restorer), gradio.Slider(visible = has_expression_restorer) + return gradio.Dropdown(visible = has_expression_restorer), gradio.Slider(visible = has_expression_restorer), gradio.CheckboxGroup(visible = has_expression_restorer) def update_expression_restorer_model(expression_restorer_model : ExpressionRestorerModel) -> gradio.Dropdown: @@ -62,3 +72,9 @@ def update_expression_restorer_model(expression_restorer_model : ExpressionResto def update_expression_restorer_factor(expression_restorer_factor : float) -> None: state_manager.set_item('expression_restorer_factor', int(expression_restorer_factor)) + + +def update_expression_restorer_areas(expression_restorer_areas : List[ExpressionRestorerArea]) -> gradio.CheckboxGroup: + expression_restorer_areas = expression_restorer_areas or processors_choices.expression_restorer_areas + state_manager.set_item('expression_restorer_areas', expression_restorer_areas) + return gradio.CheckboxGroup(value = state_manager.get_item('expression_restorer_areas')) diff --git a/facefusion/uis/components/face_detector.py b/facefusion/uis/components/face_detector.py index 756154d..64e8182 100644 --- a/facefusion/uis/components/face_detector.py +++ b/facefusion/uis/components/face_detector.py @@ -4,7 +4,7 @@ import gradio import facefusion.choices from facefusion import face_detector, state_manager, wording -from facefusion.common_helper import calc_float_step, get_last +from facefusion.common_helper import calculate_float_step, get_last from facefusion.types import Angle, FaceDetectorModel, Score from facefusion.uis.core import register_ui_component from facefusion.uis.types import ComponentOptions @@ -43,7 +43,7 @@ def render() -> None: FACE_DETECTOR_SCORE_SLIDER = gradio.Slider( label = wording.get('uis.face_detector_score_slider'), value = state_manager.get_item('face_detector_score'), - step = calc_float_step(facefusion.choices.face_detector_score_range), + step = calculate_float_step(facefusion.choices.face_detector_score_range), minimum = facefusion.choices.face_detector_score_range[0], maximum = facefusion.choices.face_detector_score_range[-1] ) diff --git a/facefusion/uis/components/face_editor_options.py b/facefusion/uis/components/face_editor_options.py index 978b12d..9a617df 100755 --- a/facefusion/uis/components/face_editor_options.py +++ b/facefusion/uis/components/face_editor_options.py @@ -3,7 +3,7 @@ from typing import List, Optional, Tuple import gradio from facefusion import state_manager, wording -from facefusion.common_helper import calc_float_step +from facefusion.common_helper import calculate_float_step from facefusion.processors import choices as processors_choices from facefusion.processors.core import load_processor_module from facefusion.processors.types import FaceEditorModel @@ -53,7 +53,7 @@ def render() -> None: FACE_EDITOR_EYEBROW_DIRECTION_SLIDER = gradio.Slider( label = wording.get('uis.face_editor_eyebrow_direction_slider'), value = state_manager.get_item('face_editor_eyebrow_direction'), - step = calc_float_step(processors_choices.face_editor_eyebrow_direction_range), + step = calculate_float_step(processors_choices.face_editor_eyebrow_direction_range), minimum = processors_choices.face_editor_eyebrow_direction_range[0], maximum = processors_choices.face_editor_eyebrow_direction_range[-1], visible = has_face_editor @@ -61,7 +61,7 @@ def render() -> None: FACE_EDITOR_EYE_GAZE_HORIZONTAL_SLIDER = gradio.Slider( label = wording.get('uis.face_editor_eye_gaze_horizontal_slider'), value = state_manager.get_item('face_editor_eye_gaze_horizontal'), - step = calc_float_step(processors_choices.face_editor_eye_gaze_horizontal_range), + step = calculate_float_step(processors_choices.face_editor_eye_gaze_horizontal_range), minimum = processors_choices.face_editor_eye_gaze_horizontal_range[0], maximum = processors_choices.face_editor_eye_gaze_horizontal_range[-1], visible = has_face_editor @@ -69,7 +69,7 @@ def render() -> None: FACE_EDITOR_EYE_GAZE_VERTICAL_SLIDER = gradio.Slider( label = wording.get('uis.face_editor_eye_gaze_vertical_slider'), value = state_manager.get_item('face_editor_eye_gaze_vertical'), - step = calc_float_step(processors_choices.face_editor_eye_gaze_vertical_range), + step = calculate_float_step(processors_choices.face_editor_eye_gaze_vertical_range), minimum = processors_choices.face_editor_eye_gaze_vertical_range[0], maximum = processors_choices.face_editor_eye_gaze_vertical_range[-1], visible = has_face_editor @@ -77,7 +77,7 @@ def render() -> None: FACE_EDITOR_EYE_OPEN_RATIO_SLIDER = gradio.Slider( label = wording.get('uis.face_editor_eye_open_ratio_slider'), value = state_manager.get_item('face_editor_eye_open_ratio'), - step = calc_float_step(processors_choices.face_editor_eye_open_ratio_range), + step = calculate_float_step(processors_choices.face_editor_eye_open_ratio_range), minimum = processors_choices.face_editor_eye_open_ratio_range[0], maximum = processors_choices.face_editor_eye_open_ratio_range[-1], visible = has_face_editor @@ -85,7 +85,7 @@ def render() -> None: FACE_EDITOR_LIP_OPEN_RATIO_SLIDER = gradio.Slider( label = wording.get('uis.face_editor_lip_open_ratio_slider'), value = state_manager.get_item('face_editor_lip_open_ratio'), - step = calc_float_step(processors_choices.face_editor_lip_open_ratio_range), + step = calculate_float_step(processors_choices.face_editor_lip_open_ratio_range), minimum = processors_choices.face_editor_lip_open_ratio_range[0], maximum = processors_choices.face_editor_lip_open_ratio_range[-1], visible = has_face_editor @@ -93,7 +93,7 @@ def render() -> None: FACE_EDITOR_MOUTH_GRIM_SLIDER = gradio.Slider( label = wording.get('uis.face_editor_mouth_grim_slider'), value = state_manager.get_item('face_editor_mouth_grim'), - step = calc_float_step(processors_choices.face_editor_mouth_grim_range), + step = calculate_float_step(processors_choices.face_editor_mouth_grim_range), minimum = processors_choices.face_editor_mouth_grim_range[0], maximum = processors_choices.face_editor_mouth_grim_range[-1], visible = has_face_editor @@ -101,7 +101,7 @@ def render() -> None: FACE_EDITOR_MOUTH_POUT_SLIDER = gradio.Slider( label = wording.get('uis.face_editor_mouth_pout_slider'), value = state_manager.get_item('face_editor_mouth_pout'), - step = calc_float_step(processors_choices.face_editor_mouth_pout_range), + step = calculate_float_step(processors_choices.face_editor_mouth_pout_range), minimum = processors_choices.face_editor_mouth_pout_range[0], maximum = processors_choices.face_editor_mouth_pout_range[-1], visible = has_face_editor @@ -109,7 +109,7 @@ def render() -> None: FACE_EDITOR_MOUTH_PURSE_SLIDER = gradio.Slider( label = wording.get('uis.face_editor_mouth_purse_slider'), value = state_manager.get_item('face_editor_mouth_purse'), - step = calc_float_step(processors_choices.face_editor_mouth_purse_range), + step = calculate_float_step(processors_choices.face_editor_mouth_purse_range), minimum = processors_choices.face_editor_mouth_purse_range[0], maximum = processors_choices.face_editor_mouth_purse_range[-1], visible = has_face_editor @@ -117,7 +117,7 @@ def render() -> None: FACE_EDITOR_MOUTH_SMILE_SLIDER = gradio.Slider( label = wording.get('uis.face_editor_mouth_smile_slider'), value = state_manager.get_item('face_editor_mouth_smile'), - step = calc_float_step(processors_choices.face_editor_mouth_smile_range), + step = calculate_float_step(processors_choices.face_editor_mouth_smile_range), minimum = processors_choices.face_editor_mouth_smile_range[0], maximum = processors_choices.face_editor_mouth_smile_range[-1], visible = has_face_editor @@ -125,7 +125,7 @@ def render() -> None: FACE_EDITOR_MOUTH_POSITION_HORIZONTAL_SLIDER = gradio.Slider( label = wording.get('uis.face_editor_mouth_position_horizontal_slider'), value = state_manager.get_item('face_editor_mouth_position_horizontal'), - step = calc_float_step(processors_choices.face_editor_mouth_position_horizontal_range), + step = calculate_float_step(processors_choices.face_editor_mouth_position_horizontal_range), minimum = processors_choices.face_editor_mouth_position_horizontal_range[0], maximum = processors_choices.face_editor_mouth_position_horizontal_range[-1], visible = has_face_editor @@ -133,7 +133,7 @@ def render() -> None: FACE_EDITOR_MOUTH_POSITION_VERTICAL_SLIDER = gradio.Slider( label = wording.get('uis.face_editor_mouth_position_vertical_slider'), value = state_manager.get_item('face_editor_mouth_position_vertical'), - step = calc_float_step(processors_choices.face_editor_mouth_position_vertical_range), + step = calculate_float_step(processors_choices.face_editor_mouth_position_vertical_range), minimum = processors_choices.face_editor_mouth_position_vertical_range[0], maximum = processors_choices.face_editor_mouth_position_vertical_range[-1], visible = has_face_editor @@ -141,7 +141,7 @@ def render() -> None: FACE_EDITOR_HEAD_PITCH_SLIDER = gradio.Slider( label = wording.get('uis.face_editor_head_pitch_slider'), value = state_manager.get_item('face_editor_head_pitch'), - step = calc_float_step(processors_choices.face_editor_head_pitch_range), + step = calculate_float_step(processors_choices.face_editor_head_pitch_range), minimum = processors_choices.face_editor_head_pitch_range[0], maximum = processors_choices.face_editor_head_pitch_range[-1], visible = has_face_editor @@ -149,7 +149,7 @@ def render() -> None: FACE_EDITOR_HEAD_YAW_SLIDER = gradio.Slider( label = wording.get('uis.face_editor_head_yaw_slider'), value = state_manager.get_item('face_editor_head_yaw'), - step = calc_float_step(processors_choices.face_editor_head_yaw_range), + step = calculate_float_step(processors_choices.face_editor_head_yaw_range), minimum = processors_choices.face_editor_head_yaw_range[0], maximum = processors_choices.face_editor_head_yaw_range[-1], visible = has_face_editor @@ -157,7 +157,7 @@ def render() -> None: FACE_EDITOR_HEAD_ROLL_SLIDER = gradio.Slider( label = wording.get('uis.face_editor_head_roll_slider'), value = state_manager.get_item('face_editor_head_roll'), - step = calc_float_step(processors_choices.face_editor_head_roll_range), + step = calculate_float_step(processors_choices.face_editor_head_roll_range), minimum = processors_choices.face_editor_head_roll_range[0], maximum = processors_choices.face_editor_head_roll_range[-1], visible = has_face_editor diff --git a/facefusion/uis/components/face_enhancer_options.py b/facefusion/uis/components/face_enhancer_options.py index 0e02d86..e88d871 100755 --- a/facefusion/uis/components/face_enhancer_options.py +++ b/facefusion/uis/components/face_enhancer_options.py @@ -3,10 +3,10 @@ from typing import List, Optional, Tuple import gradio from facefusion import state_manager, wording -from facefusion.common_helper import calc_float_step, calc_int_step +from facefusion.common_helper import calculate_float_step, calculate_int_step from facefusion.processors import choices as processors_choices from facefusion.processors.core import load_processor_module -from facefusion.processors.types import FaceEnhancerModel +from facefusion.processors.types import FaceEnhancerModel, FaceEnhancerWeight from facefusion.uis.core import get_ui_component, register_ui_component FACE_ENHANCER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None @@ -29,7 +29,7 @@ def render() -> None: FACE_ENHANCER_BLEND_SLIDER = gradio.Slider( label = wording.get('uis.face_enhancer_blend_slider'), value = state_manager.get_item('face_enhancer_blend'), - step = calc_int_step(processors_choices.face_enhancer_blend_range), + step = calculate_int_step(processors_choices.face_enhancer_blend_range), minimum = processors_choices.face_enhancer_blend_range[0], maximum = processors_choices.face_enhancer_blend_range[-1], visible = has_face_enhancer @@ -37,7 +37,7 @@ def render() -> None: FACE_ENHANCER_WEIGHT_SLIDER = gradio.Slider( label = wording.get('uis.face_enhancer_weight_slider'), value = state_manager.get_item('face_enhancer_weight'), - step = calc_float_step(processors_choices.face_enhancer_weight_range), + step = calculate_float_step(processors_choices.face_enhancer_weight_range), minimum = processors_choices.face_enhancer_weight_range[0], maximum = processors_choices.face_enhancer_weight_range[-1], visible = has_face_enhancer and load_processor_module('face_enhancer').get_inference_pool() and load_processor_module('face_enhancer').has_weight_input() @@ -76,6 +76,6 @@ def update_face_enhancer_blend(face_enhancer_blend : float) -> None: state_manager.set_item('face_enhancer_blend', int(face_enhancer_blend)) -def update_face_enhancer_weight(face_enhancer_weight : float) -> None: +def update_face_enhancer_weight(face_enhancer_weight : FaceEnhancerWeight) -> None: state_manager.set_item('face_enhancer_weight', face_enhancer_weight) diff --git a/facefusion/uis/components/face_landmarker.py b/facefusion/uis/components/face_landmarker.py index 7fab429..ad3e717 100644 --- a/facefusion/uis/components/face_landmarker.py +++ b/facefusion/uis/components/face_landmarker.py @@ -4,7 +4,7 @@ import gradio import facefusion.choices from facefusion import face_landmarker, state_manager, wording -from facefusion.common_helper import calc_float_step +from facefusion.common_helper import calculate_float_step from facefusion.types import FaceLandmarkerModel, Score from facefusion.uis.core import register_ui_component @@ -24,7 +24,7 @@ def render() -> None: FACE_LANDMARKER_SCORE_SLIDER = gradio.Slider( label = wording.get('uis.face_landmarker_score_slider'), value = state_manager.get_item('face_landmarker_score'), - step = calc_float_step(facefusion.choices.face_landmarker_score_range), + step = calculate_float_step(facefusion.choices.face_landmarker_score_range), minimum = facefusion.choices.face_landmarker_score_range[0], maximum = facefusion.choices.face_landmarker_score_range[-1] ) diff --git a/facefusion/uis/components/face_masker.py b/facefusion/uis/components/face_masker.py index e01a5cd..8d847d1 100755 --- a/facefusion/uis/components/face_masker.py +++ b/facefusion/uis/components/face_masker.py @@ -4,7 +4,7 @@ import gradio import facefusion.choices from facefusion import face_masker, state_manager, wording -from facefusion.common_helper import calc_float_step, calc_int_step +from facefusion.common_helper import calculate_float_step, calculate_int_step from facefusion.types import FaceMaskArea, FaceMaskRegion, FaceMaskType, FaceOccluderModel, FaceParserModel from facefusion.uis.core import register_ui_component @@ -65,7 +65,7 @@ def render() -> None: ) FACE_MASK_BLUR_SLIDER = gradio.Slider( label = wording.get('uis.face_mask_blur_slider'), - step = calc_float_step(facefusion.choices.face_mask_blur_range), + step = calculate_float_step(facefusion.choices.face_mask_blur_range), minimum = facefusion.choices.face_mask_blur_range[0], maximum = facefusion.choices.face_mask_blur_range[-1], value = state_manager.get_item('face_mask_blur'), @@ -75,7 +75,7 @@ def render() -> None: with gradio.Row(): FACE_MASK_PADDING_TOP_SLIDER = gradio.Slider( label = wording.get('uis.face_mask_padding_top_slider'), - step = calc_int_step(facefusion.choices.face_mask_padding_range), + step = calculate_int_step(facefusion.choices.face_mask_padding_range), minimum = facefusion.choices.face_mask_padding_range[0], maximum = facefusion.choices.face_mask_padding_range[-1], value = state_manager.get_item('face_mask_padding')[0], @@ -83,7 +83,7 @@ def render() -> None: ) FACE_MASK_PADDING_RIGHT_SLIDER = gradio.Slider( label = wording.get('uis.face_mask_padding_right_slider'), - step = calc_int_step(facefusion.choices.face_mask_padding_range), + step = calculate_int_step(facefusion.choices.face_mask_padding_range), minimum = facefusion.choices.face_mask_padding_range[0], maximum = facefusion.choices.face_mask_padding_range[-1], value = state_manager.get_item('face_mask_padding')[1], @@ -92,7 +92,7 @@ def render() -> None: with gradio.Row(): FACE_MASK_PADDING_BOTTOM_SLIDER = gradio.Slider( label = wording.get('uis.face_mask_padding_bottom_slider'), - step = calc_int_step(facefusion.choices.face_mask_padding_range), + step = calculate_int_step(facefusion.choices.face_mask_padding_range), minimum = facefusion.choices.face_mask_padding_range[0], maximum = facefusion.choices.face_mask_padding_range[-1], value = state_manager.get_item('face_mask_padding')[2], @@ -100,7 +100,7 @@ def render() -> None: ) FACE_MASK_PADDING_LEFT_SLIDER = gradio.Slider( label = wording.get('uis.face_mask_padding_left_slider'), - step = calc_int_step(facefusion.choices.face_mask_padding_range), + step = calculate_int_step(facefusion.choices.face_mask_padding_range), minimum = facefusion.choices.face_mask_padding_range[0], maximum = facefusion.choices.face_mask_padding_range[-1], value = state_manager.get_item('face_mask_padding')[3], diff --git a/facefusion/uis/components/face_selector.py b/facefusion/uis/components/face_selector.py index 58f21c8..778d37d 100644 --- a/facefusion/uis/components/face_selector.py +++ b/facefusion/uis/components/face_selector.py @@ -1,20 +1,21 @@ from typing import List, Optional, Tuple +import cv2 import gradio from gradio_rangeslider import RangeSlider import facefusion.choices from facefusion import state_manager, wording -from facefusion.common_helper import calc_float_step, calc_int_step +from facefusion.common_helper import calculate_float_step, calculate_int_step from facefusion.face_analyser import get_many_faces from facefusion.face_selector import sort_and_filter_faces -from facefusion.face_store import clear_reference_faces, clear_static_faces +from facefusion.face_store import clear_static_faces from facefusion.filesystem import is_image, is_video from facefusion.types import FaceSelectorMode, FaceSelectorOrder, Gender, Race, VisionFrame from facefusion.uis.core import get_ui_component, get_ui_components, register_ui_component from facefusion.uis.types import ComponentOptions from facefusion.uis.ui_helper import convert_str_none -from facefusion.vision import normalize_frame_color, read_static_image, read_video_frame +from facefusion.vision import fit_cover_frame, read_static_image, read_video_frame FACE_SELECTOR_MODE_DROPDOWN : Optional[gradio.Dropdown] = None FACE_SELECTOR_ORDER_DROPDOWN : Optional[gradio.Dropdown] = None @@ -44,11 +45,11 @@ def render() -> None: 'visible': 'reference' in state_manager.get_item('face_selector_mode') } if is_image(state_manager.get_item('target_path')): - reference_frame = read_static_image(state_manager.get_item('target_path')) - reference_face_gallery_options['value'] = extract_gallery_frames(reference_frame) + target_vision_frame = read_static_image(state_manager.get_item('target_path')) + reference_face_gallery_options['value'] = extract_gallery_frames(target_vision_frame) if is_video(state_manager.get_item('target_path')): - reference_frame = read_video_frame(state_manager.get_item('target_path'), state_manager.get_item('reference_frame_number')) - reference_face_gallery_options['value'] = extract_gallery_frames(reference_frame) + target_vision_frame = read_video_frame(state_manager.get_item('target_path'), state_manager.get_item('reference_frame_number')) + reference_face_gallery_options['value'] = extract_gallery_frames(target_vision_frame) FACE_SELECTOR_MODE_DROPDOWN = gradio.Dropdown( label = wording.get('uis.face_selector_mode_dropdown'), choices = facefusion.choices.face_selector_modes, @@ -69,7 +70,7 @@ def render() -> None: ) FACE_SELECTOR_RACE_DROPDOWN = gradio.Dropdown( label = wording.get('uis.face_selector_race_dropdown'), - choices = ['none'] + facefusion.choices.face_selector_races, + choices = [ 'none' ] + facefusion.choices.face_selector_races, value = state_manager.get_item('face_selector_race') or 'none' ) with gradio.Row(): @@ -80,12 +81,12 @@ def render() -> None: minimum = facefusion.choices.face_selector_age_range[0], maximum = facefusion.choices.face_selector_age_range[-1], value = (face_selector_age_start, face_selector_age_end), - step = calc_int_step(facefusion.choices.face_selector_age_range) + step = calculate_int_step(facefusion.choices.face_selector_age_range) ) REFERENCE_FACE_DISTANCE_SLIDER = gradio.Slider( label = wording.get('uis.reference_face_distance_slider'), value = state_manager.get_item('reference_face_distance'), - step = calc_float_step(facefusion.choices.reference_face_distance_range), + step = calculate_float_step(facefusion.choices.reference_face_distance_range), minimum = facefusion.choices.reference_face_distance_range[0], maximum = facefusion.choices.reference_face_distance_range[-1], visible = 'reference' in state_manager.get_item('face_selector_mode') @@ -105,16 +106,21 @@ def listen() -> None: FACE_SELECTOR_GENDER_DROPDOWN.change(update_face_selector_gender, inputs = FACE_SELECTOR_GENDER_DROPDOWN, outputs = REFERENCE_FACE_POSITION_GALLERY) FACE_SELECTOR_RACE_DROPDOWN.change(update_face_selector_race, inputs = FACE_SELECTOR_RACE_DROPDOWN, outputs = REFERENCE_FACE_POSITION_GALLERY) FACE_SELECTOR_AGE_RANGE_SLIDER.release(update_face_selector_age_range, inputs = FACE_SELECTOR_AGE_RANGE_SLIDER, outputs = REFERENCE_FACE_POSITION_GALLERY) - REFERENCE_FACE_POSITION_GALLERY.select(clear_and_update_reference_face_position) REFERENCE_FACE_DISTANCE_SLIDER.release(update_reference_face_distance, inputs = REFERENCE_FACE_DISTANCE_SLIDER) + preview_frame_slider = get_ui_component('preview_frame_slider') + if preview_frame_slider: + REFERENCE_FACE_POSITION_GALLERY.select(update_reference_frame_number, inputs = preview_frame_slider) + REFERENCE_FACE_POSITION_GALLERY.select(update_reference_face_position) + for ui_component in get_ui_components( [ 'target_image', 'target_video' ]): for method in [ 'change', 'clear' ]: - getattr(ui_component, method)(update_reference_face_position) + getattr(ui_component, method)(clear_reference_frame_number) + getattr(ui_component, method)(clear_reference_face_position) getattr(ui_component, method)(update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) for ui_component in get_ui_components( @@ -127,13 +133,12 @@ def listen() -> None: face_detector_score_slider = get_ui_component('face_detector_score_slider') if face_detector_score_slider: - face_detector_score_slider.release(clear_and_update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) + face_detector_score_slider.release(update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) preview_frame_slider = get_ui_component('preview_frame_slider') if preview_frame_slider: for method in [ 'change', 'release' ]: - getattr(preview_frame_slider, method)(update_reference_frame_number, inputs = preview_frame_slider, show_progress = 'hidden') - getattr(preview_frame_slider, method)(update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY, show_progress = 'hidden') + getattr(preview_frame_slider, method)(update_reference_position_gallery, inputs = preview_frame_slider, outputs = REFERENCE_FACE_POSITION_GALLERY, show_progress = 'hidden') def update_face_selector_mode(face_selector_mode : FaceSelectorMode) -> Tuple[gradio.Gallery, gradio.Slider]: @@ -168,47 +173,48 @@ def update_face_selector_age_range(face_selector_age_range : Tuple[float, float] return update_reference_position_gallery() -def clear_and_update_reference_face_position(event : gradio.SelectData) -> gradio.Gallery: - clear_reference_faces() - clear_static_faces() - update_reference_face_position(event.index) - return update_reference_position_gallery() +def update_reference_face_position(event : gradio.SelectData) -> None: + state_manager.set_item('reference_face_position', event.index) -def update_reference_face_position(reference_face_position : int = 0) -> None: - state_manager.set_item('reference_face_position', reference_face_position) +def clear_reference_face_position() -> None: + state_manager.set_item('reference_face_position', 0) def update_reference_face_distance(reference_face_distance : float) -> None: state_manager.set_item('reference_face_distance', reference_face_distance) -def update_reference_frame_number(reference_frame_number : int) -> None: +def update_reference_frame_number(reference_frame_number : int = 0) -> None: state_manager.set_item('reference_frame_number', reference_frame_number) +def clear_reference_frame_number() -> None: + state_manager.set_item('reference_frame_number', 0) + + def clear_and_update_reference_position_gallery() -> gradio.Gallery: - clear_reference_faces() clear_static_faces() return update_reference_position_gallery() -def update_reference_position_gallery() -> gradio.Gallery: +def update_reference_position_gallery(frame_number : int = 0) -> gradio.Gallery: gallery_vision_frames = [] if is_image(state_manager.get_item('target_path')): - temp_vision_frame = read_static_image(state_manager.get_item('target_path')) - gallery_vision_frames = extract_gallery_frames(temp_vision_frame) + target_vision_frame = read_static_image(state_manager.get_item('target_path')) + gallery_vision_frames = extract_gallery_frames(target_vision_frame) if is_video(state_manager.get_item('target_path')): - temp_vision_frame = read_video_frame(state_manager.get_item('target_path'), state_manager.get_item('reference_frame_number')) - gallery_vision_frames = extract_gallery_frames(temp_vision_frame) + target_vision_frame = read_video_frame(state_manager.get_item('target_path'), frame_number) + gallery_vision_frames = extract_gallery_frames(target_vision_frame) if gallery_vision_frames: return gradio.Gallery(value = gallery_vision_frames) return gradio.Gallery(value = None) -def extract_gallery_frames(temp_vision_frame : VisionFrame) -> List[VisionFrame]: +def extract_gallery_frames(target_vision_frame : VisionFrame) -> List[VisionFrame]: gallery_vision_frames = [] - faces = sort_and_filter_faces(get_many_faces([ temp_vision_frame ])) + faces = get_many_faces([ target_vision_frame ]) + faces = sort_and_filter_faces(faces) for face in faces: start_x, start_y, end_x, end_y = map(int, face.bounding_box) @@ -218,7 +224,8 @@ def extract_gallery_frames(temp_vision_frame : VisionFrame) -> List[VisionFrame] start_y = max(0, start_y - padding_y) end_x = max(0, end_x + padding_x) end_y = max(0, end_y + padding_y) - crop_vision_frame = temp_vision_frame[start_y:end_y, start_x:end_x] - crop_vision_frame = normalize_frame_color(crop_vision_frame) + crop_vision_frame = target_vision_frame[start_y:end_y, start_x:end_x] + crop_vision_frame = fit_cover_frame(crop_vision_frame, (128, 128)) + crop_vision_frame = cv2.cvtColor(crop_vision_frame, cv2.COLOR_BGR2RGB) gallery_vision_frames.append(crop_vision_frame) return gallery_vision_frames diff --git a/facefusion/uis/components/face_swapper_options.py b/facefusion/uis/components/face_swapper_options.py index 92f08dc..36e1f5c 100755 --- a/facefusion/uis/components/face_swapper_options.py +++ b/facefusion/uis/components/face_swapper_options.py @@ -3,19 +3,21 @@ from typing import List, Optional, Tuple import gradio from facefusion import state_manager, wording -from facefusion.common_helper import get_first +from facefusion.common_helper import calculate_float_step, get_first from facefusion.processors import choices as processors_choices from facefusion.processors.core import load_processor_module -from facefusion.processors.types import FaceSwapperModel +from facefusion.processors.types import FaceSwapperModel, FaceSwapperWeight from facefusion.uis.core import get_ui_component, register_ui_component FACE_SWAPPER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None FACE_SWAPPER_PIXEL_BOOST_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_SWAPPER_WEIGHT_SLIDER : Optional[gradio.Slider] = None def render() -> None: global FACE_SWAPPER_MODEL_DROPDOWN global FACE_SWAPPER_PIXEL_BOOST_DROPDOWN + global FACE_SWAPPER_WEIGHT_SLIDER has_face_swapper = 'face_swapper' in state_manager.get_item('processors') FACE_SWAPPER_MODEL_DROPDOWN = gradio.Dropdown( @@ -30,25 +32,35 @@ def render() -> None: value = state_manager.get_item('face_swapper_pixel_boost'), visible = has_face_swapper ) + FACE_SWAPPER_WEIGHT_SLIDER = gradio.Slider( + label = wording.get('uis.face_swapper_weight_slider'), + value = state_manager.get_item('face_swapper_weight'), + minimum = processors_choices.face_swapper_weight_range[0], + maximum = processors_choices.face_swapper_weight_range[-1], + step = calculate_float_step(processors_choices.face_swapper_weight_range), + visible = has_face_swapper and has_face_swapper_weight() + ) register_ui_component('face_swapper_model_dropdown', FACE_SWAPPER_MODEL_DROPDOWN) register_ui_component('face_swapper_pixel_boost_dropdown', FACE_SWAPPER_PIXEL_BOOST_DROPDOWN) + register_ui_component('face_swapper_weight_slider', FACE_SWAPPER_WEIGHT_SLIDER) def listen() -> None: - FACE_SWAPPER_MODEL_DROPDOWN.change(update_face_swapper_model, inputs = FACE_SWAPPER_MODEL_DROPDOWN, outputs = [ FACE_SWAPPER_MODEL_DROPDOWN, FACE_SWAPPER_PIXEL_BOOST_DROPDOWN ]) + FACE_SWAPPER_MODEL_DROPDOWN.change(update_face_swapper_model, inputs = FACE_SWAPPER_MODEL_DROPDOWN, outputs = [ FACE_SWAPPER_MODEL_DROPDOWN, FACE_SWAPPER_PIXEL_BOOST_DROPDOWN, FACE_SWAPPER_WEIGHT_SLIDER ]) FACE_SWAPPER_PIXEL_BOOST_DROPDOWN.change(update_face_swapper_pixel_boost, inputs = FACE_SWAPPER_PIXEL_BOOST_DROPDOWN) + FACE_SWAPPER_WEIGHT_SLIDER.change(update_face_swapper_weight, inputs = FACE_SWAPPER_WEIGHT_SLIDER) processors_checkbox_group = get_ui_component('processors_checkbox_group') if processors_checkbox_group: - processors_checkbox_group.change(remote_update, inputs = processors_checkbox_group, outputs = [ FACE_SWAPPER_MODEL_DROPDOWN, FACE_SWAPPER_PIXEL_BOOST_DROPDOWN ]) + processors_checkbox_group.change(remote_update, inputs = processors_checkbox_group, outputs = [ FACE_SWAPPER_MODEL_DROPDOWN, FACE_SWAPPER_PIXEL_BOOST_DROPDOWN, FACE_SWAPPER_WEIGHT_SLIDER ]) -def remote_update(processors : List[str]) -> Tuple[gradio.Dropdown, gradio.Dropdown]: +def remote_update(processors : List[str]) -> Tuple[gradio.Dropdown, gradio.Dropdown, gradio.Slider]: has_face_swapper = 'face_swapper' in processors - return gradio.Dropdown(visible = has_face_swapper), gradio.Dropdown(visible = has_face_swapper) + return gradio.Dropdown(visible = has_face_swapper), gradio.Dropdown(visible = has_face_swapper), gradio.Slider(visible = has_face_swapper) -def update_face_swapper_model(face_swapper_model : FaceSwapperModel) -> Tuple[gradio.Dropdown, gradio.Dropdown]: +def update_face_swapper_model(face_swapper_model : FaceSwapperModel) -> Tuple[gradio.Dropdown, gradio.Dropdown, gradio.Slider]: face_swapper_module = load_processor_module('face_swapper') face_swapper_module.clear_inference_pool() state_manager.set_item('face_swapper_model', face_swapper_model) @@ -56,9 +68,17 @@ def update_face_swapper_model(face_swapper_model : FaceSwapperModel) -> Tuple[gr if face_swapper_module.pre_check(): face_swapper_pixel_boost_choices = processors_choices.face_swapper_set.get(state_manager.get_item('face_swapper_model')) state_manager.set_item('face_swapper_pixel_boost', get_first(face_swapper_pixel_boost_choices)) - return gradio.Dropdown(value = state_manager.get_item('face_swapper_model')), gradio.Dropdown(value = state_manager.get_item('face_swapper_pixel_boost'), choices = face_swapper_pixel_boost_choices) - return gradio.Dropdown(), gradio.Dropdown() + return gradio.Dropdown(value = state_manager.get_item('face_swapper_model')), gradio.Dropdown(value = state_manager.get_item('face_swapper_pixel_boost'), choices = face_swapper_pixel_boost_choices), gradio.Slider(visible = has_face_swapper_weight()) + return gradio.Dropdown(), gradio.Dropdown(), gradio.Slider() def update_face_swapper_pixel_boost(face_swapper_pixel_boost : str) -> None: state_manager.set_item('face_swapper_pixel_boost', face_swapper_pixel_boost) + + +def update_face_swapper_weight(face_swapper_weight : FaceSwapperWeight) -> None: + state_manager.set_item('face_swapper_weight', face_swapper_weight) + + +def has_face_swapper_weight() -> bool: + return state_manager.get_item('face_swapper_model') in [ 'ghost_1_256', 'ghost_2_256', 'ghost_3_256', 'hififace_unofficial_256', 'hyperswap_1a_256', 'hyperswap_1b_256', 'hyperswap_1c_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_unofficial_512' ] diff --git a/facefusion/uis/components/frame_colorizer_options.py b/facefusion/uis/components/frame_colorizer_options.py index 1ef4a47..0c317c4 100755 --- a/facefusion/uis/components/frame_colorizer_options.py +++ b/facefusion/uis/components/frame_colorizer_options.py @@ -3,7 +3,7 @@ from typing import List, Optional, Tuple import gradio from facefusion import state_manager, wording -from facefusion.common_helper import calc_int_step +from facefusion.common_helper import calculate_int_step from facefusion.processors import choices as processors_choices from facefusion.processors.core import load_processor_module from facefusion.processors.types import FrameColorizerModel @@ -35,7 +35,7 @@ def render() -> None: FRAME_COLORIZER_BLEND_SLIDER = gradio.Slider( label = wording.get('uis.frame_colorizer_blend_slider'), value = state_manager.get_item('frame_colorizer_blend'), - step = calc_int_step(processors_choices.frame_colorizer_blend_range), + step = calculate_int_step(processors_choices.frame_colorizer_blend_range), minimum = processors_choices.frame_colorizer_blend_range[0], maximum = processors_choices.frame_colorizer_blend_range[-1], visible = has_frame_colorizer diff --git a/facefusion/uis/components/frame_enhancer_options.py b/facefusion/uis/components/frame_enhancer_options.py index db0df53..fe1fd40 100755 --- a/facefusion/uis/components/frame_enhancer_options.py +++ b/facefusion/uis/components/frame_enhancer_options.py @@ -3,7 +3,7 @@ from typing import List, Optional, Tuple import gradio from facefusion import state_manager, wording -from facefusion.common_helper import calc_int_step +from facefusion.common_helper import calculate_int_step from facefusion.processors import choices as processors_choices from facefusion.processors.core import load_processor_module from facefusion.processors.types import FrameEnhancerModel @@ -27,7 +27,7 @@ def render() -> None: FRAME_ENHANCER_BLEND_SLIDER = gradio.Slider( label = wording.get('uis.frame_enhancer_blend_slider'), value = state_manager.get_item('frame_enhancer_blend'), - step = calc_int_step(processors_choices.frame_enhancer_blend_range), + step = calculate_int_step(processors_choices.frame_enhancer_blend_range), minimum = processors_choices.frame_enhancer_blend_range[0], maximum = processors_choices.frame_enhancer_blend_range[-1], visible = has_frame_enhancer diff --git a/facefusion/uis/components/instant_runner.py b/facefusion/uis/components/instant_runner.py index 71a3f7a..ce1b7ca 100644 --- a/facefusion/uis/components/instant_runner.py +++ b/facefusion/uis/components/instant_runner.py @@ -54,7 +54,7 @@ def listen() -> None: if output_image and output_video: INSTANT_RUNNER_START_BUTTON.click(start, outputs = [ INSTANT_RUNNER_START_BUTTON, INSTANT_RUNNER_STOP_BUTTON ]) INSTANT_RUNNER_START_BUTTON.click(run, outputs = [ INSTANT_RUNNER_START_BUTTON, INSTANT_RUNNER_STOP_BUTTON, output_image, output_video ]) - INSTANT_RUNNER_STOP_BUTTON.click(stop, outputs = [ INSTANT_RUNNER_START_BUTTON, INSTANT_RUNNER_STOP_BUTTON ]) + INSTANT_RUNNER_STOP_BUTTON.click(stop, outputs = [ INSTANT_RUNNER_START_BUTTON, INSTANT_RUNNER_STOP_BUTTON, output_image, output_video ]) INSTANT_RUNNER_CLEAR_BUTTON.click(clear, outputs = [ output_image, output_video ]) if ui_workflow_dropdown: ui_workflow_dropdown.change(remote_update, inputs = ui_workflow_dropdown, outputs = INSTANT_RUNNER_WRAPPER) @@ -97,9 +97,9 @@ def create_and_run_job(step_args : Args) -> bool: return job_manager.create_job(job_id) and job_manager.add_step(job_id, step_args) and job_manager.submit_job(job_id) and job_runner.run_job(job_id, process_step) -def stop() -> Tuple[gradio.Button, gradio.Button]: +def stop() -> Tuple[gradio.Button, gradio.Button, gradio.Image, gradio.Video]: process_manager.stop() - return gradio.Button(visible = True), gradio.Button(visible = False) + return gradio.Button(visible = True), gradio.Button(visible = False), gradio.Image(value = None), gradio.Video(value = None) def clear() -> Tuple[gradio.Image, gradio.Video]: diff --git a/facefusion/uis/components/job_runner.py b/facefusion/uis/components/job_runner.py index df69eb0..9300e01 100644 --- a/facefusion/uis/components/job_runner.py +++ b/facefusion/uis/components/job_runner.py @@ -89,7 +89,7 @@ def run(job_action : JobRunnerAction, job_id : str) -> Tuple[gradio.Button, grad if job_action == 'job-run': logger.info(wording.get('running_job').format(job_id = job_id), __name__) if job_id and job_runner.run_job(job_id, process_step): - logger.info(wording.get('processing_job_succeed').format(job_id = job_id), __name__) + logger.info(wording.get('processing_job_succeeded').format(job_id = job_id), __name__) else: logger.info(wording.get('processing_job_failed').format(job_id = job_id), __name__) updated_job_ids = job_manager.find_job_ids('queued') or [ 'none' ] @@ -100,14 +100,14 @@ def run(job_action : JobRunnerAction, job_id : str) -> Tuple[gradio.Button, grad logger.info(wording.get('running_jobs'), __name__) halt_on_error = False if job_runner.run_jobs(process_step, halt_on_error): - logger.info(wording.get('processing_jobs_succeed'), __name__) + logger.info(wording.get('processing_jobs_succeeded'), __name__) else: logger.info(wording.get('processing_jobs_failed'), __name__) if job_action == 'job-retry': logger.info(wording.get('retrying_job').format(job_id = job_id), __name__) if job_id and job_runner.retry_job(job_id, process_step): - logger.info(wording.get('processing_job_succeed').format(job_id = job_id), __name__) + logger.info(wording.get('processing_job_succeeded').format(job_id = job_id), __name__) else: logger.info(wording.get('processing_job_failed').format(job_id = job_id), __name__) updated_job_ids = job_manager.find_job_ids('failed') or [ 'none' ] @@ -118,7 +118,7 @@ def run(job_action : JobRunnerAction, job_id : str) -> Tuple[gradio.Button, grad logger.info(wording.get('retrying_jobs'), __name__) halt_on_error = False if job_runner.retry_jobs(process_step, halt_on_error): - logger.info(wording.get('processing_jobs_succeed'), __name__) + logger.info(wording.get('processing_jobs_succeeded'), __name__) else: logger.info(wording.get('processing_jobs_failed'), __name__) return gradio.Button(visible = True), gradio.Button(visible = False), gradio.Dropdown() diff --git a/facefusion/uis/components/lip_syncer_options.py b/facefusion/uis/components/lip_syncer_options.py index e253ee9..58fedc6 100755 --- a/facefusion/uis/components/lip_syncer_options.py +++ b/facefusion/uis/components/lip_syncer_options.py @@ -3,10 +3,10 @@ from typing import List, Optional, Tuple import gradio from facefusion import state_manager, wording -from facefusion.common_helper import calc_float_step +from facefusion.common_helper import calculate_float_step from facefusion.processors import choices as processors_choices from facefusion.processors.core import load_processor_module -from facefusion.processors.types import LipSyncerModel +from facefusion.processors.types import LipSyncerModel, LipSyncerWeight from facefusion.uis.core import get_ui_component, register_ui_component LIP_SYNCER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None @@ -27,7 +27,7 @@ def render() -> None: LIP_SYNCER_WEIGHT_SLIDER = gradio.Slider( label = wording.get('uis.lip_syncer_weight_slider'), value = state_manager.get_item('lip_syncer_weight'), - step = calc_float_step(processors_choices.lip_syncer_weight_range), + step = calculate_float_step(processors_choices.lip_syncer_weight_range), minimum = processors_choices.lip_syncer_weight_range[0], maximum = processors_choices.lip_syncer_weight_range[-1], visible = has_lip_syncer @@ -60,5 +60,5 @@ def update_lip_syncer_model(lip_syncer_model : LipSyncerModel) -> gradio.Dropdow return gradio.Dropdown() -def update_lip_syncer_weight(lip_syncer_weight : float) -> None: +def update_lip_syncer_weight(lip_syncer_weight : LipSyncerWeight) -> None: state_manager.set_item('lip_syncer_weight', lip_syncer_weight) diff --git a/facefusion/uis/components/memory.py b/facefusion/uis/components/memory.py index 81c367a..b583234 100644 --- a/facefusion/uis/components/memory.py +++ b/facefusion/uis/components/memory.py @@ -4,7 +4,7 @@ import gradio import facefusion.choices from facefusion import state_manager, wording -from facefusion.common_helper import calc_int_step +from facefusion.common_helper import calculate_int_step from facefusion.types import VideoMemoryStrategy VIDEO_MEMORY_STRATEGY_DROPDOWN : Optional[gradio.Dropdown] = None @@ -22,7 +22,7 @@ def render() -> None: ) SYSTEM_MEMORY_LIMIT_SLIDER = gradio.Slider( label = wording.get('uis.system_memory_limit_slider'), - step = calc_int_step(facefusion.choices.system_memory_limit_range), + step = calculate_int_step(facefusion.choices.system_memory_limit_range), minimum = facefusion.choices.system_memory_limit_range[0], maximum = facefusion.choices.system_memory_limit_range[-1], value = state_manager.get_item('system_memory_limit') diff --git a/facefusion/uis/components/output.py b/facefusion/uis/components/output.py index 84fd089..989e246 100644 --- a/facefusion/uis/components/output.py +++ b/facefusion/uis/components/output.py @@ -1,4 +1,5 @@ import tempfile +from pathlib import Path from typing import Optional import gradio @@ -17,7 +18,12 @@ def render() -> None: global OUTPUT_VIDEO if not state_manager.get_item('output_path'): - state_manager.set_item('output_path', tempfile.gettempdir()) + documents_directory = Path.home().joinpath('Documents') + + if documents_directory.exists(): + state_manager.set_item('output_path', documents_directory) + else: + state_manager.set_item('output_path', tempfile.gettempdir()) OUTPUT_PATH_TEXTBOX = gradio.Textbox( label = wording.get('uis.output_path_textbox'), value = state_manager.get_item('output_path'), diff --git a/facefusion/uis/components/output_options.py b/facefusion/uis/components/output_options.py index 46b875d..dbaa868 100644 --- a/facefusion/uis/components/output_options.py +++ b/facefusion/uis/components/output_options.py @@ -4,58 +4,53 @@ import gradio import facefusion.choices from facefusion import state_manager, wording -from facefusion.common_helper import calc_int_step +from facefusion.common_helper import calculate_float_step, calculate_int_step from facefusion.ffmpeg import get_available_encoder_set from facefusion.filesystem import is_image, is_video -from facefusion.types import AudioEncoder, Fps, VideoEncoder, VideoPreset +from facefusion.types import AudioEncoder, Fps, Scale, VideoEncoder, VideoPreset from facefusion.uis.core import get_ui_components, register_ui_component -from facefusion.vision import create_image_resolutions, create_video_resolutions, detect_image_resolution, detect_video_fps, detect_video_resolution, pack_resolution +from facefusion.vision import detect_video_fps OUTPUT_IMAGE_QUALITY_SLIDER : Optional[gradio.Slider] = None -OUTPUT_IMAGE_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None +OUTPUT_IMAGE_SCALE_SLIDER : Optional[gradio.Slider] = None OUTPUT_AUDIO_ENCODER_DROPDOWN : Optional[gradio.Dropdown] = None OUTPUT_AUDIO_QUALITY_SLIDER : Optional[gradio.Slider] = None OUTPUT_AUDIO_VOLUME_SLIDER : Optional[gradio.Slider] = None OUTPUT_VIDEO_ENCODER_DROPDOWN : Optional[gradio.Dropdown] = None OUTPUT_VIDEO_PRESET_DROPDOWN : Optional[gradio.Dropdown] = None -OUTPUT_VIDEO_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None +OUTPUT_VIDEO_SCALE_SLIDER : Optional[gradio.Slider] = None OUTPUT_VIDEO_QUALITY_SLIDER : Optional[gradio.Slider] = None OUTPUT_VIDEO_FPS_SLIDER : Optional[gradio.Slider] = None def render() -> None: global OUTPUT_IMAGE_QUALITY_SLIDER - global OUTPUT_IMAGE_RESOLUTION_DROPDOWN + global OUTPUT_IMAGE_SCALE_SLIDER global OUTPUT_AUDIO_ENCODER_DROPDOWN global OUTPUT_AUDIO_QUALITY_SLIDER global OUTPUT_AUDIO_VOLUME_SLIDER global OUTPUT_VIDEO_ENCODER_DROPDOWN global OUTPUT_VIDEO_PRESET_DROPDOWN - global OUTPUT_VIDEO_RESOLUTION_DROPDOWN + global OUTPUT_VIDEO_SCALE_SLIDER global OUTPUT_VIDEO_QUALITY_SLIDER global OUTPUT_VIDEO_FPS_SLIDER - output_image_resolutions = [] - output_video_resolutions = [] available_encoder_set = get_available_encoder_set() - if is_image(state_manager.get_item('target_path')): - output_image_resolution = detect_image_resolution(state_manager.get_item('target_path')) - output_image_resolutions = create_image_resolutions(output_image_resolution) - if is_video(state_manager.get_item('target_path')): - output_video_resolution = detect_video_resolution(state_manager.get_item('target_path')) - output_video_resolutions = create_video_resolutions(output_video_resolution) + OUTPUT_IMAGE_QUALITY_SLIDER = gradio.Slider( label = wording.get('uis.output_image_quality_slider'), value = state_manager.get_item('output_image_quality'), - step = calc_int_step(facefusion.choices.output_image_quality_range), + step = calculate_int_step(facefusion.choices.output_image_quality_range), minimum = facefusion.choices.output_image_quality_range[0], maximum = facefusion.choices.output_image_quality_range[-1], visible = is_image(state_manager.get_item('target_path')) ) - OUTPUT_IMAGE_RESOLUTION_DROPDOWN = gradio.Dropdown( - label = wording.get('uis.output_image_resolution_dropdown'), - choices = output_image_resolutions, - value = state_manager.get_item('output_image_resolution'), + OUTPUT_IMAGE_SCALE_SLIDER = gradio.Slider( + label = wording.get('uis.output_image_scale_slider'), + step = calculate_float_step(facefusion.choices.output_image_scale_range), + value = state_manager.get_item('output_image_scale'), + minimum = facefusion.choices.output_image_scale_range[0], + maximum = facefusion.choices.output_image_scale_range[-1], visible = is_image(state_manager.get_item('target_path')) ) OUTPUT_AUDIO_ENCODER_DROPDOWN = gradio.Dropdown( @@ -67,7 +62,7 @@ def render() -> None: OUTPUT_AUDIO_QUALITY_SLIDER = gradio.Slider( label = wording.get('uis.output_audio_quality_slider'), value = state_manager.get_item('output_audio_quality'), - step = calc_int_step(facefusion.choices.output_audio_quality_range), + step = calculate_int_step(facefusion.choices.output_audio_quality_range), minimum = facefusion.choices.output_audio_quality_range[0], maximum = facefusion.choices.output_audio_quality_range[-1], visible = is_video(state_manager.get_item('target_path')) @@ -75,7 +70,7 @@ def render() -> None: OUTPUT_AUDIO_VOLUME_SLIDER = gradio.Slider( label = wording.get('uis.output_audio_volume_slider'), value = state_manager.get_item('output_audio_volume'), - step = calc_int_step(facefusion.choices.output_audio_volume_range), + step = calculate_int_step(facefusion.choices.output_audio_volume_range), minimum = facefusion.choices.output_audio_volume_range[0], maximum = facefusion.choices.output_audio_volume_range[-1], visible = is_video(state_manager.get_item('target_path')) @@ -95,15 +90,17 @@ def render() -> None: OUTPUT_VIDEO_QUALITY_SLIDER = gradio.Slider( label = wording.get('uis.output_video_quality_slider'), value = state_manager.get_item('output_video_quality'), - step = calc_int_step(facefusion.choices.output_video_quality_range), + step = calculate_int_step(facefusion.choices.output_video_quality_range), minimum = facefusion.choices.output_video_quality_range[0], maximum = facefusion.choices.output_video_quality_range[-1], visible = is_video(state_manager.get_item('target_path')) ) - OUTPUT_VIDEO_RESOLUTION_DROPDOWN = gradio.Dropdown( - label = wording.get('uis.output_video_resolution_dropdown'), - choices = output_video_resolutions, - value = state_manager.get_item('output_video_resolution'), + OUTPUT_VIDEO_SCALE_SLIDER = gradio.Slider( + label = wording.get('uis.output_video_scale_slider'), + step = calculate_float_step(facefusion.choices.output_video_scale_range), + value = state_manager.get_item('output_video_scale'), + minimum = facefusion.choices.output_video_scale_range[0], + maximum = facefusion.choices.output_video_scale_range[-1], visible = is_video(state_manager.get_item('target_path')) ) OUTPUT_VIDEO_FPS_SLIDER = gradio.Slider( @@ -119,14 +116,14 @@ def render() -> None: def listen() -> None: OUTPUT_IMAGE_QUALITY_SLIDER.release(update_output_image_quality, inputs = OUTPUT_IMAGE_QUALITY_SLIDER) - OUTPUT_IMAGE_RESOLUTION_DROPDOWN.change(update_output_image_resolution, inputs = OUTPUT_IMAGE_RESOLUTION_DROPDOWN) + OUTPUT_IMAGE_SCALE_SLIDER.release(update_output_image_scale, inputs = OUTPUT_IMAGE_SCALE_SLIDER) OUTPUT_AUDIO_ENCODER_DROPDOWN.change(update_output_audio_encoder, inputs = OUTPUT_AUDIO_ENCODER_DROPDOWN) OUTPUT_AUDIO_QUALITY_SLIDER.release(update_output_audio_quality, inputs = OUTPUT_AUDIO_QUALITY_SLIDER) OUTPUT_AUDIO_VOLUME_SLIDER.release(update_output_audio_volume, inputs = OUTPUT_AUDIO_VOLUME_SLIDER) OUTPUT_VIDEO_ENCODER_DROPDOWN.change(update_output_video_encoder, inputs = OUTPUT_VIDEO_ENCODER_DROPDOWN) OUTPUT_VIDEO_PRESET_DROPDOWN.change(update_output_video_preset, inputs = OUTPUT_VIDEO_PRESET_DROPDOWN) OUTPUT_VIDEO_QUALITY_SLIDER.release(update_output_video_quality, inputs = OUTPUT_VIDEO_QUALITY_SLIDER) - OUTPUT_VIDEO_RESOLUTION_DROPDOWN.change(update_output_video_resolution, inputs = OUTPUT_VIDEO_RESOLUTION_DROPDOWN) + OUTPUT_VIDEO_SCALE_SLIDER.release(update_output_video_scale, inputs = OUTPUT_VIDEO_SCALE_SLIDER) OUTPUT_VIDEO_FPS_SLIDER.release(update_output_video_fps, inputs = OUTPUT_VIDEO_FPS_SLIDER) for ui_component in get_ui_components( @@ -135,30 +132,24 @@ def listen() -> None: 'target_video' ]): for method in [ 'change', 'clear' ]: - getattr(ui_component, method)(remote_update, outputs = [ OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_IMAGE_RESOLUTION_DROPDOWN, OUTPUT_AUDIO_ENCODER_DROPDOWN, OUTPUT_AUDIO_QUALITY_SLIDER, OUTPUT_AUDIO_VOLUME_SLIDER, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_PRESET_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER, OUTPUT_VIDEO_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_FPS_SLIDER ]) + getattr(ui_component, method)(remote_update, outputs = [OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_IMAGE_SCALE_SLIDER, OUTPUT_AUDIO_ENCODER_DROPDOWN, OUTPUT_AUDIO_QUALITY_SLIDER, OUTPUT_AUDIO_VOLUME_SLIDER, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_PRESET_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER, OUTPUT_VIDEO_SCALE_SLIDER, OUTPUT_VIDEO_FPS_SLIDER]) -def remote_update() -> Tuple[gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Slider]: +def remote_update() -> Tuple[gradio.Slider, gradio.Slider, gradio.Dropdown, gradio.Slider, gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Slider, gradio.Slider]: if is_image(state_manager.get_item('target_path')): - output_image_resolution = detect_image_resolution(state_manager.get_item('target_path')) - output_image_resolutions = create_image_resolutions(output_image_resolution) - state_manager.set_item('output_image_resolution', pack_resolution(output_image_resolution)) - return gradio.Slider(visible = True), gradio.Dropdown(value = state_manager.get_item('output_image_resolution'), choices = output_image_resolutions, visible = True), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False) + return gradio.Slider(visible = True), gradio.Slider(visible = True), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Slider(visible = False), gradio.Slider(visible = False) if is_video(state_manager.get_item('target_path')): - output_video_resolution = detect_video_resolution(state_manager.get_item('target_path')) - output_video_resolutions = create_video_resolutions(output_video_resolution) - state_manager.set_item('output_video_resolution', pack_resolution(output_video_resolution)) state_manager.set_item('output_video_fps', detect_video_fps(state_manager.get_item('target_path'))) - return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Slider(visible = True), gradio.Dropdown(visible = True), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Dropdown(value = state_manager.get_item('output_video_resolution'), choices = output_video_resolutions, visible = True), gradio.Slider(value = state_manager.get_item('output_video_fps'), visible = True) - return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False) + return gradio.Slider(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Slider(visible = True), gradio.Dropdown(visible = True), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Slider(visible = True), gradio.Slider(value = state_manager.get_item('output_video_fps'), visible = True) + return gradio.Slider(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Slider(visible = False), gradio.Slider(visible = False) def update_output_image_quality(output_image_quality : float) -> None: state_manager.set_item('output_image_quality', int(output_image_quality)) -def update_output_image_resolution(output_image_resolution : str) -> None: - state_manager.set_item('output_image_resolution', output_image_resolution) +def update_output_image_scale(output_image_scale : Scale) -> None: + state_manager.set_item('output_image_scale', output_image_scale) def update_output_audio_encoder(output_audio_encoder : AudioEncoder) -> None: @@ -185,8 +176,8 @@ def update_output_video_quality(output_video_quality : float) -> None: state_manager.set_item('output_video_quality', int(output_video_quality)) -def update_output_video_resolution(output_video_resolution : str) -> None: - state_manager.set_item('output_video_resolution', output_video_resolution) +def update_output_video_scale(output_video_scale : Scale) -> None: + state_manager.set_item('output_video_scale', output_video_scale) def update_output_video_fps(output_video_fps : Fps) -> None: diff --git a/facefusion/uis/components/preview.py b/facefusion/uis/components/preview.py index d6283ef..f7d032d 100755 --- a/facefusion/uis/components/preview.py +++ b/facefusion/uis/components/preview.py @@ -1,85 +1,82 @@ from time import sleep -from typing import Optional +from typing import List, Optional, Tuple import cv2 import gradio import numpy from facefusion import logger, process_manager, state_manager, wording -from facefusion.audio import create_empty_audio_frame, get_audio_frame +from facefusion.audio import create_empty_audio_frame, get_voice_frame from facefusion.common_helper import get_first from facefusion.content_analyser import analyse_frame -from facefusion.core import conditional_append_reference_faces -from facefusion.face_analyser import get_average_face, get_many_faces -from facefusion.face_selector import sort_faces_by_order -from facefusion.face_store import clear_reference_faces, clear_static_faces, get_reference_faces +from facefusion.face_analyser import get_one_face +from facefusion.face_selector import select_faces +from facefusion.face_store import clear_static_faces from facefusion.filesystem import filter_audio_paths, is_image, is_video from facefusion.processors.core import get_processors_modules -from facefusion.types import AudioFrame, Face, FaceSet, VisionFrame +from facefusion.types import AudioFrame, Face, VisionFrame +from facefusion.uis import choices as uis_choices from facefusion.uis.core import get_ui_component, get_ui_components, register_ui_component -from facefusion.uis.types import ComponentOptions -from facefusion.vision import count_video_frame_total, detect_frame_orientation, normalize_frame_color, read_static_image, read_static_images, read_video_frame, restrict_frame +from facefusion.uis.types import ComponentOptions, PreviewMode +from facefusion.vision import detect_frame_orientation, fit_cover_frame, obscure_frame, read_static_image, read_static_images, read_video_frame, restrict_frame, unpack_resolution PREVIEW_IMAGE : Optional[gradio.Image] = None -PREVIEW_FRAME_SLIDER : Optional[gradio.Slider] = None def render() -> None: global PREVIEW_IMAGE - global PREVIEW_FRAME_SLIDER preview_image_options : ComponentOptions =\ { 'label': wording.get('uis.preview_image') } - preview_frame_slider_options : ComponentOptions =\ - { - 'label': wording.get('uis.preview_frame_slider'), - 'step': 1, - 'minimum': 0, - 'maximum': 100, - 'visible': False - } - conditional_append_reference_faces() - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - source_frames = read_static_images(state_manager.get_item('source_paths')) - source_faces = get_many_faces(source_frames) - source_face = get_average_face(source_faces) + + source_vision_frames = read_static_images(state_manager.get_item('source_paths')) source_audio_path = get_first(filter_audio_paths(state_manager.get_item('source_paths'))) source_audio_frame = create_empty_audio_frame() + source_voice_frame = create_empty_audio_frame() if source_audio_path and state_manager.get_item('output_video_fps') and state_manager.get_item('reference_frame_number'): - temp_audio_frame = get_audio_frame(source_audio_path, state_manager.get_item('output_video_fps'), state_manager.get_item('reference_frame_number')) - if numpy.any(temp_audio_frame): - source_audio_frame = temp_audio_frame + temp_voice_frame = get_voice_frame(source_audio_path, state_manager.get_item('output_video_fps'), state_manager.get_item('reference_frame_number')) + if numpy.any(temp_voice_frame): + source_voice_frame = temp_voice_frame if is_image(state_manager.get_item('target_path')): target_vision_frame = read_static_image(state_manager.get_item('target_path')) - preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, target_vision_frame) - preview_image_options['value'] = normalize_frame_color(preview_vision_frame) + reference_vision_frame = read_static_image(state_manager.get_item('target_path')) + preview_vision_frame = process_preview_frame(reference_vision_frame, source_vision_frames, source_audio_frame, source_voice_frame, target_vision_frame, uis_choices.preview_modes[0], uis_choices.preview_resolutions[-1]) + preview_image_options['value'] = cv2.cvtColor(preview_vision_frame, cv2.COLOR_BGR2RGB) preview_image_options['elem_classes'] = [ 'image-preview', 'is-' + detect_frame_orientation(preview_vision_frame) ] if is_video(state_manager.get_item('target_path')): temp_vision_frame = read_video_frame(state_manager.get_item('target_path'), state_manager.get_item('reference_frame_number')) - preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, temp_vision_frame) - preview_image_options['value'] = normalize_frame_color(preview_vision_frame) + reference_vision_frame = read_video_frame(state_manager.get_item('target_path'), state_manager.get_item('reference_frame_number')) + preview_vision_frame = process_preview_frame(reference_vision_frame, source_vision_frames, source_audio_frame, source_voice_frame, temp_vision_frame, uis_choices.preview_modes[0], uis_choices.preview_resolutions[-1]) + preview_image_options['value'] = cv2.cvtColor(preview_vision_frame, cv2.COLOR_BGR2RGB) preview_image_options['elem_classes'] = [ 'image-preview', 'is-' + detect_frame_orientation(preview_vision_frame) ] preview_image_options['visible'] = True - preview_frame_slider_options['value'] = state_manager.get_item('reference_frame_number') - preview_frame_slider_options['maximum'] = count_video_frame_total(state_manager.get_item('target_path')) - preview_frame_slider_options['visible'] = True PREVIEW_IMAGE = gradio.Image(**preview_image_options) - PREVIEW_FRAME_SLIDER = gradio.Slider(**preview_frame_slider_options) - register_ui_component('preview_frame_slider', PREVIEW_FRAME_SLIDER) + register_ui_component('preview_image', PREVIEW_IMAGE) def listen() -> None: - PREVIEW_FRAME_SLIDER.release(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE, show_progress = 'hidden') - PREVIEW_FRAME_SLIDER.change(slide_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE, show_progress = 'hidden', trigger_mode = 'once') + preview_frame_slider = get_ui_component('preview_frame_slider') + preview_mode_dropdown = get_ui_component('preview_mode_dropdown') + preview_resolution_dropdown = get_ui_component('preview_resolution_dropdown') - reference_face_position_gallery = get_ui_component('reference_face_position_gallery') - if reference_face_position_gallery: - reference_face_position_gallery.select(clear_and_update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + if preview_mode_dropdown: + preview_mode_dropdown.change(update_preview_image, inputs = [ preview_mode_dropdown, preview_resolution_dropdown, preview_frame_slider ], outputs = PREVIEW_IMAGE) + + if preview_resolution_dropdown: + preview_resolution_dropdown.change(update_preview_image, inputs = [ preview_mode_dropdown, preview_resolution_dropdown, preview_frame_slider ], outputs = PREVIEW_IMAGE) + + if preview_frame_slider: + preview_frame_slider.release(update_preview_image, inputs = [ preview_mode_dropdown, preview_resolution_dropdown, preview_frame_slider ], outputs = PREVIEW_IMAGE, show_progress = 'hidden') + preview_frame_slider.change(update_preview_image, inputs = [ preview_mode_dropdown, preview_resolution_dropdown, preview_frame_slider ], outputs = PREVIEW_IMAGE, show_progress = 'hidden', trigger_mode = 'once') + + reference_face_position_gallery = get_ui_component('reference_face_position_gallery') + if reference_face_position_gallery: + reference_face_position_gallery.select(clear_and_update_preview_image, inputs = [ preview_mode_dropdown, preview_resolution_dropdown, preview_frame_slider ], outputs = PREVIEW_IMAGE) for ui_component in get_ui_components( [ @@ -89,15 +86,7 @@ def listen() -> None: 'target_video' ]): for method in [ 'change', 'clear' ]: - getattr(ui_component, method)(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) - - for ui_component in get_ui_components( - [ - 'target_image', - 'target_video' - ]): - for method in [ 'change', 'clear' ]: - getattr(ui_component, method)(update_preview_frame_slider, outputs = PREVIEW_FRAME_SLIDER) + getattr(ui_component, method)(update_preview_image, inputs = [ preview_mode_dropdown, preview_resolution_dropdown, preview_frame_slider ], outputs = PREVIEW_IMAGE) for ui_component in get_ui_components( [ @@ -105,9 +94,10 @@ def listen() -> None: 'frame_colorizer_size_dropdown', 'face_mask_types_checkbox_group', 'face_mask_areas_checkbox_group', - 'face_mask_regions_checkbox_group' + 'face_mask_regions_checkbox_group', + 'expression_restorer_areas_checkbox_group' ]): - ui_component.change(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + ui_component.change(update_preview_image, inputs = [ preview_mode_dropdown, preview_resolution_dropdown, preview_frame_slider ], outputs = PREVIEW_IMAGE) for ui_component in get_ui_components( [ @@ -130,6 +120,7 @@ def listen() -> None: 'face_editor_head_roll_slider', 'face_enhancer_blend_slider', 'face_enhancer_weight_slider', + 'face_swapper_weight_slider', 'frame_colorizer_blend_slider', 'frame_enhancer_blend_slider', 'lip_syncer_weight_slider', @@ -142,7 +133,7 @@ def listen() -> None: 'face_mask_padding_right_slider', 'output_video_fps_slider' ]): - ui_component.release(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + ui_component.release(update_preview_image, inputs = [ preview_mode_dropdown, preview_resolution_dropdown, preview_frame_slider ], outputs = PREVIEW_IMAGE) for ui_component in get_ui_components( [ @@ -166,94 +157,125 @@ def listen() -> None: 'face_detector_angles_checkbox_group', 'face_landmarker_model_dropdown', 'face_occluder_model_dropdown', - 'face_parser_model_dropdown' + 'face_parser_model_dropdown', + 'voice_extractor_model_dropdown' ]): - ui_component.change(clear_and_update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + ui_component.change(clear_and_update_preview_image, inputs = [ preview_mode_dropdown, preview_resolution_dropdown, preview_frame_slider ], outputs = PREVIEW_IMAGE) for ui_component in get_ui_components( [ 'face_detector_score_slider', 'face_landmarker_score_slider' ]): - ui_component.release(clear_and_update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + ui_component.release(clear_and_update_preview_image, inputs = [ preview_mode_dropdown, preview_resolution_dropdown, preview_frame_slider ], outputs = PREVIEW_IMAGE) -def clear_and_update_preview_image(frame_number : int = 0) -> gradio.Image: - clear_reference_faces() - clear_static_faces() - return update_preview_image(frame_number) - - -def slide_preview_image(frame_number : int = 0) -> gradio.Image: - if is_video(state_manager.get_item('target_path')): - preview_vision_frame = normalize_frame_color(read_video_frame(state_manager.get_item('target_path'), frame_number)) - preview_vision_frame = restrict_frame(preview_vision_frame, (1024, 1024)) - return gradio.Image(value = preview_vision_frame) - return gradio.Image(value = None) - - -def update_preview_image(frame_number : int = 0) -> gradio.Image: +def update_preview_image(preview_mode : PreviewMode, preview_resolution : str, frame_number : int = 0) -> gradio.Image: while process_manager.is_checking(): sleep(0.5) - conditional_append_reference_faces() - reference_faces = get_reference_faces() if 'reference' in state_manager.get_item('face_selector_mode') else None - source_frames = read_static_images(state_manager.get_item('source_paths')) - source_faces = [] - for source_frame in source_frames: - temp_faces = get_many_faces([ source_frame ]) - temp_faces = sort_faces_by_order(temp_faces, 'large-small') - if temp_faces: - source_faces.append(get_first(temp_faces)) - source_face = get_average_face(source_faces) + source_vision_frames = read_static_images(state_manager.get_item('source_paths')) source_audio_path = get_first(filter_audio_paths(state_manager.get_item('source_paths'))) source_audio_frame = create_empty_audio_frame() + source_voice_frame = create_empty_audio_frame() if source_audio_path and state_manager.get_item('output_video_fps') and state_manager.get_item('reference_frame_number'): reference_audio_frame_number = state_manager.get_item('reference_frame_number') if state_manager.get_item('trim_frame_start'): reference_audio_frame_number -= state_manager.get_item('trim_frame_start') - temp_audio_frame = get_audio_frame(source_audio_path, state_manager.get_item('output_video_fps'), reference_audio_frame_number) - if numpy.any(temp_audio_frame): - source_audio_frame = temp_audio_frame + temp_voice_frame = get_voice_frame(source_audio_path, state_manager.get_item('output_video_fps'), reference_audio_frame_number) + if numpy.any(temp_voice_frame): + source_voice_frame = temp_voice_frame if is_image(state_manager.get_item('target_path')): + reference_vision_frame = read_static_image(state_manager.get_item('target_path')) target_vision_frame = read_static_image(state_manager.get_item('target_path')) - preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, target_vision_frame) - preview_vision_frame = normalize_frame_color(preview_vision_frame) + preview_vision_frame = process_preview_frame(reference_vision_frame, source_vision_frames, source_audio_frame, source_voice_frame, target_vision_frame, preview_mode, preview_resolution) + preview_vision_frame = cv2.cvtColor(preview_vision_frame, cv2.COLOR_BGR2RGB) return gradio.Image(value = preview_vision_frame, elem_classes = [ 'image-preview', 'is-' + detect_frame_orientation(preview_vision_frame) ]) if is_video(state_manager.get_item('target_path')): + reference_vision_frame = read_video_frame(state_manager.get_item('target_path'), state_manager.get_item('reference_frame_number')) temp_vision_frame = read_video_frame(state_manager.get_item('target_path'), frame_number) - preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, temp_vision_frame) - preview_vision_frame = normalize_frame_color(preview_vision_frame) + preview_vision_frame = process_preview_frame(reference_vision_frame, source_vision_frames, source_audio_frame, source_voice_frame, temp_vision_frame, preview_mode, preview_resolution) + preview_vision_frame = cv2.cvtColor(preview_vision_frame, cv2.COLOR_BGR2RGB) return gradio.Image(value = preview_vision_frame, elem_classes = [ 'image-preview', 'is-' + detect_frame_orientation(preview_vision_frame) ]) return gradio.Image(value = None, elem_classes = None) -def update_preview_frame_slider() -> gradio.Slider: - if is_video(state_manager.get_item('target_path')): - video_frame_total = count_video_frame_total(state_manager.get_item('target_path')) - return gradio.Slider(maximum = video_frame_total, visible = True) - return gradio.Slider(value = 0, visible = False) +def clear_and_update_preview_image(preview_mode : PreviewMode, preview_resolution : str, frame_number : int = 0) -> gradio.Image: + clear_static_faces() + return update_preview_image(preview_mode, preview_resolution, frame_number) -def process_preview_frame(reference_faces : FaceSet, source_face : Face, source_audio_frame : AudioFrame, target_vision_frame : VisionFrame) -> VisionFrame: - target_vision_frame = restrict_frame(target_vision_frame, (1024, 1024)) - source_vision_frame = target_vision_frame.copy() +def process_preview_frame(reference_vision_frame : VisionFrame, source_vision_frames : List[VisionFrame], source_audio_frame : AudioFrame, source_voice_frame : AudioFrame, target_vision_frame : VisionFrame, preview_mode : PreviewMode, preview_resolution : str) -> VisionFrame: + target_vision_frame = restrict_frame(target_vision_frame, unpack_resolution(preview_resolution)) + temp_vision_frame = target_vision_frame.copy() + if analyse_frame(target_vision_frame): - return cv2.GaussianBlur(target_vision_frame, (99, 99), 0) + if preview_mode == 'frame-by-frame': + temp_vision_frame = obscure_frame(temp_vision_frame) + return numpy.hstack((temp_vision_frame, temp_vision_frame)) + + if preview_mode == 'face-by-face': + target_crop_vision_frame, output_crop_vision_frame = create_face_by_face(reference_vision_frame, target_vision_frame, temp_vision_frame) + target_crop_vision_frame = obscure_frame(target_crop_vision_frame) + output_crop_vision_frame = obscure_frame(output_crop_vision_frame) + return numpy.hstack((target_crop_vision_frame, output_crop_vision_frame)) + + temp_vision_frame = obscure_frame(temp_vision_frame) + return temp_vision_frame for processor_module in get_processors_modules(state_manager.get_item('processors')): logger.disable() if processor_module.pre_process('preview'): - target_vision_frame = processor_module.process_frame( + logger.enable() + temp_vision_frame = processor_module.process_frame( { - 'reference_faces': reference_faces, - 'source_face': source_face, + 'reference_vision_frame': reference_vision_frame, 'source_audio_frame': source_audio_frame, - 'source_vision_frame': source_vision_frame, - 'target_vision_frame': target_vision_frame + 'source_voice_frame': source_voice_frame, + 'source_vision_frames': source_vision_frames, + 'target_vision_frame': target_vision_frame, + 'temp_vision_frame': temp_vision_frame }) logger.enable() - return target_vision_frame + + if preview_mode == 'frame-by-frame': + return numpy.hstack((target_vision_frame, temp_vision_frame)) + + if preview_mode == 'face-by-face': + target_crop_vision_frame, output_crop_vision_frame = create_face_by_face(reference_vision_frame, target_vision_frame, temp_vision_frame) + return numpy.hstack((target_crop_vision_frame, output_crop_vision_frame)) + + return temp_vision_frame + + +def create_face_by_face(reference_vision_frame : VisionFrame, target_vision_frame : VisionFrame, temp_vision_frame : VisionFrame) -> Tuple[VisionFrame, VisionFrame]: + target_faces = select_faces(reference_vision_frame, target_vision_frame) + target_face = get_one_face(target_faces) + + if target_face: + target_crop_vision_frame = extract_crop_frame(target_vision_frame, target_face) + output_crop_vision_frame = extract_crop_frame(temp_vision_frame, target_face) + + if numpy.any(target_crop_vision_frame) and numpy.any(output_crop_vision_frame): + target_crop_dimension = min(target_crop_vision_frame.shape[:2]) + target_crop_vision_frame = fit_cover_frame(target_crop_vision_frame, (target_crop_dimension, target_crop_dimension)) + output_crop_vision_frame = fit_cover_frame(output_crop_vision_frame, (target_crop_dimension, target_crop_dimension)) + return target_crop_vision_frame, output_crop_vision_frame + + empty_vision_frame = numpy.zeros((512, 512, 3), dtype = numpy.uint8) + return empty_vision_frame, empty_vision_frame + + +def extract_crop_frame(vision_frame : VisionFrame, face : Face) -> Optional[VisionFrame]: + start_x, start_y, end_x, end_y = map(int, face.bounding_box) + padding_x = int((end_x - start_x) * 0.25) + padding_y = int((end_y - start_y) * 0.25) + start_x = max(0, start_x - padding_x) + start_y = max(0, start_y - padding_y) + end_x = max(0, end_x + padding_x) + end_y = max(0, end_y + padding_y) + crop_vision_frame = vision_frame[start_y:end_y, start_x:end_x] + return crop_vision_frame diff --git a/facefusion/uis/components/preview_options.py b/facefusion/uis/components/preview_options.py new file mode 100644 index 0000000..89f7067 --- /dev/null +++ b/facefusion/uis/components/preview_options.py @@ -0,0 +1,61 @@ +from typing import Optional + +import gradio + +from facefusion import state_manager, wording +from facefusion.filesystem import is_video +from facefusion.uis import choices as uis_choices +from facefusion.uis.core import get_ui_components, register_ui_component +from facefusion.uis.types import ComponentOptions +from facefusion.vision import count_video_frame_total + +PREVIEW_FRAME_SLIDER: Optional[gradio.Slider] = None +PREVIEW_MODE_DROPDOWN: Optional[gradio.Dropdown] = None +PREVIEW_RESOLUTION_DROPDOWN: Optional[gradio.Dropdown] = None + + +def render() -> None: + global PREVIEW_FRAME_SLIDER, PREVIEW_MODE_DROPDOWN, PREVIEW_RESOLUTION_DROPDOWN + + preview_frame_slider_options : ComponentOptions =\ + { + 'label': wording.get('uis.preview_frame_slider'), + 'step': 1, + 'minimum': 0, + 'maximum': 100, + 'visible': False + } + if is_video(state_manager.get_item('target_path')): + preview_frame_slider_options['value'] = state_manager.get_item('reference_frame_number') + preview_frame_slider_options['maximum'] = count_video_frame_total(state_manager.get_item('target_path')) + preview_frame_slider_options['visible'] = True + PREVIEW_FRAME_SLIDER = gradio.Slider(**preview_frame_slider_options) + with gradio.Row(): + PREVIEW_MODE_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.preview_mode_dropdown'), + value = uis_choices.preview_modes[0], + choices = uis_choices.preview_modes, + visible = True + ) + PREVIEW_RESOLUTION_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.preview_resolution_dropdown'), + value = uis_choices.preview_resolutions[-1], + choices = uis_choices.preview_resolutions, + visible = True + ) + register_ui_component('preview_mode_dropdown', PREVIEW_MODE_DROPDOWN) + register_ui_component('preview_resolution_dropdown', PREVIEW_RESOLUTION_DROPDOWN) + register_ui_component('preview_frame_slider', PREVIEW_FRAME_SLIDER) + + +def listen() -> None: + for ui_component in get_ui_components([ 'target_image', 'target_video' ]): + for method in [ 'change', 'clear' ]: + getattr(ui_component, method)(update_preview_frame_slider, outputs = PREVIEW_FRAME_SLIDER) + + +def update_preview_frame_slider() -> gradio.Slider: + if is_video(state_manager.get_item('target_path')): + video_frame_total = count_video_frame_total(state_manager.get_item('target_path')) + return gradio.Slider(maximum = video_frame_total, visible = True) + return gradio.Slider(value = 0, visible = False) diff --git a/facefusion/uis/components/target.py b/facefusion/uis/components/target.py index 79e0f38..4824b61 100644 --- a/facefusion/uis/components/target.py +++ b/facefusion/uis/components/target.py @@ -3,7 +3,7 @@ from typing import Optional, Tuple import gradio from facefusion import state_manager, wording -from facefusion.face_store import clear_reference_faces, clear_static_faces +from facefusion.face_store import clear_static_faces from facefusion.filesystem import is_image, is_video from facefusion.uis.core import register_ui_component from facefusion.uis.types import ComponentOptions, File @@ -51,7 +51,6 @@ def listen() -> None: def update(file : File) -> Tuple[gradio.Image, gradio.Video]: - clear_reference_faces() clear_static_faces() if file and is_image(file.name): diff --git a/facefusion/uis/components/voice_extractor.py b/facefusion/uis/components/voice_extractor.py new file mode 100644 index 0000000..e6b0c3f --- /dev/null +++ b/facefusion/uis/components/voice_extractor.py @@ -0,0 +1,50 @@ +from typing import Optional + +import gradio + +import facefusion.choices +from facefusion import state_manager, voice_extractor, wording +from facefusion.filesystem import is_video +from facefusion.types import VoiceExtractorModel +from facefusion.uis.core import get_ui_components, register_ui_component + +VOICE_EXTRACTOR_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None + + +def render() -> None: + global VOICE_EXTRACTOR_MODEL_DROPDOWN + + VOICE_EXTRACTOR_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.voice_extractor_model_dropdown'), + choices = facefusion.choices.voice_extractor_models, + value = state_manager.get_item('voice_extractor_model'), + visible = is_video(state_manager.get_item('target_path')) + ) + register_ui_component('voice_extractor_model_dropdown', VOICE_EXTRACTOR_MODEL_DROPDOWN) + + +def listen() -> None: + VOICE_EXTRACTOR_MODEL_DROPDOWN.change(update_voice_extractor_model, inputs = VOICE_EXTRACTOR_MODEL_DROPDOWN, outputs = VOICE_EXTRACTOR_MODEL_DROPDOWN) + + for ui_component in get_ui_components( + [ + 'target_image', + 'target_video' + ]): + for method in [ 'change', 'clear' ]: + getattr(ui_component, method)(remote_update, outputs = VOICE_EXTRACTOR_MODEL_DROPDOWN) + + +def remote_update() -> gradio.Dropdown: + if is_video(state_manager.get_item('target_path')): + return gradio.Dropdown(visible = True) + return gradio.Dropdown(visible = False) + + +def update_voice_extractor_model(voice_extractor_model : VoiceExtractorModel) -> gradio.Dropdown: + voice_extractor.clear_inference_pool() + state_manager.set_item('voice_extractor_model', voice_extractor_model) + + if voice_extractor.pre_check(): + gradio.Dropdown(value = state_manager.get_item('voice_extractor_model')) + return gradio.Dropdown() diff --git a/facefusion/uis/components/webcam.py b/facefusion/uis/components/webcam.py index 5547919..a829d2e 100644 --- a/facefusion/uis/components/webcam.py +++ b/facefusion/uis/components/webcam.py @@ -1,62 +1,39 @@ -import os -import subprocess -from collections import deque -from concurrent.futures import ThreadPoolExecutor -from typing import Deque, Generator, List, Optional +from typing import Generator, List, Optional, Tuple import cv2 import gradio -from tqdm import tqdm -from facefusion import ffmpeg_builder, logger, state_manager, wording -from facefusion.audio import create_empty_audio_frame -from facefusion.common_helper import is_windows -from facefusion.content_analyser import analyse_stream -from facefusion.face_analyser import get_average_face, get_many_faces -from facefusion.ffmpeg import open_ffmpeg -from facefusion.filesystem import filter_image_paths, is_directory -from facefusion.processors.core import get_processors_modules -from facefusion.types import Face, Fps, StreamMode, VisionFrame, WebcamMode +from facefusion import state_manager, wording +from facefusion.camera_manager import clear_camera_pool, get_local_camera_capture +from facefusion.filesystem import has_image +from facefusion.streamer import multi_process_capture, open_stream +from facefusion.types import Fps, VisionFrame, WebcamMode from facefusion.uis.core import get_ui_component -from facefusion.vision import normalize_frame_color, read_static_images, unpack_resolution +from facefusion.uis.types import File +from facefusion.vision import unpack_resolution -WEBCAM_CAPTURE : Optional[cv2.VideoCapture] = None +SOURCE_FILE : Optional[gradio.File] = None WEBCAM_IMAGE : Optional[gradio.Image] = None WEBCAM_START_BUTTON : Optional[gradio.Button] = None WEBCAM_STOP_BUTTON : Optional[gradio.Button] = None -def get_webcam_capture(webcam_device_id : int) -> Optional[cv2.VideoCapture]: - global WEBCAM_CAPTURE - - if WEBCAM_CAPTURE is None: - cv2.setLogLevel(0) - if is_windows(): - webcam_capture = cv2.VideoCapture(webcam_device_id, cv2.CAP_DSHOW) - else: - webcam_capture = cv2.VideoCapture(webcam_device_id) - cv2.setLogLevel(3) - - if webcam_capture and webcam_capture.isOpened(): - WEBCAM_CAPTURE = webcam_capture - return WEBCAM_CAPTURE - - -def clear_webcam_capture() -> None: - global WEBCAM_CAPTURE - - if WEBCAM_CAPTURE and WEBCAM_CAPTURE.isOpened(): - WEBCAM_CAPTURE.release() - WEBCAM_CAPTURE = None - - def render() -> None: + global SOURCE_FILE global WEBCAM_IMAGE global WEBCAM_START_BUTTON global WEBCAM_STOP_BUTTON + has_source_image = has_image(state_manager.get_item('source_paths')) + SOURCE_FILE = gradio.File( + label = wording.get('uis.source_file'), + file_count = 'multiple', + value = state_manager.get_item('source_paths') if has_source_image else None + ) WEBCAM_IMAGE = gradio.Image( - label = wording.get('uis.webcam_image') + label = wording.get('uis.webcam_image'), + format = 'jpeg', + visible = False ) WEBCAM_START_BUTTON = gradio.Button( value = wording.get('uis.start_button'), @@ -65,143 +42,74 @@ def render() -> None: ) WEBCAM_STOP_BUTTON = gradio.Button( value = wording.get('uis.stop_button'), - size = 'sm' + size = 'sm', + visible = False ) def listen() -> None: + SOURCE_FILE.change(update_source, inputs = SOURCE_FILE, outputs = SOURCE_FILE) webcam_device_id_dropdown = get_ui_component('webcam_device_id_dropdown') webcam_mode_radio = get_ui_component('webcam_mode_radio') webcam_resolution_dropdown = get_ui_component('webcam_resolution_dropdown') webcam_fps_slider = get_ui_component('webcam_fps_slider') - source_image = get_ui_component('source_image') if webcam_device_id_dropdown and webcam_mode_radio and webcam_resolution_dropdown and webcam_fps_slider: + WEBCAM_START_BUTTON.click(pre_start, outputs = [ SOURCE_FILE, WEBCAM_IMAGE, WEBCAM_START_BUTTON, WEBCAM_STOP_BUTTON ]) start_event = WEBCAM_START_BUTTON.click(start, inputs = [ webcam_device_id_dropdown, webcam_mode_radio, webcam_resolution_dropdown, webcam_fps_slider ], outputs = WEBCAM_IMAGE) + start_event.then(pre_stop) WEBCAM_STOP_BUTTON.click(stop, cancels = start_event, outputs = WEBCAM_IMAGE) + WEBCAM_STOP_BUTTON.click(pre_stop, outputs = [ SOURCE_FILE, WEBCAM_IMAGE, WEBCAM_START_BUTTON, WEBCAM_STOP_BUTTON ]) - if source_image: - source_image.change(stop, cancels = start_event, outputs = WEBCAM_IMAGE) + +def update_source(files : List[File]) -> gradio.File: + file_names = [ file.name for file in files ] if files else None + has_source_image = has_image(file_names) + + if has_source_image: + state_manager.set_item('source_paths', file_names) + return gradio.File(value = file_names) + + state_manager.clear_item('source_paths') + return gradio.File(value = None) + + +def pre_start() -> Tuple[gradio.File, gradio.Image, gradio.Button, gradio.Button]: + return gradio.File(visible = False), gradio.Image(visible = True), gradio.Button(visible = False), gradio.Button(visible = True) + + +def pre_stop() -> Tuple[gradio.File, gradio.Image, gradio.Button, gradio.Button]: + return gradio.File(visible = True), gradio.Image(visible = False), gradio.Button(visible = True), gradio.Button(visible = False) def start(webcam_device_id : int, webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps) -> Generator[VisionFrame, None, None]: - state_manager.set_item('face_selector_mode', 'one') - source_image_paths = filter_image_paths(state_manager.get_item('source_paths')) - source_frames = read_static_images(source_image_paths) - source_faces = get_many_faces(source_frames) - source_face = get_average_face(source_faces) + state_manager.init_item('face_selector_mode', 'one') + state_manager.sync_state() + + camera_capture = get_local_camera_capture(webcam_device_id) stream = None - webcam_capture = None if webcam_mode in [ 'udp', 'v4l2' ]: - stream = open_stream(webcam_mode, webcam_resolution, webcam_fps) #type:ignore[arg-type] + stream = open_stream(webcam_mode, webcam_resolution, webcam_fps) # type:ignore[arg-type] webcam_width, webcam_height = unpack_resolution(webcam_resolution) - if isinstance(webcam_device_id, int): - webcam_capture = get_webcam_capture(webcam_device_id) + if camera_capture and camera_capture.isOpened(): + camera_capture.set(cv2.CAP_PROP_FRAME_WIDTH, webcam_width) + camera_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, webcam_height) + camera_capture.set(cv2.CAP_PROP_FPS, webcam_fps) - if webcam_capture and webcam_capture.isOpened(): - webcam_capture.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) #type:ignore[attr-defined] - webcam_capture.set(cv2.CAP_PROP_FRAME_WIDTH, webcam_width) - webcam_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, webcam_height) - webcam_capture.set(cv2.CAP_PROP_FPS, webcam_fps) + for capture_frame in multi_process_capture(camera_capture, webcam_fps): + capture_frame = cv2.cvtColor(capture_frame, cv2.COLOR_BGR2RGB) - for capture_frame in multi_process_capture(source_face, webcam_capture, webcam_fps): - capture_frame = normalize_frame_color(capture_frame) if webcam_mode == 'inline': yield capture_frame else: try: stream.stdin.write(capture_frame.tobytes()) except Exception: - clear_webcam_capture() - yield None - - -def multi_process_capture(source_face : Face, webcam_capture : cv2.VideoCapture, webcam_fps : Fps) -> Generator[VisionFrame, None, None]: - deque_capture_frames: Deque[VisionFrame] = deque() - - with tqdm(desc = wording.get('streaming'), unit = 'frame', disable = state_manager.get_item('log_level') in [ 'warn', 'error' ]) as progress: - with ThreadPoolExecutor(max_workers = state_manager.get_item('execution_thread_count')) as executor: - futures = [] - - while webcam_capture and webcam_capture.isOpened(): - _, capture_frame = webcam_capture.read() - if analyse_stream(capture_frame, webcam_fps): - yield None - future = executor.submit(process_stream_frame, source_face, capture_frame) - futures.append(future) - - for future_done in [ future for future in futures if future.done() ]: - capture_frame = future_done.result() - deque_capture_frames.append(capture_frame) - futures.remove(future_done) - - while deque_capture_frames: - progress.update() - yield deque_capture_frames.popleft() + pass def stop() -> gradio.Image: - clear_webcam_capture() + clear_camera_pool() return gradio.Image(value = None) - - -def process_stream_frame(source_face : Face, target_vision_frame : VisionFrame) -> VisionFrame: - source_audio_frame = create_empty_audio_frame() - - for processor_module in get_processors_modules(state_manager.get_item('processors')): - logger.disable() - if processor_module.pre_process('stream'): - target_vision_frame = processor_module.process_frame( - { - 'source_face': source_face, - 'source_audio_frame': source_audio_frame, - 'target_vision_frame': target_vision_frame - }) - logger.enable() - return target_vision_frame - - -def open_stream(stream_mode : StreamMode, stream_resolution : str, stream_fps : Fps) -> subprocess.Popen[bytes]: - commands = ffmpeg_builder.chain( - ffmpeg_builder.capture_video(), - ffmpeg_builder.set_media_resolution(stream_resolution), - ffmpeg_builder.set_input_fps(stream_fps) - ) - - if stream_mode == 'udp': - commands.extend(ffmpeg_builder.set_input('-')) - commands.extend(ffmpeg_builder.set_stream_mode('udp')) - commands.extend(ffmpeg_builder.set_stream_quality(2000)) - commands.extend(ffmpeg_builder.set_output('udp://localhost:27000?pkt_size=1316')) - - if stream_mode == 'v4l2': - device_directory_path = '/sys/devices/virtual/video4linux' - commands.extend(ffmpeg_builder.set_input('-')) - commands.extend(ffmpeg_builder.set_stream_mode('v4l2')) - - if is_directory(device_directory_path): - device_names = os.listdir(device_directory_path) - - for device_name in device_names: - device_path = '/dev/' + device_name - commands.extend(ffmpeg_builder.set_output(device_path)) - - else: - logger.error(wording.get('stream_not_loaded').format(stream_mode = stream_mode), __name__) - - return open_ffmpeg(commands) - - -def get_available_webcam_ids(webcam_id_start : int, webcam_id_end : int) -> List[int]: - available_webcam_ids = [] - - for index in range(webcam_id_start, webcam_id_end): - webcam_capture = get_webcam_capture(index) - - if webcam_capture and webcam_capture.isOpened(): - available_webcam_ids.append(index) - clear_webcam_capture() - - return available_webcam_ids diff --git a/facefusion/uis/components/webcam_options.py b/facefusion/uis/components/webcam_options.py index b7971c2..03d3078 100644 --- a/facefusion/uis/components/webcam_options.py +++ b/facefusion/uis/components/webcam_options.py @@ -2,10 +2,10 @@ from typing import Optional import gradio -import facefusion.choices from facefusion import wording +from facefusion.camera_manager import detect_local_camera_ids from facefusion.common_helper import get_first -from facefusion.uis.components.webcam import get_available_webcam_ids +from facefusion.uis import choices as uis_choices from facefusion.uis.core import register_ui_component WEBCAM_DEVICE_ID_DROPDOWN : Optional[gradio.Dropdown] = None @@ -20,28 +20,28 @@ def render() -> None: global WEBCAM_RESOLUTION_DROPDOWN global WEBCAM_FPS_SLIDER - available_webcam_ids = get_available_webcam_ids(0, 10) or [ 'none' ] #type:ignore[list-item] + local_camera_ids = detect_local_camera_ids(0, 10) or [ 'none' ] #type:ignore[list-item] WEBCAM_DEVICE_ID_DROPDOWN = gradio.Dropdown( - value = get_first(available_webcam_ids), + value = get_first(local_camera_ids), label = wording.get('uis.webcam_device_id_dropdown'), - choices = available_webcam_ids + choices = local_camera_ids ) WEBCAM_MODE_RADIO = gradio.Radio( label = wording.get('uis.webcam_mode_radio'), - choices = facefusion.choices.webcam_modes, - value = 'inline' + choices = uis_choices.webcam_modes, + value = uis_choices.webcam_modes[0] ) WEBCAM_RESOLUTION_DROPDOWN = gradio.Dropdown( label = wording.get('uis.webcam_resolution_dropdown'), - choices = facefusion.choices.webcam_resolutions, - value = facefusion.choices.webcam_resolutions[0] + choices = uis_choices.webcam_resolutions, + value = uis_choices.webcam_resolutions[0] ) WEBCAM_FPS_SLIDER = gradio.Slider( label = wording.get('uis.webcam_fps_slider'), - value = 25, + value = 30, step = 1, minimum = 1, - maximum = 60 + maximum = 30 ) register_ui_component('webcam_device_id_dropdown', WEBCAM_DEVICE_ID_DROPDOWN) register_ui_component('webcam_mode_radio', WEBCAM_MODE_RADIO) diff --git a/facefusion/uis/layouts/benchmark.py b/facefusion/uis/layouts/benchmark.py index b6d6686..5e31416 100644 --- a/facefusion/uis/layouts/benchmark.py +++ b/facefusion/uis/layouts/benchmark.py @@ -1,12 +1,11 @@ import gradio -from facefusion import state_manager -from facefusion.benchmarker import pre_check as benchmarker_pre_check -from facefusion.uis.components import about, age_modifier_options, benchmark, benchmark_options, deep_swapper_options, download, execution, execution_queue_count, execution_thread_count, expression_restorer_options, face_debugger_options, face_editor_options, face_enhancer_options, face_swapper_options, frame_colorizer_options, frame_enhancer_options, lip_syncer_options, memory, processors +from facefusion import benchmarker, state_manager +from facefusion.uis.components import about, age_modifier_options, benchmark, benchmark_options, deep_swapper_options, download, execution, execution_thread_count, expression_restorer_options, face_debugger_options, face_editor_options, face_enhancer_options, face_swapper_options, frame_colorizer_options, frame_enhancer_options, lip_syncer_options, memory, processors def pre_check() -> bool: - return benchmarker_pre_check() + return benchmarker.pre_check() def render() -> gradio.Blocks: @@ -15,6 +14,8 @@ def render() -> gradio.Blocks: with gradio.Column(scale = 4): with gradio.Blocks(): about.render() + with gradio.Blocks(): + benchmark_options.render() with gradio.Blocks(): processors.render() with gradio.Blocks(): @@ -40,14 +41,11 @@ def render() -> gradio.Blocks: with gradio.Blocks(): execution.render() execution_thread_count.render() - execution_queue_count.render() with gradio.Blocks(): download.render() with gradio.Blocks(): state_manager.set_item('video_memory_strategy', 'tolerant') memory.render() - with gradio.Blocks(): - benchmark_options.render() with gradio.Column(scale = 11): with gradio.Blocks(): benchmark.render() @@ -69,9 +67,9 @@ def listen() -> None: lip_syncer_options.listen() execution.listen() execution_thread_count.listen() - execution_queue_count.listen() memory.listen() benchmark.listen() + benchmark_options.listen() def run(ui : gradio.Blocks) -> None: diff --git a/facefusion/uis/layouts/default.py b/facefusion/uis/layouts/default.py index 96553f8..ccf5b2d 100755 --- a/facefusion/uis/layouts/default.py +++ b/facefusion/uis/layouts/default.py @@ -1,7 +1,7 @@ import gradio from facefusion import state_manager -from facefusion.uis.components import about, age_modifier_options, common_options, deep_swapper_options, download, execution, execution_queue_count, execution_thread_count, expression_restorer_options, face_debugger_options, face_detector, face_editor_options, face_enhancer_options, face_landmarker, face_masker, face_selector, face_swapper_options, frame_colorizer_options, frame_enhancer_options, instant_runner, job_manager, job_runner, lip_syncer_options, memory, output, output_options, preview, processors, source, target, temp_frame, terminal, trim_frame, ui_workflow +from facefusion.uis.components import about, age_modifier_options, common_options, deep_swapper_options, download, execution, execution_thread_count, expression_restorer_options, face_debugger_options, face_detector, face_editor_options, face_enhancer_options, face_landmarker, face_masker, face_selector, face_swapper_options, frame_colorizer_options, frame_enhancer_options, instant_runner, job_manager, job_runner, lip_syncer_options, memory, output, output_options, preview, preview_options, processors, source, target, temp_frame, terminal, trim_frame, ui_workflow, voice_extractor def pre_check() -> bool: @@ -36,10 +36,11 @@ def render() -> gradio.Blocks: frame_enhancer_options.render() with gradio.Blocks(): lip_syncer_options.render() + with gradio.Blocks(): + voice_extractor.render() with gradio.Blocks(): execution.render() execution_thread_count.render() - execution_queue_count.render() with gradio.Blocks(): download.render() with gradio.Blocks(): @@ -65,6 +66,7 @@ def render() -> gradio.Blocks: with gradio.Column(scale = 7): with gradio.Blocks(): preview.render() + preview_options.render() with gradio.Blocks(): trim_frame.render() with gradio.Blocks(): @@ -94,7 +96,6 @@ def listen() -> None: lip_syncer_options.listen() execution.listen() execution_thread_count.listen() - execution_queue_count.listen() download.listen() memory.listen() temp_frame.listen() @@ -107,11 +108,13 @@ def listen() -> None: job_manager.listen() terminal.listen() preview.listen() + preview_options.listen() trim_frame.listen() face_selector.listen() face_masker.listen() face_detector.listen() face_landmarker.listen() + voice_extractor.listen() common_options.listen() diff --git a/facefusion/uis/layouts/webcam.py b/facefusion/uis/layouts/webcam.py index 7b1fcd7..2181a45 100644 --- a/facefusion/uis/layouts/webcam.py +++ b/facefusion/uis/layouts/webcam.py @@ -1,7 +1,7 @@ import gradio from facefusion import state_manager -from facefusion.uis.components import about, age_modifier_options, deep_swapper_options, download, execution, execution_thread_count, expression_restorer_options, face_debugger_options, face_editor_options, face_enhancer_options, face_swapper_options, frame_colorizer_options, frame_enhancer_options, lip_syncer_options, processors, source, webcam, webcam_options +from facefusion.uis.components import about, age_modifier_options, deep_swapper_options, download, execution, execution_thread_count, expression_restorer_options, face_debugger_options, face_editor_options, face_enhancer_options, face_swapper_options, frame_colorizer_options, frame_enhancer_options, lip_syncer_options, processors, webcam, webcam_options def pre_check() -> bool: @@ -14,6 +14,8 @@ def render() -> gradio.Blocks: with gradio.Column(scale = 4): with gradio.Blocks(): about.render() + with gradio.Blocks(): + webcam_options.render() with gradio.Blocks(): processors.render() with gradio.Blocks(): @@ -41,10 +43,6 @@ def render() -> gradio.Blocks: execution_thread_count.render() with gradio.Blocks(): download.render() - with gradio.Blocks(): - webcam_options.render() - with gradio.Blocks(): - source.render() with gradio.Column(scale = 11): with gradio.Blocks(): webcam.render() @@ -66,7 +64,6 @@ def listen() -> None: lip_syncer_options.listen() execution.listen() execution_thread_count.listen() - source.listen() webcam.listen() diff --git a/facefusion/uis/types.py b/facefusion/uis/types.py index 159f389..e2d4a20 100644 --- a/facefusion/uis/types.py +++ b/facefusion/uis/types.py @@ -5,12 +5,11 @@ ComponentName = Literal\ [ 'age_modifier_direction_slider', 'age_modifier_model_dropdown', - 'benchmark_cycle_count_slider', - 'benchmark_resolutions_checkbox_group', 'deep_swapper_model_dropdown', 'deep_swapper_morph_slider', 'expression_restorer_factor_slider', 'expression_restorer_model_dropdown', + 'expression_restorer_areas_checkbox_group', 'face_debugger_items_checkbox_group', 'face_detector_angles_checkbox_group', 'face_detector_model_dropdown', @@ -51,8 +50,10 @@ ComponentName = Literal\ 'face_selector_race_dropdown', 'face_swapper_model_dropdown', 'face_swapper_pixel_boost_dropdown', + 'face_swapper_weight_slider', 'face_occluder_model_dropdown', 'face_parser_model_dropdown', + 'voice_extractor_model_dropdown', 'frame_colorizer_blend_slider', 'frame_colorizer_model_dropdown', 'frame_colorizer_size_dropdown', @@ -64,7 +65,10 @@ ComponentName = Literal\ 'output_image', 'output_video', 'output_video_fps_slider', + 'preview_image', 'preview_frame_slider', + 'preview_mode_dropdown', + 'preview_resolution_dropdown', 'processors_checkbox_group', 'reference_face_distance_slider', 'reference_face_position_gallery', @@ -83,3 +87,5 @@ ComponentOptions : TypeAlias = Dict[str, Any] JobManagerAction = Literal['job-create', 'job-submit', 'job-delete', 'job-add-step', 'job-remix-step', 'job-insert-step', 'job-remove-step'] JobRunnerAction = Literal['job-run', 'job-run-all', 'job-retry', 'job-retry-all'] + +PreviewMode = Literal[ 'default', 'frame-by-frame', 'face-by-face' ] diff --git a/facefusion/video_manager.py b/facefusion/video_manager.py index d687caf..8923afb 100644 --- a/facefusion/video_manager.py +++ b/facefusion/video_manager.py @@ -2,18 +2,39 @@ import cv2 from facefusion.types import VideoPoolSet -VIDEO_POOL_SET : VideoPoolSet = {} +VIDEO_POOL_SET : VideoPoolSet =\ +{ + 'capture': {}, + 'writer': {} +} def get_video_capture(video_path : str) -> cv2.VideoCapture: - if video_path not in VIDEO_POOL_SET: - VIDEO_POOL_SET[video_path] = cv2.VideoCapture(video_path) + if video_path not in VIDEO_POOL_SET.get('capture'): + video_capture = cv2.VideoCapture(video_path) - return VIDEO_POOL_SET.get(video_path) + if video_capture.isOpened(): + VIDEO_POOL_SET['capture'][video_path] = video_capture + + return VIDEO_POOL_SET.get('capture').get(video_path) + + +def get_video_writer(video_path : str) -> cv2.VideoWriter: + if video_path not in VIDEO_POOL_SET.get('writer'): + video_writer = cv2.VideoWriter() + + if video_writer.isOpened(): + VIDEO_POOL_SET['writer'][video_path] = video_writer + + return VIDEO_POOL_SET.get('writer').get(video_path) def clear_video_pool() -> None: - for video_capture in VIDEO_POOL_SET.values(): + for video_capture in VIDEO_POOL_SET.get('capture').values(): video_capture.release() - VIDEO_POOL_SET.clear() + for video_writer in VIDEO_POOL_SET.get('writer').values(): + video_writer.release() + + VIDEO_POOL_SET['capture'].clear() + VIDEO_POOL_SET['writer'].clear() diff --git a/facefusion/vision.py b/facefusion/vision.py index b146170..3166248 100644 --- a/facefusion/vision.py +++ b/facefusion/vision.py @@ -6,26 +6,25 @@ import cv2 import numpy from cv2.typing import Size -import facefusion.choices from facefusion.common_helper import is_windows from facefusion.filesystem import get_file_extension, is_image, is_video from facefusion.thread_helper import thread_semaphore -from facefusion.types import Duration, Fps, Orientation, Resolution, VisionFrame +from facefusion.types import Duration, Fps, Orientation, Resolution, Scale, VisionFrame from facefusion.video_manager import get_video_capture -@lru_cache() -def read_static_image(image_path : str) -> Optional[VisionFrame]: - return read_image(image_path) - - def read_static_images(image_paths : List[str]) -> List[VisionFrame]: - frames = [] + vision_frames = [] if image_paths: for image_path in image_paths: - frames.append(read_static_image(image_path)) - return frames + vision_frames.append(read_static_image(image_path)) + return vision_frames + + +@lru_cache(maxsize = 1024) +def read_static_image(image_path : str) -> Optional[VisionFrame]: + return read_image(image_path) def read_image(image_path : str) -> Optional[VisionFrame]: @@ -66,19 +65,9 @@ def restrict_image_resolution(image_path : str, resolution : Resolution) -> Reso return resolution -def create_image_resolutions(resolution : Resolution) -> List[str]: - resolutions = [] - temp_resolutions = [] - - if resolution: - width, height = resolution - temp_resolutions.append(normalize_resolution(resolution)) - for image_template_size in facefusion.choices.image_template_sizes: - temp_resolutions.append(normalize_resolution((width * image_template_size, height * image_template_size))) - temp_resolutions = sorted(set(temp_resolutions)) - for temp_resolution in temp_resolutions: - resolutions.append(pack_resolution(temp_resolution)) - return resolutions +@lru_cache(maxsize = 1024) +def read_static_video_frame(video_path : str, frame_number : int = 0) -> Optional[VisionFrame]: + return read_video_frame(video_path, frame_number) def read_video_frame(video_path : str, frame_number : int = 0) -> Optional[VisionFrame]: @@ -192,22 +181,10 @@ def restrict_video_resolution(video_path : str, resolution : Resolution) -> Reso return resolution -def create_video_resolutions(resolution : Resolution) -> List[str]: - resolutions = [] - temp_resolutions = [] - - if resolution: - width, height = resolution - temp_resolutions.append(normalize_resolution(resolution)) - for video_template_size in facefusion.choices.video_template_sizes: - if width > height: - temp_resolutions.append(normalize_resolution((video_template_size * width / height, video_template_size))) - else: - temp_resolutions.append(normalize_resolution((video_template_size, video_template_size * height / width))) - temp_resolutions = sorted(set(temp_resolutions)) - for temp_resolution in temp_resolutions: - resolutions.append(pack_resolution(temp_resolution)) - return resolutions +def scale_resolution(resolution : Resolution, scale : Scale) -> Resolution: + resolution = (int(resolution[0] * scale), int(resolution[1] * scale)) + resolution = normalize_resolution(resolution) + return resolution def normalize_resolution(resolution : Tuple[float, float]) -> Resolution: @@ -250,26 +227,48 @@ def restrict_frame(vision_frame : VisionFrame, resolution : Resolution) -> Visio return vision_frame -def fit_frame(vision_frame : VisionFrame, resolution: Resolution) -> VisionFrame: - fit_width, fit_height = resolution +def fit_contain_frame(vision_frame : VisionFrame, resolution : Resolution) -> VisionFrame: + contain_width, contain_height = resolution height, width = vision_frame.shape[:2] - scale = min(fit_height / height, fit_width / width) + scale = min(contain_height / height, contain_width / width) new_width = int(width * scale) new_height = int(height * scale) - paste_vision_frame = cv2.resize(vision_frame, (new_width, new_height)) - x_pad = (fit_width - new_width) // 2 - y_pad = (fit_height - new_height) // 2 - temp_vision_frame = numpy.pad(paste_vision_frame, ((y_pad, fit_height - new_height - y_pad), (x_pad, fit_width - new_width - x_pad), (0, 0))) + start_x = max(0, (contain_width - new_width) // 2) + start_y = max(0, (contain_height - new_height) // 2) + end_x = max(0, contain_width - new_width - start_x) + end_y = max(0, contain_height - new_height - start_y) + temp_vision_frame = cv2.resize(vision_frame, (new_width, new_height)) + temp_vision_frame = numpy.pad(temp_vision_frame, ((start_y, end_y), (start_x, end_x), (0, 0))) return temp_vision_frame -def normalize_frame_color(vision_frame : VisionFrame) -> VisionFrame: - return cv2.cvtColor(vision_frame, cv2.COLOR_BGR2RGB) +def fit_cover_frame(vision_frame : VisionFrame, resolution : Resolution) -> VisionFrame: + cover_width, cover_height = resolution + height, width = vision_frame.shape[:2] + scale = max(cover_width / width, cover_height / height) + new_width = int(width * scale) + new_height = int(height * scale) + start_x = max(0, (new_width - cover_width) // 2) + start_y = max(0, (new_height - cover_height) // 2) + end_x = min(new_width, start_x + cover_width) + end_y = min(new_height, start_y + cover_height) + temp_vision_frame = cv2.resize(vision_frame, (new_width, new_height)) + temp_vision_frame = temp_vision_frame[start_y:end_y, start_x:end_x] + return temp_vision_frame + + +def obscure_frame(vision_frame : VisionFrame) -> VisionFrame: + return cv2.GaussianBlur(vision_frame, (99, 99), 0) + + +def blend_frame(source_vision_frame : VisionFrame, target_vision_frame : VisionFrame, blend_factor : float) -> VisionFrame: + blend_vision_frame = cv2.addWeighted(source_vision_frame, 1 - blend_factor, target_vision_frame, blend_factor, 0) + return blend_vision_frame def conditional_match_frame_color(source_vision_frame : VisionFrame, target_vision_frame : VisionFrame) -> VisionFrame: - histogram_factor = calc_histogram_difference(source_vision_frame, target_vision_frame) - target_vision_frame = blend_vision_frames(target_vision_frame, match_frame_color(source_vision_frame, target_vision_frame), histogram_factor) + histogram_factor = calculate_histogram_difference(source_vision_frame, target_vision_frame) + target_vision_frame = blend_frame(target_vision_frame, match_frame_color(source_vision_frame, target_vision_frame), histogram_factor) return target_vision_frame @@ -291,7 +290,7 @@ def equalize_frame_color(source_vision_frame : VisionFrame, target_vision_frame return target_vision_frame -def calc_histogram_difference(source_vision_frame : VisionFrame, target_vision_frame : VisionFrame) -> float: +def calculate_histogram_difference(source_vision_frame : VisionFrame, target_vision_frame : VisionFrame) -> float: histogram_source = cv2.calcHist([cv2.cvtColor(source_vision_frame, cv2.COLOR_BGR2HSV)], [ 0, 1 ], None, [ 50, 60 ], [ 0, 180, 0, 256 ]) histogram_target = cv2.calcHist([cv2.cvtColor(target_vision_frame, cv2.COLOR_BGR2HSV)], [ 0, 1 ], None, [ 50, 60 ], [ 0, 180, 0, 256 ]) histogram_difference = float(numpy.interp(cv2.compareHist(histogram_source, histogram_target, cv2.HISTCMP_CORREL), [ -1, 1 ], [ 0, 1 ])) @@ -304,11 +303,11 @@ def blend_vision_frames(source_vision_frame : VisionFrame, target_vision_frame : def create_tile_frames(vision_frame : VisionFrame, size : Size) -> Tuple[List[VisionFrame], int, int]: - vision_frame = numpy.pad(vision_frame, ((size[1], size[1]), (size[1], size[1]), (0, 0))) tile_width = size[0] - 2 * size[2] - pad_size_bottom = size[2] + tile_width - vision_frame.shape[0] % tile_width - pad_size_right = size[2] + tile_width - vision_frame.shape[1] % tile_width - pad_vision_frame = numpy.pad(vision_frame, ((size[2], pad_size_bottom), (size[2], pad_size_right), (0, 0))) + pad_size_top = size[1] + size[2] + pad_size_bottom = pad_size_top + tile_width - (vision_frame.shape[0] + 2 * size[1]) % tile_width + pad_size_right = pad_size_top + tile_width - (vision_frame.shape[1] + 2 * size[1]) % tile_width + pad_vision_frame = numpy.pad(vision_frame, ((pad_size_top, pad_size_bottom), (pad_size_top, pad_size_right), (0, 0))) pad_height, pad_width = pad_vision_frame.shape[:2] row_range = range(size[2], pad_height - size[2], tile_width) col_range = range(size[2], pad_width - size[2], tile_width) diff --git a/facefusion/voice_extractor.py b/facefusion/voice_extractor.py index 6fca54a..1ecbff9 100644 --- a/facefusion/voice_extractor.py +++ b/facefusion/voice_extractor.py @@ -4,17 +4,36 @@ from typing import Tuple import numpy import scipy -from facefusion import inference_manager +from facefusion import inference_manager, state_manager from facefusion.download import conditional_download_hashes, conditional_download_sources, resolve_download_url from facefusion.filesystem import resolve_relative_path from facefusion.thread_helper import thread_semaphore -from facefusion.types import Audio, AudioChunk, DownloadScope, InferencePool, ModelOptions, ModelSet +from facefusion.types import Audio, AudioChunk, DownloadScope, DownloadSet, InferencePool, ModelSet, Voice, VoiceChunk -@lru_cache(maxsize = None) +@lru_cache() def create_static_model_set(download_scope : DownloadScope) -> ModelSet: return\ { + 'kim_vocal_1': + { + 'hashes': + { + 'voice_extractor': + { + 'url': resolve_download_url('models-3.4.0', 'kim_vocal_1.hash'), + 'path': resolve_relative_path('../.assets/models/kim_vocal_1.hash') + } + }, + 'sources': + { + 'voice_extractor': + { + 'url': resolve_download_url('models-3.4.0', 'kim_vocal_1.onnx'), + 'path': resolve_relative_path('../.assets/models/kim_vocal_1.onnx') + } + } + }, 'kim_vocal_2': { 'hashes': @@ -33,60 +52,87 @@ def create_static_model_set(download_scope : DownloadScope) -> ModelSet: 'path': resolve_relative_path('../.assets/models/kim_vocal_2.onnx') } } + }, + 'uvr_mdxnet': + { + 'hashes': + { + 'voice_extractor': + { + 'url': resolve_download_url('models-3.4.0', 'uvr_mdxnet.hash'), + 'path': resolve_relative_path('../.assets/models/uvr_mdxnet.hash') + } + }, + 'sources': + { + 'voice_extractor': + { + 'url': resolve_download_url('models-3.4.0', 'uvr_mdxnet.onnx'), + 'path': resolve_relative_path('../.assets/models/uvr_mdxnet.onnx') + } + } } } def get_inference_pool() -> InferencePool: - model_names = [ 'kim_vocal_2' ] - model_source_set = get_model_options().get('sources') + model_names = [ state_manager.get_item('voice_extractor_model') ] + _, model_source_set = collect_model_downloads() return inference_manager.get_inference_pool(__name__, model_names, model_source_set) def clear_inference_pool() -> None: - model_names = [ 'kim_vocal_2' ] + model_names = [ state_manager.get_item('voice_extractor_model') ] inference_manager.clear_inference_pool(__name__, model_names) -def get_model_options() -> ModelOptions: - return create_static_model_set('full').get('kim_vocal_2') +def collect_model_downloads() -> Tuple[DownloadSet, DownloadSet]: + model_set = create_static_model_set('full') + model_hash_set = {} + model_source_set = {} + + for voice_extractor_model in [ 'kim_vocal_1', 'kim_vocal_2', 'uvr_mdxnet' ]: + if state_manager.get_item('voice_extractor_model') == voice_extractor_model: + model_hash_set[voice_extractor_model] = model_set.get(voice_extractor_model).get('hashes').get('voice_extractor') + model_source_set[voice_extractor_model] = model_set.get(voice_extractor_model).get('sources').get('voice_extractor') + + return model_hash_set, model_source_set def pre_check() -> bool: - model_hash_set = get_model_options().get('hashes') - model_source_set = get_model_options().get('sources') + model_hash_set, model_source_set = collect_model_downloads() return conditional_download_hashes(model_hash_set) and conditional_download_sources(model_source_set) -def batch_extract_voice(audio : Audio, chunk_size : int, step_size : int) -> Audio: - temp_audio = numpy.zeros((audio.shape[0], 2)).astype(numpy.float32) - temp_chunk = numpy.zeros((audio.shape[0], 2)).astype(numpy.float32) +def batch_extract_voice(audio : Audio, chunk_size : int, step_size : int) -> Voice: + temp_voice = numpy.zeros((audio.shape[0], 2)).astype(numpy.float32) + temp_voice_chunk = numpy.zeros((audio.shape[0], 2)).astype(numpy.float32) for start in range(0, audio.shape[0], step_size): end = min(start + chunk_size, audio.shape[0]) - temp_audio[start:end, ...] += extract_voice(audio[start:end, ...]) - temp_chunk[start:end, ...] += 1 + temp_voice[start:end, ...] += extract_voice(audio[start:end, ...]) + temp_voice_chunk[start:end, ...] += 1 - audio = temp_audio / temp_chunk - return audio + voice = temp_voice / temp_voice_chunk + return voice -def extract_voice(temp_audio_chunk : AudioChunk) -> AudioChunk: - voice_extractor = get_inference_pool().get('voice_extractor') - chunk_size = (voice_extractor.get_inputs()[0].shape[3] - 1) * 1024 - trim_size = 3840 - temp_audio_chunk, pad_size = prepare_audio_chunk(temp_audio_chunk.T, chunk_size, trim_size) - temp_audio_chunk = decompose_audio_chunk(temp_audio_chunk, trim_size) +def extract_voice(temp_audio_chunk : AudioChunk) -> VoiceChunk: + voice_extractor = get_inference_pool().get(state_manager.get_item('voice_extractor_model')) + voice_trim_size = 3840 + voice_chunk_size = (voice_extractor.get_inputs()[0].shape[3] - 1) * 1024 + temp_audio_chunk, audio_pad_size = prepare_audio_chunk(temp_audio_chunk.T, voice_chunk_size, voice_trim_size) + temp_audio_chunk = decompose_audio_chunk(temp_audio_chunk, voice_trim_size) temp_audio_chunk = forward(temp_audio_chunk) - temp_audio_chunk = compose_audio_chunk(temp_audio_chunk, trim_size) - temp_audio_chunk = normalize_audio_chunk(temp_audio_chunk, chunk_size, trim_size, pad_size) + temp_audio_chunk = compose_audio_chunk(temp_audio_chunk, voice_trim_size) + temp_audio_chunk = normalize_audio_chunk(temp_audio_chunk, voice_chunk_size, voice_trim_size, audio_pad_size) return temp_audio_chunk def forward(temp_audio_chunk : AudioChunk) -> AudioChunk: - voice_extractor = get_inference_pool().get('voice_extractor') + voice_extractor = get_inference_pool().get(state_manager.get_item('voice_extractor_model')) with thread_semaphore(): temp_audio_chunk = voice_extractor.run(None, @@ -97,53 +143,53 @@ def forward(temp_audio_chunk : AudioChunk) -> AudioChunk: return temp_audio_chunk -def prepare_audio_chunk(temp_audio_chunk : AudioChunk, chunk_size : int, trim_size : int) -> Tuple[AudioChunk, int]: - step_size = chunk_size - 2 * trim_size - pad_size = step_size - temp_audio_chunk.shape[1] % step_size - audio_chunk_size = temp_audio_chunk.shape[1] + pad_size +def prepare_audio_chunk(temp_audio_chunk : AudioChunk, chunk_size : int, audio_trim_size : int) -> Tuple[AudioChunk, int]: + audio_step_size = chunk_size - 2 * audio_trim_size + audio_pad_size = audio_step_size - temp_audio_chunk.shape[1] % audio_step_size + audio_chunk_size = temp_audio_chunk.shape[1] + audio_pad_size temp_audio_chunk = temp_audio_chunk.astype(numpy.float32) / numpy.iinfo(numpy.int16).max - temp_audio_chunk = numpy.pad(temp_audio_chunk, ((0, 0), (trim_size, trim_size + pad_size))) + temp_audio_chunk = numpy.pad(temp_audio_chunk, ((0, 0), (audio_trim_size, audio_trim_size + audio_pad_size))) temp_audio_chunks = [] - for index in range(0, audio_chunk_size, step_size): + for index in range(0, audio_chunk_size, audio_step_size): temp_audio_chunks.append(temp_audio_chunk[:, index:index + chunk_size]) temp_audio_chunk = numpy.concatenate(temp_audio_chunks, axis = 0) temp_audio_chunk = temp_audio_chunk.reshape((-1, chunk_size)) - return temp_audio_chunk, pad_size + return temp_audio_chunk, audio_pad_size -def decompose_audio_chunk(temp_audio_chunk : AudioChunk, trim_size : int) -> AudioChunk: - frame_size = 7680 - frame_overlap = 6656 - frame_total = 3072 - bin_total = 256 - channel_total = 4 - window = scipy.signal.windows.hann(frame_size) - temp_audio_chunk = scipy.signal.stft(temp_audio_chunk, nperseg = frame_size, noverlap = frame_overlap, window = window)[2] +def decompose_audio_chunk(temp_audio_chunk : AudioChunk, audio_trim_size : int) -> AudioChunk: + audio_frame_size = 7680 + audio_frame_overlap = 6656 + audio_frame_total = 3072 + audio_bin_total = 256 + audio_channel_total = 4 + window = scipy.signal.windows.hann(audio_frame_size) + temp_audio_chunk = scipy.signal.stft(temp_audio_chunk, nperseg = audio_frame_size, noverlap = audio_frame_overlap, window = window)[2] temp_audio_chunk = numpy.stack((numpy.real(temp_audio_chunk), numpy.imag(temp_audio_chunk)), axis = -1).transpose((0, 3, 1, 2)) - temp_audio_chunk = temp_audio_chunk.reshape(-1, 2, 2, trim_size + 1, bin_total).reshape(-1, channel_total, trim_size + 1, bin_total) - temp_audio_chunk = temp_audio_chunk[:, :, :frame_total] + temp_audio_chunk = temp_audio_chunk.reshape(-1, 2, 2, audio_trim_size + 1, audio_bin_total).reshape(-1, audio_channel_total, audio_trim_size + 1, audio_bin_total) + temp_audio_chunk = temp_audio_chunk[:, :, :audio_frame_total] temp_audio_chunk /= numpy.sqrt(1.0 / window.sum() ** 2) return temp_audio_chunk -def compose_audio_chunk(temp_audio_chunk : AudioChunk, trim_size : int) -> AudioChunk: - frame_size = 7680 - frame_overlap = 6656 - frame_total = 3072 - bin_total = 256 - window = scipy.signal.windows.hann(frame_size) - temp_audio_chunk = numpy.pad(temp_audio_chunk, ((0, 0), (0, 0), (0, trim_size + 1 - frame_total), (0, 0))) - temp_audio_chunk = temp_audio_chunk.reshape(-1, 2, trim_size + 1, bin_total).transpose((0, 2, 3, 1)) +def compose_audio_chunk(temp_audio_chunk : AudioChunk, audio_trim_size : int) -> AudioChunk: + audio_frame_size = 7680 + audio_frame_overlap = 6656 + audio_frame_total = 3072 + audio_bin_total = 256 + window = scipy.signal.windows.hann(audio_frame_size) + temp_audio_chunk = numpy.pad(temp_audio_chunk, ((0, 0), (0, 0), (0, audio_trim_size + 1 - audio_frame_total), (0, 0))) + temp_audio_chunk = temp_audio_chunk.reshape(-1, 2, audio_trim_size + 1, audio_bin_total).transpose((0, 2, 3, 1)) temp_audio_chunk = temp_audio_chunk[:, :, :, 0] + 1j * temp_audio_chunk[:, :, :, 1] - temp_audio_chunk = scipy.signal.istft(temp_audio_chunk, nperseg = frame_size, noverlap = frame_overlap, window = window)[1] + temp_audio_chunk = scipy.signal.istft(temp_audio_chunk, nperseg = audio_frame_size, noverlap = audio_frame_overlap, window = window)[1] temp_audio_chunk *= numpy.sqrt(1.0 / window.sum() ** 2) return temp_audio_chunk -def normalize_audio_chunk(temp_audio_chunk : AudioChunk, chunk_size : int, trim_size : int, pad_size : int) -> AudioChunk: +def normalize_audio_chunk(temp_audio_chunk : AudioChunk, chunk_size : int, audio_trim_size : int, audio_pad_size : int) -> AudioChunk: temp_audio_chunk = temp_audio_chunk.reshape((-1, 2, chunk_size)) - temp_audio_chunk = temp_audio_chunk[:, :, trim_size:-trim_size].transpose(1, 0, 2) - temp_audio_chunk = temp_audio_chunk.reshape(2, -1)[:, :-pad_size].T + temp_audio_chunk = temp_audio_chunk[:, :, audio_trim_size:-audio_trim_size].transpose(1, 0, 2) + temp_audio_chunk = temp_audio_chunk.reshape(2, -1)[:, :-audio_pad_size].T return temp_audio_chunk diff --git a/facefusion/wording.py b/facefusion/wording.py index 8094f10..70dcd38 100755 --- a/facefusion/wording.py +++ b/facefusion/wording.py @@ -8,7 +8,7 @@ WORDING : Dict[str, Any] =\ 'ffmpeg_not_installed': 'FFMpeg is not installed', 'creating_temp': 'Creating temporary resources', 'extracting_frames': 'Extracting frames with a resolution of {resolution} and {fps} frames per second', - 'extracting_frames_succeed': 'Extracting frames succeed', + 'extracting_frames_succeeded': 'Extracting frames succeeded', 'extracting_frames_failed': 'Extracting frames failed', 'analysing': 'Analysing', 'extracting': 'Extracting', @@ -18,29 +18,29 @@ WORDING : Dict[str, Any] =\ 'downloading': 'Downloading', 'temp_frames_not_found': 'Temporary frames not found', 'copying_image': 'Copying image with a resolution of {resolution}', - 'copying_image_succeed': 'Copying image succeed', + 'copying_image_succeeded': 'Copying image succeeded', 'copying_image_failed': 'Copying image failed', 'finalizing_image': 'Finalizing image with a resolution of {resolution}', - 'finalizing_image_succeed': 'Finalizing image succeed', + 'finalizing_image_succeeded': 'Finalizing image succeeded', 'finalizing_image_skipped': 'Finalizing image skipped', 'merging_video': 'Merging video with a resolution of {resolution} and {fps} frames per second', - 'merging_video_succeed': 'Merging video succeed', + 'merging_video_succeeded': 'Merging video succeeded', 'merging_video_failed': 'Merging video failed', 'skipping_audio': 'Skipping audio', - 'replacing_audio_succeed': 'Replacing audio succeed', + 'replacing_audio_succeeded': 'Replacing audio succeeded', 'replacing_audio_skipped': 'Replacing audio skipped', - 'restoring_audio_succeed': 'Restoring audio succeed', + 'restoring_audio_succeeded': 'Restoring audio succeeded', 'restoring_audio_skipped': 'Restoring audio skipped', 'clearing_temp': 'Clearing temporary resources', 'processing_stopped': 'Processing stopped', - 'processing_image_succeed': 'Processing to image succeed in {seconds} seconds', + 'processing_image_succeeded': 'Processing to image succeeded in {seconds} seconds', 'processing_image_failed': 'Processing to image failed', - 'processing_video_succeed': 'Processing to video succeed in {seconds} seconds', + 'processing_video_succeeded': 'Processing to video succeeded in {seconds} seconds', 'processing_video_failed': 'Processing to video failed', - 'choose_image_source': 'Choose a image for the source', - 'choose_audio_source': 'Choose a audio for the source', + 'choose_image_source': 'Choose an image for the source', + 'choose_audio_source': 'Choose an audio for the source', 'choose_video_target': 'Choose a video for the target', - 'choose_image_or_video_target': 'Choose a image or video for the target', + 'choose_image_or_video_target': 'Choose an image or video for the target', 'specify_image_or_video_output': 'Specify the output image or video within a directory', 'match_target_and_output_extension': 'Match the target and output extension', 'no_source_face_detected': 'No source face detected', @@ -72,16 +72,18 @@ WORDING : Dict[str, Any] =\ 'running_jobs': 'Running all queued jobs', 'retrying_job': 'Retrying failed job {job_id}', 'retrying_jobs': 'Retrying all failed jobs', - 'processing_job_succeed': 'Processing of job {job_id} succeed', - 'processing_jobs_succeed': 'Processing of all job succeed', + 'processing_job_succeeded': 'Processing of job {job_id} succeeded', + 'processing_jobs_succeeded': 'Processing of all jobs succeeded', 'processing_job_failed': 'Processing of job {job_id} failed', 'processing_jobs_failed': 'Processing of all jobs failed', 'processing_step': 'Processing step {step_current} of {step_total}', - 'validating_hash_succeed': 'Validating hash for {hash_file_name} succeed', + 'validating_hash_succeeded': 'Validating hash for {hash_file_name} succeeded', 'validating_hash_failed': 'Validating hash for {hash_file_name} failed', - 'validating_source_succeed': 'Validating source for {source_file_name} succeed', + 'validating_source_succeeded': 'Validating source for {source_file_name} succeeded', 'validating_source_failed': 'Validating source for {source_file_name} failed', 'deleting_corrupt_source': 'Deleting corrupt source for {source_file_name}', + 'loading_model_succeeded': 'Loading model {model_name} succeeded in {seconds} seconds', + 'loading_model_failed': 'Loading model {model_name} failed', 'time_ago_now': 'just now', 'time_ago_minutes': '{minutes} minutes ago', 'time_ago_hours': '{hours} hours and {minutes} minutes ago', @@ -111,15 +113,15 @@ WORDING : Dict[str, Any] =\ 'face_detector_model': 'choose the model responsible for detecting the faces', 'face_detector_size': 'specify the frame size provided to the face detector', 'face_detector_angles': 'specify the angles to rotate the frame before detecting faces', - 'face_detector_score': 'filter the detected faces base on the confidence score', + 'face_detector_score': 'filter the detected faces based on the confidence score', # face landmarker 'face_landmarker_model': 'choose the model responsible for detecting the face landmarks', - 'face_landmarker_score': 'filter the detected face landmarks base on the confidence score', + 'face_landmarker_score': 'filter the detected face landmarks based on the confidence score', # face selector 'face_selector_mode': 'use reference based tracking or simple matching', 'face_selector_order': 'specify the order of the detected faces', - 'face_selector_age_start': 'filter the detected faces based the starting age', - 'face_selector_age_end': 'filter the detected faces based the ending age', + 'face_selector_age_start': 'filter the detected faces based on the starting age', + 'face_selector_age_end': 'filter the detected faces based on the ending age', 'face_selector_gender': 'filter the detected faces based on their gender', 'face_selector_race': 'filter the detected faces based on their race', 'reference_face_position': 'specify the position used to create the reference face', @@ -133,6 +135,8 @@ WORDING : Dict[str, Any] =\ 'face_mask_regions': 'choose the items used for the region mask (choices: {choices})', 'face_mask_blur': 'specify the degree of blur applied to the box mask', 'face_mask_padding': 'apply top, right, bottom and left padding to the box mask', + # voice extractor + 'voice_extractor_model': 'choose the model responsible for extracting the voices', # frame extraction 'trim_frame_start': 'specify the starting frame of the target video', 'trim_frame_end': 'specify the ending frame of the target video', @@ -140,14 +144,14 @@ WORDING : Dict[str, Any] =\ 'keep_temp': 'keep the temporary resources after processing', # output creation 'output_image_quality': 'specify the image quality which translates to the image compression', - 'output_image_resolution': 'specify the image resolution based on the target image', + 'output_image_scale': 'specify the image scale based on the target image', 'output_audio_encoder': 'specify the encoder used for the audio', 'output_audio_quality': 'specify the audio quality which translates to the audio compression', 'output_audio_volume': 'specify the audio volume based on the target video', 'output_video_encoder': 'specify the encoder used for the video', 'output_video_preset': 'balance fast video processing and video file size', 'output_video_quality': 'specify the video quality which translates to the video compression', - 'output_video_resolution': 'specify the video resolution based on the target video', + 'output_video_scale': 'specify the video scale based on the target video', 'output_video_fps': 'specify the video fps based on the target video', # processors 'processors': 'load a single or multiple processors (choices: {choices}, ...)', @@ -157,6 +161,7 @@ WORDING : Dict[str, Any] =\ 'deep_swapper_morph': 'morph between source face and target faces', 'expression_restorer_model': 'choose the model responsible for restoring the expression', 'expression_restorer_factor': 'restore factor of expression from the target face', + 'expression_restorer_areas': 'choose the items used for the expression areas (choices: {choices})', 'face_debugger_items': 'load a single or multiple processors (choices: {choices})', 'face_editor_model': 'choose the model responsible for editing the face', 'face_editor_eyebrow_direction': 'specify the eyebrow direction', @@ -178,6 +183,7 @@ WORDING : Dict[str, Any] =\ 'face_enhancer_weight': 'specify the degree of weight applied to the face', 'face_swapper_model': 'choose the model responsible for swapping the face', 'face_swapper_pixel_boost': 'choose the pixel boost resolution for the face swapper', + 'face_swapper_weight': 'specify the degree of weight applied to the face', 'frame_colorizer_model': 'choose the model responsible for colorizing the frame', 'frame_colorizer_size': 'specify the frame size provided to the frame colorizer', 'frame_colorizer_blend': 'blend the colorized into the previous frame', @@ -193,13 +199,13 @@ WORDING : Dict[str, Any] =\ 'download_providers': 'download using different providers (choices: {choices}, ...)', 'download_scope': 'specify the download scope', # benchmark + 'benchmark_mode': 'choose the benchmark mode', 'benchmark_resolutions': 'choose the resolutions for the benchmarks (choices: {choices}, ...)', 'benchmark_cycle_count': 'specify the amount of cycles per benchmark', # execution - 'execution_device_id': 'specify the device used for processing', + 'execution_device_ids': 'specify the devices used for processing', 'execution_providers': 'inference using different providers (choices: {choices}, ...)', 'execution_thread_count': 'specify the amount of parallel threads while processing', - 'execution_queue_count': 'specify the amount of frames each thread is processing', # memory 'video_memory_strategy': 'balance fast processing and low VRAM usage', 'system_memory_limit': 'limit the available RAM that can be used while processing', @@ -244,6 +250,7 @@ WORDING : Dict[str, Any] =\ 'age_modifier_direction_slider': 'AGE MODIFIER DIRECTION', 'age_modifier_model_dropdown': 'AGE MODIFIER MODEL', 'apply_button': 'APPLY', + 'benchmark_mode_dropdown': 'BENCHMARK MODE', 'benchmark_cycle_count_slider': 'BENCHMARK CYCLE COUNT', 'benchmark_resolutions_checkbox_group': 'BENCHMARK RESOLUTIONS', 'clear_button': 'CLEAR', @@ -252,10 +259,10 @@ WORDING : Dict[str, Any] =\ 'deep_swapper_model_dropdown': 'DEEP SWAPPER MODEL', 'deep_swapper_morph_slider': 'DEEP SWAPPER MORPH', 'execution_providers_checkbox_group': 'EXECUTION PROVIDERS', - 'execution_queue_count_slider': 'EXECUTION QUEUE COUNT', 'execution_thread_count_slider': 'EXECUTION THREAD COUNT', 'expression_restorer_factor_slider': 'EXPRESSION RESTORER FACTOR', 'expression_restorer_model_dropdown': 'EXPRESSION RESTORER MODEL', + 'expression_restorer_areas_checkbox_group': 'EXPRESSION RESTORER AREAS', 'face_debugger_items_checkbox_group': 'FACE DEBUGGER ITEMS', 'face_detector_angles_checkbox_group': 'FACE DETECTOR ANGLES', 'face_detector_model_dropdown': 'FACE DETECTOR MODEL', @@ -296,8 +303,10 @@ WORDING : Dict[str, Any] =\ 'face_selector_race_dropdown': 'FACE SELECTOR RACE', 'face_swapper_model_dropdown': 'FACE SWAPPER MODEL', 'face_swapper_pixel_boost_dropdown': 'FACE SWAPPER PIXEL BOOST', + 'face_swapper_weight_slider': 'FACE SWAPPER WEIGHT', 'face_occluder_model_dropdown': 'FACE OCCLUDER MODEL', 'face_parser_model_dropdown': 'FACE PARSER MODEL', + 'voice_extractor_model_dropdown': 'VOICE EXTRACTOR MODEL', 'frame_colorizer_blend_slider': 'FRAME COLORIZER BLEND', 'frame_colorizer_model_dropdown': 'FRAME COLORIZER MODEL', 'frame_colorizer_size_dropdown': 'FRAME COLORIZER SIZE', @@ -317,15 +326,17 @@ WORDING : Dict[str, Any] =\ 'output_audio_volume_slider': 'OUTPUT AUDIO VOLUME', 'output_image_or_video': 'OUTPUT', 'output_image_quality_slider': 'OUTPUT IMAGE QUALITY', - 'output_image_resolution_dropdown': 'OUTPUT IMAGE RESOLUTION', + 'output_image_scale_slider': 'OUTPUT IMAGE SCALE', 'output_path_textbox': 'OUTPUT PATH', 'output_video_encoder_dropdown': 'OUTPUT VIDEO ENCODER', 'output_video_fps_slider': 'OUTPUT VIDEO FPS', 'output_video_preset_dropdown': 'OUTPUT VIDEO PRESET', 'output_video_quality_slider': 'OUTPUT VIDEO QUALITY', - 'output_video_resolution_dropdown': 'OUTPUT VIDEO RESOLUTION', + 'output_video_scale_slider': 'OUTPUT VIDEO SCALE', 'preview_frame_slider': 'PREVIEW FRAME', 'preview_image': 'PREVIEW', + 'preview_mode_dropdown': 'PREVIEW MODE', + 'preview_resolution_dropdown': 'PREVIEW RESOLUTION', 'processors_checkbox_group': 'PROCESSORS', 'reference_face_distance_slider': 'REFERENCE FACE DISTANCE', 'reference_face_gallery': 'REFERENCE FACE', diff --git a/requirements.txt b/requirements.txt index 77c061a..fd98ac2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ gradio-rangeslider==0.0.8 -gradio==5.25.2 -numpy==2.2.4 -onnx==1.17.0 +gradio==5.42.0 +numpy==2.3.2 +onnx==1.18.0 onnxruntime==1.22.0 -opencv-python==4.11.0.86 +opencv-python==4.12.0.88 psutil==7.0.0 tqdm==4.67.1 -scipy==1.15.2 +scipy==1.16.1 diff --git a/tests/test_cli_output_scale.py b/tests/test_cli_output_scale.py new file mode 100644 index 0000000..7c187fc --- /dev/null +++ b/tests/test_cli_output_scale.py @@ -0,0 +1,57 @@ +import subprocess +import sys + +import pytest + +from facefusion.download import conditional_download +from facefusion.jobs.job_manager import clear_jobs, init_jobs +from facefusion.types import Resolution, Scale +from facefusion.vision import detect_image_resolution, detect_video_resolution +from .helper import get_test_example_file, get_test_examples_directory, get_test_jobs_directory, get_test_output_file, prepare_test_output_directory + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download(get_test_examples_directory(), + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples-3.0.0/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples-3.0.0/target-240p.mp4' + ]) + subprocess.run([ 'ffmpeg', '-i', get_test_example_file('target-240p.mp4'), '-vframes', '1', get_test_example_file('target-240p.jpg') ]) + + +@pytest.fixture(scope = 'function', autouse = True) +def before_each() -> None: + clear_jobs(get_test_jobs_directory()) + init_jobs(get_test_jobs_directory()) + prepare_test_output_directory() + + +@pytest.mark.parametrize('output_image_scale, output_image_resolution', +[ + (0.5, (212, 112)), + (1.0, (426, 226)), + (2.0, (852, 452)), + (8.0, (3408, 1808)) +]) +def test_output_image_scale(output_image_scale : Scale, output_image_resolution : Resolution) -> None: + output_file_path = get_test_output_file('test-output-image-scale-' + str(output_image_scale) + '.jpg') + commands = [ sys.executable, 'facefusion.py', 'headless-run', '--jobs-path', get_test_jobs_directory(), '--processors', 'frame_enhancer', '-t', get_test_example_file('target-240p.jpg'), '-o', output_file_path, '--output-image-scale', str(output_image_scale) ] + + assert subprocess.run(commands).returncode == 0 + assert detect_image_resolution(output_file_path) == output_image_resolution + + +@pytest.mark.parametrize('output_video_scale, output_video_resolution', +[ + (0.5, (212, 112)), + (1.0, (426, 226)), + (2.0, (852, 452)), + (8.0, (3408, 1808)) +]) +def test_output_video_scale(output_video_scale : Scale, output_video_resolution : Resolution) -> None: + output_file_path = get_test_output_file('test-output-video-scale-' + str(output_video_scale) + '.mp4') + commands = [ sys.executable, 'facefusion.py', 'headless-run', '--jobs-path', get_test_jobs_directory(), '--processors', 'frame_enhancer', '-t', get_test_example_file('target-240p.mp4'), '-o', output_file_path, '--trim-frame-end', '1', '--output-video-scale', str(output_video_scale) ] + + assert subprocess.run(commands).returncode == 0 + assert detect_video_resolution(output_file_path) == output_video_resolution diff --git a/tests/test_common_helper.py b/tests/test_common_helper.py index ed7dcfd..cec3682 100644 --- a/tests/test_common_helper.py +++ b/tests/test_common_helper.py @@ -1,4 +1,4 @@ -from facefusion.common_helper import calc_float_step, calc_int_step, create_float_metavar, create_float_range, create_int_metavar, create_int_range +from facefusion.common_helper import calculate_float_step, calculate_int_step, create_float_metavar, create_float_range, create_int_metavar, create_int_range def test_create_int_metavar() -> None: @@ -20,8 +20,8 @@ def test_create_float_range() -> None: def test_calc_int_step() -> None: - assert calc_int_step([ 0, 1 ]) == 1 + assert calculate_int_step([0, 1]) == 1 def test_calc_float_step() -> None: - assert calc_float_step([ 0.1, 0.2 ]) == 0.1 + assert calculate_float_step([0.1, 0.2]) == 0.1 diff --git a/tests/test_face_analyser.py b/tests/test_face_analyser.py index 8692684..9499a6b 100644 --- a/tests/test_face_analyser.py +++ b/tests/test_face_analyser.py @@ -4,8 +4,7 @@ import pytest from facefusion import face_classifier, face_detector, face_landmarker, face_recognizer, state_manager from facefusion.download import conditional_download -from facefusion.face_analyser import get_many_faces, get_one_face -from facefusion.types import Face +from facefusion.face_analyser import get_many_faces from facefusion.vision import read_static_image from .helper import get_test_example_file, get_test_examples_directory @@ -19,7 +18,7 @@ def before_all() -> None: subprocess.run([ 'ffmpeg', '-i', get_test_example_file('source.jpg'), '-vf', 'crop=iw*0.8:ih*0.8', get_test_example_file('source-80crop.jpg') ]) subprocess.run([ 'ffmpeg', '-i', get_test_example_file('source.jpg'), '-vf', 'crop=iw*0.7:ih*0.7', get_test_example_file('source-70crop.jpg') ]) subprocess.run([ 'ffmpeg', '-i', get_test_example_file('source.jpg'), '-vf', 'crop=iw*0.6:ih*0.6', get_test_example_file('source-60crop.jpg') ]) - state_manager.init_item('execution_device_id', '0') + state_manager.init_item('execution_device_ids', [ '0' ]) state_manager.init_item('execution_providers', [ 'cpu' ]) state_manager.init_item('download_providers', [ 'github' ]) state_manager.init_item('face_detector_angles', [ 0 ]) @@ -56,9 +55,8 @@ def test_get_one_face_with_retinaface() -> None: for source_path in source_paths: source_frame = read_static_image(source_path) many_faces = get_many_faces([ source_frame ]) - face = get_one_face(many_faces) - assert isinstance(face, Face) + assert len(many_faces) == 1 def test_get_one_face_with_scrfd() -> None: @@ -77,9 +75,8 @@ def test_get_one_face_with_scrfd() -> None: for source_path in source_paths: source_frame = read_static_image(source_path) many_faces = get_many_faces([ source_frame ]) - face = get_one_face(many_faces) - assert isinstance(face, Face) + assert len(many_faces) == 1 def test_get_one_face_with_yoloface() -> None: @@ -98,9 +95,28 @@ def test_get_one_face_with_yoloface() -> None: for source_path in source_paths: source_frame = read_static_image(source_path) many_faces = get_many_faces([ source_frame ]) - face = get_one_face(many_faces) - assert isinstance(face, Face) + assert len(many_faces) == 1 + + +def test_get_one_face_with_yunet() -> None: + state_manager.init_item('face_detector_model', 'yunet') + state_manager.init_item('face_detector_size', '640x640') + face_detector.pre_check() + + source_paths =\ + [ + get_test_example_file('source.jpg'), + get_test_example_file('source-80crop.jpg'), + get_test_example_file('source-70crop.jpg'), + get_test_example_file('source-60crop.jpg') + ] + + for source_path in source_paths: + source_frame = read_static_image(source_path) + many_faces = get_many_faces([ source_frame ]) + + assert len(many_faces) == 1 def test_get_many_faces() -> None: @@ -108,6 +124,4 @@ def test_get_many_faces() -> None: source_frame = read_static_image(source_path) many_faces = get_many_faces([ source_frame, source_frame, source_frame ]) - assert isinstance(many_faces[0], Face) - assert isinstance(many_faces[1], Face) - assert isinstance(many_faces[2], Face) + assert len(many_faces) == 3 diff --git a/tests/test_ffmpeg.py b/tests/test_ffmpeg.py index 677822b..a3fa0f8 100644 --- a/tests/test_ffmpeg.py +++ b/tests/test_ffmpeg.py @@ -28,7 +28,7 @@ def before_all() -> None: subprocess.run([ 'ffmpeg', '-i', get_test_example_file('target-240p.mp4'), '-vf', 'fps=30', get_test_example_file('target-240p-30fps.mp4') ]) subprocess.run([ 'ffmpeg', '-i', get_test_example_file('target-240p.mp4'), '-vf', 'fps=60', get_test_example_file('target-240p-60fps.mp4') ]) - for output_video_format in [ 'avi', 'm4v', 'mkv', 'mov', 'mp4', 'webm' ]: + for output_video_format in [ 'avi', 'm4v', 'mkv', 'mov', 'mp4', 'webm', 'wmv' ]: subprocess.run([ 'ffmpeg', '-i', get_test_example_file('source.mp3'), '-i', get_test_example_file('target-240p.mp4'), '-ar', '16000', get_test_example_file('target-240p-16khz.' + output_video_format) ]) subprocess.run([ 'ffmpeg', '-i', get_test_example_file('source.mp3'), '-i', get_test_example_file('target-240p.mp4'), '-ar', '48000', get_test_example_file('target-240p-48khz.mp4') ]) @@ -84,7 +84,7 @@ def test_extract_frames() -> None: for target_path, trim_frame_start, trim_frame_end, frame_total in test_set: create_temp_directory(target_path) - assert extract_frames(target_path, '452x240', 30.0, trim_frame_start, trim_frame_end) is True + assert extract_frames(target_path, (452, 240), 30.0, trim_frame_start, trim_frame_end) is True assert len(resolve_temp_frame_paths(target_path)) == frame_total clear_temp_directory(target_path) @@ -98,7 +98,8 @@ def test_merge_video() -> None: get_test_example_file('target-240p-16khz.mkv'), get_test_example_file('target-240p-16khz.mp4'), get_test_example_file('target-240p-16khz.mov'), - get_test_example_file('target-240p-16khz.webm') + get_test_example_file('target-240p-16khz.webm'), + get_test_example_file('target-240p-16khz.wmv') ] output_video_encoders = get_available_encoder_set().get('video') @@ -106,9 +107,9 @@ def test_merge_video() -> None: for output_video_encoder in output_video_encoders: state_manager.init_item('output_video_encoder', output_video_encoder) create_temp_directory(target_path) - extract_frames(target_path, '452x240', 25.0, 0, 1) + extract_frames(target_path, (452, 240), 25.0, 0, 1) - assert merge_video(target_path, 25.0, '452x240', 25.0, 0, 1) is True + assert merge_video(target_path, 25.0, (452, 240), 25.0, 0, 1) is True clear_temp_directory(target_path) @@ -141,7 +142,8 @@ def test_restore_audio() -> None: (get_test_example_file('target-240p-16khz.mov'), get_test_output_file('target-240p-16khz.mov')), (get_test_example_file('target-240p-16khz.mp4'), get_test_output_file('target-240p-16khz.mp4')), (get_test_example_file('target-240p-48khz.mp4'), get_test_output_file('target-240p-48khz.mp4')), - (get_test_example_file('target-240p-16khz.webm'), get_test_output_file('target-240p-16khz.webm')) + (get_test_example_file('target-240p-16khz.webm'), get_test_output_file('target-240p-16khz.webm')), + (get_test_example_file('target-240p-16khz.wmv'), get_test_output_file('target-240p-16khz.wmv')) ] output_audio_encoders = get_available_encoder_set().get('audio') diff --git a/tests/test_ffmpeg_builder.py b/tests/test_ffmpeg_builder.py index 9179b88..9778f58 100644 --- a/tests/test_ffmpeg_builder.py +++ b/tests/test_ffmpeg_builder.py @@ -51,6 +51,9 @@ def test_set_video_quality() -> None: assert set_video_quality('libx264', 0) == [ '-crf', '51' ] assert set_video_quality('libx264', 50) == [ '-crf', '26' ] assert set_video_quality('libx264', 100) == [ '-crf', '0' ] + assert set_video_quality('libx264rgb', 0) == [ '-crf', '51' ] + assert set_video_quality('libx264rgb', 50) == [ '-crf', '26' ] + assert set_video_quality('libx264rgb', 100) == [ '-crf', '0' ] assert set_video_quality('libx265', 0) == [ '-crf', '51' ] assert set_video_quality('libx265', 50) == [ '-crf', '26' ] assert set_video_quality('libx265', 100) == [ '-crf', '0' ] diff --git a/tests/test_inference_manager.py b/tests/test_inference_manager.py index 2b4d61e..78cb46d 100644 --- a/tests/test_inference_manager.py +++ b/tests/test_inference_manager.py @@ -9,7 +9,7 @@ from facefusion.inference_manager import INFERENCE_POOL_SET, get_inference_pool @pytest.fixture(scope = 'module', autouse = True) def before_all() -> None: - state_manager.init_item('execution_device_id', '0') + state_manager.init_item('execution_device_ids', [ '0' ]) state_manager.init_item('execution_providers', [ 'cpu' ]) state_manager.init_item('download_providers', [ 'github' ]) content_analyser.pre_check() diff --git a/tests/test_job_helper.py b/tests/test_job_helper.py index 08fe6f8..1f724bb 100644 --- a/tests/test_job_helper.py +++ b/tests/test_job_helper.py @@ -6,3 +6,4 @@ from facefusion.jobs.job_helper import get_step_output_path def test_get_step_output_path() -> None: assert get_step_output_path('test-job', 0, 'test.mp4') == 'test-test-job-0.mp4' assert get_step_output_path('test-job', 0, 'test/test.mp4') == os.path.join('test', 'test-test-job-0.mp4') + assert get_step_output_path('test-job', 0, 'invalid') is None diff --git a/tests/test_date_helper.py b/tests/test_time_helper.py similarity index 91% rename from tests/test_date_helper.py rename to tests/test_time_helper.py index e8d0cd0..987817b 100644 --- a/tests/test_date_helper.py +++ b/tests/test_time_helper.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta -from facefusion.date_helper import describe_time_ago +from facefusion.time_helper import describe_time_ago def get_time_ago(days : int, hours : int, minutes : int) -> datetime: diff --git a/tests/test_vision.py b/tests/test_vision.py index 01463a4..a440bab 100644 --- a/tests/test_vision.py +++ b/tests/test_vision.py @@ -3,7 +3,7 @@ import subprocess import pytest from facefusion.download import conditional_download -from facefusion.vision import calc_histogram_difference, count_trim_frame_total, count_video_frame_total, create_image_resolutions, create_video_resolutions, detect_image_resolution, detect_video_duration, detect_video_fps, detect_video_resolution, match_frame_color, normalize_resolution, pack_resolution, predict_video_frame_total, read_image, read_video_frame, restrict_image_resolution, restrict_trim_frame, restrict_video_fps, restrict_video_resolution, unpack_resolution, write_image +from facefusion.vision import calculate_histogram_difference, count_trim_frame_total, count_video_frame_total, detect_image_resolution, detect_video_duration, detect_video_fps, detect_video_resolution, match_frame_color, normalize_resolution, pack_resolution, predict_video_frame_total, read_image, read_video_frame, restrict_image_resolution, restrict_trim_frame, restrict_video_fps, restrict_video_resolution, scale_resolution, unpack_resolution, write_image from .helper import get_test_example_file, get_test_examples_directory, get_test_output_file, prepare_test_output_directory @@ -60,14 +60,6 @@ def test_restrict_image_resolution() -> None: assert restrict_image_resolution(get_test_example_file('target-1080p.jpg'), (4096, 2160)) == (2048, 1080) -def test_create_image_resolutions() -> None: - assert create_image_resolutions((426, 226)) == [ '106x56', '212x112', '320x170', '426x226', '640x340', '852x452', '1064x564', '1278x678', '1492x792', '1704x904' ] - assert create_image_resolutions((226, 426)) == [ '56x106', '112x212', '170x320', '226x426', '340x640', '452x852', '564x1064', '678x1278', '792x1492', '904x1704' ] - assert create_image_resolutions((2048, 1080)) == [ '512x270', '1024x540', '1536x810', '2048x1080', '3072x1620', '4096x2160', '5120x2700', '6144x3240', '7168x3780', '8192x4320' ] - assert create_image_resolutions((1080, 2048)) == [ '270x512', '540x1024', '810x1536', '1080x2048', '1620x3072', '2160x4096', '2700x5120', '3240x6144', '3780x7168', '4320x8192' ] - assert create_image_resolutions(None) == [] - - def test_read_video_frame() -> None: assert hasattr(read_video_frame(get_test_example_file('target-240p-25fps.mp4')), '__array_interface__') assert read_video_frame('invalid') is None @@ -139,12 +131,10 @@ def test_restrict_video_resolution() -> None: assert restrict_video_resolution(get_test_example_file('target-1080p.mp4'), (4096, 2160)) == (2048, 1080) -def test_create_video_resolutions() -> None: - assert create_video_resolutions((426, 226)) == [ '426x226', '452x240', '678x360', '904x480', '1018x540', '1358x720', '2036x1080', '2714x1440', '4072x2160', '8144x4320' ] - assert create_video_resolutions((226, 426)) == [ '226x426', '240x452', '360x678', '480x904', '540x1018', '720x1358', '1080x2036', '1440x2714', '2160x4072', '4320x8144' ] - assert create_video_resolutions((2048, 1080)) == [ '456x240', '682x360', '910x480', '1024x540', '1366x720', '2048x1080', '2730x1440', '4096x2160', '8192x4320' ] - assert create_video_resolutions((1080, 2048)) == [ '240x456', '360x682', '480x910', '540x1024', '720x1366', '1080x2048', '1440x2730', '2160x4096', '4320x8192' ] - assert create_video_resolutions(None) == [] +def test_scale_resolution() -> None: + assert scale_resolution((426, 226), 0.5) == (212, 112) + assert scale_resolution((2048, 1080), 1.0) == (2048, 1080) + assert scale_resolution((4096, 2160), 2.0) == (8192, 4320) def test_normalize_resolution() -> None: @@ -167,8 +157,8 @@ def test_calc_histogram_difference() -> None: source_vision_frame = read_image(get_test_example_file('target-240p.jpg')) target_vision_frame = read_image(get_test_example_file('target-240p-0sat.jpg')) - assert calc_histogram_difference(source_vision_frame, source_vision_frame) == 1.0 - assert calc_histogram_difference(source_vision_frame, target_vision_frame) < 0.5 + assert calculate_histogram_difference(source_vision_frame, source_vision_frame) == 1.0 + assert calculate_histogram_difference(source_vision_frame, target_vision_frame) < 0.5 def test_match_frame_color() -> None: @@ -176,4 +166,4 @@ def test_match_frame_color() -> None: target_vision_frame = read_image(get_test_example_file('target-240p-0sat.jpg')) output_vision_frame = match_frame_color(source_vision_frame, target_vision_frame) - assert calc_histogram_difference(source_vision_frame, output_vision_frame) > 0.5 + assert calculate_histogram_difference(source_vision_frame, output_vision_frame) > 0.5