Next (#477)

* Add real_hatgan_x4 model * Mark it as NEXT * Force download to be executed and exit * Fix frame per second interpolation * 5 to 68 landmark (#456) * changes * changes * Adjust model url * Cleanup 5 to 68 landmark convertion * Move everything to face analyser * Introduce matrix only face helper * Revert facefusion.ini * Adjust limit due false positive analysis * changes (#457) * Use pixel format yuv422p to merge video * Fix some code * Minor cleanup * Add gpen_bfr_1024 and gpen_bfr_2048 * Revert it back to yuv420p due compatibility issues * Add debug back to ffmpeg * Add debug back to ffmpeg * Migrate to conda (#461) * Migrate from venv to conda * Migrate from venv to conda * Message when conda is not activated * Use release for every slider (#463) * Use release event handler for every slider * Move more sliders to release handler * Move more sliders to release handler * Add get_ui_components() to simplify code * Revert some changes on frame slider * Add the first iteration of a frame colorizer * Support for the DDColor model * Improve model file handling * Improve model file handling part2 * Remove deoldify * Remove deoldify * Voice separator (#468) * changes * changes * changes * changes * changes * changes * Rename audio extractor to voice extractor * Cosmetic changes * Cosmetic changes * Fix fps lowering and boosting * Fix fps lowering and boosting * Fix fps lowering and boosting * Some refactoring for audio.py and some astype() here and there (#470) * Some refactoring for audio.py and some astype() here and there * Fix lint * Spacing * Add mp3 to benchmark suite for lip syncer testing * Improve naming * Adjust chunk size * Use higher quality * Revert "Use higher quality" This reverts commit d32f28757251ecc0f48214073adf54f3631b1289. * Improve naming in ffmpeg.py * Simplify code * Better fps calculation * Fix naming here and there * Add back real esrgan x2 * Remove trailing comma * Update wording and README * Use semaphore to prevent frame colorizer memory issues * Revert "Remove deoldify" This reverts commit bd8034cbc71fe701f78dddec3057dc98593b2162. * Remove unused type from frame colorizer * Adjust naming * Add missing clear of model initializer * Change nvenc preset mappping to support old FFMPEG 4 * Update onnxruntime to 1.17.1 * Fix lint * Prepare 2.5.0 * Fix Gradio overrides * Add Deoldify Artistic back * Feat/audio refactoring (#476) * Improve audio naming and variables * Improve audio naming and variables * Refactor voice extractor like crazy * Refactor voice extractor like crazy * Remove spaces * Update the usage --------- Co-authored-by: Harisreedhar <46858047+harisreedhar@users.noreply.github.com>
2024-04-09 15:40:55 +02:00
parent 6e67d7bff6
commit 4ccf4c24c7
45 changed files with 1007 additions and 405 deletions
--- a/facefusion/face_analyser.py
+++ b/facefusion/face_analyser.py
@@ -8,11 +8,11 @@ import onnxruntime
 import facefusion.globals
 from facefusion import process_manager
 from facefusion.common_helper import get_first
-from facefusion.face_helper import warp_face_by_face_landmark_5, warp_face_by_translation, create_static_anchors, distance_to_face_landmark_5, distance_to_bounding_box, convert_face_landmark_68_to_5, apply_nms, categorize_age, categorize_gender
+from facefusion.face_helper import estimate_matrix_by_face_landmark_5, warp_face_by_face_landmark_5, warp_face_by_translation, create_static_anchors, distance_to_face_landmark_5, distance_to_bounding_box, convert_face_landmark_68_to_5, apply_nms, categorize_age, categorize_gender
 from facefusion.face_store import get_static_faces, set_static_faces
 from facefusion.execution import apply_execution_provider_options
 from facefusion.download import conditional_download
-from facefusion.filesystem import resolve_relative_path
+from facefusion.filesystem import resolve_relative_path, is_file
 from facefusion.typing import VisionFrame, Face, FaceSet, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, ModelSet, BoundingBox, FaceLandmarkSet, FaceLandmark5, FaceLandmark68, Score, FaceScoreSet, Embedding
 from facefusion.vision import resize_frame_resolution, unpack_resolution

@@ -61,11 +61,16 @@ MODELS : ModelSet =\
 		'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/arcface_w600k_r50.onnx',
 		'path': resolve_relative_path('../.assets/models/arcface_w600k_r50.onnx')
 	},
-	'face_landmarker':
+	'face_landmarker_68':
 	{
 		'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/2dfan4.onnx',
 		'path': resolve_relative_path('../.assets/models/2dfan4.onnx')
 	},
+	'face_landmarker_68_5':
+	{
+		'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/face_landmarker_68_5.onnx',
+		'path': resolve_relative_path('../.assets/models/face_landmarker_68_5.onnx')
+	},
 	'gender_age':
 	{
 		'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gender_age.onnx',
@@ -78,22 +83,20 @@ def get_face_analyser() -> Any:
 	global FACE_ANALYSER

 	face_detectors = {}
+	face_landmarkers = {}
+
 	with THREAD_LOCK:
 		while process_manager.is_checking():
 			sleep(0.5)
 		if FACE_ANALYSER is None:
 			if facefusion.globals.face_detector_model in [ 'many', 'retinaface' ]:
-				face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_retinaface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
-				face_detectors['retinaface'] = face_detector
+				face_detectors['retinaface'] = onnxruntime.InferenceSession(MODELS.get('face_detector_retinaface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
 			if facefusion.globals.face_detector_model in [ 'many', 'scrfd' ]:
-				face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_scrfd').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
-				face_detectors['scrfd'] = face_detector
+				face_detectors['scrfd'] = onnxruntime.InferenceSession(MODELS.get('face_detector_scrfd').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
 			if facefusion.globals.face_detector_model in [ 'many', 'yoloface' ]:
-				face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_yoloface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
-				face_detectors['yoloface'] = face_detector
+				face_detectors['yoloface'] = onnxruntime.InferenceSession(MODELS.get('face_detector_yoloface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
 			if facefusion.globals.face_detector_model in [ 'yunet' ]:
-				face_detector = cv2.FaceDetectorYN.create(MODELS.get('face_detector_yunet').get('path'), '', (0, 0))
-				face_detectors['yunet'] = face_detector
+				face_detectors['yunet'] = cv2.FaceDetectorYN.create(MODELS.get('face_detector_yunet').get('path'), '', (0, 0))
 			if facefusion.globals.face_recognizer_model == 'arcface_blendswap':
 				face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_blendswap').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
 			if facefusion.globals.face_recognizer_model == 'arcface_inswapper':
@@ -102,13 +105,14 @@ def get_face_analyser() -> Any:
 				face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_simswap').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
 			if facefusion.globals.face_recognizer_model == 'arcface_uniface':
 				face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_uniface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
-			face_landmarker = onnxruntime.InferenceSession(MODELS.get('face_landmarker').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
+			face_landmarkers['68'] = onnxruntime.InferenceSession(MODELS.get('face_landmarker_68').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
+			face_landmarkers['68_5'] = onnxruntime.InferenceSession(MODELS.get('face_landmarker_68_5').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
 			gender_age = onnxruntime.InferenceSession(MODELS.get('gender_age').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers))
 			FACE_ANALYSER =\
 			{
 				'face_detectors': face_detectors,
 				'face_recognizer': face_recognizer,
-				'face_landmarker': face_landmarker,
+				'face_landmarkers': face_landmarkers,
 				'gender_age': gender_age
 			}
 	return FACE_ANALYSER
@@ -121,34 +125,50 @@ def clear_face_analyser() -> Any:


 def pre_check() -> bool:
-	if not facefusion.globals.skip_download:
-		download_directory_path = resolve_relative_path('../.assets/models')
-		model_urls =\
-		[
-			MODELS.get('face_landmarker').get('url'),
-			MODELS.get('gender_age').get('url')
-		]
+	download_directory_path = resolve_relative_path('../.assets/models')
+	model_urls =\
+	[
+		MODELS.get('face_landmarker_68').get('url'),
+		MODELS.get('face_landmarker_68_5').get('url'),
+		MODELS.get('gender_age').get('url')
+	]
+	model_paths =\
+	[
+		MODELS.get('face_landmarker_68').get('path'),
+		MODELS.get('face_landmarker_68_5').get('path'),
+		MODELS.get('gender_age').get('path')
+	]

-		if facefusion.globals.face_detector_model in [ 'many', 'retinaface' ]:
-			model_urls.append(MODELS.get('face_detector_retinaface').get('url'))
-		if facefusion.globals.face_detector_model in [ 'many', 'scrfd' ]:
-			model_urls.append(MODELS.get('face_detector_scrfd').get('url'))
-		if facefusion.globals.face_detector_model in [ 'many', 'yoloface' ]:
-			model_urls.append(MODELS.get('face_detector_yoloface').get('url'))
-		if facefusion.globals.face_detector_model in [ 'yunet' ]:
-			model_urls.append(MODELS.get('face_detector_yunet').get('url'))
-		if facefusion.globals.face_recognizer_model == 'arcface_blendswap':
-			model_urls.append(MODELS.get('face_recognizer_arcface_blendswap').get('url'))
-		if facefusion.globals.face_recognizer_model == 'arcface_inswapper':
-			model_urls.append(MODELS.get('face_recognizer_arcface_inswapper').get('url'))
-		if facefusion.globals.face_recognizer_model == 'arcface_simswap':
-			model_urls.append(MODELS.get('face_recognizer_arcface_simswap').get('url'))
-		if facefusion.globals.face_recognizer_model == 'arcface_uniface':
-			model_urls.append(MODELS.get('face_recognizer_arcface_uniface').get('url'))
+	if facefusion.globals.face_detector_model in [ 'many', 'retinaface' ]:
+		model_urls.append(MODELS.get('face_detector_retinaface').get('url'))
+		model_paths.append(MODELS.get('face_detector_retinaface').get('path'))
+	if facefusion.globals.face_detector_model in [ 'many', 'scrfd' ]:
+		model_urls.append(MODELS.get('face_detector_scrfd').get('url'))
+		model_paths.append(MODELS.get('face_detector_scrfd').get('path'))
+	if facefusion.globals.face_detector_model in [ 'many', 'yoloface' ]:
+		model_urls.append(MODELS.get('face_detector_yoloface').get('url'))
+		model_paths.append(MODELS.get('face_detector_yoloface').get('path'))
+	if facefusion.globals.face_detector_model in [ 'yunet' ]:
+		model_urls.append(MODELS.get('face_detector_yunet').get('url'))
+		model_paths.append(MODELS.get('face_detector_yunet').get('path'))
+	if facefusion.globals.face_recognizer_model == 'arcface_blendswap':
+		model_urls.append(MODELS.get('face_recognizer_arcface_blendswap').get('url'))
+		model_paths.append(MODELS.get('face_recognizer_arcface_blendswap').get('path'))
+	if facefusion.globals.face_recognizer_model == 'arcface_inswapper':
+		model_urls.append(MODELS.get('face_recognizer_arcface_inswapper').get('url'))
+		model_paths.append(MODELS.get('face_recognizer_arcface_inswapper').get('path'))
+	if facefusion.globals.face_recognizer_model == 'arcface_simswap':
+		model_urls.append(MODELS.get('face_recognizer_arcface_simswap').get('url'))
+		model_paths.append(MODELS.get('face_recognizer_arcface_simswap').get('path'))
+	if facefusion.globals.face_recognizer_model == 'arcface_uniface':
+		model_urls.append(MODELS.get('face_recognizer_arcface_uniface').get('url'))
+		model_paths.append(MODELS.get('face_recognizer_arcface_uniface').get('path'))
+
+	if not facefusion.globals.skip_download:
 		process_manager.check()
 		conditional_download(download_directory_path, model_urls)
 		process_manager.end()
-	return True
+	return all(is_file(model_path) for model_path in model_paths)


 def detect_with_retinaface(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]:
@@ -321,7 +341,8 @@ def create_faces(vision_frame : VisionFrame, bounding_box_list : List[BoundingBo
 		for index in keep_indices:
 			bounding_box = bounding_box_list[index]
 			face_landmark_5_68 = face_landmark_5_list[index]
-			face_landmark_68 = None
+			face_landmark_68_5 = expand_face_landmark_68_from_5(face_landmark_5_68)
+			face_landmark_68 = face_landmark_68_5
 			face_landmark_68_score = 0.0
 			if facefusion.globals.face_landmarker_score > 0:
 				face_landmark_68, face_landmark_68_score = detect_face_landmark_68(vision_frame, bounding_box)
@@ -331,7 +352,8 @@ def create_faces(vision_frame : VisionFrame, bounding_box_list : List[BoundingBo
 			{
 				'5': face_landmark_5_list[index],
 				'5/68': face_landmark_5_68,
-				'68': face_landmark_68
+				'68': face_landmark_68,
+				'68/5': face_landmark_68_5
 			}
 			scores : FaceScoreSet = \
 			{
@@ -368,7 +390,7 @@ def calc_embedding(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandma


 def detect_face_landmark_68(temp_vision_frame : VisionFrame, bounding_box : BoundingBox) -> Tuple[FaceLandmark68, Score]:
-	face_landmarker = get_face_analyser().get('face_landmarker')
+	face_landmarker = get_face_analyser().get('face_landmarkers').get('68')
 	scale = 195 / numpy.subtract(bounding_box[2:], bounding_box[:2]).max()
 	translation = (256 - numpy.add(bounding_box[2:], bounding_box[:2]) * scale) * 0.5
 	crop_vision_frame, affine_matrix = warp_face_by_translation(temp_vision_frame, translation, scale, (256, 256))
@@ -390,6 +412,18 @@ def detect_face_landmark_68(temp_vision_frame : VisionFrame, bounding_box : Boun
 	return face_landmark_68, face_landmark_68_score


+def expand_face_landmark_68_from_5(face_landmark_5 : FaceLandmark5) -> FaceLandmark68:
+	face_landmarker = get_face_analyser().get('face_landmarkers').get('68_5')
+	affine_matrix = estimate_matrix_by_face_landmark_5(face_landmark_5, 'ffhq_512', (1, 1))
+	face_landmark_5 = cv2.transform(face_landmark_5.reshape(1, -1, 2), affine_matrix).reshape(-1, 2)
+	face_landmark_68_5 = face_landmarker.run(None,
+	{
+		face_landmarker.get_inputs()[0].name: [ face_landmark_5 ]
+	})[0][0]
+	face_landmark_68_5 = cv2.transform(face_landmark_68_5.reshape(1, -1, 2), cv2.invertAffineTransform(affine_matrix)).reshape(-1, 2)
+	return face_landmark_68_5
+
+
 def detect_gender_age(temp_vision_frame : VisionFrame, bounding_box : BoundingBox) -> Tuple[int, int]:
 	gender_age = get_face_analyser().get('gender_age')
 	bounding_box = bounding_box.reshape(2, -1)