re-arrange to methods

This commit is contained in:
harisreedhar
2025-06-12 17:07:51 +05:30
parent f712570d1e
commit 8aec81d63b

View File

@@ -23,7 +23,7 @@ from facefusion.processors import choices as processors_choices
from facefusion.processors.types import LipSyncerInputs from facefusion.processors.types import LipSyncerInputs
from facefusion.program_helper import find_argument_group from facefusion.program_helper import find_argument_group
from facefusion.thread_helper import conditional_thread_semaphore from facefusion.thread_helper import conditional_thread_semaphore
from facefusion.types import ApplyStateItem, Args, AudioFrame, DownloadScope, Face, InferencePool, ModelOptions, ModelSet, ProcessMode, QueuePayload, UpdateProgress, VisionFrame from facefusion.types import ApplyStateItem, Args, AudioFrame, BoundingBox, DownloadScope, Face, InferencePool, ModelOptions, ModelSet, ProcessMode, QueuePayload, UpdateProgress, VisionFrame
from facefusion.vision import read_image, read_static_image, restrict_video_fps, write_image from facefusion.vision import read_image, read_static_image, restrict_video_fps, write_image
@@ -144,12 +144,9 @@ def post_process() -> None:
def sync_lip(target_face : Face, temp_audio_frame : AudioFrame, temp_vision_frame : VisionFrame) -> VisionFrame: def sync_lip(target_face : Face, temp_audio_frame : AudioFrame, temp_vision_frame : VisionFrame) -> VisionFrame:
model_size = get_model_options().get('size')
temp_audio_frame = prepare_audio_frame(temp_audio_frame) temp_audio_frame = prepare_audio_frame(temp_audio_frame)
crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmark_set.get('5/68'), 'ffhq_512', (512, 512)) crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmark_set.get('5/68'), 'ffhq_512', (512, 512))
face_landmark_68 = cv2.transform(target_face.landmark_set.get('68').reshape(1, -1, 2), affine_matrix).reshape(-1, 2) face_landmark_68 = cv2.transform(target_face.landmark_set.get('68').reshape(1, -1, 2), affine_matrix).reshape(-1, 2)
bounding_box = create_bounding_box(face_landmark_68)
bounding_box[1] -= numpy.abs(bounding_box[3] - bounding_box[1]) * 0.125
area_mask = create_area_mask(face_landmark_68, [ 'lower-face' ]) area_mask = create_area_mask(face_landmark_68, [ 'lower-face' ])
crop_masks =\ crop_masks =\
[ [
@@ -160,16 +157,24 @@ def sync_lip(target_face : Face, temp_audio_frame : AudioFrame, temp_vision_fram
occlusion_mask = create_occlusion_mask(crop_vision_frame) occlusion_mask = create_occlusion_mask(crop_vision_frame)
crop_masks.append(occlusion_mask) crop_masks.append(occlusion_mask)
close_vision_frame, close_matrix = warp_face_by_bounding_box(crop_vision_frame, bounding_box, model_size) bounding_box = create_bounding_box(face_landmark_68)
close_vision_frame = prepare_crop_frame(close_vision_frame) bounding_box = prepare_bounding_box(bounding_box)
close_vision_frame = forward(temp_audio_frame, close_vision_frame) crop_vision_frame = process_wav2lip(crop_vision_frame, temp_audio_frame, bounding_box)
close_vision_frame = normalize_close_frame(close_vision_frame)
crop_vision_frame = cv2.warpAffine(close_vision_frame, cv2.invertAffineTransform(close_matrix), (512, 512), borderMode = cv2.BORDER_REPLICATE)
crop_mask = numpy.minimum.reduce(crop_masks) crop_mask = numpy.minimum.reduce(crop_masks)
paste_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix) paste_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix)
return paste_vision_frame return paste_vision_frame
def process_wav2lip(crop_vision_frame : VisionFrame, temp_audio_frame : AudioFrame, bounding_box : BoundingBox) -> VisionFrame:
model_size = get_model_options().get('size')
close_vision_frame, close_matrix = warp_face_by_bounding_box(crop_vision_frame, bounding_box, model_size)
close_vision_frame = prepare_close_frame(close_vision_frame)
close_vision_frame = forward(temp_audio_frame, close_vision_frame)
close_vision_frame = normalize_close_frame(close_vision_frame)
crop_vision_frame = cv2.warpAffine(close_vision_frame, cv2.invertAffineTransform(close_matrix), (512, 512), borderMode = cv2.BORDER_REPLICATE)
return crop_vision_frame
def forward(temp_audio_frame : AudioFrame, close_vision_frame : VisionFrame) -> VisionFrame: def forward(temp_audio_frame : AudioFrame, close_vision_frame : VisionFrame) -> VisionFrame:
lip_syncer = get_inference_pool().get('lip_syncer') lip_syncer = get_inference_pool().get('lip_syncer')
@@ -192,13 +197,22 @@ def prepare_audio_frame(temp_audio_frame : AudioFrame) -> AudioFrame:
return temp_audio_frame return temp_audio_frame
def prepare_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: def prepare_close_frame(close_vision_frame : VisionFrame) -> VisionFrame:
crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0) model_size = get_model_options().get('size')
prepare_vision_frame = crop_vision_frame.copy() close_vision_frame = numpy.expand_dims(close_vision_frame, axis=0)
prepare_vision_frame[:, 48:] = 0 prepare_vision_frame = close_vision_frame.copy()
crop_vision_frame = numpy.concatenate((prepare_vision_frame, crop_vision_frame), axis = 3) prepare_vision_frame[:, model_size[0] // 2:] = 0
crop_vision_frame = crop_vision_frame.transpose(0, 3, 1, 2).astype('float32') / 255.0 close_vision_frame = numpy.concatenate((prepare_vision_frame, close_vision_frame), axis=3)
return crop_vision_frame close_vision_frame = close_vision_frame.transpose(0, 3, 1, 2).astype('float32') / 255.0
return close_vision_frame
def prepare_bounding_box(bounding_box : BoundingBox) -> BoundingBox:
bounding_box[3] += min(8, 511)
x1, y1, x2, y2 = bounding_box
y1 = y2 - (4 / 3) * (x2 - x1)
bounding_box[1] = max(y1, 0)
return bounding_box
def normalize_close_frame(crop_vision_frame : VisionFrame) -> VisionFrame: def normalize_close_frame(crop_vision_frame : VisionFrame) -> VisionFrame: