Feat/improve content analyser (#861)
* Introduce fit_frame to improve content analyser, rename resize_frame_resolution to restrict_frame * Fix CI, Add some spaces * Normalize according to face detector
This commit is contained in:
@@ -9,7 +9,7 @@ from facefusion.download import conditional_download_hashes, conditional_downloa
|
||||
from facefusion.filesystem import resolve_relative_path
|
||||
from facefusion.thread_helper import conditional_thread_semaphore
|
||||
from facefusion.typing import Detection, DownloadScope, Fps, InferencePool, ModelOptions, ModelSet, Score, VisionFrame
|
||||
from facefusion.vision import detect_video_fps, read_image, read_video_frame, resize_frame_resolution
|
||||
from facefusion.vision import detect_video_fps, fit_frame, read_image, read_video_frame
|
||||
|
||||
STREAM_COUNTER = 0
|
||||
|
||||
@@ -106,7 +106,7 @@ def analyse_video(video_path : str, trim_frame_start : int, trim_frame_end : int
|
||||
def detect_nsfw(vision_frame : VisionFrame) -> List[Score]:
|
||||
nsfw_scores = []
|
||||
model_size = get_model_options().get('size')
|
||||
temp_vision_frame = resize_frame_resolution(vision_frame, model_size)
|
||||
temp_vision_frame = fit_frame(vision_frame, model_size)
|
||||
detect_vision_frame = prepare_detect_frame(temp_vision_frame)
|
||||
detection = forward(detect_vision_frame)
|
||||
detection = numpy.squeeze(detection).T
|
||||
@@ -133,9 +133,6 @@ def forward(vision_frame : VisionFrame) -> Detection:
|
||||
|
||||
|
||||
def prepare_detect_frame(temp_vision_frame : VisionFrame) -> VisionFrame:
|
||||
model_size = get_model_options().get('size')
|
||||
detect_vision_frame = numpy.zeros((model_size[0], model_size[1], 3))
|
||||
detect_vision_frame[:temp_vision_frame.shape[0], :temp_vision_frame.shape[1], :] = temp_vision_frame
|
||||
detect_vision_frame = detect_vision_frame / 255.0
|
||||
detect_vision_frame = temp_vision_frame / 255.0
|
||||
detect_vision_frame = numpy.expand_dims(detect_vision_frame.transpose(2, 0, 1), axis = 0).astype(numpy.float32)
|
||||
return detect_vision_frame
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from functools import lru_cache
|
||||
from typing import List, Tuple
|
||||
from typing import List, Sequence, Tuple
|
||||
|
||||
import cv2
|
||||
import numpy
|
||||
@@ -10,7 +10,7 @@ from facefusion.face_helper import create_rotated_matrix_and_size, create_static
|
||||
from facefusion.filesystem import resolve_relative_path
|
||||
from facefusion.thread_helper import thread_semaphore
|
||||
from facefusion.typing import Angle, BoundingBox, Detection, DownloadScope, DownloadSet, FaceLandmark5, InferencePool, ModelSet, Score, VisionFrame
|
||||
from facefusion.vision import resize_frame_resolution, unpack_resolution
|
||||
from facefusion.vision import restrict_frame, unpack_resolution
|
||||
|
||||
|
||||
@lru_cache(maxsize = None)
|
||||
@@ -151,10 +151,11 @@ def detect_with_retinaface(vision_frame : VisionFrame, face_detector_size : str)
|
||||
anchor_total = 2
|
||||
face_detector_score = state_manager.get_item('face_detector_score')
|
||||
face_detector_width, face_detector_height = unpack_resolution(face_detector_size)
|
||||
temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height))
|
||||
temp_vision_frame = restrict_frame(vision_frame, (face_detector_width, face_detector_height))
|
||||
ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0]
|
||||
ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1]
|
||||
detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size)
|
||||
detect_vision_frame = normalize_detect_frame(detect_vision_frame, [ -1, 1 ])
|
||||
detection = forward_with_retinaface(detect_vision_frame)
|
||||
|
||||
for index, feature_stride in enumerate(feature_strides):
|
||||
@@ -194,10 +195,11 @@ def detect_with_scrfd(vision_frame : VisionFrame, face_detector_size : str) -> T
|
||||
anchor_total = 2
|
||||
face_detector_score = state_manager.get_item('face_detector_score')
|
||||
face_detector_width, face_detector_height = unpack_resolution(face_detector_size)
|
||||
temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height))
|
||||
temp_vision_frame = restrict_frame(vision_frame, (face_detector_width, face_detector_height))
|
||||
ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0]
|
||||
ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1]
|
||||
detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size)
|
||||
detect_vision_frame = normalize_detect_frame(detect_vision_frame, [ -1, 1 ])
|
||||
detection = forward_with_scrfd(detect_vision_frame)
|
||||
|
||||
for index, feature_stride in enumerate(feature_strides):
|
||||
@@ -234,10 +236,11 @@ def detect_with_yolo_face(vision_frame : VisionFrame, face_detector_size : str)
|
||||
face_landmarks_5 = []
|
||||
face_detector_score = state_manager.get_item('face_detector_score')
|
||||
face_detector_width, face_detector_height = unpack_resolution(face_detector_size)
|
||||
temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height))
|
||||
temp_vision_frame = restrict_frame(vision_frame, (face_detector_width, face_detector_height))
|
||||
ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0]
|
||||
ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1]
|
||||
detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size)
|
||||
detect_vision_frame = normalize_detect_frame(detect_vision_frame, [ 0, 1 ])
|
||||
detection = forward_with_yolo_face(detect_vision_frame)
|
||||
detection = numpy.squeeze(detection).T
|
||||
bounding_boxes_raw, face_scores_raw, face_landmarks_5_raw = numpy.split(detection, [ 4, 5 ], axis = 1)
|
||||
@@ -305,6 +308,13 @@ def prepare_detect_frame(temp_vision_frame : VisionFrame, face_detector_size : s
|
||||
face_detector_width, face_detector_height = unpack_resolution(face_detector_size)
|
||||
detect_vision_frame = numpy.zeros((face_detector_height, face_detector_width, 3))
|
||||
detect_vision_frame[:temp_vision_frame.shape[0], :temp_vision_frame.shape[1], :] = temp_vision_frame
|
||||
detect_vision_frame = (detect_vision_frame - 127.5) / 128.0
|
||||
detect_vision_frame = numpy.expand_dims(detect_vision_frame.transpose(2, 0, 1), axis = 0).astype(numpy.float32)
|
||||
return detect_vision_frame
|
||||
|
||||
|
||||
def normalize_detect_frame(detect_vision_frame : VisionFrame, normalize_range : Sequence[int]) -> VisionFrame:
|
||||
if normalize_range == [ -1, 1 ]:
|
||||
return (detect_vision_frame - 127.5) / 128.0
|
||||
if normalize_range == [ 0, 1 ]:
|
||||
return detect_vision_frame / 255.0
|
||||
return detect_vision_frame
|
||||
|
||||
@@ -18,7 +18,7 @@ from facefusion.processors.core import get_processors_modules
|
||||
from facefusion.typing import AudioFrame, Face, FaceSet, VisionFrame
|
||||
from facefusion.uis.core import get_ui_component, get_ui_components, register_ui_component
|
||||
from facefusion.uis.typing import ComponentOptions
|
||||
from facefusion.vision import count_video_frame_total, detect_frame_orientation, normalize_frame_color, read_static_image, read_static_images, read_video_frame, resize_frame_resolution
|
||||
from facefusion.vision import count_video_frame_total, detect_frame_orientation, normalize_frame_color, read_static_image, read_static_images, read_video_frame, restrict_frame
|
||||
|
||||
PREVIEW_IMAGE : Optional[gradio.Image] = None
|
||||
PREVIEW_FRAME_SLIDER : Optional[gradio.Slider] = None
|
||||
@@ -185,7 +185,7 @@ def clear_and_update_preview_image(frame_number : int = 0) -> gradio.Image:
|
||||
def slide_preview_image(frame_number : int = 0) -> gradio.Image:
|
||||
if is_video(state_manager.get_item('target_path')):
|
||||
preview_vision_frame = normalize_frame_color(read_video_frame(state_manager.get_item('target_path'), frame_number))
|
||||
preview_vision_frame = resize_frame_resolution(preview_vision_frame, (1024, 1024))
|
||||
preview_vision_frame = restrict_frame(preview_vision_frame, (1024, 1024))
|
||||
return gradio.Image(value = preview_vision_frame)
|
||||
return gradio.Image(value = None)
|
||||
|
||||
@@ -237,7 +237,7 @@ def update_preview_frame_slider() -> gradio.Slider:
|
||||
|
||||
|
||||
def process_preview_frame(reference_faces : FaceSet, source_face : Face, source_audio_frame : AudioFrame, target_vision_frame : VisionFrame) -> VisionFrame:
|
||||
target_vision_frame = resize_frame_resolution(target_vision_frame, (1024, 1024))
|
||||
target_vision_frame = restrict_frame(target_vision_frame, (1024, 1024))
|
||||
source_vision_frame = target_vision_frame.copy()
|
||||
if analyse_frame(target_vision_frame):
|
||||
return cv2.GaussianBlur(target_vision_frame, (99, 99), 0)
|
||||
|
||||
@@ -217,18 +217,31 @@ def detect_frame_orientation(vision_frame : VisionFrame) -> Orientation:
|
||||
return 'portrait'
|
||||
|
||||
|
||||
def resize_frame_resolution(vision_frame : VisionFrame, max_resolution : Resolution) -> VisionFrame:
|
||||
def restrict_frame(vision_frame : VisionFrame, resolution : Resolution) -> VisionFrame:
|
||||
height, width = vision_frame.shape[:2]
|
||||
max_width, max_height = max_resolution
|
||||
restrict_width, restrict_height = resolution
|
||||
|
||||
if height > max_height or width > max_width:
|
||||
scale = min(max_height / height, max_width / width)
|
||||
if height > restrict_height or width > restrict_width:
|
||||
scale = min(restrict_height / height, restrict_width / width)
|
||||
new_width = int(width * scale)
|
||||
new_height = int(height * scale)
|
||||
return cv2.resize(vision_frame, (new_width, new_height))
|
||||
return vision_frame
|
||||
|
||||
|
||||
def fit_frame(vision_frame: VisionFrame, resolution: Resolution) -> VisionFrame:
|
||||
fit_width, fit_height = resolution
|
||||
height, width = vision_frame.shape[:2]
|
||||
scale = min(fit_height / height, fit_width / width)
|
||||
new_width = int(width * scale)
|
||||
new_height = int(height * scale)
|
||||
paste_vision_frame = cv2.resize(vision_frame, (new_width, new_height))
|
||||
x_pad = (fit_width - new_width) // 2
|
||||
y_pad = (fit_height - new_height) // 2
|
||||
temp_vision_frame = numpy.pad(paste_vision_frame, ((y_pad, fit_height - new_height - y_pad), (x_pad, fit_width - new_width - x_pad), (0, 0)))
|
||||
return temp_vision_frame
|
||||
|
||||
|
||||
def normalize_frame_color(vision_frame : VisionFrame) -> VisionFrame:
|
||||
return cv2.cvtColor(vision_frame, cv2.COLOR_BGR2RGB)
|
||||
|
||||
@@ -283,10 +296,12 @@ def create_tile_frames(vision_frame : VisionFrame, size : Size) -> Tuple[List[Vi
|
||||
for row_vision_frame in row_range:
|
||||
top = row_vision_frame - size[2]
|
||||
bottom = row_vision_frame + size[2] + tile_width
|
||||
|
||||
for column_vision_frame in col_range:
|
||||
left = column_vision_frame - size[2]
|
||||
right = column_vision_frame + size[2] + tile_width
|
||||
tile_vision_frames.append(pad_vision_frame[top:bottom, left:right, :])
|
||||
|
||||
return tile_vision_frames, pad_width, pad_height
|
||||
|
||||
|
||||
@@ -304,5 +319,6 @@ def merge_tile_frames(tile_vision_frames : List[VisionFrame], temp_width : int,
|
||||
left = col_index * tile_vision_frame.shape[1]
|
||||
right = left + tile_vision_frame.shape[1]
|
||||
merge_vision_frame[top:bottom, left:right, :] = tile_vision_frame
|
||||
|
||||
merge_vision_frame = merge_vision_frame[size[1] : size[1] + temp_height, size[1]: size[1] + temp_width, :]
|
||||
return merge_vision_frame
|
||||
|
||||
Reference in New Issue
Block a user