From 854f895403db2fe5b14f74b7123eccabde553862 Mon Sep 17 00:00:00 2001 From: henryruhs Date: Sun, 25 May 2025 18:02:37 +0200 Subject: [PATCH] Introduce fix_audio_encoder and fix_video_encoder --- facefusion/ffmpeg.py | 63 ++++++++++++++++++++++++++++---------------- tests/test_ffmpeg.py | 19 ++++++------- 2 files changed, 50 insertions(+), 32 deletions(-) diff --git a/facefusion/ffmpeg.py b/facefusion/ffmpeg.py index eaf8f03..91eec2c 100644 --- a/facefusion/ffmpeg.py +++ b/facefusion/ffmpeg.py @@ -1,7 +1,7 @@ import os import subprocess import tempfile -from typing import List, Optional +from typing import List, Optional, cast from tqdm import tqdm @@ -9,7 +9,7 @@ import facefusion.choices from facefusion import ffmpeg_builder, logger, process_manager, state_manager, wording from facefusion.filesystem import get_file_format, remove_file from facefusion.temp_helper import get_temp_file_path, get_temp_frames_pattern -from facefusion.types import AudioBuffer, Commands, EncoderSet, Fps, UpdateProgress +from facefusion.types import AudioBuffer, AudioEncoder, Commands, EncoderSet, Fps, UpdateProgress, VideoEncoder, VideoFormat from facefusion.vision import detect_video_duration, detect_video_fps, predict_video_frame_total @@ -164,12 +164,10 @@ def restore_audio(target_path : str, output_path : str, trim_frame_start : int, output_audio_volume = state_manager.get_item('output_audio_volume') target_video_fps = detect_video_fps(target_path) temp_video_path = get_temp_file_path(target_path) - temp_video_format = get_file_format(temp_video_path) + temp_video_format = cast(VideoFormat, get_file_format(temp_video_path)) temp_video_duration = detect_video_duration(temp_video_path) - if temp_video_format == 'webm': - output_audio_encoder = 'libopus' - + output_audio_encoder = fix_audio_encoder(temp_video_format, output_audio_encoder) commands = ffmpeg_builder.chain( ffmpeg_builder.set_input(temp_video_path), ffmpeg_builder.select_media_range(trim_frame_start, trim_frame_end, target_video_fps), @@ -191,12 +189,10 @@ def replace_audio(target_path : str, audio_path : str, output_path : str) -> boo output_audio_quality = state_manager.get_item('output_audio_quality') output_audio_volume = state_manager.get_item('output_audio_volume') temp_video_path = get_temp_file_path(target_path) - temp_video_format = get_file_format(temp_video_path) + temp_video_format = cast(VideoFormat, get_file_format(temp_video_path)) temp_video_duration = detect_video_duration(temp_video_path) - if temp_video_format == 'webm': - output_audio_encoder = 'libopus' - + output_audio_encoder = fix_audio_encoder(temp_video_format, output_audio_encoder) commands = ffmpeg_builder.chain( ffmpeg_builder.set_input(temp_video_path), ffmpeg_builder.set_input(audio_path), @@ -216,21 +212,10 @@ def merge_video(target_path : str, temp_video_fps : Fps, output_video_resolution output_video_preset = state_manager.get_item('output_video_preset') merge_frame_total = predict_video_frame_total(target_path, output_video_fps, trim_frame_start, trim_frame_end) temp_video_path = get_temp_file_path(target_path) - temp_video_format = get_file_format(temp_video_path) + temp_video_format = cast(VideoFormat, get_file_format(temp_video_path)) temp_frames_pattern = get_temp_frames_pattern(target_path, '%08d') - if temp_video_format == 'm4v': - output_video_encoder = 'libx264' - - if temp_video_format in [ 'mkv', 'mp4' ] and output_video_encoder == 'rawvideo': - output_video_encoder = 'libx264' - - if temp_video_format == 'mov' and output_video_encoder == 'libvpx-vp9': - output_video_encoder = 'libx264' - - if temp_video_format == 'webm': - output_video_encoder = 'libvpx-vp9' - + output_video_encoder = fix_video_encoder(temp_video_format, output_video_encoder) commands = ffmpeg_builder.chain( ffmpeg_builder.set_conditional_fps(temp_video_fps), ffmpeg_builder.set_input(temp_frames_pattern), @@ -270,3 +255,35 @@ def concat_video(output_path : str, temp_output_paths : List[str]) -> bool: process.communicate() remove_file(concat_video_path) return process.returncode == 0 + + +def fix_audio_encoder(video_format : VideoFormat, audio_encoder : AudioEncoder) -> AudioEncoder: + if video_format == 'avi' and audio_encoder == 'libopus': + return 'aac' + + if video_format == 'm4v': + return 'aac' + + if video_format == 'mov' and audio_encoder in [ 'flac', 'libopus' ]: + return 'aac' + + if video_format == 'webm': + return 'libopus' + + return audio_encoder + + +def fix_video_encoder(video_format : VideoFormat, video_encoder : VideoEncoder) -> VideoEncoder: + if video_format == 'm4v': + return 'libx264' + + if video_format in [ 'mkv', 'mp4' ] and video_encoder == 'rawvideo': + return 'libx264' + + if video_format == 'mov' and video_encoder == 'libvpx-vp9': + return 'libx264' + + if video_format == 'webm': + return 'libvpx-vp9' + + return video_encoder diff --git a/tests/test_ffmpeg.py b/tests/test_ffmpeg.py index 60a199e..8d8be54 100644 --- a/tests/test_ffmpeg.py +++ b/tests/test_ffmpeg.py @@ -9,9 +9,11 @@ from facefusion import process_manager, state_manager from facefusion.download import conditional_download from facefusion.ffmpeg import concat_video, extract_frames, merge_video, read_audio_buffer, replace_audio, restore_audio from facefusion.filesystem import copy_file -from facefusion.temp_helper import clear_temp_directory, create_temp_directory, get_temp_file_path, resolve_temp_frame_paths +from facefusion.temp_helper import clear_temp_directory, create_temp_directory, get_temp_file_path, \ + resolve_temp_frame_paths from facefusion.types import EncoderSet -from .helper import get_test_example_file, get_test_examples_directory, get_test_output_file, prepare_test_output_directory +from .helper import get_test_example_file, get_test_examples_directory, get_test_output_file, \ + prepare_test_output_directory @pytest.fixture(scope = 'module', autouse = True) @@ -63,7 +65,7 @@ def test_get_available_encoder_set() -> None: def test_extract_frames() -> None: - extract_set =\ + test_set =\ [ (get_test_example_file('target-240p-25fps.mp4'), 0, 270, 324), (get_test_example_file('target-240p-25fps.mp4'), 224, 270, 55), @@ -79,7 +81,7 @@ def test_extract_frames() -> None: (get_test_example_file('target-240p-60fps.mp4'), 0, 100, 50) ] - for target_path, trim_frame_start, trim_frame_end, frame_total in extract_set: + for target_path, trim_frame_start, trim_frame_end, frame_total in test_set: create_temp_directory(target_path) assert extract_frames(target_path, '452x240', 30.0, trim_frame_start, trim_frame_end) is True @@ -131,7 +133,7 @@ def test_read_audio_buffer() -> None: def test_restore_audio() -> None: - restore_set =\ + test_set =\ [ (get_test_example_file('target-240p-16khz.avi'), get_test_output_file('target-240p-16khz.avi')), (get_test_example_file('target-240p-16khz.m4v'), get_test_output_file('target-240p-16khz.m4v')), @@ -143,12 +145,11 @@ def test_restore_audio() -> None: ] output_audio_encoders = get_available_encoder_set().get('audio') - for target_path, output_path in restore_set: + for target_path, output_path in test_set: create_temp_directory(target_path) for output_audio_encoder in output_audio_encoders: state_manager.init_item('output_audio_encoder', output_audio_encoder) - copy_file(target_path, get_temp_file_path(target_path)) assert restore_audio(target_path, output_path, 0, 270) is True @@ -159,7 +160,7 @@ def test_restore_audio() -> None: def test_replace_audio() -> None: - replace_set =\ + test_set =\ [ (get_test_example_file('target-240p-16khz.avi'), get_test_output_file('target-240p-16khz.avi')), (get_test_example_file('target-240p-16khz.m4v'), get_test_output_file('target-240p-16khz.m4v')), @@ -171,7 +172,7 @@ def test_replace_audio() -> None: ] output_audio_encoders = get_available_encoder_set().get('audio') - for target_path, output_path in replace_set: + for target_path, output_path in test_set: create_temp_directory(target_path) for output_audio_encoder in output_audio_encoders: