Introduce fix_audio_encoder and fix_video_encoder

This commit is contained in:
henryruhs
2025-05-25 18:02:37 +02:00
parent 1b5621cac5
commit 854f895403
2 changed files with 50 additions and 32 deletions

View File

@@ -1,7 +1,7 @@
import os import os
import subprocess import subprocess
import tempfile import tempfile
from typing import List, Optional from typing import List, Optional, cast
from tqdm import tqdm from tqdm import tqdm
@@ -9,7 +9,7 @@ import facefusion.choices
from facefusion import ffmpeg_builder, logger, process_manager, state_manager, wording from facefusion import ffmpeg_builder, logger, process_manager, state_manager, wording
from facefusion.filesystem import get_file_format, remove_file from facefusion.filesystem import get_file_format, remove_file
from facefusion.temp_helper import get_temp_file_path, get_temp_frames_pattern from facefusion.temp_helper import get_temp_file_path, get_temp_frames_pattern
from facefusion.types import AudioBuffer, Commands, EncoderSet, Fps, UpdateProgress from facefusion.types import AudioBuffer, AudioEncoder, Commands, EncoderSet, Fps, UpdateProgress, VideoEncoder, VideoFormat
from facefusion.vision import detect_video_duration, detect_video_fps, predict_video_frame_total from facefusion.vision import detect_video_duration, detect_video_fps, predict_video_frame_total
@@ -164,12 +164,10 @@ def restore_audio(target_path : str, output_path : str, trim_frame_start : int,
output_audio_volume = state_manager.get_item('output_audio_volume') output_audio_volume = state_manager.get_item('output_audio_volume')
target_video_fps = detect_video_fps(target_path) target_video_fps = detect_video_fps(target_path)
temp_video_path = get_temp_file_path(target_path) temp_video_path = get_temp_file_path(target_path)
temp_video_format = get_file_format(temp_video_path) temp_video_format = cast(VideoFormat, get_file_format(temp_video_path))
temp_video_duration = detect_video_duration(temp_video_path) temp_video_duration = detect_video_duration(temp_video_path)
if temp_video_format == 'webm': output_audio_encoder = fix_audio_encoder(temp_video_format, output_audio_encoder)
output_audio_encoder = 'libopus'
commands = ffmpeg_builder.chain( commands = ffmpeg_builder.chain(
ffmpeg_builder.set_input(temp_video_path), ffmpeg_builder.set_input(temp_video_path),
ffmpeg_builder.select_media_range(trim_frame_start, trim_frame_end, target_video_fps), ffmpeg_builder.select_media_range(trim_frame_start, trim_frame_end, target_video_fps),
@@ -191,12 +189,10 @@ def replace_audio(target_path : str, audio_path : str, output_path : str) -> boo
output_audio_quality = state_manager.get_item('output_audio_quality') output_audio_quality = state_manager.get_item('output_audio_quality')
output_audio_volume = state_manager.get_item('output_audio_volume') output_audio_volume = state_manager.get_item('output_audio_volume')
temp_video_path = get_temp_file_path(target_path) temp_video_path = get_temp_file_path(target_path)
temp_video_format = get_file_format(temp_video_path) temp_video_format = cast(VideoFormat, get_file_format(temp_video_path))
temp_video_duration = detect_video_duration(temp_video_path) temp_video_duration = detect_video_duration(temp_video_path)
if temp_video_format == 'webm': output_audio_encoder = fix_audio_encoder(temp_video_format, output_audio_encoder)
output_audio_encoder = 'libopus'
commands = ffmpeg_builder.chain( commands = ffmpeg_builder.chain(
ffmpeg_builder.set_input(temp_video_path), ffmpeg_builder.set_input(temp_video_path),
ffmpeg_builder.set_input(audio_path), ffmpeg_builder.set_input(audio_path),
@@ -216,21 +212,10 @@ def merge_video(target_path : str, temp_video_fps : Fps, output_video_resolution
output_video_preset = state_manager.get_item('output_video_preset') output_video_preset = state_manager.get_item('output_video_preset')
merge_frame_total = predict_video_frame_total(target_path, output_video_fps, trim_frame_start, trim_frame_end) merge_frame_total = predict_video_frame_total(target_path, output_video_fps, trim_frame_start, trim_frame_end)
temp_video_path = get_temp_file_path(target_path) temp_video_path = get_temp_file_path(target_path)
temp_video_format = get_file_format(temp_video_path) temp_video_format = cast(VideoFormat, get_file_format(temp_video_path))
temp_frames_pattern = get_temp_frames_pattern(target_path, '%08d') temp_frames_pattern = get_temp_frames_pattern(target_path, '%08d')
if temp_video_format == 'm4v': output_video_encoder = fix_video_encoder(temp_video_format, output_video_encoder)
output_video_encoder = 'libx264'
if temp_video_format in [ 'mkv', 'mp4' ] and output_video_encoder == 'rawvideo':
output_video_encoder = 'libx264'
if temp_video_format == 'mov' and output_video_encoder == 'libvpx-vp9':
output_video_encoder = 'libx264'
if temp_video_format == 'webm':
output_video_encoder = 'libvpx-vp9'
commands = ffmpeg_builder.chain( commands = ffmpeg_builder.chain(
ffmpeg_builder.set_conditional_fps(temp_video_fps), ffmpeg_builder.set_conditional_fps(temp_video_fps),
ffmpeg_builder.set_input(temp_frames_pattern), ffmpeg_builder.set_input(temp_frames_pattern),
@@ -270,3 +255,35 @@ def concat_video(output_path : str, temp_output_paths : List[str]) -> bool:
process.communicate() process.communicate()
remove_file(concat_video_path) remove_file(concat_video_path)
return process.returncode == 0 return process.returncode == 0
def fix_audio_encoder(video_format : VideoFormat, audio_encoder : AudioEncoder) -> AudioEncoder:
if video_format == 'avi' and audio_encoder == 'libopus':
return 'aac'
if video_format == 'm4v':
return 'aac'
if video_format == 'mov' and audio_encoder in [ 'flac', 'libopus' ]:
return 'aac'
if video_format == 'webm':
return 'libopus'
return audio_encoder
def fix_video_encoder(video_format : VideoFormat, video_encoder : VideoEncoder) -> VideoEncoder:
if video_format == 'm4v':
return 'libx264'
if video_format in [ 'mkv', 'mp4' ] and video_encoder == 'rawvideo':
return 'libx264'
if video_format == 'mov' and video_encoder == 'libvpx-vp9':
return 'libx264'
if video_format == 'webm':
return 'libvpx-vp9'
return video_encoder

View File

@@ -9,9 +9,11 @@ from facefusion import process_manager, state_manager
from facefusion.download import conditional_download from facefusion.download import conditional_download
from facefusion.ffmpeg import concat_video, extract_frames, merge_video, read_audio_buffer, replace_audio, restore_audio from facefusion.ffmpeg import concat_video, extract_frames, merge_video, read_audio_buffer, replace_audio, restore_audio
from facefusion.filesystem import copy_file from facefusion.filesystem import copy_file
from facefusion.temp_helper import clear_temp_directory, create_temp_directory, get_temp_file_path, resolve_temp_frame_paths from facefusion.temp_helper import clear_temp_directory, create_temp_directory, get_temp_file_path, \
resolve_temp_frame_paths
from facefusion.types import EncoderSet from facefusion.types import EncoderSet
from .helper import get_test_example_file, get_test_examples_directory, get_test_output_file, prepare_test_output_directory from .helper import get_test_example_file, get_test_examples_directory, get_test_output_file, \
prepare_test_output_directory
@pytest.fixture(scope = 'module', autouse = True) @pytest.fixture(scope = 'module', autouse = True)
@@ -63,7 +65,7 @@ def test_get_available_encoder_set() -> None:
def test_extract_frames() -> None: def test_extract_frames() -> None:
extract_set =\ test_set =\
[ [
(get_test_example_file('target-240p-25fps.mp4'), 0, 270, 324), (get_test_example_file('target-240p-25fps.mp4'), 0, 270, 324),
(get_test_example_file('target-240p-25fps.mp4'), 224, 270, 55), (get_test_example_file('target-240p-25fps.mp4'), 224, 270, 55),
@@ -79,7 +81,7 @@ def test_extract_frames() -> None:
(get_test_example_file('target-240p-60fps.mp4'), 0, 100, 50) (get_test_example_file('target-240p-60fps.mp4'), 0, 100, 50)
] ]
for target_path, trim_frame_start, trim_frame_end, frame_total in extract_set: for target_path, trim_frame_start, trim_frame_end, frame_total in test_set:
create_temp_directory(target_path) create_temp_directory(target_path)
assert extract_frames(target_path, '452x240', 30.0, trim_frame_start, trim_frame_end) is True assert extract_frames(target_path, '452x240', 30.0, trim_frame_start, trim_frame_end) is True
@@ -131,7 +133,7 @@ def test_read_audio_buffer() -> None:
def test_restore_audio() -> None: def test_restore_audio() -> None:
restore_set =\ test_set =\
[ [
(get_test_example_file('target-240p-16khz.avi'), get_test_output_file('target-240p-16khz.avi')), (get_test_example_file('target-240p-16khz.avi'), get_test_output_file('target-240p-16khz.avi')),
(get_test_example_file('target-240p-16khz.m4v'), get_test_output_file('target-240p-16khz.m4v')), (get_test_example_file('target-240p-16khz.m4v'), get_test_output_file('target-240p-16khz.m4v')),
@@ -143,12 +145,11 @@ def test_restore_audio() -> None:
] ]
output_audio_encoders = get_available_encoder_set().get('audio') output_audio_encoders = get_available_encoder_set().get('audio')
for target_path, output_path in restore_set: for target_path, output_path in test_set:
create_temp_directory(target_path) create_temp_directory(target_path)
for output_audio_encoder in output_audio_encoders: for output_audio_encoder in output_audio_encoders:
state_manager.init_item('output_audio_encoder', output_audio_encoder) state_manager.init_item('output_audio_encoder', output_audio_encoder)
copy_file(target_path, get_temp_file_path(target_path)) copy_file(target_path, get_temp_file_path(target_path))
assert restore_audio(target_path, output_path, 0, 270) is True assert restore_audio(target_path, output_path, 0, 270) is True
@@ -159,7 +160,7 @@ def test_restore_audio() -> None:
def test_replace_audio() -> None: def test_replace_audio() -> None:
replace_set =\ test_set =\
[ [
(get_test_example_file('target-240p-16khz.avi'), get_test_output_file('target-240p-16khz.avi')), (get_test_example_file('target-240p-16khz.avi'), get_test_output_file('target-240p-16khz.avi')),
(get_test_example_file('target-240p-16khz.m4v'), get_test_output_file('target-240p-16khz.m4v')), (get_test_example_file('target-240p-16khz.m4v'), get_test_output_file('target-240p-16khz.m4v')),
@@ -171,7 +172,7 @@ def test_replace_audio() -> None:
] ]
output_audio_encoders = get_available_encoder_set().get('audio') output_audio_encoders = get_available_encoder_set().get('audio')
for target_path, output_path in replace_set: for target_path, output_path in test_set:
create_temp_directory(target_path) create_temp_directory(target_path)
for output_audio_encoder in output_audio_encoders: for output_audio_encoder in output_audio_encoders: