Feat/commands builder (#852)

* Protype for ffmpeg builder * Protype for ffmpeg builder * Add curl builder * Fix typing import * Adjust commands indent * Protype for ffmpeg builder part2 * Protype for ffmpeg builder part3 * Protype for ffmpeg builder part3 * Add chain() helper to the builders * Protype for ffmpeg builder part4 * Protype for ffmpeg builder part5 * Protoype for ffmpeg builder part5 * Protoype for ffmpeg builder part6 * Allow dynamic audio size * Fix testing * Protoype for ffmpeg builder part7 * Fix and polish ffmpeg builder * Hardcode the log level for ffmpeg * More ffmpeg rework * Prototype for ffmpeg builder part8 * Prototype for ffmpeg builder part9 * Fix CI * Fix Styles * Add lazy testing, User Agent for CURL * More testing * More testing
2025-01-13 10:42:40 +01:00
parent 7f90ca72bb
commit 8a9e08f3a2
16 changed files with 491 additions and 188 deletions
--- a/facefusion/ffmpeg.py
+++ b/facefusion/ffmpeg.py
@@ -1,21 +1,22 @@
 import os
-import shutil
 import subprocess
 import tempfile
 from typing import List, Optional

 from tqdm import tqdm

-from facefusion import logger, process_manager, state_manager, wording
+from facefusion import ffmpeg_builder, logger, process_manager, state_manager, wording
 from facefusion.filesystem import get_file_format, remove_file
 from facefusion.temp_helper import get_temp_file_path, get_temp_frames_pattern, resolve_temp_frame_paths
-from facefusion.typing import AudioBuffer, Commands, Fps, OutputVideoPreset, UpdateProgress
+from facefusion.typing import AudioBuffer, Commands, Fps, UpdateProgress
 from facefusion.vision import count_trim_frame_total, detect_video_duration, restrict_video_fps


 def run_ffmpeg_with_progress(commands : Commands, update_progress : UpdateProgress) -> subprocess.Popen[bytes]:
 	log_level = state_manager.get_item('log_level')
-	commands = [ shutil.which('ffmpeg'), '-hide_banner', '-nostats', '-loglevel', 'error', '-progress', '-' ] + commands
+	commands.extend(ffmpeg_builder.set_progress())
+	commands.extend(ffmpeg_builder.cast_stream())
+	commands = ffmpeg_builder.run(commands)
 	process = subprocess.Popen(commands, stderr = subprocess.PIPE, stdout = subprocess.PIPE)

 	while process_manager.is_processing():
@@ -40,7 +41,7 @@ def run_ffmpeg_with_progress(commands : Commands, update_progress : UpdateProgre

 def run_ffmpeg(commands : Commands) -> subprocess.Popen[bytes]:
 	log_level = state_manager.get_item('log_level')
-	commands = [ shutil.which('ffmpeg'), '-hide_banner', '-nostats', '-loglevel', 'error' ] + commands
+	commands = ffmpeg_builder.run(commands)
 	process = subprocess.Popen(commands, stderr = subprocess.PIPE, stdout = subprocess.PIPE)

 	while process_manager.is_processing():
@@ -58,7 +59,7 @@ def run_ffmpeg(commands : Commands) -> subprocess.Popen[bytes]:


 def open_ffmpeg(commands : Commands) -> subprocess.Popen[bytes]:
-	commands = [ shutil.which('ffmpeg'), '-loglevel', 'quiet' ] + commands
+	commands = ffmpeg_builder.run(commands)
 	return subprocess.Popen(commands, stdin = subprocess.PIPE, stdout = subprocess.PIPE)


@@ -74,17 +75,14 @@ def log_debug(process : subprocess.Popen[bytes]) -> None:
 def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fps : Fps, trim_frame_start : int, trim_frame_end : int) -> bool:
 	extract_frame_total = count_trim_frame_total(target_path, trim_frame_start, trim_frame_end)
 	temp_frames_pattern = get_temp_frames_pattern(target_path, '%08d')
-	commands = [ '-i', target_path, '-s', str(temp_video_resolution), '-q:v', '0' ]
-
-	if isinstance(trim_frame_start, int) and isinstance(trim_frame_end, int):
-		commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',fps=' + str(temp_video_fps) ])
-	elif isinstance(trim_frame_start, int):
-		commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ',fps=' + str(temp_video_fps) ])
-	elif isinstance(trim_frame_end, int):
-		commands.extend([ '-vf', 'trim=end_frame=' + str(trim_frame_end) + ',fps=' + str(temp_video_fps) ])
-	else:
-		commands.extend([ '-vf', 'fps=' + str(temp_video_fps) ])
-	commands.extend([ '-vsync', '0', temp_frames_pattern ])
+	commands = ffmpeg_builder.chain(
+		ffmpeg_builder.set_input(target_path),
+		ffmpeg_builder.set_media_resolution(temp_video_resolution),
+		ffmpeg_builder.set_frame_quality(0),
+		ffmpeg_builder.select_frame_range(trim_frame_start, trim_frame_end, temp_video_fps),
+		ffmpeg_builder.prevent_frame_drop(),
+		ffmpeg_builder.set_output(temp_frames_pattern)
+	)

 	with tqdm(total = extract_frame_total, desc = wording.get('extracting'), unit = 'frame', ascii = ' =', disable = state_manager.get_item('log_level') in [ 'warn', 'error' ]) as progress:
 		process = run_ffmpeg_with_progress(commands, lambda frame_number: progress.update(frame_number - progress.n))
@@ -93,27 +91,37 @@ def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fp

 def copy_image(target_path : str, temp_image_resolution : str) -> bool:
 	temp_file_path = get_temp_file_path(target_path)
-	if get_file_format(target_path) == 'webp':
-		output_image_compression = 100
-	else:
-		output_image_compression = 1
-	commands = [ '-i', target_path, '-s', str(temp_image_resolution), '-q:v', str(output_image_compression), '-y', temp_file_path ]
+	commands = ffmpeg_builder.chain(
+		ffmpeg_builder.set_input(target_path),
+		ffmpeg_builder.set_media_resolution(temp_image_resolution),
+		ffmpeg_builder.set_image_quality(target_path, 100),
+		ffmpeg_builder.force_output(temp_file_path)
+	)
 	return run_ffmpeg(commands).returncode == 0


 def finalize_image(target_path : str, output_path : str, output_image_resolution : str) -> bool:
 	output_image_quality = state_manager.get_item('output_image_quality')
 	temp_file_path = get_temp_file_path(target_path)
-	if get_file_format(target_path) == 'webp':
-		output_image_compression = output_image_quality
-	else:
-		output_image_compression = round(31 - (output_image_quality * 0.31))
-	commands = [ '-i', temp_file_path, '-s', str(output_image_resolution), '-q:v', str(output_image_compression), '-y', output_path ]
+	commands = ffmpeg_builder.chain(
+		ffmpeg_builder.set_input(temp_file_path),
+		ffmpeg_builder.set_media_resolution(output_image_resolution),
+		ffmpeg_builder.set_image_quality(target_path, output_image_quality),
+		ffmpeg_builder.force_output(output_path)
+	)
 	return run_ffmpeg(commands).returncode == 0


-def read_audio_buffer(target_path : str, sample_rate : int, channel_total : int) -> Optional[AudioBuffer]:
-	commands = [ '-i', target_path, '-vn', '-f', 's16le', '-acodec', 'pcm_s16le', '-ar', str(sample_rate), '-ac', str(channel_total), '-' ]
+def read_audio_buffer(target_path : str, audio_sample_rate : int, audio_sample_size : int, audio_channel_total : int) -> Optional[AudioBuffer]:
+	commands = ffmpeg_builder.chain(
+		ffmpeg_builder.set_input(target_path),
+		ffmpeg_builder.ignore_video_stream(),
+		ffmpeg_builder.set_audio_sample_rate(audio_sample_rate),
+		ffmpeg_builder.set_audio_sample_size(audio_sample_size),
+		ffmpeg_builder.set_audio_channel_total(audio_channel_total),
+		ffmpeg_builder.cast_stream()
+	)
+
 	process = open_ffmpeg(commands)
 	audio_buffer, _ = process.communicate()
 	if process.returncode == 0:
@@ -127,26 +135,20 @@ def restore_audio(target_path : str, output_path : str, output_video_fps : Fps,
 	output_audio_volume = state_manager.get_item('output_audio_volume')
 	temp_file_path = get_temp_file_path(target_path)
 	temp_video_duration = detect_video_duration(temp_file_path)
-	commands = [ '-i', temp_file_path ]

-	if isinstance(trim_frame_start, int):
-		start_time = trim_frame_start / output_video_fps
-		commands.extend([ '-ss', str(start_time) ])
-	if isinstance(trim_frame_end, int):
-		end_time = trim_frame_end / output_video_fps
-		commands.extend([ '-to', str(end_time) ])
-	commands.extend([ '-i', target_path, '-c:v', 'copy', '-c:a', output_audio_encoder ])
-	if output_audio_encoder in [ 'aac' ]:
-		output_audio_compression = round(10 - (output_audio_quality * 0.9))
-		commands.extend([ '-q:a', str(output_audio_compression) ])
-	if output_audio_encoder in [ 'libmp3lame' ]:
-		output_audio_compression = round(9 - (output_audio_quality * 0.9))
-		commands.extend([ '-q:a', str(output_audio_compression) ])
-	if output_audio_encoder in [ 'libopus', 'libvorbis' ]:
-		output_audio_compression = round((100 - output_audio_quality) / 10)
-		commands.extend([ '-q:a', str(output_audio_compression) ])
-	output_audio_volume = output_audio_volume / 100
-	commands.extend([ '-filter:a', 'volume=' + str(output_audio_volume), '-map', '0:v:0', '-map', '1:a:0', '-t', str(temp_video_duration), '-y', output_path ])
+	commands = ffmpeg_builder.chain(
+		ffmpeg_builder.set_input(temp_file_path),
+		ffmpeg_builder.select_media_range(trim_frame_start, trim_frame_end, output_video_fps),
+		ffmpeg_builder.set_input(target_path),
+		ffmpeg_builder.copy_video_encoder(),
+		ffmpeg_builder.set_audio_encoder(output_audio_encoder),
+		ffmpeg_builder.set_audio_quality(output_audio_encoder, output_audio_quality),
+		ffmpeg_builder.set_audio_volume(output_audio_volume),
+		ffmpeg_builder.select_media_stream('0:v:0'),
+		ffmpeg_builder.select_media_stream('1:a:0'),
+		ffmpeg_builder.set_video_duration(temp_video_duration),
+		ffmpeg_builder.force_output(output_path)
+	)
 	return run_ffmpeg(commands).returncode == 0


@@ -156,19 +158,17 @@ def replace_audio(target_path : str, audio_path : str, output_path : str) -> boo
 	output_audio_volume = state_manager.get_item('output_audio_volume')
 	temp_file_path = get_temp_file_path(target_path)
 	temp_video_duration = detect_video_duration(temp_file_path)
-	commands = [ '-i', temp_file_path, '-i', audio_path, '-c:v', 'copy', '-c:a', output_audio_encoder ]

-	if output_audio_encoder in [ 'aac' ]:
-		output_audio_compression = round(10 - (output_audio_quality * 0.9))
-		commands.extend([ '-q:a', str(output_audio_compression) ])
-	if output_audio_encoder in [ 'libmp3lame' ]:
-		output_audio_compression = round(9 - (output_audio_quality * 0.9))
-		commands.extend([ '-q:a', str(output_audio_compression) ])
-	if output_audio_encoder in [ 'libopus', 'libvorbis' ]:
-		output_audio_compression = round((100 - output_audio_quality) / 10)
-		commands.extend([ '-q:a', str(output_audio_compression) ])
-	output_audio_volume = output_audio_volume / 100
-	commands.extend([ '-filter:a', 'volume=' + str(output_audio_volume), '-t', str(temp_video_duration), '-y', output_path ])
+	commands = ffmpeg_builder.chain(
+		ffmpeg_builder.set_input(temp_file_path),
+		ffmpeg_builder.set_input(audio_path),
+		ffmpeg_builder.copy_video_encoder(),
+		ffmpeg_builder.set_audio_encoder(output_audio_encoder),
+		ffmpeg_builder.set_audio_quality(output_audio_encoder, output_audio_quality),
+		ffmpeg_builder.set_audio_volume(output_audio_volume),
+		ffmpeg_builder.set_video_duration(temp_video_duration),
+		ffmpeg_builder.force_output(output_path)
+	)
 	return run_ffmpeg(commands).returncode == 0


@@ -183,22 +183,19 @@ def merge_video(target_path : str, output_video_resolution : str, output_video_f

 	if get_file_format(target_path) == 'webm':
 		output_video_encoder = 'libvpx-vp9'
-	commands = [ '-r', str(temp_video_fps), '-i', temp_frames_pattern, '-s', str(output_video_resolution), '-c:v', output_video_encoder ]
-	if output_video_encoder in [ 'libx264', 'libx265' ]:
-		output_video_compression = round(51 - (output_video_quality * 0.51))
-		commands.extend([ '-crf', str(output_video_compression), '-preset', output_video_preset ])
-	if output_video_encoder in [ 'libvpx-vp9' ]:
-		output_video_compression = round(63 - (output_video_quality * 0.63))
-		commands.extend([ '-crf', str(output_video_compression) ])
-	if output_video_encoder in [ 'h264_nvenc', 'hevc_nvenc' ]:
-		output_video_compression = round(51 - (output_video_quality * 0.51))
-		commands.extend([ '-cq', str(output_video_compression), '-preset', map_nvenc_preset(output_video_preset) ])
-	if output_video_encoder in [ 'h264_amf', 'hevc_amf' ]:
-		output_video_compression = round(51 - (output_video_quality * 0.51))
-		commands.extend([ '-qp_i', str(output_video_compression), '-qp_p', str(output_video_compression), '-quality', map_amf_preset(output_video_preset) ])
-	if output_video_encoder in [ 'h264_videotoolbox', 'hevc_videotoolbox' ]:
-		commands.extend([ '-q:v', str(output_video_quality) ])
-	commands.extend([ '-vf', 'framerate=fps=' + str(output_video_fps), '-pix_fmt', 'yuv420p', '-colorspace', 'bt709', '-y', temp_file_path ])
+
+	commands = ffmpeg_builder.chain(
+		ffmpeg_builder.set_conditional_fps(temp_video_fps),
+		ffmpeg_builder.set_input(temp_frames_pattern),
+		ffmpeg_builder.set_video_encoder(output_video_encoder),
+		ffmpeg_builder.set_media_resolution(output_video_resolution),
+		ffmpeg_builder.set_video_quality(output_video_encoder, output_video_quality),
+		ffmpeg_builder.set_video_preset(output_video_encoder, output_video_preset),
+		ffmpeg_builder.set_video_fps(output_video_fps),
+		ffmpeg_builder.set_pixel_format('yuv420p'),
+		ffmpeg_builder.set_video_colorspace('bt709'),
+		ffmpeg_builder.force_output(temp_file_path)
+	)

 	with tqdm(total = merge_frame_total, desc = wording.get('merging'), unit = 'frame', ascii = ' =', disable = state_manager.get_item('log_level') in [ 'warn', 'error' ]) as progress:
 		process = run_ffmpeg_with_progress(commands, lambda frame_number: progress.update(frame_number - progress.n))
@@ -213,38 +210,16 @@ def concat_video(output_path : str, temp_output_paths : List[str]) -> bool:
 			concat_video_file.write('file \'' + os.path.abspath(temp_output_path) + '\'' + os.linesep)
 		concat_video_file.flush()
 		concat_video_file.close()
-	commands = [ '-f', 'concat', '-safe', '0', '-i', concat_video_file.name, '-c:v', 'copy', '-c:a', 'copy', '-y', os.path.abspath(output_path) ]
+
+	output_path = os.path.abspath(output_path)
+	commands = ffmpeg_builder.chain(
+		ffmpeg_builder.unsafe_concat(),
+		ffmpeg_builder.set_input(concat_video_file.name),
+		ffmpeg_builder.copy_video_encoder(),
+		ffmpeg_builder.copy_audio_encoder(),
+		ffmpeg_builder.force_output(output_path)
+	)
 	process = run_ffmpeg(commands)
 	process.communicate()
 	remove_file(concat_video_path)
 	return process.returncode == 0
-
-
-def map_nvenc_preset(output_video_preset : OutputVideoPreset) -> Optional[str]:
-	if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast' ]:
-		return 'fast'
-	if output_video_preset == 'medium':
-		return 'medium'
-	if output_video_preset in [ 'slow', 'slower', 'veryslow' ]:
-		return 'slow'
-	return None
-
-
-def map_amf_preset(output_video_preset : OutputVideoPreset) -> Optional[str]:
-	if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast' ]:
-		return 'speed'
-	if output_video_preset in [ 'faster', 'fast', 'medium' ]:
-		return 'balanced'
-	if output_video_preset in [ 'slow', 'slower', 'veryslow' ]:
-		return 'quality'
-	return None
-
-
-def map_qsv_preset(output_video_preset : OutputVideoPreset) -> Optional[str]:
-	if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast' ]:
-		return 'fast'
-	if output_video_preset == 'medium':
-		return 'medium'
-	if output_video_preset in [ 'slow', 'slower', 'veryslow' ]:
-		return 'slow'
-	return None