Feat/more audio settings (#849)

* Add more audio settings, revamp some ffmpeg commands

* Add more audio settings, revamp some ffmpeg commands

* Add more audio settings, revamp some ffmpeg commands

* Add more audio settings, revamp some ffmpeg commands
This commit is contained in:
Henry Ruhs
2025-01-07 22:10:54 +01:00
committed by henryruhs
parent c5bc7c50a5
commit 5b76f54332
14 changed files with 144 additions and 81 deletions

View File

@@ -49,6 +49,8 @@ keep_temp =
output_image_quality = output_image_quality =
output_image_resolution = output_image_resolution =
output_audio_encoder = output_audio_encoder =
output_audio_quality =
output_audio_volume =
output_video_encoder = output_video_encoder =
output_video_preset = output_video_preset =
output_video_quality = output_video_quality =

View File

@@ -92,6 +92,8 @@ def apply_args(args : Args, apply_state_item : ApplyStateItem) -> None:
else: else:
apply_state_item('output_image_resolution', pack_resolution(output_image_resolution)) apply_state_item('output_image_resolution', pack_resolution(output_image_resolution))
apply_state_item('output_audio_encoder', args.get('output_audio_encoder')) apply_state_item('output_audio_encoder', args.get('output_audio_encoder'))
apply_state_item('output_audio_quality', args.get('output_audio_quality'))
apply_state_item('output_audio_volume', args.get('output_audio_volume'))
apply_state_item('output_video_encoder', args.get('output_video_encoder')) apply_state_item('output_video_encoder', args.get('output_video_encoder'))
apply_state_item('output_video_preset', args.get('output_video_preset')) apply_state_item('output_video_preset', args.get('output_video_preset'))
apply_state_item('output_video_quality', args.get('output_video_quality')) apply_state_item('output_video_quality', args.get('output_video_quality'))
@@ -105,7 +107,6 @@ def apply_args(args : Args, apply_state_item : ApplyStateItem) -> None:
if args.get('output_video_fps') or is_video(args.get('target_path')): if args.get('output_video_fps') or is_video(args.get('target_path')):
output_video_fps = normalize_fps(args.get('output_video_fps')) or detect_video_fps(args.get('target_path')) output_video_fps = normalize_fps(args.get('output_video_fps')) or detect_video_fps(args.get('target_path'))
apply_state_item('output_video_fps', output_video_fps) apply_state_item('output_video_fps', output_video_fps)
apply_state_item('skip_audio', args.get('skip_audio'))
# processors # processors
available_processors = [ get_file_name(file_path) for file_path in resolve_file_paths('facefusion/processors/modules') ] available_processors = [ get_file_name(file_path) for file_path in resolve_file_paths('facefusion/processors/modules') ]
apply_state_item('processors', args.get('processors')) apply_state_item('processors', args.get('processors'))

View File

@@ -127,4 +127,6 @@ face_mask_padding_range : Sequence[int] = create_int_range(0, 100, 1)
face_selector_age_range : Sequence[int] = create_int_range(0, 100, 1) face_selector_age_range : Sequence[int] = create_int_range(0, 100, 1)
reference_face_distance_range : Sequence[float] = create_float_range(0.0, 1.5, 0.05) reference_face_distance_range : Sequence[float] = create_float_range(0.0, 1.5, 0.05)
output_image_quality_range : Sequence[int] = create_int_range(0, 100, 1) output_image_quality_range : Sequence[int] = create_int_range(0, 100, 1)
output_audio_quality_range : Sequence[int] = create_int_range(0, 100, 1)
output_audio_volume_range : Sequence[int] = create_int_range(0, 100, 1)
output_video_quality_range : Sequence[int] = create_int_range(0, 100, 1) output_video_quality_range : Sequence[int] = create_int_range(0, 100, 1)

View File

@@ -437,7 +437,7 @@ def process_video(start_time : float) -> ErrorCode:
process_manager.end() process_manager.end()
return 1 return 1
# handle audio # handle audio
if state_manager.get_item('skip_audio'): if state_manager.get_item('output_audio_volume') == 0:
logger.info(wording.get('skipping_audio'), __name__) logger.info(wording.get('skipping_audio'), __name__)
move_temp_file(state_manager.get_item('target_path'), state_manager.get_item('output_path')) move_temp_file(state_manager.get_item('target_path'), state_manager.get_item('output_path'))
else: else:

View File

@@ -94,6 +94,87 @@ def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fp
return process.returncode == 0 return process.returncode == 0
def copy_image(target_path : str, temp_image_resolution : str) -> bool:
temp_file_path = get_temp_file_path(target_path)
if get_file_format(target_path) == 'webp':
output_image_compression = 100
else:
output_image_compression = 1
commands = [ '-i', target_path, '-s', str(temp_image_resolution), '-q:v', str(output_image_compression), '-y', temp_file_path ]
return run_ffmpeg(commands).returncode == 0
def finalize_image(target_path : str, output_path : str, output_image_resolution : str) -> bool:
output_image_quality = state_manager.get_item('output_image_quality')
temp_file_path = get_temp_file_path(target_path)
if get_file_format(target_path) == 'webp':
output_image_compression = output_image_quality
else:
output_image_compression = round(31 - (output_image_quality * 0.31))
commands = [ '-i', temp_file_path, '-s', str(output_image_resolution), '-q:v', str(output_image_compression), '-y', output_path ]
return run_ffmpeg(commands).returncode == 0
def read_audio_buffer(target_path : str, sample_rate : int, channel_total : int) -> Optional[AudioBuffer]:
commands = [ '-i', target_path, '-vn', '-f', 's16le', '-acodec', 'pcm_s16le', '-ar', str(sample_rate), '-ac', str(channel_total), '-' ]
process = open_ffmpeg(commands)
audio_buffer, _ = process.communicate()
if process.returncode == 0:
return audio_buffer
return None
def restore_audio(target_path : str, output_path : str, output_video_fps : Fps, trim_frame_start : int, trim_frame_end : int) -> bool:
output_audio_encoder = state_manager.get_item('output_audio_encoder')
output_audio_quality = state_manager.get_item('output_audio_quality')
output_audio_volume = state_manager.get_item('output_audio_volume')
temp_file_path = get_temp_file_path(target_path)
temp_video_duration = detect_video_duration(temp_file_path)
commands = [ '-i', temp_file_path ]
if isinstance(trim_frame_start, int):
start_time = trim_frame_start / output_video_fps
commands.extend([ '-ss', str(start_time) ])
if isinstance(trim_frame_end, int):
end_time = trim_frame_end / output_video_fps
commands.extend([ '-to', str(end_time) ])
commands.extend([ '-i', target_path, '-c:v', 'copy', '-c:a', output_audio_encoder ])
if output_audio_encoder in [ 'aac' ]:
output_audio_compression = round(10 - (output_audio_quality * 0.9))
commands.extend([ '-q:a', str(output_audio_compression) ])
if output_audio_encoder in [ 'libmp3lame' ]:
output_audio_compression = round(9 - (output_audio_quality * 0.9))
commands.extend([ '-q:a', str(output_audio_compression) ])
if output_audio_encoder in [ 'libopus', 'libvorbis' ]:
output_audio_compression = round((100 - output_audio_quality) / 10)
commands.extend([ '-q:a', str(output_audio_compression) ])
output_audio_volume = output_audio_volume / 100
commands.extend([ '-filter:a', 'volume=' + str(output_audio_volume), '-map', '0:v:0', '-map', '1:a:0', '-t', str(temp_video_duration), '-y', output_path ])
return run_ffmpeg(commands).returncode == 0
def replace_audio(target_path : str, audio_path : str, output_path : str) -> bool:
output_audio_encoder = state_manager.get_item('output_audio_encoder')
output_audio_quality = state_manager.get_item('output_audio_quality')
output_audio_volume = state_manager.get_item('output_audio_volume')
temp_file_path = get_temp_file_path(target_path)
temp_video_duration = detect_video_duration(temp_file_path)
commands = [ '-i', temp_file_path, '-i', audio_path, '-c:v', 'copy', '-c:a', output_audio_encoder ]
if output_audio_encoder in [ 'aac' ]:
output_audio_compression = round(10 - (output_audio_quality * 0.9))
commands.extend([ '-q:a', str(output_audio_compression) ])
if output_audio_encoder in [ 'libmp3lame' ]:
output_audio_compression = round(9 - (output_audio_quality * 0.9))
commands.extend([ '-q:a', str(output_audio_compression) ])
if output_audio_encoder in [ 'libopus', 'libvorbis' ]:
output_audio_compression = round((100 - output_audio_quality) / 10)
commands.extend([ '-q:a', str(output_audio_compression) ])
output_audio_volume = output_audio_volume / 100
commands.extend([ '-filter:a', 'volume=' + str(output_audio_volume), '-t', str(temp_video_duration), '-y', output_path ])
return run_ffmpeg(commands).returncode == 0
def merge_video(target_path : str, output_video_resolution : str, output_video_fps: Fps) -> bool: def merge_video(target_path : str, output_video_resolution : str, output_video_fps: Fps) -> bool:
output_video_encoder = state_manager.get_item('output_video_encoder') output_video_encoder = state_manager.get_item('output_video_encoder')
output_video_quality = state_manager.get_item('output_video_quality') output_video_quality = state_manager.get_item('output_video_quality')
@@ -128,7 +209,6 @@ def merge_video(target_path : str, output_video_resolution : str, output_video_f
def concat_video(output_path : str, temp_output_paths : List[str]) -> bool: def concat_video(output_path : str, temp_output_paths : List[str]) -> bool:
output_audio_encoder = state_manager.get_item('output_audio_encoder')
concat_video_path = tempfile.mktemp() concat_video_path = tempfile.mktemp()
with open(concat_video_path, 'w') as concat_video_file: with open(concat_video_path, 'w') as concat_video_file:
@@ -136,67 +216,13 @@ def concat_video(output_path : str, temp_output_paths : List[str]) -> bool:
concat_video_file.write('file \'' + os.path.abspath(temp_output_path) + '\'' + os.linesep) concat_video_file.write('file \'' + os.path.abspath(temp_output_path) + '\'' + os.linesep)
concat_video_file.flush() concat_video_file.flush()
concat_video_file.close() concat_video_file.close()
commands = [ '-f', 'concat', '-safe', '0', '-i', concat_video_file.name, '-c:v', 'copy', '-c:a', output_audio_encoder, '-y', os.path.abspath(output_path) ] commands = [ '-f', 'concat', '-safe', '0', '-i', concat_video_file.name, '-c:v', 'copy', '-c:a', 'copy', '-y', os.path.abspath(output_path) ]
process = run_ffmpeg(commands) process = run_ffmpeg(commands)
process.communicate() process.communicate()
remove_file(concat_video_path) remove_file(concat_video_path)
return process.returncode == 0 return process.returncode == 0
def copy_image(target_path : str, temp_image_resolution : str) -> bool:
temp_file_path = get_temp_file_path(target_path)
temp_image_compression = calc_image_compression(target_path, 100)
commands = [ '-i', target_path, '-s', str(temp_image_resolution), '-q:v', str(temp_image_compression), '-y', temp_file_path ]
return run_ffmpeg(commands).returncode == 0
def finalize_image(target_path : str, output_path : str, output_image_resolution : str) -> bool:
output_image_quality = state_manager.get_item('output_image_quality')
temp_file_path = get_temp_file_path(target_path)
output_image_compression = calc_image_compression(target_path, output_image_quality)
commands = [ '-i', temp_file_path, '-s', str(output_image_resolution), '-q:v', str(output_image_compression), '-y', output_path ]
return run_ffmpeg(commands).returncode == 0
def calc_image_compression(image_path : str, image_quality : int) -> int:
if get_file_format(image_path) == 'webm':
image_quality = 100 - image_quality
return round(31 - (image_quality * 0.31))
def read_audio_buffer(target_path : str, sample_rate : int, channel_total : int) -> Optional[AudioBuffer]:
commands = [ '-i', target_path, '-vn', '-f', 's16le', '-acodec', 'pcm_s16le', '-ar', str(sample_rate), '-ac', str(channel_total), '-' ]
process = open_ffmpeg(commands)
audio_buffer, _ = process.communicate()
if process.returncode == 0:
return audio_buffer
return None
def restore_audio(target_path : str, output_path : str, output_video_fps : Fps, trim_frame_start : int, trim_frame_end : int) -> bool:
output_audio_encoder = state_manager.get_item('output_audio_encoder')
temp_file_path = get_temp_file_path(target_path)
temp_video_duration = detect_video_duration(temp_file_path)
commands = [ '-i', temp_file_path ]
if isinstance(trim_frame_start, int):
start_time = trim_frame_start / output_video_fps
commands.extend([ '-ss', str(start_time) ])
if isinstance(trim_frame_end, int):
end_time = trim_frame_end / output_video_fps
commands.extend([ '-to', str(end_time) ])
commands.extend([ '-i', target_path, '-c:v', 'copy', '-c:a', output_audio_encoder, '-map', '0:v:0', '-map', '1:a:0', '-t', str(temp_video_duration), '-y', output_path ])
return run_ffmpeg(commands).returncode == 0
def replace_audio(target_path : str, audio_path : str, output_path : str) -> bool:
output_audio_encoder = state_manager.get_item('output_audio_encoder')
temp_file_path = get_temp_file_path(target_path)
temp_video_duration = detect_video_duration(temp_file_path)
commands = [ '-i', temp_file_path, '-i', audio_path, '-c:v', 'copy', '-c:a', output_audio_encoder, '-t', str(temp_video_duration), '-y', output_path ]
return run_ffmpeg(commands).returncode == 0
def map_nvenc_preset(output_video_preset : OutputVideoPreset) -> Optional[str]: def map_nvenc_preset(output_video_preset : OutputVideoPreset) -> Optional[str]:
if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast' ]: if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast' ]:
return 'fast' return 'fast'

View File

@@ -159,13 +159,14 @@ def create_output_creation_program() -> ArgumentParser:
group_output_creation.add_argument('--output-image-quality', help = wording.get('help.output_image_quality'), type = int, default = config.get_int_value('output_creation.output_image_quality', '80'), choices = facefusion.choices.output_image_quality_range, metavar = create_int_metavar(facefusion.choices.output_image_quality_range)) group_output_creation.add_argument('--output-image-quality', help = wording.get('help.output_image_quality'), type = int, default = config.get_int_value('output_creation.output_image_quality', '80'), choices = facefusion.choices.output_image_quality_range, metavar = create_int_metavar(facefusion.choices.output_image_quality_range))
group_output_creation.add_argument('--output-image-resolution', help = wording.get('help.output_image_resolution'), default = config.get_str_value('output_creation.output_image_resolution')) group_output_creation.add_argument('--output-image-resolution', help = wording.get('help.output_image_resolution'), default = config.get_str_value('output_creation.output_image_resolution'))
group_output_creation.add_argument('--output-audio-encoder', help = wording.get('help.output_audio_encoder'), default = config.get_str_value('output_creation.output_audio_encoder', 'aac'), choices = facefusion.choices.output_audio_encoders) group_output_creation.add_argument('--output-audio-encoder', help = wording.get('help.output_audio_encoder'), default = config.get_str_value('output_creation.output_audio_encoder', 'aac'), choices = facefusion.choices.output_audio_encoders)
group_output_creation.add_argument('--output-audio-quality', help = wording.get('help.output_audio_quality'), type = int, default = config.get_int_value('output_creation.output_audio_quality', '80'), choices = facefusion.choices.output_audio_quality_range, metavar = create_int_metavar(facefusion.choices.output_audio_quality_range))
group_output_creation.add_argument('--output-audio-volume', help = wording.get('help.output_audio_volume'), type = int, default = config.get_int_value('output_creation.output_audio_volume', '100'), choices = facefusion.choices.output_audio_volume_range, metavar = create_int_metavar(facefusion.choices.output_audio_volume_range))
group_output_creation.add_argument('--output-video-encoder', help = wording.get('help.output_video_encoder'), default = config.get_str_value('output_creation.output_video_encoder', 'libx264'), choices = facefusion.choices.output_video_encoders) group_output_creation.add_argument('--output-video-encoder', help = wording.get('help.output_video_encoder'), default = config.get_str_value('output_creation.output_video_encoder', 'libx264'), choices = facefusion.choices.output_video_encoders)
group_output_creation.add_argument('--output-video-preset', help = wording.get('help.output_video_preset'), default = config.get_str_value('output_creation.output_video_preset', 'veryfast'), choices = facefusion.choices.output_video_presets) group_output_creation.add_argument('--output-video-preset', help = wording.get('help.output_video_preset'), default = config.get_str_value('output_creation.output_video_preset', 'veryfast'), choices = facefusion.choices.output_video_presets)
group_output_creation.add_argument('--output-video-quality', help = wording.get('help.output_video_quality'), type = int, default = config.get_int_value('output_creation.output_video_quality', '80'), choices = facefusion.choices.output_video_quality_range, metavar = create_int_metavar(facefusion.choices.output_video_quality_range)) group_output_creation.add_argument('--output-video-quality', help = wording.get('help.output_video_quality'), type = int, default = config.get_int_value('output_creation.output_video_quality', '80'), choices = facefusion.choices.output_video_quality_range, metavar = create_int_metavar(facefusion.choices.output_video_quality_range))
group_output_creation.add_argument('--output-video-resolution', help = wording.get('help.output_video_resolution'), default = config.get_str_value('output_creation.output_video_resolution')) group_output_creation.add_argument('--output-video-resolution', help = wording.get('help.output_video_resolution'), default = config.get_str_value('output_creation.output_video_resolution'))
group_output_creation.add_argument('--output-video-fps', help = wording.get('help.output_video_fps'), type = float, default = config.get_str_value('output_creation.output_video_fps')) group_output_creation.add_argument('--output-video-fps', help = wording.get('help.output_video_fps'), type = float, default = config.get_str_value('output_creation.output_video_fps'))
group_output_creation.add_argument('--skip-audio', help = wording.get('help.skip_audio'), action = 'store_true', default = config.get_bool_value('output_creation.skip_audio')) job_store.register_step_keys([ 'output_image_quality', 'output_image_resolution', 'output_audio_encoder', 'output_audio_quality', 'output_audio_volume', 'output_video_encoder', 'output_video_preset', 'output_video_quality', 'output_video_resolution', 'output_video_fps' ])
job_store.register_step_keys([ 'output_image_quality', 'output_image_resolution', 'output_audio_encoder', 'output_video_encoder', 'output_video_preset', 'output_video_quality', 'output_video_resolution', 'output_video_fps', 'skip_audio' ])
return program return program

View File

@@ -252,12 +252,13 @@ StateKey = Literal\
'output_image_quality', 'output_image_quality',
'output_image_resolution', 'output_image_resolution',
'output_audio_encoder', 'output_audio_encoder',
'output_audio_quality',
'output_audio_volume',
'output_video_encoder', 'output_video_encoder',
'output_video_preset', 'output_video_preset',
'output_video_quality', 'output_video_quality',
'output_video_resolution', 'output_video_resolution',
'output_video_fps', 'output_video_fps',
'skip_audio',
'processors', 'processors',
'open_browser', 'open_browser',
'ui_layouts', 'ui_layouts',
@@ -315,12 +316,13 @@ State = TypedDict('State',
'output_image_quality' : int, 'output_image_quality' : int,
'output_image_resolution' : str, 'output_image_resolution' : str,
'output_audio_encoder' : OutputAudioEncoder, 'output_audio_encoder' : OutputAudioEncoder,
'output_audio_quality' : int,
'output_audio_volume' : int,
'output_video_encoder' : OutputVideoEncoder, 'output_video_encoder' : OutputVideoEncoder,
'output_video_preset' : OutputVideoPreset, 'output_video_preset' : OutputVideoPreset,
'output_video_quality' : int, 'output_video_quality' : int,
'output_video_resolution' : str, 'output_video_resolution' : str,
'output_video_fps' : float, 'output_video_fps' : float,
'skip_audio' : bool,
'processors' : List[str], 'processors' : List[str],
'open_browser' : bool, 'open_browser' : bool,
'ui_layouts' : List[str], 'ui_layouts' : List[str],

View File

@@ -5,7 +5,7 @@ from facefusion.uis.typing import JobManagerAction, JobRunnerAction, WebcamMode
job_manager_actions : List[JobManagerAction] = [ 'job-create', 'job-submit', 'job-delete', 'job-add-step', 'job-remix-step', 'job-insert-step', 'job-remove-step' ] job_manager_actions : List[JobManagerAction] = [ 'job-create', 'job-submit', 'job-delete', 'job-add-step', 'job-remix-step', 'job-insert-step', 'job-remove-step' ]
job_runner_actions : List[JobRunnerAction] = [ 'job-run', 'job-run-all', 'job-retry', 'job-retry-all' ] job_runner_actions : List[JobRunnerAction] = [ 'job-run', 'job-run-all', 'job-retry', 'job-retry-all' ]
common_options : List[str] = [ 'keep-temp', 'skip-audio' ] common_options : List[str] = [ 'keep-temp' ]
webcam_modes : List[WebcamMode] = [ 'inline', 'udp', 'v4l2' ] webcam_modes : List[WebcamMode] = [ 'inline', 'udp', 'v4l2' ]
webcam_resolutions : List[str] = [ '320x240', '640x480', '800x600', '1024x768', '1280x720', '1280x960', '1920x1080', '2560x1440', '3840x2160' ] webcam_resolutions : List[str] = [ '320x240', '640x480', '800x600', '1024x768', '1280x720', '1280x960', '1920x1080', '2560x1440', '3840x2160' ]

View File

@@ -81,8 +81,8 @@ def start(benchmark_runs : List[str], benchmark_cycles : int) -> Generator[List[
state_manager.init_item('source_paths', [ '.assets/examples/source.jpg', '.assets/examples/source.mp3' ]) state_manager.init_item('source_paths', [ '.assets/examples/source.jpg', '.assets/examples/source.mp3' ])
state_manager.init_item('face_landmarker_score', 0) state_manager.init_item('face_landmarker_score', 0)
state_manager.init_item('temp_frame_format', 'bmp') state_manager.init_item('temp_frame_format', 'bmp')
state_manager.init_item('output_audio_volume', 0)
state_manager.init_item('output_video_preset', 'ultrafast') state_manager.init_item('output_video_preset', 'ultrafast')
state_manager.init_item('skip_audio', True)
state_manager.sync_item('execution_providers') state_manager.sync_item('execution_providers')
state_manager.sync_item('execution_thread_count') state_manager.sync_item('execution_thread_count')
state_manager.sync_item('execution_queue_count') state_manager.sync_item('execution_queue_count')

View File

@@ -15,8 +15,6 @@ def render() -> None:
if state_manager.get_item('keep_temp'): if state_manager.get_item('keep_temp'):
common_options.append('keep-temp') common_options.append('keep-temp')
if state_manager.get_item('skip_audio'):
common_options.append('skip-audio')
COMMON_OPTIONS_CHECKBOX_GROUP = gradio.Checkboxgroup( COMMON_OPTIONS_CHECKBOX_GROUP = gradio.Checkboxgroup(
label = wording.get('uis.common_options_checkbox_group'), label = wording.get('uis.common_options_checkbox_group'),
@@ -31,6 +29,4 @@ def listen() -> None:
def update(common_options : List[str]) -> None: def update(common_options : List[str]) -> None:
keep_temp = 'keep-temp' in common_options keep_temp = 'keep-temp' in common_options
skip_audio = 'skip-audio' in common_options
state_manager.set_item('keep_temp', keep_temp) state_manager.set_item('keep_temp', keep_temp)
state_manager.set_item('skip_audio', skip_audio)

View File

@@ -13,6 +13,8 @@ from facefusion.vision import create_image_resolutions, create_video_resolutions
OUTPUT_IMAGE_QUALITY_SLIDER : Optional[gradio.Slider] = None OUTPUT_IMAGE_QUALITY_SLIDER : Optional[gradio.Slider] = None
OUTPUT_IMAGE_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None OUTPUT_IMAGE_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None
OUTPUT_AUDIO_ENCODER_DROPDOWN : Optional[gradio.Dropdown] = None OUTPUT_AUDIO_ENCODER_DROPDOWN : Optional[gradio.Dropdown] = None
OUTPUT_AUDIO_QUALITY_SLIDER : Optional[gradio.Slider] = None
OUTPUT_AUDIO_VOLUME_SLIDER : Optional[gradio.Slider] = None
OUTPUT_VIDEO_ENCODER_DROPDOWN : Optional[gradio.Dropdown] = None OUTPUT_VIDEO_ENCODER_DROPDOWN : Optional[gradio.Dropdown] = None
OUTPUT_VIDEO_PRESET_DROPDOWN : Optional[gradio.Dropdown] = None OUTPUT_VIDEO_PRESET_DROPDOWN : Optional[gradio.Dropdown] = None
OUTPUT_VIDEO_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None OUTPUT_VIDEO_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None
@@ -24,6 +26,8 @@ def render() -> None:
global OUTPUT_IMAGE_QUALITY_SLIDER global OUTPUT_IMAGE_QUALITY_SLIDER
global OUTPUT_IMAGE_RESOLUTION_DROPDOWN global OUTPUT_IMAGE_RESOLUTION_DROPDOWN
global OUTPUT_AUDIO_ENCODER_DROPDOWN global OUTPUT_AUDIO_ENCODER_DROPDOWN
global OUTPUT_AUDIO_QUALITY_SLIDER
global OUTPUT_AUDIO_VOLUME_SLIDER
global OUTPUT_VIDEO_ENCODER_DROPDOWN global OUTPUT_VIDEO_ENCODER_DROPDOWN
global OUTPUT_VIDEO_PRESET_DROPDOWN global OUTPUT_VIDEO_PRESET_DROPDOWN
global OUTPUT_VIDEO_RESOLUTION_DROPDOWN global OUTPUT_VIDEO_RESOLUTION_DROPDOWN
@@ -58,6 +62,22 @@ def render() -> None:
value = state_manager.get_item('output_audio_encoder'), value = state_manager.get_item('output_audio_encoder'),
visible = is_video(state_manager.get_item('target_path')) visible = is_video(state_manager.get_item('target_path'))
) )
OUTPUT_AUDIO_QUALITY_SLIDER = gradio.Slider(
label = wording.get('uis.output_audio_quality_slider'),
value = state_manager.get_item('output_audio_quality'),
step = calc_int_step(facefusion.choices.output_audio_quality_range),
minimum = facefusion.choices.output_audio_quality_range[0],
maximum = facefusion.choices.output_audio_quality_range[-1],
visible = is_video(state_manager.get_item('target_path'))
)
OUTPUT_AUDIO_VOLUME_SLIDER = gradio.Slider(
label = wording.get('uis.output_audio_volume_slider'),
value = state_manager.get_item('output_audio_volume'),
step = calc_int_step(facefusion.choices.output_audio_volume_range),
minimum = facefusion.choices.output_audio_volume_range[0],
maximum = facefusion.choices.output_audio_volume_range[-1],
visible = is_video(state_manager.get_item('target_path'))
)
OUTPUT_VIDEO_ENCODER_DROPDOWN = gradio.Dropdown( OUTPUT_VIDEO_ENCODER_DROPDOWN = gradio.Dropdown(
label = wording.get('uis.output_video_encoder_dropdown'), label = wording.get('uis.output_video_encoder_dropdown'),
choices = facefusion.choices.output_video_encoders, choices = facefusion.choices.output_video_encoders,
@@ -99,6 +119,8 @@ def listen() -> None:
OUTPUT_IMAGE_QUALITY_SLIDER.release(update_output_image_quality, inputs = OUTPUT_IMAGE_QUALITY_SLIDER) OUTPUT_IMAGE_QUALITY_SLIDER.release(update_output_image_quality, inputs = OUTPUT_IMAGE_QUALITY_SLIDER)
OUTPUT_IMAGE_RESOLUTION_DROPDOWN.change(update_output_image_resolution, inputs = OUTPUT_IMAGE_RESOLUTION_DROPDOWN) OUTPUT_IMAGE_RESOLUTION_DROPDOWN.change(update_output_image_resolution, inputs = OUTPUT_IMAGE_RESOLUTION_DROPDOWN)
OUTPUT_AUDIO_ENCODER_DROPDOWN.change(update_output_audio_encoder, inputs = OUTPUT_AUDIO_ENCODER_DROPDOWN) OUTPUT_AUDIO_ENCODER_DROPDOWN.change(update_output_audio_encoder, inputs = OUTPUT_AUDIO_ENCODER_DROPDOWN)
OUTPUT_AUDIO_QUALITY_SLIDER.release(update_output_audio_quality, inputs = OUTPUT_AUDIO_QUALITY_SLIDER)
OUTPUT_AUDIO_VOLUME_SLIDER.release(update_output_audio_volume, inputs = OUTPUT_AUDIO_VOLUME_SLIDER)
OUTPUT_VIDEO_ENCODER_DROPDOWN.change(update_output_video_encoder, inputs = OUTPUT_VIDEO_ENCODER_DROPDOWN) OUTPUT_VIDEO_ENCODER_DROPDOWN.change(update_output_video_encoder, inputs = OUTPUT_VIDEO_ENCODER_DROPDOWN)
OUTPUT_VIDEO_PRESET_DROPDOWN.change(update_output_video_preset, inputs = OUTPUT_VIDEO_PRESET_DROPDOWN) OUTPUT_VIDEO_PRESET_DROPDOWN.change(update_output_video_preset, inputs = OUTPUT_VIDEO_PRESET_DROPDOWN)
OUTPUT_VIDEO_QUALITY_SLIDER.release(update_output_video_quality, inputs = OUTPUT_VIDEO_QUALITY_SLIDER) OUTPUT_VIDEO_QUALITY_SLIDER.release(update_output_video_quality, inputs = OUTPUT_VIDEO_QUALITY_SLIDER)
@@ -111,22 +133,22 @@ def listen() -> None:
'target_video' 'target_video'
]): ]):
for method in [ 'upload', 'change', 'clear' ]: for method in [ 'upload', 'change', 'clear' ]:
getattr(ui_component, method)(remote_update, outputs = [ OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_IMAGE_RESOLUTION_DROPDOWN, OUTPUT_AUDIO_ENCODER_DROPDOWN, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_PRESET_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER, OUTPUT_VIDEO_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_FPS_SLIDER ]) getattr(ui_component, method)(remote_update, outputs = [ OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_IMAGE_RESOLUTION_DROPDOWN, OUTPUT_AUDIO_ENCODER_DROPDOWN, OUTPUT_AUDIO_QUALITY_SLIDER, OUTPUT_AUDIO_VOLUME_SLIDER, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_PRESET_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER, OUTPUT_VIDEO_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_FPS_SLIDER ])
def remote_update() -> Tuple[gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Slider]: def remote_update() -> Tuple[gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Slider]:
if is_image(state_manager.get_item('target_path')): if is_image(state_manager.get_item('target_path')):
output_image_resolution = detect_image_resolution(state_manager.get_item('target_path')) output_image_resolution = detect_image_resolution(state_manager.get_item('target_path'))
output_image_resolutions = create_image_resolutions(output_image_resolution) output_image_resolutions = create_image_resolutions(output_image_resolution)
state_manager.set_item('output_image_resolution', pack_resolution(output_image_resolution)) state_manager.set_item('output_image_resolution', pack_resolution(output_image_resolution))
return gradio.Slider(visible = True), gradio.Dropdown(value = state_manager.get_item('output_image_resolution'), choices = output_image_resolutions, visible = True), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False) return gradio.Slider(visible = True), gradio.Dropdown(value = state_manager.get_item('output_image_resolution'), choices = output_image_resolutions, visible = True), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False)
if is_video(state_manager.get_item('target_path')): if is_video(state_manager.get_item('target_path')):
output_video_resolution = detect_video_resolution(state_manager.get_item('target_path')) output_video_resolution = detect_video_resolution(state_manager.get_item('target_path'))
output_video_resolutions = create_video_resolutions(output_video_resolution) output_video_resolutions = create_video_resolutions(output_video_resolution)
state_manager.set_item('output_video_resolution', pack_resolution(output_video_resolution)) state_manager.set_item('output_video_resolution', pack_resolution(output_video_resolution))
state_manager.set_item('output_video_fps', detect_video_fps(state_manager.get_item('target_path'))) state_manager.set_item('output_video_fps', detect_video_fps(state_manager.get_item('target_path')))
return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = True), gradio.Dropdown(visible = True), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Dropdown(value = state_manager.get_item('output_video_resolution'), choices = output_video_resolutions, visible = True), gradio.Slider(value = state_manager.get_item('output_video_fps'), visible = True) return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Slider(visible = True), gradio.Dropdown(visible = True), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Dropdown(value = state_manager.get_item('output_video_resolution'), choices = output_video_resolutions, visible = True), gradio.Slider(value = state_manager.get_item('output_video_fps'), visible = True)
return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False) return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False)
def update_output_image_quality(output_image_quality : float) -> None: def update_output_image_quality(output_image_quality : float) -> None:
@@ -141,6 +163,14 @@ def update_output_audio_encoder(output_audio_encoder : OutputAudioEncoder) -> No
state_manager.set_item('output_audio_encoder', output_audio_encoder) state_manager.set_item('output_audio_encoder', output_audio_encoder)
def update_output_audio_quality(output_audio_quality : float) -> None:
state_manager.set_item('output_audio_quality', int(output_audio_quality))
def update_output_audio_volume(output_audio_volume: float) -> None:
state_manager.set_item('output_audio_volume', int(output_audio_volume))
def update_output_video_encoder(output_video_encoder : OutputVideoEncoder) -> None: def update_output_video_encoder(output_video_encoder : OutputVideoEncoder) -> None:
state_manager.set_item('output_video_encoder', output_video_encoder) state_manager.set_item('output_video_encoder', output_video_encoder)

View File

@@ -139,14 +139,15 @@ WORDING : Dict[str, Any] =\
'keep_temp': 'keep the temporary resources after processing', 'keep_temp': 'keep the temporary resources after processing',
# output creation # output creation
'output_image_quality': 'specify the image quality which translates to the compression factor', 'output_image_quality': 'specify the image quality which translates to the compression factor',
'output_image_resolution': 'specify the image output resolution based on the target image', 'output_image_resolution': 'specify the image resolution based on the target image',
'output_audio_encoder': 'specify the encoder used for the audio output', 'output_audio_encoder': 'specify the encoder used for the audio',
'output_video_encoder': 'specify the encoder used for the video output', 'output_audio_quality': 'specify the audio quality which translates to the compression factor',
'output_audio_volume': 'specify the audio volume based on the target video',
'output_video_encoder': 'specify the encoder used for the video',
'output_video_preset': 'balance fast video processing and video file size', 'output_video_preset': 'balance fast video processing and video file size',
'output_video_quality': 'specify the video quality which translates to the compression factor', 'output_video_quality': 'specify the video quality which translates to the compression factor',
'output_video_resolution': 'specify the video output resolution based on the target video', 'output_video_resolution': 'specify the video resolution based on the target video',
'output_video_fps': 'specify the video output fps based on the target video', 'output_video_fps': 'specify the video fps based on the target video',
'skip_audio': 'omit the audio from the target video',
# processors # processors
'processors': 'load a single or multiple processors (choices: {choices}, ...)', 'processors': 'load a single or multiple processors (choices: {choices}, ...)',
'age_modifier_model': 'choose the model responsible for aging the face', 'age_modifier_model': 'choose the model responsible for aging the face',
@@ -303,6 +304,8 @@ WORDING : Dict[str, Any] =\
'lip_syncer_model_dropdown': 'LIP SYNCER MODEL', 'lip_syncer_model_dropdown': 'LIP SYNCER MODEL',
'log_level_dropdown': 'LOG LEVEL', 'log_level_dropdown': 'LOG LEVEL',
'output_audio_encoder_dropdown': 'OUTPUT AUDIO ENCODER', 'output_audio_encoder_dropdown': 'OUTPUT AUDIO ENCODER',
'output_audio_quality_slider': 'OUTPUT AUDIO QUALITY',
'output_audio_volume_slider': 'OUTPUT AUDIO VOLUME',
'output_image_or_video': 'OUTPUT', 'output_image_or_video': 'OUTPUT',
'output_image_quality_slider': 'OUTPUT IMAGE QUALITY', 'output_image_quality_slider': 'OUTPUT IMAGE QUALITY',
'output_image_resolution_dropdown': 'OUTPUT IMAGE RESOLUTION', 'output_image_resolution_dropdown': 'OUTPUT IMAGE RESOLUTION',

View File

@@ -29,6 +29,8 @@ def before_all() -> None:
state_manager.init_item('temp_path', tempfile.gettempdir()) state_manager.init_item('temp_path', tempfile.gettempdir())
state_manager.init_item('temp_frame_format', 'png') state_manager.init_item('temp_frame_format', 'png')
state_manager.init_item('output_audio_encoder', 'aac') state_manager.init_item('output_audio_encoder', 'aac')
state_manager.init_item('output_audio_quality', 80)
state_manager.init_item('output_audio_volume', 100)
@pytest.fixture(scope = 'function', autouse = True) @pytest.fixture(scope = 'function', autouse = True)

View File

@@ -2,7 +2,6 @@ import subprocess
import pytest import pytest
from facefusion import state_manager
from facefusion.download import conditional_download from facefusion.download import conditional_download
from facefusion.filesystem import copy_file from facefusion.filesystem import copy_file
from facefusion.jobs.job_manager import add_step, clear_jobs, create_job, init_jobs, submit_job, submit_jobs from facefusion.jobs.job_manager import add_step, clear_jobs, create_job, init_jobs, submit_job, submit_jobs
@@ -19,7 +18,6 @@ def before_all() -> None:
'https://github.com/facefusion/facefusion-assets/releases/download/examples-3.0.0/target-240p.mp4' 'https://github.com/facefusion/facefusion-assets/releases/download/examples-3.0.0/target-240p.mp4'
]) ])
subprocess.run([ 'ffmpeg', '-i', get_test_example_file('target-240p.mp4'), '-vframes', '1', get_test_example_file('target-240p.jpg') ]) subprocess.run([ 'ffmpeg', '-i', get_test_example_file('target-240p.mp4'), '-vframes', '1', get_test_example_file('target-240p.jpg') ])
state_manager.init_item('output_audio_encoder', 'aac')
@pytest.fixture(scope = 'function', autouse = True) @pytest.fixture(scope = 'function', autouse = True)