diff --git a/facefusion.ini b/facefusion.ini index 539b83f..96b006e 100644 --- a/facefusion.ini +++ b/facefusion.ini @@ -49,6 +49,8 @@ keep_temp = output_image_quality = output_image_resolution = output_audio_encoder = +output_audio_quality = +output_audio_volume = output_video_encoder = output_video_preset = output_video_quality = diff --git a/facefusion/args.py b/facefusion/args.py index 778c335..5538a4b 100644 --- a/facefusion/args.py +++ b/facefusion/args.py @@ -92,6 +92,8 @@ def apply_args(args : Args, apply_state_item : ApplyStateItem) -> None: else: apply_state_item('output_image_resolution', pack_resolution(output_image_resolution)) apply_state_item('output_audio_encoder', args.get('output_audio_encoder')) + apply_state_item('output_audio_quality', args.get('output_audio_quality')) + apply_state_item('output_audio_volume', args.get('output_audio_volume')) apply_state_item('output_video_encoder', args.get('output_video_encoder')) apply_state_item('output_video_preset', args.get('output_video_preset')) apply_state_item('output_video_quality', args.get('output_video_quality')) @@ -105,7 +107,6 @@ def apply_args(args : Args, apply_state_item : ApplyStateItem) -> None: if args.get('output_video_fps') or is_video(args.get('target_path')): output_video_fps = normalize_fps(args.get('output_video_fps')) or detect_video_fps(args.get('target_path')) apply_state_item('output_video_fps', output_video_fps) - apply_state_item('skip_audio', args.get('skip_audio')) # processors available_processors = [ get_file_name(file_path) for file_path in resolve_file_paths('facefusion/processors/modules') ] apply_state_item('processors', args.get('processors')) diff --git a/facefusion/choices.py b/facefusion/choices.py index 78996da..2176a84 100755 --- a/facefusion/choices.py +++ b/facefusion/choices.py @@ -127,4 +127,6 @@ face_mask_padding_range : Sequence[int] = create_int_range(0, 100, 1) face_selector_age_range : Sequence[int] = create_int_range(0, 100, 1) reference_face_distance_range : Sequence[float] = create_float_range(0.0, 1.5, 0.05) output_image_quality_range : Sequence[int] = create_int_range(0, 100, 1) +output_audio_quality_range : Sequence[int] = create_int_range(0, 100, 1) +output_audio_volume_range : Sequence[int] = create_int_range(0, 100, 1) output_video_quality_range : Sequence[int] = create_int_range(0, 100, 1) diff --git a/facefusion/core.py b/facefusion/core.py index 4df5410..5863d8d 100755 --- a/facefusion/core.py +++ b/facefusion/core.py @@ -437,7 +437,7 @@ def process_video(start_time : float) -> ErrorCode: process_manager.end() return 1 # handle audio - if state_manager.get_item('skip_audio'): + if state_manager.get_item('output_audio_volume') == 0: logger.info(wording.get('skipping_audio'), __name__) move_temp_file(state_manager.get_item('target_path'), state_manager.get_item('output_path')) else: diff --git a/facefusion/ffmpeg.py b/facefusion/ffmpeg.py index 85fe581..381b027 100644 --- a/facefusion/ffmpeg.py +++ b/facefusion/ffmpeg.py @@ -94,6 +94,87 @@ def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fp return process.returncode == 0 +def copy_image(target_path : str, temp_image_resolution : str) -> bool: + temp_file_path = get_temp_file_path(target_path) + if get_file_format(target_path) == 'webp': + output_image_compression = 100 + else: + output_image_compression = 1 + commands = [ '-i', target_path, '-s', str(temp_image_resolution), '-q:v', str(output_image_compression), '-y', temp_file_path ] + return run_ffmpeg(commands).returncode == 0 + + +def finalize_image(target_path : str, output_path : str, output_image_resolution : str) -> bool: + output_image_quality = state_manager.get_item('output_image_quality') + temp_file_path = get_temp_file_path(target_path) + if get_file_format(target_path) == 'webp': + output_image_compression = output_image_quality + else: + output_image_compression = round(31 - (output_image_quality * 0.31)) + commands = [ '-i', temp_file_path, '-s', str(output_image_resolution), '-q:v', str(output_image_compression), '-y', output_path ] + return run_ffmpeg(commands).returncode == 0 + + +def read_audio_buffer(target_path : str, sample_rate : int, channel_total : int) -> Optional[AudioBuffer]: + commands = [ '-i', target_path, '-vn', '-f', 's16le', '-acodec', 'pcm_s16le', '-ar', str(sample_rate), '-ac', str(channel_total), '-' ] + process = open_ffmpeg(commands) + audio_buffer, _ = process.communicate() + if process.returncode == 0: + return audio_buffer + return None + + +def restore_audio(target_path : str, output_path : str, output_video_fps : Fps, trim_frame_start : int, trim_frame_end : int) -> bool: + output_audio_encoder = state_manager.get_item('output_audio_encoder') + output_audio_quality = state_manager.get_item('output_audio_quality') + output_audio_volume = state_manager.get_item('output_audio_volume') + temp_file_path = get_temp_file_path(target_path) + temp_video_duration = detect_video_duration(temp_file_path) + commands = [ '-i', temp_file_path ] + + if isinstance(trim_frame_start, int): + start_time = trim_frame_start / output_video_fps + commands.extend([ '-ss', str(start_time) ]) + if isinstance(trim_frame_end, int): + end_time = trim_frame_end / output_video_fps + commands.extend([ '-to', str(end_time) ]) + commands.extend([ '-i', target_path, '-c:v', 'copy', '-c:a', output_audio_encoder ]) + if output_audio_encoder in [ 'aac' ]: + output_audio_compression = round(10 - (output_audio_quality * 0.9)) + commands.extend([ '-q:a', str(output_audio_compression) ]) + if output_audio_encoder in [ 'libmp3lame' ]: + output_audio_compression = round(9 - (output_audio_quality * 0.9)) + commands.extend([ '-q:a', str(output_audio_compression) ]) + if output_audio_encoder in [ 'libopus', 'libvorbis' ]: + output_audio_compression = round((100 - output_audio_quality) / 10) + commands.extend([ '-q:a', str(output_audio_compression) ]) + output_audio_volume = output_audio_volume / 100 + commands.extend([ '-filter:a', 'volume=' + str(output_audio_volume), '-map', '0:v:0', '-map', '1:a:0', '-t', str(temp_video_duration), '-y', output_path ]) + return run_ffmpeg(commands).returncode == 0 + + +def replace_audio(target_path : str, audio_path : str, output_path : str) -> bool: + output_audio_encoder = state_manager.get_item('output_audio_encoder') + output_audio_quality = state_manager.get_item('output_audio_quality') + output_audio_volume = state_manager.get_item('output_audio_volume') + temp_file_path = get_temp_file_path(target_path) + temp_video_duration = detect_video_duration(temp_file_path) + commands = [ '-i', temp_file_path, '-i', audio_path, '-c:v', 'copy', '-c:a', output_audio_encoder ] + + if output_audio_encoder in [ 'aac' ]: + output_audio_compression = round(10 - (output_audio_quality * 0.9)) + commands.extend([ '-q:a', str(output_audio_compression) ]) + if output_audio_encoder in [ 'libmp3lame' ]: + output_audio_compression = round(9 - (output_audio_quality * 0.9)) + commands.extend([ '-q:a', str(output_audio_compression) ]) + if output_audio_encoder in [ 'libopus', 'libvorbis' ]: + output_audio_compression = round((100 - output_audio_quality) / 10) + commands.extend([ '-q:a', str(output_audio_compression) ]) + output_audio_volume = output_audio_volume / 100 + commands.extend([ '-filter:a', 'volume=' + str(output_audio_volume), '-t', str(temp_video_duration), '-y', output_path ]) + return run_ffmpeg(commands).returncode == 0 + + def merge_video(target_path : str, output_video_resolution : str, output_video_fps: Fps) -> bool: output_video_encoder = state_manager.get_item('output_video_encoder') output_video_quality = state_manager.get_item('output_video_quality') @@ -128,7 +209,6 @@ def merge_video(target_path : str, output_video_resolution : str, output_video_f def concat_video(output_path : str, temp_output_paths : List[str]) -> bool: - output_audio_encoder = state_manager.get_item('output_audio_encoder') concat_video_path = tempfile.mktemp() with open(concat_video_path, 'w') as concat_video_file: @@ -136,67 +216,13 @@ def concat_video(output_path : str, temp_output_paths : List[str]) -> bool: concat_video_file.write('file \'' + os.path.abspath(temp_output_path) + '\'' + os.linesep) concat_video_file.flush() concat_video_file.close() - commands = [ '-f', 'concat', '-safe', '0', '-i', concat_video_file.name, '-c:v', 'copy', '-c:a', output_audio_encoder, '-y', os.path.abspath(output_path) ] + commands = [ '-f', 'concat', '-safe', '0', '-i', concat_video_file.name, '-c:v', 'copy', '-c:a', 'copy', '-y', os.path.abspath(output_path) ] process = run_ffmpeg(commands) process.communicate() remove_file(concat_video_path) return process.returncode == 0 -def copy_image(target_path : str, temp_image_resolution : str) -> bool: - temp_file_path = get_temp_file_path(target_path) - temp_image_compression = calc_image_compression(target_path, 100) - commands = [ '-i', target_path, '-s', str(temp_image_resolution), '-q:v', str(temp_image_compression), '-y', temp_file_path ] - return run_ffmpeg(commands).returncode == 0 - - -def finalize_image(target_path : str, output_path : str, output_image_resolution : str) -> bool: - output_image_quality = state_manager.get_item('output_image_quality') - temp_file_path = get_temp_file_path(target_path) - output_image_compression = calc_image_compression(target_path, output_image_quality) - commands = [ '-i', temp_file_path, '-s', str(output_image_resolution), '-q:v', str(output_image_compression), '-y', output_path ] - return run_ffmpeg(commands).returncode == 0 - - -def calc_image_compression(image_path : str, image_quality : int) -> int: - if get_file_format(image_path) == 'webm': - image_quality = 100 - image_quality - return round(31 - (image_quality * 0.31)) - - -def read_audio_buffer(target_path : str, sample_rate : int, channel_total : int) -> Optional[AudioBuffer]: - commands = [ '-i', target_path, '-vn', '-f', 's16le', '-acodec', 'pcm_s16le', '-ar', str(sample_rate), '-ac', str(channel_total), '-' ] - process = open_ffmpeg(commands) - audio_buffer, _ = process.communicate() - if process.returncode == 0: - return audio_buffer - return None - - -def restore_audio(target_path : str, output_path : str, output_video_fps : Fps, trim_frame_start : int, trim_frame_end : int) -> bool: - output_audio_encoder = state_manager.get_item('output_audio_encoder') - temp_file_path = get_temp_file_path(target_path) - temp_video_duration = detect_video_duration(temp_file_path) - commands = [ '-i', temp_file_path ] - - if isinstance(trim_frame_start, int): - start_time = trim_frame_start / output_video_fps - commands.extend([ '-ss', str(start_time) ]) - if isinstance(trim_frame_end, int): - end_time = trim_frame_end / output_video_fps - commands.extend([ '-to', str(end_time) ]) - commands.extend([ '-i', target_path, '-c:v', 'copy', '-c:a', output_audio_encoder, '-map', '0:v:0', '-map', '1:a:0', '-t', str(temp_video_duration), '-y', output_path ]) - return run_ffmpeg(commands).returncode == 0 - - -def replace_audio(target_path : str, audio_path : str, output_path : str) -> bool: - output_audio_encoder = state_manager.get_item('output_audio_encoder') - temp_file_path = get_temp_file_path(target_path) - temp_video_duration = detect_video_duration(temp_file_path) - commands = [ '-i', temp_file_path, '-i', audio_path, '-c:v', 'copy', '-c:a', output_audio_encoder, '-t', str(temp_video_duration), '-y', output_path ] - return run_ffmpeg(commands).returncode == 0 - - def map_nvenc_preset(output_video_preset : OutputVideoPreset) -> Optional[str]: if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast' ]: return 'fast' diff --git a/facefusion/program.py b/facefusion/program.py index 71e1def..7e7a42c 100755 --- a/facefusion/program.py +++ b/facefusion/program.py @@ -159,13 +159,14 @@ def create_output_creation_program() -> ArgumentParser: group_output_creation.add_argument('--output-image-quality', help = wording.get('help.output_image_quality'), type = int, default = config.get_int_value('output_creation.output_image_quality', '80'), choices = facefusion.choices.output_image_quality_range, metavar = create_int_metavar(facefusion.choices.output_image_quality_range)) group_output_creation.add_argument('--output-image-resolution', help = wording.get('help.output_image_resolution'), default = config.get_str_value('output_creation.output_image_resolution')) group_output_creation.add_argument('--output-audio-encoder', help = wording.get('help.output_audio_encoder'), default = config.get_str_value('output_creation.output_audio_encoder', 'aac'), choices = facefusion.choices.output_audio_encoders) + group_output_creation.add_argument('--output-audio-quality', help = wording.get('help.output_audio_quality'), type = int, default = config.get_int_value('output_creation.output_audio_quality', '80'), choices = facefusion.choices.output_audio_quality_range, metavar = create_int_metavar(facefusion.choices.output_audio_quality_range)) + group_output_creation.add_argument('--output-audio-volume', help = wording.get('help.output_audio_volume'), type = int, default = config.get_int_value('output_creation.output_audio_volume', '100'), choices = facefusion.choices.output_audio_volume_range, metavar = create_int_metavar(facefusion.choices.output_audio_volume_range)) group_output_creation.add_argument('--output-video-encoder', help = wording.get('help.output_video_encoder'), default = config.get_str_value('output_creation.output_video_encoder', 'libx264'), choices = facefusion.choices.output_video_encoders) group_output_creation.add_argument('--output-video-preset', help = wording.get('help.output_video_preset'), default = config.get_str_value('output_creation.output_video_preset', 'veryfast'), choices = facefusion.choices.output_video_presets) group_output_creation.add_argument('--output-video-quality', help = wording.get('help.output_video_quality'), type = int, default = config.get_int_value('output_creation.output_video_quality', '80'), choices = facefusion.choices.output_video_quality_range, metavar = create_int_metavar(facefusion.choices.output_video_quality_range)) group_output_creation.add_argument('--output-video-resolution', help = wording.get('help.output_video_resolution'), default = config.get_str_value('output_creation.output_video_resolution')) group_output_creation.add_argument('--output-video-fps', help = wording.get('help.output_video_fps'), type = float, default = config.get_str_value('output_creation.output_video_fps')) - group_output_creation.add_argument('--skip-audio', help = wording.get('help.skip_audio'), action = 'store_true', default = config.get_bool_value('output_creation.skip_audio')) - job_store.register_step_keys([ 'output_image_quality', 'output_image_resolution', 'output_audio_encoder', 'output_video_encoder', 'output_video_preset', 'output_video_quality', 'output_video_resolution', 'output_video_fps', 'skip_audio' ]) + job_store.register_step_keys([ 'output_image_quality', 'output_image_resolution', 'output_audio_encoder', 'output_audio_quality', 'output_audio_volume', 'output_video_encoder', 'output_video_preset', 'output_video_quality', 'output_video_resolution', 'output_video_fps' ]) return program diff --git a/facefusion/typing.py b/facefusion/typing.py index ad2bf0a..1bc08e6 100755 --- a/facefusion/typing.py +++ b/facefusion/typing.py @@ -252,12 +252,13 @@ StateKey = Literal\ 'output_image_quality', 'output_image_resolution', 'output_audio_encoder', + 'output_audio_quality', + 'output_audio_volume', 'output_video_encoder', 'output_video_preset', 'output_video_quality', 'output_video_resolution', 'output_video_fps', - 'skip_audio', 'processors', 'open_browser', 'ui_layouts', @@ -315,12 +316,13 @@ State = TypedDict('State', 'output_image_quality' : int, 'output_image_resolution' : str, 'output_audio_encoder' : OutputAudioEncoder, + 'output_audio_quality' : int, + 'output_audio_volume' : int, 'output_video_encoder' : OutputVideoEncoder, 'output_video_preset' : OutputVideoPreset, 'output_video_quality' : int, 'output_video_resolution' : str, 'output_video_fps' : float, - 'skip_audio' : bool, 'processors' : List[str], 'open_browser' : bool, 'ui_layouts' : List[str], diff --git a/facefusion/uis/choices.py b/facefusion/uis/choices.py index da0aa26..be98eb8 100644 --- a/facefusion/uis/choices.py +++ b/facefusion/uis/choices.py @@ -5,7 +5,7 @@ from facefusion.uis.typing import JobManagerAction, JobRunnerAction, WebcamMode job_manager_actions : List[JobManagerAction] = [ 'job-create', 'job-submit', 'job-delete', 'job-add-step', 'job-remix-step', 'job-insert-step', 'job-remove-step' ] job_runner_actions : List[JobRunnerAction] = [ 'job-run', 'job-run-all', 'job-retry', 'job-retry-all' ] -common_options : List[str] = [ 'keep-temp', 'skip-audio' ] +common_options : List[str] = [ 'keep-temp' ] webcam_modes : List[WebcamMode] = [ 'inline', 'udp', 'v4l2' ] webcam_resolutions : List[str] = [ '320x240', '640x480', '800x600', '1024x768', '1280x720', '1280x960', '1920x1080', '2560x1440', '3840x2160' ] diff --git a/facefusion/uis/components/benchmark.py b/facefusion/uis/components/benchmark.py index 183ebd1..a9b409a 100644 --- a/facefusion/uis/components/benchmark.py +++ b/facefusion/uis/components/benchmark.py @@ -81,8 +81,8 @@ def start(benchmark_runs : List[str], benchmark_cycles : int) -> Generator[List[ state_manager.init_item('source_paths', [ '.assets/examples/source.jpg', '.assets/examples/source.mp3' ]) state_manager.init_item('face_landmarker_score', 0) state_manager.init_item('temp_frame_format', 'bmp') + state_manager.init_item('output_audio_volume', 0) state_manager.init_item('output_video_preset', 'ultrafast') - state_manager.init_item('skip_audio', True) state_manager.sync_item('execution_providers') state_manager.sync_item('execution_thread_count') state_manager.sync_item('execution_queue_count') diff --git a/facefusion/uis/components/common_options.py b/facefusion/uis/components/common_options.py index b44b60c..1cf96fa 100644 --- a/facefusion/uis/components/common_options.py +++ b/facefusion/uis/components/common_options.py @@ -15,8 +15,6 @@ def render() -> None: if state_manager.get_item('keep_temp'): common_options.append('keep-temp') - if state_manager.get_item('skip_audio'): - common_options.append('skip-audio') COMMON_OPTIONS_CHECKBOX_GROUP = gradio.Checkboxgroup( label = wording.get('uis.common_options_checkbox_group'), @@ -31,6 +29,4 @@ def listen() -> None: def update(common_options : List[str]) -> None: keep_temp = 'keep-temp' in common_options - skip_audio = 'skip-audio' in common_options state_manager.set_item('keep_temp', keep_temp) - state_manager.set_item('skip_audio', skip_audio) diff --git a/facefusion/uis/components/output_options.py b/facefusion/uis/components/output_options.py index 31fe154..3011f2b 100644 --- a/facefusion/uis/components/output_options.py +++ b/facefusion/uis/components/output_options.py @@ -13,6 +13,8 @@ from facefusion.vision import create_image_resolutions, create_video_resolutions OUTPUT_IMAGE_QUALITY_SLIDER : Optional[gradio.Slider] = None OUTPUT_IMAGE_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None OUTPUT_AUDIO_ENCODER_DROPDOWN : Optional[gradio.Dropdown] = None +OUTPUT_AUDIO_QUALITY_SLIDER : Optional[gradio.Slider] = None +OUTPUT_AUDIO_VOLUME_SLIDER : Optional[gradio.Slider] = None OUTPUT_VIDEO_ENCODER_DROPDOWN : Optional[gradio.Dropdown] = None OUTPUT_VIDEO_PRESET_DROPDOWN : Optional[gradio.Dropdown] = None OUTPUT_VIDEO_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None @@ -24,6 +26,8 @@ def render() -> None: global OUTPUT_IMAGE_QUALITY_SLIDER global OUTPUT_IMAGE_RESOLUTION_DROPDOWN global OUTPUT_AUDIO_ENCODER_DROPDOWN + global OUTPUT_AUDIO_QUALITY_SLIDER + global OUTPUT_AUDIO_VOLUME_SLIDER global OUTPUT_VIDEO_ENCODER_DROPDOWN global OUTPUT_VIDEO_PRESET_DROPDOWN global OUTPUT_VIDEO_RESOLUTION_DROPDOWN @@ -58,6 +62,22 @@ def render() -> None: value = state_manager.get_item('output_audio_encoder'), visible = is_video(state_manager.get_item('target_path')) ) + OUTPUT_AUDIO_QUALITY_SLIDER = gradio.Slider( + label = wording.get('uis.output_audio_quality_slider'), + value = state_manager.get_item('output_audio_quality'), + step = calc_int_step(facefusion.choices.output_audio_quality_range), + minimum = facefusion.choices.output_audio_quality_range[0], + maximum = facefusion.choices.output_audio_quality_range[-1], + visible = is_video(state_manager.get_item('target_path')) + ) + OUTPUT_AUDIO_VOLUME_SLIDER = gradio.Slider( + label = wording.get('uis.output_audio_volume_slider'), + value = state_manager.get_item('output_audio_volume'), + step = calc_int_step(facefusion.choices.output_audio_volume_range), + minimum = facefusion.choices.output_audio_volume_range[0], + maximum = facefusion.choices.output_audio_volume_range[-1], + visible = is_video(state_manager.get_item('target_path')) + ) OUTPUT_VIDEO_ENCODER_DROPDOWN = gradio.Dropdown( label = wording.get('uis.output_video_encoder_dropdown'), choices = facefusion.choices.output_video_encoders, @@ -99,6 +119,8 @@ def listen() -> None: OUTPUT_IMAGE_QUALITY_SLIDER.release(update_output_image_quality, inputs = OUTPUT_IMAGE_QUALITY_SLIDER) OUTPUT_IMAGE_RESOLUTION_DROPDOWN.change(update_output_image_resolution, inputs = OUTPUT_IMAGE_RESOLUTION_DROPDOWN) OUTPUT_AUDIO_ENCODER_DROPDOWN.change(update_output_audio_encoder, inputs = OUTPUT_AUDIO_ENCODER_DROPDOWN) + OUTPUT_AUDIO_QUALITY_SLIDER.release(update_output_audio_quality, inputs = OUTPUT_AUDIO_QUALITY_SLIDER) + OUTPUT_AUDIO_VOLUME_SLIDER.release(update_output_audio_volume, inputs = OUTPUT_AUDIO_VOLUME_SLIDER) OUTPUT_VIDEO_ENCODER_DROPDOWN.change(update_output_video_encoder, inputs = OUTPUT_VIDEO_ENCODER_DROPDOWN) OUTPUT_VIDEO_PRESET_DROPDOWN.change(update_output_video_preset, inputs = OUTPUT_VIDEO_PRESET_DROPDOWN) OUTPUT_VIDEO_QUALITY_SLIDER.release(update_output_video_quality, inputs = OUTPUT_VIDEO_QUALITY_SLIDER) @@ -111,22 +133,22 @@ def listen() -> None: 'target_video' ]): for method in [ 'upload', 'change', 'clear' ]: - getattr(ui_component, method)(remote_update, outputs = [ OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_IMAGE_RESOLUTION_DROPDOWN, OUTPUT_AUDIO_ENCODER_DROPDOWN, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_PRESET_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER, OUTPUT_VIDEO_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_FPS_SLIDER ]) + getattr(ui_component, method)(remote_update, outputs = [ OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_IMAGE_RESOLUTION_DROPDOWN, OUTPUT_AUDIO_ENCODER_DROPDOWN, OUTPUT_AUDIO_QUALITY_SLIDER, OUTPUT_AUDIO_VOLUME_SLIDER, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_PRESET_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER, OUTPUT_VIDEO_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_FPS_SLIDER ]) -def remote_update() -> Tuple[gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Slider]: +def remote_update() -> Tuple[gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Slider]: if is_image(state_manager.get_item('target_path')): output_image_resolution = detect_image_resolution(state_manager.get_item('target_path')) output_image_resolutions = create_image_resolutions(output_image_resolution) state_manager.set_item('output_image_resolution', pack_resolution(output_image_resolution)) - return gradio.Slider(visible = True), gradio.Dropdown(value = state_manager.get_item('output_image_resolution'), choices = output_image_resolutions, visible = True), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False) + return gradio.Slider(visible = True), gradio.Dropdown(value = state_manager.get_item('output_image_resolution'), choices = output_image_resolutions, visible = True), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False) if is_video(state_manager.get_item('target_path')): output_video_resolution = detect_video_resolution(state_manager.get_item('target_path')) output_video_resolutions = create_video_resolutions(output_video_resolution) state_manager.set_item('output_video_resolution', pack_resolution(output_video_resolution)) state_manager.set_item('output_video_fps', detect_video_fps(state_manager.get_item('target_path'))) - return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = True), gradio.Dropdown(visible = True), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Dropdown(value = state_manager.get_item('output_video_resolution'), choices = output_video_resolutions, visible = True), gradio.Slider(value = state_manager.get_item('output_video_fps'), visible = True) - return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False) + return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Slider(visible = True), gradio.Dropdown(visible = True), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Dropdown(value = state_manager.get_item('output_video_resolution'), choices = output_video_resolutions, visible = True), gradio.Slider(value = state_manager.get_item('output_video_fps'), visible = True) + return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False) def update_output_image_quality(output_image_quality : float) -> None: @@ -141,6 +163,14 @@ def update_output_audio_encoder(output_audio_encoder : OutputAudioEncoder) -> No state_manager.set_item('output_audio_encoder', output_audio_encoder) +def update_output_audio_quality(output_audio_quality : float) -> None: + state_manager.set_item('output_audio_quality', int(output_audio_quality)) + + +def update_output_audio_volume(output_audio_volume: float) -> None: + state_manager.set_item('output_audio_volume', int(output_audio_volume)) + + def update_output_video_encoder(output_video_encoder : OutputVideoEncoder) -> None: state_manager.set_item('output_video_encoder', output_video_encoder) diff --git a/facefusion/wording.py b/facefusion/wording.py index db7faab..9a55c72 100755 --- a/facefusion/wording.py +++ b/facefusion/wording.py @@ -139,14 +139,15 @@ WORDING : Dict[str, Any] =\ 'keep_temp': 'keep the temporary resources after processing', # output creation 'output_image_quality': 'specify the image quality which translates to the compression factor', - 'output_image_resolution': 'specify the image output resolution based on the target image', - 'output_audio_encoder': 'specify the encoder used for the audio output', - 'output_video_encoder': 'specify the encoder used for the video output', + 'output_image_resolution': 'specify the image resolution based on the target image', + 'output_audio_encoder': 'specify the encoder used for the audio', + 'output_audio_quality': 'specify the audio quality which translates to the compression factor', + 'output_audio_volume': 'specify the audio volume based on the target video', + 'output_video_encoder': 'specify the encoder used for the video', 'output_video_preset': 'balance fast video processing and video file size', 'output_video_quality': 'specify the video quality which translates to the compression factor', - 'output_video_resolution': 'specify the video output resolution based on the target video', - 'output_video_fps': 'specify the video output fps based on the target video', - 'skip_audio': 'omit the audio from the target video', + 'output_video_resolution': 'specify the video resolution based on the target video', + 'output_video_fps': 'specify the video fps based on the target video', # processors 'processors': 'load a single or multiple processors (choices: {choices}, ...)', 'age_modifier_model': 'choose the model responsible for aging the face', @@ -303,6 +304,8 @@ WORDING : Dict[str, Any] =\ 'lip_syncer_model_dropdown': 'LIP SYNCER MODEL', 'log_level_dropdown': 'LOG LEVEL', 'output_audio_encoder_dropdown': 'OUTPUT AUDIO ENCODER', + 'output_audio_quality_slider': 'OUTPUT AUDIO QUALITY', + 'output_audio_volume_slider': 'OUTPUT AUDIO VOLUME', 'output_image_or_video': 'OUTPUT', 'output_image_quality_slider': 'OUTPUT IMAGE QUALITY', 'output_image_resolution_dropdown': 'OUTPUT IMAGE RESOLUTION', diff --git a/tests/test_ffmpeg.py b/tests/test_ffmpeg.py index 514b6ed..5506885 100644 --- a/tests/test_ffmpeg.py +++ b/tests/test_ffmpeg.py @@ -29,6 +29,8 @@ def before_all() -> None: state_manager.init_item('temp_path', tempfile.gettempdir()) state_manager.init_item('temp_frame_format', 'png') state_manager.init_item('output_audio_encoder', 'aac') + state_manager.init_item('output_audio_quality', 80) + state_manager.init_item('output_audio_volume', 100) @pytest.fixture(scope = 'function', autouse = True) diff --git a/tests/test_job_runner.py b/tests/test_job_runner.py index 84e8644..a51968d 100644 --- a/tests/test_job_runner.py +++ b/tests/test_job_runner.py @@ -2,7 +2,6 @@ import subprocess import pytest -from facefusion import state_manager from facefusion.download import conditional_download from facefusion.filesystem import copy_file from facefusion.jobs.job_manager import add_step, clear_jobs, create_job, init_jobs, submit_job, submit_jobs @@ -19,7 +18,6 @@ def before_all() -> None: 'https://github.com/facefusion/facefusion-assets/releases/download/examples-3.0.0/target-240p.mp4' ]) subprocess.run([ 'ffmpeg', '-i', get_test_example_file('target-240p.mp4'), '-vframes', '1', get_test_example_file('target-240p.jpg') ]) - state_manager.init_item('output_audio_encoder', 'aac') @pytest.fixture(scope = 'function', autouse = True)