Feat/available encoders (#860)
* Introduce available audio encoders and video encoders * Introduce available audio encoders and video encoders * Introduce available audio encoders and video encoders * Introduce available audio encoders and video encoders * Add flac to audio encoders
This commit is contained in:
@@ -2,7 +2,7 @@ import logging
|
|||||||
from typing import List, Sequence
|
from typing import List, Sequence
|
||||||
|
|
||||||
from facefusion.common_helper import create_float_range, create_int_range
|
from facefusion.common_helper import create_float_range, create_int_range
|
||||||
from facefusion.typing import Angle, AudioEncoder, AudioFormat, AudioTypeSet, DownloadProvider, DownloadProviderSet, DownloadScope, ExecutionProvider, ExecutionProviderSet, FaceDetectorModel, FaceDetectorSet, FaceLandmarkerModel, FaceMaskRegion, FaceMaskRegionSet, FaceMaskType, FaceOccluderModel, FaceParserModel, FaceSelectorMode, FaceSelectorOrder, Gender, ImageFormat, ImageTypeSet, JobStatus, LogLevel, LogLevelSet, Race, Score, UiWorkflow, VideoEncoder, VideoFormat, VideoMemoryStrategy, VideoPreset, VideoTypeSet, WebcamMode
|
from facefusion.typing import Angle, AudioEncoder, AudioFormat, AudioTypeSet, DownloadProvider, DownloadProviderSet, DownloadScope, EncoderSet, ExecutionProvider, ExecutionProviderSet, FaceDetectorModel, FaceDetectorSet, FaceLandmarkerModel, FaceMaskRegion, FaceMaskRegionSet, FaceMaskType, FaceOccluderModel, FaceParserModel, FaceSelectorMode, FaceSelectorOrder, Gender, ImageFormat, ImageTypeSet, JobStatus, LogLevel, LogLevelSet, Race, Score, UiWorkflow, VideoEncoder, VideoFormat, VideoMemoryStrategy, VideoPreset, VideoTypeSet, WebcamMode
|
||||||
|
|
||||||
face_detector_set : FaceDetectorSet =\
|
face_detector_set : FaceDetectorSet =\
|
||||||
{
|
{
|
||||||
@@ -62,8 +62,13 @@ image_formats : List[ImageFormat] = list(image_type_set.keys())
|
|||||||
video_formats : List[VideoFormat] = list(video_type_set.keys())
|
video_formats : List[VideoFormat] = list(video_type_set.keys())
|
||||||
temp_frame_formats : List[ImageFormat] = [ 'bmp', 'jpeg', 'png', 'tiff' ]
|
temp_frame_formats : List[ImageFormat] = [ 'bmp', 'jpeg', 'png', 'tiff' ]
|
||||||
|
|
||||||
output_audio_encoders : List[AudioEncoder] = [ 'aac', 'libmp3lame', 'libopus', 'libvorbis' ]
|
output_encoder_set : EncoderSet =\
|
||||||
output_video_encoders : List[VideoEncoder] = [ 'libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf', 'h264_qsv', 'hevc_qsv', 'h264_videotoolbox', 'hevc_videotoolbox' ]
|
{
|
||||||
|
'audio': [ 'aac', 'libmp3lame', 'libopus', 'libvorbis', 'flac' ],
|
||||||
|
'video': [ 'libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf', 'h264_qsv', 'hevc_qsv', 'h264_videotoolbox', 'hevc_videotoolbox' ]
|
||||||
|
}
|
||||||
|
output_audio_encoders : List[AudioEncoder] = output_encoder_set.get('audio')
|
||||||
|
output_video_encoders : List[VideoEncoder] = output_encoder_set.get('video')
|
||||||
output_video_presets : List[VideoPreset] = [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow' ]
|
output_video_presets : List[VideoPreset] = [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow' ]
|
||||||
|
|
||||||
image_template_sizes : List[float] = [ 0.25, 0.5, 0.75, 1, 1.5, 2, 2.5, 3, 3.5, 4 ]
|
image_template_sizes : List[float] = [ 0.25, 0.5, 0.75, 1, 1.5, 2, 2.5, 3, 3.5, 4 ]
|
||||||
|
|||||||
@@ -5,10 +5,11 @@ from typing import List, Optional
|
|||||||
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
import facefusion.choices
|
||||||
from facefusion import ffmpeg_builder, logger, process_manager, state_manager, wording
|
from facefusion import ffmpeg_builder, logger, process_manager, state_manager, wording
|
||||||
from facefusion.filesystem import get_file_format, remove_file
|
from facefusion.filesystem import get_file_format, remove_file
|
||||||
from facefusion.temp_helper import get_temp_file_path, get_temp_frames_pattern, resolve_temp_frame_paths
|
from facefusion.temp_helper import get_temp_file_path, get_temp_frames_pattern, resolve_temp_frame_paths
|
||||||
from facefusion.typing import AudioBuffer, Commands, Fps, UpdateProgress
|
from facefusion.typing import AudioBuffer, Commands, EncoderSet, Fps, UpdateProgress
|
||||||
from facefusion.vision import count_trim_frame_total, detect_video_duration, restrict_video_fps
|
from facefusion.vision import count_trim_frame_total, detect_video_duration, restrict_video_fps
|
||||||
|
|
||||||
|
|
||||||
@@ -72,6 +73,32 @@ def log_debug(process : subprocess.Popen[bytes]) -> None:
|
|||||||
logger.debug(error.strip(), __name__)
|
logger.debug(error.strip(), __name__)
|
||||||
|
|
||||||
|
|
||||||
|
def get_available_encoder_set() -> EncoderSet:
|
||||||
|
available_encoder_set : EncoderSet =\
|
||||||
|
{
|
||||||
|
'audio': [],
|
||||||
|
'video': []
|
||||||
|
}
|
||||||
|
commands = ffmpeg_builder.chain(
|
||||||
|
ffmpeg_builder.get_encoders()
|
||||||
|
)
|
||||||
|
process = run_ffmpeg(commands)
|
||||||
|
|
||||||
|
while line := process.stdout.readline().decode().lower():
|
||||||
|
if line.startswith(' a'):
|
||||||
|
audio_encoder = line.split()[1]
|
||||||
|
|
||||||
|
if audio_encoder in facefusion.choices.output_audio_encoders:
|
||||||
|
available_encoder_set['audio'].append(audio_encoder) #type:ignore[arg-type]
|
||||||
|
if line.startswith(' v'):
|
||||||
|
video_encoder = line.split()[1]
|
||||||
|
|
||||||
|
if video_encoder in facefusion.choices.output_video_encoders:
|
||||||
|
available_encoder_set['video'].append(video_encoder) #type:ignore[arg-type]
|
||||||
|
|
||||||
|
return available_encoder_set
|
||||||
|
|
||||||
|
|
||||||
def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fps : Fps, trim_frame_start : int, trim_frame_end : int) -> bool:
|
def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fps : Fps, trim_frame_start : int, trim_frame_end : int) -> bool:
|
||||||
extract_frame_total = count_trim_frame_total(target_path, trim_frame_start, trim_frame_end)
|
extract_frame_total = count_trim_frame_total(target_path, trim_frame_start, trim_frame_end)
|
||||||
temp_frames_pattern = get_temp_frames_pattern(target_path, '%08d')
|
temp_frames_pattern = get_temp_frames_pattern(target_path, '%08d')
|
||||||
|
|||||||
@@ -16,6 +16,10 @@ def chain(*commands : Commands) -> Commands:
|
|||||||
return list(itertools.chain(*commands))
|
return list(itertools.chain(*commands))
|
||||||
|
|
||||||
|
|
||||||
|
def get_encoders() -> Commands:
|
||||||
|
return [ '-encoders' ]
|
||||||
|
|
||||||
|
|
||||||
def set_progress() -> Commands:
|
def set_progress() -> Commands:
|
||||||
return [ '-progress' ]
|
return [ '-progress' ]
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ from argparse import ArgumentParser, HelpFormatter
|
|||||||
from facefusion import metadata, wording
|
from facefusion import metadata, wording
|
||||||
from facefusion.common_helper import is_linux, is_windows
|
from facefusion.common_helper import is_linux, is_windows
|
||||||
|
|
||||||
|
|
||||||
ONNXRUNTIME_SET =\
|
ONNXRUNTIME_SET =\
|
||||||
{
|
{
|
||||||
'default': ('onnxruntime', '1.20.1')
|
'default': ('onnxruntime', '1.20.1')
|
||||||
|
|||||||
@@ -3,8 +3,9 @@ from argparse import ArgumentParser, HelpFormatter
|
|||||||
|
|
||||||
import facefusion.choices
|
import facefusion.choices
|
||||||
from facefusion import config, metadata, state_manager, wording
|
from facefusion import config, metadata, state_manager, wording
|
||||||
from facefusion.common_helper import create_float_metavar, create_int_metavar, get_last
|
from facefusion.common_helper import create_float_metavar, create_int_metavar, get_first, get_last
|
||||||
from facefusion.execution import get_available_execution_providers, suggest_execution_provider
|
from facefusion.execution import get_available_execution_providers, suggest_execution_provider
|
||||||
|
from facefusion.ffmpeg import get_available_encoder_set
|
||||||
from facefusion.filesystem import get_file_name, resolve_file_paths
|
from facefusion.filesystem import get_file_name, resolve_file_paths
|
||||||
from facefusion.jobs import job_store
|
from facefusion.jobs import job_store
|
||||||
from facefusion.processors.core import get_processors_modules
|
from facefusion.processors.core import get_processors_modules
|
||||||
@@ -155,13 +156,14 @@ def create_frame_extraction_program() -> ArgumentParser:
|
|||||||
|
|
||||||
def create_output_creation_program() -> ArgumentParser:
|
def create_output_creation_program() -> ArgumentParser:
|
||||||
program = ArgumentParser(add_help = False)
|
program = ArgumentParser(add_help = False)
|
||||||
|
available_encoder_set = get_available_encoder_set()
|
||||||
group_output_creation = program.add_argument_group('output creation')
|
group_output_creation = program.add_argument_group('output creation')
|
||||||
group_output_creation.add_argument('--output-image-quality', help = wording.get('help.output_image_quality'), type = int, default = config.get_int_value('output_creation.output_image_quality', '80'), choices = facefusion.choices.output_image_quality_range, metavar = create_int_metavar(facefusion.choices.output_image_quality_range))
|
group_output_creation.add_argument('--output-image-quality', help = wording.get('help.output_image_quality'), type = int, default = config.get_int_value('output_creation.output_image_quality', '80'), choices = facefusion.choices.output_image_quality_range, metavar = create_int_metavar(facefusion.choices.output_image_quality_range))
|
||||||
group_output_creation.add_argument('--output-image-resolution', help = wording.get('help.output_image_resolution'), default = config.get_str_value('output_creation.output_image_resolution'))
|
group_output_creation.add_argument('--output-image-resolution', help = wording.get('help.output_image_resolution'), default = config.get_str_value('output_creation.output_image_resolution'))
|
||||||
group_output_creation.add_argument('--output-audio-encoder', help = wording.get('help.output_audio_encoder'), default = config.get_str_value('output_creation.output_audio_encoder', 'aac'), choices = facefusion.choices.output_audio_encoders)
|
group_output_creation.add_argument('--output-audio-encoder', help = wording.get('help.output_audio_encoder'), default = config.get_str_value('output_creation.output_audio_encoder', get_first(available_encoder_set.get('audio'))), choices = available_encoder_set.get('audio'))
|
||||||
group_output_creation.add_argument('--output-audio-quality', help = wording.get('help.output_audio_quality'), type = int, default = config.get_int_value('output_creation.output_audio_quality', '80'), choices = facefusion.choices.output_audio_quality_range, metavar = create_int_metavar(facefusion.choices.output_audio_quality_range))
|
group_output_creation.add_argument('--output-audio-quality', help = wording.get('help.output_audio_quality'), type = int, default = config.get_int_value('output_creation.output_audio_quality', '80'), choices = facefusion.choices.output_audio_quality_range, metavar = create_int_metavar(facefusion.choices.output_audio_quality_range))
|
||||||
group_output_creation.add_argument('--output-audio-volume', help = wording.get('help.output_audio_volume'), type = int, default = config.get_int_value('output_creation.output_audio_volume', '100'), choices = facefusion.choices.output_audio_volume_range, metavar = create_int_metavar(facefusion.choices.output_audio_volume_range))
|
group_output_creation.add_argument('--output-audio-volume', help = wording.get('help.output_audio_volume'), type = int, default = config.get_int_value('output_creation.output_audio_volume', '100'), choices = facefusion.choices.output_audio_volume_range, metavar = create_int_metavar(facefusion.choices.output_audio_volume_range))
|
||||||
group_output_creation.add_argument('--output-video-encoder', help = wording.get('help.output_video_encoder'), default = config.get_str_value('output_creation.output_video_encoder', 'libx264'), choices = facefusion.choices.output_video_encoders)
|
group_output_creation.add_argument('--output-video-encoder', help = wording.get('help.output_video_encoder'), default = config.get_str_value('output_creation.output_video_encoder', get_first(available_encoder_set.get('video'))), choices = available_encoder_set.get('video'))
|
||||||
group_output_creation.add_argument('--output-video-preset', help = wording.get('help.output_video_preset'), default = config.get_str_value('output_creation.output_video_preset', 'veryfast'), choices = facefusion.choices.output_video_presets)
|
group_output_creation.add_argument('--output-video-preset', help = wording.get('help.output_video_preset'), default = config.get_str_value('output_creation.output_video_preset', 'veryfast'), choices = facefusion.choices.output_video_presets)
|
||||||
group_output_creation.add_argument('--output-video-quality', help = wording.get('help.output_video_quality'), type = int, default = config.get_int_value('output_creation.output_video_quality', '80'), choices = facefusion.choices.output_video_quality_range, metavar = create_int_metavar(facefusion.choices.output_video_quality_range))
|
group_output_creation.add_argument('--output-video-quality', help = wording.get('help.output_video_quality'), type = int, default = config.get_int_value('output_creation.output_video_quality', '80'), choices = facefusion.choices.output_video_quality_range, metavar = create_int_metavar(facefusion.choices.output_video_quality_range))
|
||||||
group_output_creation.add_argument('--output-video-resolution', help = wording.get('help.output_video_resolution'), default = config.get_str_value('output_creation.output_video_resolution'))
|
group_output_creation.add_argument('--output-video-resolution', help = wording.get('help.output_video_resolution'), default = config.get_str_value('output_creation.output_video_resolution'))
|
||||||
|
|||||||
@@ -117,8 +117,13 @@ AudioTypeSet = Dict[AudioFormat, str]
|
|||||||
ImageTypeSet = Dict[ImageFormat, str]
|
ImageTypeSet = Dict[ImageFormat, str]
|
||||||
VideoTypeSet = Dict[VideoFormat, str]
|
VideoTypeSet = Dict[VideoFormat, str]
|
||||||
|
|
||||||
AudioEncoder = Literal['aac', 'libmp3lame', 'libopus', 'libvorbis']
|
AudioEncoder = Literal['aac', 'libmp3lame', 'libopus', 'libvorbis', 'flac']
|
||||||
VideoEncoder = Literal['libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf', 'h264_qsv', 'hevc_qsv', 'h264_videotoolbox', 'hevc_videotoolbox']
|
VideoEncoder = Literal['libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf', 'h264_qsv', 'hevc_qsv', 'h264_videotoolbox', 'hevc_videotoolbox']
|
||||||
|
EncoderSet = TypedDict('EncoderSet',
|
||||||
|
{
|
||||||
|
'audio' : List[AudioEncoder],
|
||||||
|
'video' : List[VideoEncoder]
|
||||||
|
})
|
||||||
VideoPreset = Literal['ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow']
|
VideoPreset = Literal['ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow']
|
||||||
|
|
||||||
WebcamMode = Literal['inline', 'udp', 'v4l2']
|
WebcamMode = Literal['inline', 'udp', 'v4l2']
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import gradio
|
|||||||
import facefusion.choices
|
import facefusion.choices
|
||||||
from facefusion import state_manager, wording
|
from facefusion import state_manager, wording
|
||||||
from facefusion.common_helper import calc_int_step
|
from facefusion.common_helper import calc_int_step
|
||||||
|
from facefusion.ffmpeg import get_available_encoder_set
|
||||||
from facefusion.filesystem import is_image, is_video
|
from facefusion.filesystem import is_image, is_video
|
||||||
from facefusion.typing import AudioEncoder, Fps, VideoEncoder, VideoPreset
|
from facefusion.typing import AudioEncoder, Fps, VideoEncoder, VideoPreset
|
||||||
from facefusion.uis.core import get_ui_components, register_ui_component
|
from facefusion.uis.core import get_ui_components, register_ui_component
|
||||||
@@ -36,6 +37,7 @@ def render() -> None:
|
|||||||
|
|
||||||
output_image_resolutions = []
|
output_image_resolutions = []
|
||||||
output_video_resolutions = []
|
output_video_resolutions = []
|
||||||
|
available_encoder_set = get_available_encoder_set()
|
||||||
if is_image(state_manager.get_item('target_path')):
|
if is_image(state_manager.get_item('target_path')):
|
||||||
output_image_resolution = detect_image_resolution(state_manager.get_item('target_path'))
|
output_image_resolution = detect_image_resolution(state_manager.get_item('target_path'))
|
||||||
output_image_resolutions = create_image_resolutions(output_image_resolution)
|
output_image_resolutions = create_image_resolutions(output_image_resolution)
|
||||||
@@ -58,7 +60,7 @@ def render() -> None:
|
|||||||
)
|
)
|
||||||
OUTPUT_AUDIO_ENCODER_DROPDOWN = gradio.Dropdown(
|
OUTPUT_AUDIO_ENCODER_DROPDOWN = gradio.Dropdown(
|
||||||
label = wording.get('uis.output_audio_encoder_dropdown'),
|
label = wording.get('uis.output_audio_encoder_dropdown'),
|
||||||
choices = facefusion.choices.output_audio_encoders,
|
choices = available_encoder_set.get('audio'),
|
||||||
value = state_manager.get_item('output_audio_encoder'),
|
value = state_manager.get_item('output_audio_encoder'),
|
||||||
visible = is_video(state_manager.get_item('target_path'))
|
visible = is_video(state_manager.get_item('target_path'))
|
||||||
)
|
)
|
||||||
@@ -80,7 +82,7 @@ def render() -> None:
|
|||||||
)
|
)
|
||||||
OUTPUT_VIDEO_ENCODER_DROPDOWN = gradio.Dropdown(
|
OUTPUT_VIDEO_ENCODER_DROPDOWN = gradio.Dropdown(
|
||||||
label = wording.get('uis.output_video_encoder_dropdown'),
|
label = wording.get('uis.output_video_encoder_dropdown'),
|
||||||
choices = facefusion.choices.output_video_encoders,
|
choices = available_encoder_set.get('video'),
|
||||||
value = state_manager.get_item('output_video_encoder'),
|
value = state_manager.get_item('output_video_encoder'),
|
||||||
visible = is_video(state_manager.get_item('target_path'))
|
visible = is_video(state_manager.get_item('target_path'))
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import pytest
|
|||||||
|
|
||||||
from facefusion import process_manager, state_manager
|
from facefusion import process_manager, state_manager
|
||||||
from facefusion.download import conditional_download
|
from facefusion.download import conditional_download
|
||||||
from facefusion.ffmpeg import concat_video, extract_frames, read_audio_buffer, replace_audio, restore_audio
|
from facefusion.ffmpeg import concat_video, extract_frames, get_available_encoder_set, read_audio_buffer, replace_audio, restore_audio
|
||||||
from facefusion.filesystem import copy_file
|
from facefusion.filesystem import copy_file
|
||||||
from facefusion.temp_helper import clear_temp_directory, create_temp_directory, get_temp_file_path, resolve_temp_frame_paths
|
from facefusion.temp_helper import clear_temp_directory, create_temp_directory, get_temp_file_path, resolve_temp_frame_paths
|
||||||
from .helper import get_test_example_file, get_test_examples_directory, get_test_output_file, prepare_test_output_directory
|
from .helper import get_test_example_file, get_test_examples_directory, get_test_output_file, prepare_test_output_directory
|
||||||
@@ -38,6 +38,13 @@ def before_each() -> None:
|
|||||||
prepare_test_output_directory()
|
prepare_test_output_directory()
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_available_encoder_set() -> None:
|
||||||
|
available_encoder_set = get_available_encoder_set()
|
||||||
|
|
||||||
|
assert 'aac' in available_encoder_set.get('audio')
|
||||||
|
assert 'libx264' in available_encoder_set.get('video')
|
||||||
|
|
||||||
|
|
||||||
def test_extract_frames() -> None:
|
def test_extract_frames() -> None:
|
||||||
extract_set =\
|
extract_set =\
|
||||||
[
|
[
|
||||||
|
|||||||
@@ -42,6 +42,9 @@ def test_set_audio_quality() -> None:
|
|||||||
assert set_audio_quality('libvorbis', 0) == [ '-q:a', '-1.0' ]
|
assert set_audio_quality('libvorbis', 0) == [ '-q:a', '-1.0' ]
|
||||||
assert set_audio_quality('libvorbis', 50) == [ '-q:a', '4.5' ]
|
assert set_audio_quality('libvorbis', 50) == [ '-q:a', '4.5' ]
|
||||||
assert set_audio_quality('libvorbis', 100) == [ '-q:a', '10.0' ]
|
assert set_audio_quality('libvorbis', 100) == [ '-q:a', '10.0' ]
|
||||||
|
assert set_audio_quality('flac', 0) == []
|
||||||
|
assert set_audio_quality('flac', 50) == []
|
||||||
|
assert set_audio_quality('flac', 100) == []
|
||||||
|
|
||||||
|
|
||||||
def test_set_video_quality() -> None:
|
def test_set_video_quality() -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user