Feat/available encoders (#860)

* Introduce available audio encoders and video encoders

* Introduce available audio encoders and video encoders

* Introduce available audio encoders and video encoders

* Introduce available audio encoders and video encoders

* Add flac to audio encoders
This commit is contained in:
Henry Ruhs
2025-01-26 22:54:07 +01:00
committed by henryruhs
parent 30d9b038e4
commit d260c28cf3
9 changed files with 66 additions and 12 deletions

View File

@@ -2,7 +2,7 @@ import logging
from typing import List, Sequence
from facefusion.common_helper import create_float_range, create_int_range
from facefusion.typing import Angle, AudioEncoder, AudioFormat, AudioTypeSet, DownloadProvider, DownloadProviderSet, DownloadScope, ExecutionProvider, ExecutionProviderSet, FaceDetectorModel, FaceDetectorSet, FaceLandmarkerModel, FaceMaskRegion, FaceMaskRegionSet, FaceMaskType, FaceOccluderModel, FaceParserModel, FaceSelectorMode, FaceSelectorOrder, Gender, ImageFormat, ImageTypeSet, JobStatus, LogLevel, LogLevelSet, Race, Score, UiWorkflow, VideoEncoder, VideoFormat, VideoMemoryStrategy, VideoPreset, VideoTypeSet, WebcamMode
from facefusion.typing import Angle, AudioEncoder, AudioFormat, AudioTypeSet, DownloadProvider, DownloadProviderSet, DownloadScope, EncoderSet, ExecutionProvider, ExecutionProviderSet, FaceDetectorModel, FaceDetectorSet, FaceLandmarkerModel, FaceMaskRegion, FaceMaskRegionSet, FaceMaskType, FaceOccluderModel, FaceParserModel, FaceSelectorMode, FaceSelectorOrder, Gender, ImageFormat, ImageTypeSet, JobStatus, LogLevel, LogLevelSet, Race, Score, UiWorkflow, VideoEncoder, VideoFormat, VideoMemoryStrategy, VideoPreset, VideoTypeSet, WebcamMode
face_detector_set : FaceDetectorSet =\
{
@@ -62,8 +62,13 @@ image_formats : List[ImageFormat] = list(image_type_set.keys())
video_formats : List[VideoFormat] = list(video_type_set.keys())
temp_frame_formats : List[ImageFormat] = [ 'bmp', 'jpeg', 'png', 'tiff' ]
output_audio_encoders : List[AudioEncoder] = [ 'aac', 'libmp3lame', 'libopus', 'libvorbis' ]
output_video_encoders : List[VideoEncoder] = [ 'libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf', 'h264_qsv', 'hevc_qsv', 'h264_videotoolbox', 'hevc_videotoolbox' ]
output_encoder_set : EncoderSet =\
{
'audio': [ 'aac', 'libmp3lame', 'libopus', 'libvorbis', 'flac' ],
'video': [ 'libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf', 'h264_qsv', 'hevc_qsv', 'h264_videotoolbox', 'hevc_videotoolbox' ]
}
output_audio_encoders : List[AudioEncoder] = output_encoder_set.get('audio')
output_video_encoders : List[VideoEncoder] = output_encoder_set.get('video')
output_video_presets : List[VideoPreset] = [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow' ]
image_template_sizes : List[float] = [ 0.25, 0.5, 0.75, 1, 1.5, 2, 2.5, 3, 3.5, 4 ]

View File

@@ -5,10 +5,11 @@ from typing import List, Optional
from tqdm import tqdm
import facefusion.choices
from facefusion import ffmpeg_builder, logger, process_manager, state_manager, wording
from facefusion.filesystem import get_file_format, remove_file
from facefusion.temp_helper import get_temp_file_path, get_temp_frames_pattern, resolve_temp_frame_paths
from facefusion.typing import AudioBuffer, Commands, Fps, UpdateProgress
from facefusion.typing import AudioBuffer, Commands, EncoderSet, Fps, UpdateProgress
from facefusion.vision import count_trim_frame_total, detect_video_duration, restrict_video_fps
@@ -72,6 +73,32 @@ def log_debug(process : subprocess.Popen[bytes]) -> None:
logger.debug(error.strip(), __name__)
def get_available_encoder_set() -> EncoderSet:
available_encoder_set : EncoderSet =\
{
'audio': [],
'video': []
}
commands = ffmpeg_builder.chain(
ffmpeg_builder.get_encoders()
)
process = run_ffmpeg(commands)
while line := process.stdout.readline().decode().lower():
if line.startswith(' a'):
audio_encoder = line.split()[1]
if audio_encoder in facefusion.choices.output_audio_encoders:
available_encoder_set['audio'].append(audio_encoder) #type:ignore[arg-type]
if line.startswith(' v'):
video_encoder = line.split()[1]
if video_encoder in facefusion.choices.output_video_encoders:
available_encoder_set['video'].append(video_encoder) #type:ignore[arg-type]
return available_encoder_set
def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fps : Fps, trim_frame_start : int, trim_frame_end : int) -> bool:
extract_frame_total = count_trim_frame_total(target_path, trim_frame_start, trim_frame_end)
temp_frames_pattern = get_temp_frames_pattern(target_path, '%08d')

View File

@@ -16,6 +16,10 @@ def chain(*commands : Commands) -> Commands:
return list(itertools.chain(*commands))
def get_encoders() -> Commands:
return [ '-encoders' ]
def set_progress() -> Commands:
return [ '-progress' ]

View File

@@ -8,7 +8,6 @@ from argparse import ArgumentParser, HelpFormatter
from facefusion import metadata, wording
from facefusion.common_helper import is_linux, is_windows
ONNXRUNTIME_SET =\
{
'default': ('onnxruntime', '1.20.1')

View File

@@ -3,8 +3,9 @@ from argparse import ArgumentParser, HelpFormatter
import facefusion.choices
from facefusion import config, metadata, state_manager, wording
from facefusion.common_helper import create_float_metavar, create_int_metavar, get_last
from facefusion.common_helper import create_float_metavar, create_int_metavar, get_first, get_last
from facefusion.execution import get_available_execution_providers, suggest_execution_provider
from facefusion.ffmpeg import get_available_encoder_set
from facefusion.filesystem import get_file_name, resolve_file_paths
from facefusion.jobs import job_store
from facefusion.processors.core import get_processors_modules
@@ -155,13 +156,14 @@ def create_frame_extraction_program() -> ArgumentParser:
def create_output_creation_program() -> ArgumentParser:
program = ArgumentParser(add_help = False)
available_encoder_set = get_available_encoder_set()
group_output_creation = program.add_argument_group('output creation')
group_output_creation.add_argument('--output-image-quality', help = wording.get('help.output_image_quality'), type = int, default = config.get_int_value('output_creation.output_image_quality', '80'), choices = facefusion.choices.output_image_quality_range, metavar = create_int_metavar(facefusion.choices.output_image_quality_range))
group_output_creation.add_argument('--output-image-resolution', help = wording.get('help.output_image_resolution'), default = config.get_str_value('output_creation.output_image_resolution'))
group_output_creation.add_argument('--output-audio-encoder', help = wording.get('help.output_audio_encoder'), default = config.get_str_value('output_creation.output_audio_encoder', 'aac'), choices = facefusion.choices.output_audio_encoders)
group_output_creation.add_argument('--output-audio-encoder', help = wording.get('help.output_audio_encoder'), default = config.get_str_value('output_creation.output_audio_encoder', get_first(available_encoder_set.get('audio'))), choices = available_encoder_set.get('audio'))
group_output_creation.add_argument('--output-audio-quality', help = wording.get('help.output_audio_quality'), type = int, default = config.get_int_value('output_creation.output_audio_quality', '80'), choices = facefusion.choices.output_audio_quality_range, metavar = create_int_metavar(facefusion.choices.output_audio_quality_range))
group_output_creation.add_argument('--output-audio-volume', help = wording.get('help.output_audio_volume'), type = int, default = config.get_int_value('output_creation.output_audio_volume', '100'), choices = facefusion.choices.output_audio_volume_range, metavar = create_int_metavar(facefusion.choices.output_audio_volume_range))
group_output_creation.add_argument('--output-video-encoder', help = wording.get('help.output_video_encoder'), default = config.get_str_value('output_creation.output_video_encoder', 'libx264'), choices = facefusion.choices.output_video_encoders)
group_output_creation.add_argument('--output-video-encoder', help = wording.get('help.output_video_encoder'), default = config.get_str_value('output_creation.output_video_encoder', get_first(available_encoder_set.get('video'))), choices = available_encoder_set.get('video'))
group_output_creation.add_argument('--output-video-preset', help = wording.get('help.output_video_preset'), default = config.get_str_value('output_creation.output_video_preset', 'veryfast'), choices = facefusion.choices.output_video_presets)
group_output_creation.add_argument('--output-video-quality', help = wording.get('help.output_video_quality'), type = int, default = config.get_int_value('output_creation.output_video_quality', '80'), choices = facefusion.choices.output_video_quality_range, metavar = create_int_metavar(facefusion.choices.output_video_quality_range))
group_output_creation.add_argument('--output-video-resolution', help = wording.get('help.output_video_resolution'), default = config.get_str_value('output_creation.output_video_resolution'))

View File

@@ -117,8 +117,13 @@ AudioTypeSet = Dict[AudioFormat, str]
ImageTypeSet = Dict[ImageFormat, str]
VideoTypeSet = Dict[VideoFormat, str]
AudioEncoder = Literal['aac', 'libmp3lame', 'libopus', 'libvorbis']
AudioEncoder = Literal['aac', 'libmp3lame', 'libopus', 'libvorbis', 'flac']
VideoEncoder = Literal['libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf', 'h264_qsv', 'hevc_qsv', 'h264_videotoolbox', 'hevc_videotoolbox']
EncoderSet = TypedDict('EncoderSet',
{
'audio' : List[AudioEncoder],
'video' : List[VideoEncoder]
})
VideoPreset = Literal['ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow']
WebcamMode = Literal['inline', 'udp', 'v4l2']

View File

@@ -5,6 +5,7 @@ import gradio
import facefusion.choices
from facefusion import state_manager, wording
from facefusion.common_helper import calc_int_step
from facefusion.ffmpeg import get_available_encoder_set
from facefusion.filesystem import is_image, is_video
from facefusion.typing import AudioEncoder, Fps, VideoEncoder, VideoPreset
from facefusion.uis.core import get_ui_components, register_ui_component
@@ -36,6 +37,7 @@ def render() -> None:
output_image_resolutions = []
output_video_resolutions = []
available_encoder_set = get_available_encoder_set()
if is_image(state_manager.get_item('target_path')):
output_image_resolution = detect_image_resolution(state_manager.get_item('target_path'))
output_image_resolutions = create_image_resolutions(output_image_resolution)
@@ -58,7 +60,7 @@ def render() -> None:
)
OUTPUT_AUDIO_ENCODER_DROPDOWN = gradio.Dropdown(
label = wording.get('uis.output_audio_encoder_dropdown'),
choices = facefusion.choices.output_audio_encoders,
choices = available_encoder_set.get('audio'),
value = state_manager.get_item('output_audio_encoder'),
visible = is_video(state_manager.get_item('target_path'))
)
@@ -80,7 +82,7 @@ def render() -> None:
)
OUTPUT_VIDEO_ENCODER_DROPDOWN = gradio.Dropdown(
label = wording.get('uis.output_video_encoder_dropdown'),
choices = facefusion.choices.output_video_encoders,
choices = available_encoder_set.get('video'),
value = state_manager.get_item('output_video_encoder'),
visible = is_video(state_manager.get_item('target_path'))
)