* Add real_hatgan_x4 model * Mark it as NEXT * Force download to be executed and exit * Fix frame per second interpolation * 5 to 68 landmark (#456) * changes * changes * Adjust model url * Cleanup 5 to 68 landmark convertion * Move everything to face analyser * Introduce matrix only face helper * Revert facefusion.ini * Adjust limit due false positive analysis * changes (#457) * Use pixel format yuv422p to merge video * Fix some code * Minor cleanup * Add gpen_bfr_1024 and gpen_bfr_2048 * Revert it back to yuv420p due compatibility issues * Add debug back to ffmpeg * Add debug back to ffmpeg * Migrate to conda (#461) * Migrate from venv to conda * Migrate from venv to conda * Message when conda is not activated * Use release for every slider (#463) * Use release event handler for every slider * Move more sliders to release handler * Move more sliders to release handler * Add get_ui_components() to simplify code * Revert some changes on frame slider * Add the first iteration of a frame colorizer * Support for the DDColor model * Improve model file handling * Improve model file handling part2 * Remove deoldify * Remove deoldify * Voice separator (#468) * changes * changes * changes * changes * changes * changes * Rename audio extractor to voice extractor * Cosmetic changes * Cosmetic changes * Fix fps lowering and boosting * Fix fps lowering and boosting * Fix fps lowering and boosting * Some refactoring for audio.py and some astype() here and there (#470) * Some refactoring for audio.py and some astype() here and there * Fix lint * Spacing * Add mp3 to benchmark suite for lip syncer testing * Improve naming * Adjust chunk size * Use higher quality * Revert "Use higher quality" This reverts commit d32f28757251ecc0f48214073adf54f3631b1289. * Improve naming in ffmpeg.py * Simplify code * Better fps calculation * Fix naming here and there * Add back real esrgan x2 * Remove trailing comma * Update wording and README * Use semaphore to prevent frame colorizer memory issues * Revert "Remove deoldify" This reverts commit bd8034cbc71fe701f78dddec3057dc98593b2162. * Remove unused type from frame colorizer * Adjust naming * Add missing clear of model initializer * Change nvenc preset mappping to support old FFMPEG 4 * Update onnxruntime to 1.17.1 * Fix lint * Prepare 2.5.0 * Fix Gradio overrides * Add Deoldify Artistic back * Feat/audio refactoring (#476) * Improve audio naming and variables * Improve audio naming and variables * Refactor voice extractor like crazy * Refactor voice extractor like crazy * Remove spaces * Update the usage --------- Co-authored-by: Harisreedhar <46858047+harisreedhar@users.noreply.github.com>
124 lines
3.8 KiB
Python
Executable File
124 lines
3.8 KiB
Python
Executable File
from typing import Any, Literal, Callable, List, Tuple, Dict, TypedDict
|
|
from collections import namedtuple
|
|
import numpy
|
|
|
|
BoundingBox = numpy.ndarray[Any, Any]
|
|
FaceLandmark5 = numpy.ndarray[Any, Any]
|
|
FaceLandmark68 = numpy.ndarray[Any, Any]
|
|
FaceLandmarkSet = TypedDict('FaceLandmarkSet',
|
|
{
|
|
'5' : FaceLandmark5, # type: ignore[valid-type]
|
|
'5/68' : FaceLandmark5, # type: ignore[valid-type]
|
|
'68' : FaceLandmark68, # type: ignore[valid-type]
|
|
'68/5' : FaceLandmark68 # type: ignore[valid-type]
|
|
})
|
|
Score = float
|
|
FaceScoreSet = TypedDict('FaceScoreSet',
|
|
{
|
|
'detector' : Score,
|
|
'landmarker' : Score
|
|
})
|
|
Embedding = numpy.ndarray[Any, Any]
|
|
Face = namedtuple('Face',
|
|
[
|
|
'bounding_box',
|
|
'landmarks',
|
|
'scores',
|
|
'embedding',
|
|
'normed_embedding',
|
|
'gender',
|
|
'age'
|
|
])
|
|
FaceSet = Dict[str, List[Face]]
|
|
FaceStore = TypedDict('FaceStore',
|
|
{
|
|
'static_faces' : FaceSet,
|
|
'reference_faces': FaceSet
|
|
})
|
|
|
|
VisionFrame = numpy.ndarray[Any, Any]
|
|
Mask = numpy.ndarray[Any, Any]
|
|
Matrix = numpy.ndarray[Any, Any]
|
|
Translation = numpy.ndarray[Any, Any]
|
|
|
|
AudioBuffer = bytes
|
|
Audio = numpy.ndarray[Any, Any]
|
|
AudioChunk = numpy.ndarray[Any, Any]
|
|
AudioFrame = numpy.ndarray[Any, Any]
|
|
Spectrogram = numpy.ndarray[Any, Any]
|
|
MelFilterBank = numpy.ndarray[Any, Any]
|
|
|
|
Fps = float
|
|
Padding = Tuple[int, int, int, int]
|
|
Resolution = Tuple[int, int]
|
|
|
|
ProcessState = Literal['checking', 'processing', 'stopping', 'pending']
|
|
QueuePayload = TypedDict('QueuePayload',
|
|
{
|
|
'frame_number' : int,
|
|
'frame_path' : str
|
|
})
|
|
UpdateProgress = Callable[[int], None]
|
|
ProcessFrames = Callable[[List[str], List[QueuePayload], UpdateProgress], None]
|
|
|
|
WarpTemplate = Literal['arcface_112_v1', 'arcface_112_v2', 'arcface_128_v2', 'ffhq_512']
|
|
WarpTemplateSet = Dict[WarpTemplate, numpy.ndarray[Any, Any]]
|
|
ProcessMode = Literal['output', 'preview', 'stream']
|
|
|
|
LogLevel = Literal['error', 'warn', 'info', 'debug']
|
|
VideoMemoryStrategy = Literal['strict', 'moderate', 'tolerant']
|
|
FaceSelectorMode = Literal['many', 'one', 'reference']
|
|
FaceAnalyserOrder = Literal['left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small', 'best-worst', 'worst-best']
|
|
FaceAnalyserAge = Literal['child', 'teen', 'adult', 'senior']
|
|
FaceAnalyserGender = Literal['female', 'male']
|
|
FaceDetectorModel = Literal['many', 'retinaface', 'scrfd', 'yoloface', 'yunet']
|
|
FaceDetectorTweak = Literal['low-luminance', 'high-luminance']
|
|
FaceRecognizerModel = Literal['arcface_blendswap', 'arcface_inswapper', 'arcface_simswap', 'arcface_uniface']
|
|
FaceMaskType = Literal['box', 'occlusion', 'region']
|
|
FaceMaskRegion = Literal['skin', 'left-eyebrow', 'right-eyebrow', 'left-eye', 'right-eye', 'eye-glasses', 'nose', 'mouth', 'upper-lip', 'lower-lip']
|
|
TempFrameFormat = Literal['jpg', 'png', 'bmp']
|
|
OutputVideoEncoder = Literal['libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf']
|
|
OutputVideoPreset = Literal['ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow']
|
|
|
|
ModelValue = Dict[str, Any]
|
|
ModelSet = Dict[str, ModelValue]
|
|
OptionsWithModel = TypedDict('OptionsWithModel',
|
|
{
|
|
'model' : ModelValue
|
|
})
|
|
|
|
ValueAndUnit = TypedDict('ValueAndUnit',
|
|
{
|
|
'value' : str,
|
|
'unit' : str
|
|
})
|
|
ExecutionDeviceFramework = TypedDict('ExecutionDeviceFramework',
|
|
{
|
|
'name' : str,
|
|
'version' : str
|
|
})
|
|
ExecutionDeviceProduct = TypedDict('ExecutionDeviceProduct',
|
|
{
|
|
'vendor' : str,
|
|
'name' : str,
|
|
'architecture' : str,
|
|
})
|
|
ExecutionDeviceVideoMemory = TypedDict('ExecutionDeviceVideoMemory',
|
|
{
|
|
'total' : ValueAndUnit,
|
|
'free' : ValueAndUnit
|
|
})
|
|
ExecutionDeviceUtilization = TypedDict('ExecutionDeviceUtilization',
|
|
{
|
|
'gpu' : ValueAndUnit,
|
|
'memory' : ValueAndUnit
|
|
})
|
|
ExecutionDevice = TypedDict('ExecutionDevice',
|
|
{
|
|
'driver_version' : str,
|
|
'framework' : ExecutionDeviceFramework,
|
|
'product' : ExecutionDeviceProduct,
|
|
'video_memory' : ExecutionDeviceVideoMemory,
|
|
'utilization' : ExecutionDeviceUtilization
|
|
})
|