Develop (#73)

* Cosmetic changes * Cosmetic changes * Run single warm up for the benchmark suite * Use latest version of Gradio * More testing * Introduce basic installer * Fix typo * Move more to installer file * Fix the installer with the uninstall all trick * Adjust wording * Fix coreml in installer * Allow Pyhton 3.9 * Add VENV to installer * Just some cosmetics * Just some cosmetics * Dedicated headless mode, Refine API of UI layouts * Use --headless for pytest * Fix testing for Windows * Normalize output path that lacks extension * Fix CI for Windows * Fix CI for Windows * UI to change output path * Add conda support for the installer * Improve installer quite a bit * Drop conda support * Install community wheels for coreml silicon * Improve output video component * Fix silicon wheel downloading * Remove venv from installer as we cannot activate via subprocess * Use join to create wheel name * Refine the output path normalization * Refine the output path normalization * Introduce ProcessMode and rename some methods * Introduce ProcessMode and rename some methods * Basic webcam integration and open_ffmpeg() * Basic webcam integration part2 * Benchmark resolutions now selectable * Rename benchmark resolution back to benchmark runs * Fix repeating output path in UI * Keep output_path untouched if not resolvable * Add more cases to normalize output path * None for those tests that don't take source path into account * Finish basic webcam integration, UI layout now with custom run() * Fix CI and hide link in webcam UI * Cosmetics on webcam UI * Move get_device to utilities * Fix CI * Introduce output-image-quality, Show and hide UI according to target media type * Benchmark with partial result updates * fix: trim frame sliders not appearing after draggin video * fix: output and temp frame setting inputs not appearing * Fix: set increased update delay to 250ms to let Gradio update conditional inputs properly * Reverted .gitignore * Adjust timings * Remove timeout hacks and get fully event driven * Update dependencies * Update dependencies * Revert NSFW library, Conditional unset trim args * Face selector works better on preview slider release * Add limit resources to UI * Introduce vision.py for all CV2 operations, Rename some methods * Add restoring audio failed * Decouple updates for preview image and preview frame slider, Move reduce_preview_frame to vision * Refactor detect_fps based on JSON output * Only webcam when open * More conditions to vision.py * Add udp and v4l2 streaming to webcam UI * Detect v4l2 device to be used * Refactor code a bit * Use static max memory for UI * Fix CI * Looks stable to me * Update preview * Update preview --------- Co-authored-by: Sumit <vizsumit@gmail.com>
2023-09-06 00:25:18 +02:00
parent 4ffae94bac
commit 82eaf76da8
39 changed files with 788 additions and 282 deletions
--- a/facefusion/uis/components/benchmark.py
+++ b/facefusion/uis/components/benchmark.py
@@ -1,4 +1,4 @@
-from typing import Any, Optional, List
+from typing import Any, Optional, List, Dict, Generator
 import time
 import tempfile
 import statistics
@@ -6,26 +6,38 @@ import gradio

 import facefusion.globals
 from facefusion import wording
-from facefusion.capturer import get_video_frame_total
-from facefusion.core import conditional_process
+from facefusion.vision import count_video_frame_total
+from facefusion.core import limit_resources, conditional_process
 from facefusion.uis.typing import Update
 from facefusion.utilities import normalize_output_path, clear_temp

-BENCHMARK_RESULT_DATAFRAME : Optional[gradio.Dataframe] = None
+BENCHMARK_RESULTS_DATAFRAME : Optional[gradio.Dataframe] = None
+BENCHMARK_RUNS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None
 BENCHMARK_CYCLES_SLIDER : Optional[gradio.Button] = None
 BENCHMARK_START_BUTTON : Optional[gradio.Button] = None
 BENCHMARK_CLEAR_BUTTON : Optional[gradio.Button] = None
+BENCHMARKS : Dict[str, str] = \
+{
+	'240p': '.assets/examples/target-240p.mp4',
+	'360p': '.assets/examples/target-360p.mp4',
+	'540p': '.assets/examples/target-540p.mp4',
+	'720p': '.assets/examples/target-720p.mp4',
+	'1080p': '.assets/examples/target-1080p.mp4',
+	'1440p': '.assets/examples/target-1440p.mp4',
+	'2160p': '.assets/examples/target-2160p.mp4'
+}


 def render() -> None:
-	global BENCHMARK_RESULT_DATAFRAME
+	global BENCHMARK_RESULTS_DATAFRAME
+	global BENCHMARK_RUNS_CHECKBOX_GROUP
 	global BENCHMARK_CYCLES_SLIDER
 	global BENCHMARK_START_BUTTON
 	global BENCHMARK_CLEAR_BUTTON

 	with gradio.Box():
-		BENCHMARK_RESULT_DATAFRAME = gradio.Dataframe(
-			label = wording.get('benchmark_result_dataframe_label'),
+		BENCHMARK_RESULTS_DATAFRAME = gradio.Dataframe(
+			label = wording.get('benchmark_results_dataframe_label'),
 			headers =
 			[
 				'target_path',
@@ -35,8 +47,7 @@ def render() -> None:
 				'slowest_run',
 				'relative_fps'
 			],
-			col_count = (6, 'fixed'),
-			row_count = (7, 'fixed'),
+			row_count = len(BENCHMARKS),
 			datatype =
 			[
 				'str',
@@ -47,54 +58,65 @@ def render() -> None:
 				'number'
 			]
 		)
-	BENCHMARK_CYCLES_SLIDER = gradio.Slider(
-		label = wording.get('benchmark_cycles_slider_label'),
-		minimum = 1,
-		step = 1,
-		value = 3,
-		maximum = 10
-	)
+	with gradio.Box():
+		BENCHMARK_RUNS_CHECKBOX_GROUP = gradio.CheckboxGroup(
+			label = wording.get('benchmark_runs_checkbox_group_label'),
+			value = list(BENCHMARKS.keys()),
+			choices = list(BENCHMARKS.keys())
+		)
+		BENCHMARK_CYCLES_SLIDER = gradio.Slider(
+			label = wording.get('benchmark_cycles_slider_label'),
+			minimum = 1,
+			step = 1,
+			value = 3,
+			maximum = 10
+		)
 	with gradio.Row():
 		BENCHMARK_START_BUTTON = gradio.Button(wording.get('start_button_label'))
 		BENCHMARK_CLEAR_BUTTON = gradio.Button(wording.get('clear_button_label'))


 def listen() -> None:
-	BENCHMARK_START_BUTTON.click(update, inputs = BENCHMARK_CYCLES_SLIDER, outputs = BENCHMARK_RESULT_DATAFRAME)
-	BENCHMARK_CLEAR_BUTTON.click(clear, outputs = BENCHMARK_RESULT_DATAFRAME)
+	BENCHMARK_RUNS_CHECKBOX_GROUP.change(update_benchmark_runs, inputs = BENCHMARK_RUNS_CHECKBOX_GROUP, outputs = BENCHMARK_RUNS_CHECKBOX_GROUP)
+	BENCHMARK_START_BUTTON.click(start, inputs = [ BENCHMARK_RUNS_CHECKBOX_GROUP, BENCHMARK_CYCLES_SLIDER ], outputs = BENCHMARK_RESULTS_DATAFRAME)
+	BENCHMARK_CLEAR_BUTTON.click(clear, outputs = BENCHMARK_RESULTS_DATAFRAME)


-def update(benchmark_cycles : int) -> Update:
+def update_benchmark_runs(benchmark_runs : List[str]) -> Update:
+	return gradio.update(value = benchmark_runs)
+
+
+def start(benchmark_runs : List[str], benchmark_cycles : int) -> Generator[List[Any], None, None]:
 	facefusion.globals.source_path = '.assets/examples/source.jpg'
-	target_paths =\
-	[
-		'.assets/examples/target-240p.mp4',
-		'.assets/examples/target-360p.mp4',
-		'.assets/examples/target-540p.mp4',
-		'.assets/examples/target-720p.mp4',
-		'.assets/examples/target-1080p.mp4',
-		'.assets/examples/target-1440p.mp4',
-		'.assets/examples/target-2160p.mp4'
-	]
-	value = [ benchmark(target_path, benchmark_cycles) for target_path in target_paths ]
-	return gradio.update(value = value)
+	target_paths = [ BENCHMARKS[benchmark_run] for benchmark_run in benchmark_runs if benchmark_run in BENCHMARKS ]
+	benchmark_results = []
+	if target_paths:
+		warm_up(BENCHMARKS['240p'])
+		for target_path in target_paths:
+			benchmark_results.append(benchmark(target_path, benchmark_cycles))
+			yield benchmark_results
+
+
+def warm_up(target_path : str) -> None:
+	facefusion.globals.target_path = target_path
+	facefusion.globals.output_path = normalize_output_path(facefusion.globals.source_path, facefusion.globals.target_path, tempfile.gettempdir())
+	conditional_process()


 def benchmark(target_path : str, benchmark_cycles : int) -> List[Any]:
 	process_times = []
 	total_fps = 0.0
-	for i in range(benchmark_cycles + 1):
+	for i in range(benchmark_cycles):
 		facefusion.globals.target_path = target_path
 		facefusion.globals.output_path = normalize_output_path(facefusion.globals.source_path, facefusion.globals.target_path, tempfile.gettempdir())
-		video_frame_total = get_video_frame_total(facefusion.globals.target_path)
+		video_frame_total = count_video_frame_total(facefusion.globals.target_path)
 		start_time = time.perf_counter()
+		limit_resources()
 		conditional_process()
 		end_time = time.perf_counter()
 		process_time = end_time - start_time
-		fps = video_frame_total / process_time
-		if i > 0:
-			process_times.append(process_time)
-			total_fps += fps
+		total_fps += video_frame_total / process_time
+		process_times.append(process_time)
 	average_run = round(statistics.mean(process_times), 2)
 	fastest_run = round(min(process_times), 2)
 	slowest_run = round(max(process_times), 2)
--- a/facefusion/uis/components/face_selector.py
+++ b/facefusion/uis/components/face_selector.py
@@ -1,5 +1,4 @@
 from typing import List, Optional, Tuple, Any, Dict
-from time import sleep

 import cv2
 import gradio
@@ -7,7 +6,7 @@ import gradio
 import facefusion.choices
 import facefusion.globals
 from facefusion import wording
-from facefusion.capturer import get_video_frame
+from facefusion.vision import get_video_frame, normalize_frame_color
 from facefusion.face_analyser import get_many_faces
 from facefusion.face_reference import clear_face_reference
 from facefusion.typing import Frame, FaceRecognition
@@ -26,7 +25,8 @@ def render() -> None:
 	global REFERENCE_FACE_DISTANCE_SLIDER

 	with gradio.Box():
-		reference_face_gallery_args: Dict[str, Any] = {
+		reference_face_gallery_args: Dict[str, Any] =\
+		{
 			'label': wording.get('reference_face_gallery_label'),
 			'height': 120,
 			'object_fit': 'cover',
@@ -62,15 +62,17 @@ def listen() -> None:
 	FACE_RECOGNITION_DROPDOWN.select(update_face_recognition, inputs = FACE_RECOGNITION_DROPDOWN, outputs = [ REFERENCE_FACE_POSITION_GALLERY, REFERENCE_FACE_DISTANCE_SLIDER ])
 	REFERENCE_FACE_POSITION_GALLERY.select(clear_and_update_face_reference_position)
 	REFERENCE_FACE_DISTANCE_SLIDER.change(update_reference_face_distance, inputs = REFERENCE_FACE_DISTANCE_SLIDER)
-	update_component_names : List[ComponentName] =\
+	multi_component_names : List[ComponentName] =\
 	[
-		'target_file',
-		'preview_frame_slider'
+		'source_image',
+		'target_image',
+		'target_video'
 	]
-	for component_name in update_component_names:
+	for component_name in multi_component_names:
 		component = ui.get_component(component_name)
 		if component:
-			component.change(update_face_reference_position, outputs = REFERENCE_FACE_POSITION_GALLERY)
+			for method in [ 'upload', 'change', 'clear' ]:
+				getattr(component, method)(update_face_reference_position, outputs = REFERENCE_FACE_POSITION_GALLERY)
 	select_component_names : List[ComponentName] =\
 	[
 		'face_analyser_direction_dropdown',
@@ -81,6 +83,9 @@ def listen() -> None:
 		component = ui.get_component(component_name)
 		if component:
 			component.select(update_face_reference_position, outputs = REFERENCE_FACE_POSITION_GALLERY)
+	preview_frame_slider = ui.get_component('preview_frame_slider')
+	if preview_frame_slider:
+		preview_frame_slider.release(update_face_reference_position, outputs = REFERENCE_FACE_POSITION_GALLERY)


 def update_face_recognition(face_recognition : FaceRecognition) -> Tuple[Update, Update]:
@@ -98,7 +103,6 @@ def clear_and_update_face_reference_position(event: gradio.SelectData) -> Update


 def update_face_reference_position(reference_face_position : int = 0) -> Update:
-	sleep(0.2)
 	gallery_frames = []
 	facefusion.globals.reference_face_position = reference_face_position
 	if is_image(facefusion.globals.target_path):
@@ -129,5 +133,6 @@ def extract_gallery_frames(reference_frame : Frame) -> List[Frame]:
 		end_x = max(0, end_x + padding_x)
 		end_y = max(0, end_y + padding_y)
 		crop_frame = reference_frame[start_y:end_y, start_x:end_x]
-		crop_frames.append(ui.normalize_frame(crop_frame))
+		crop_frame = normalize_frame_color(crop_frame)
+		crop_frames.append(crop_frame)
 	return crop_frames
--- a/facefusion/uis/components/limit_resources.py
+++ b/facefusion/uis/components/limit_resources.py
@@ -0,0 +1,29 @@
+from typing import Optional
+import gradio
+
+import facefusion.globals
+from facefusion import wording
+from facefusion.uis.typing import Update
+
+MAX_MEMORY_SLIDER : Optional[gradio.Slider] = None
+
+
+def render() -> None:
+	global MAX_MEMORY_SLIDER
+
+	with gradio.Box():
+		MAX_MEMORY_SLIDER = gradio.Slider(
+			label = wording.get('max_memory_slider_label'),
+			minimum = 0,
+			maximum = 128,
+			step = 1
+		)
+
+
+def listen() -> None:
+	MAX_MEMORY_SLIDER.change(update_max_memory, inputs = MAX_MEMORY_SLIDER, outputs = MAX_MEMORY_SLIDER)
+
+
+def update_max_memory(max_memory : int) -> Update:
+	facefusion.globals.max_memory = max_memory if max_memory > 0 else None
+	return gradio.update(value = max_memory)
--- a/facefusion/uis/components/output.py
+++ b/facefusion/uis/components/output.py
@@ -1,23 +1,26 @@
+import tempfile
 from typing import Tuple, Optional
 import gradio

 import facefusion.globals
 from facefusion import wording
-from facefusion.core import conditional_process
+from facefusion.core import limit_resources, conditional_process
 from facefusion.uis.typing import Update
 from facefusion.utilities import is_image, is_video, normalize_output_path, clear_temp

-OUTPUT_START_BUTTON : Optional[gradio.Button] = None
-OUTPUT_CLEAR_BUTTON : Optional[gradio.Button] = None
 OUTPUT_IMAGE : Optional[gradio.Image] = None
 OUTPUT_VIDEO : Optional[gradio.Video] = None
+OUTPUT_PATH_TEXTBOX : Optional[gradio.Textbox] = None
+OUTPUT_START_BUTTON : Optional[gradio.Button] = None
+OUTPUT_CLEAR_BUTTON : Optional[gradio.Button] = None


 def render() -> None:
-	global OUTPUT_START_BUTTON
-	global OUTPUT_CLEAR_BUTTON
 	global OUTPUT_IMAGE
 	global OUTPUT_VIDEO
+	global OUTPUT_PATH_TEXTBOX
+	global OUTPUT_START_BUTTON
+	global OUTPUT_CLEAR_BUTTON

 	with gradio.Row():
 		with gradio.Box():
@@ -28,25 +31,36 @@ def render() -> None:
 			OUTPUT_VIDEO = gradio.Video(
 				label = wording.get('output_image_or_video_label')
 			)
+			OUTPUT_PATH_TEXTBOX = gradio.Textbox(
+				label = wording.get('output_path_textbox_label'),
+				value = facefusion.globals.output_path or tempfile.gettempdir(),
+				max_lines = 1
+			)
 	with gradio.Row():
 		OUTPUT_START_BUTTON = gradio.Button(wording.get('start_button_label'))
 		OUTPUT_CLEAR_BUTTON = gradio.Button(wording.get('clear_button_label'))


 def listen() -> None:
-	OUTPUT_START_BUTTON.click(update, outputs = [ OUTPUT_IMAGE, OUTPUT_VIDEO ])
+	OUTPUT_PATH_TEXTBOX.change(update_output_path, inputs = OUTPUT_PATH_TEXTBOX, outputs = OUTPUT_PATH_TEXTBOX)
+	OUTPUT_START_BUTTON.click(start, inputs = OUTPUT_PATH_TEXTBOX, outputs = [ OUTPUT_IMAGE, OUTPUT_VIDEO ])
 	OUTPUT_CLEAR_BUTTON.click(clear, outputs = [ OUTPUT_IMAGE, OUTPUT_VIDEO ])


-def update() -> Tuple[Update, Update]:
-	facefusion.globals.output_path = normalize_output_path(facefusion.globals.source_path, facefusion.globals.target_path, '.')
-	if facefusion.globals.output_path:
-		conditional_process()
-		if is_image(facefusion.globals.output_path):
-			return gradio.update(value = facefusion.globals.output_path, visible = True), gradio.update(value = None, visible = False)
-		if is_video(facefusion.globals.output_path):
-			return gradio.update(value = None, visible = False), gradio.update(value = facefusion.globals.output_path, visible = True)
-	return gradio.update(value = None, visible = False), gradio.update(value = None, visible = False)
+def start(output_path : str) -> Tuple[Update, Update]:
+	facefusion.globals.output_path = normalize_output_path(facefusion.globals.source_path, facefusion.globals.target_path, output_path)
+	limit_resources()
+	conditional_process()
+	if is_image(facefusion.globals.output_path):
+		return gradio.update(value = facefusion.globals.output_path, visible = True), gradio.update(value = None, visible = False)
+	if is_video(facefusion.globals.output_path):
+		return gradio.update(value = None, visible = False), gradio.update(value = facefusion.globals.output_path, visible = True)
+	return gradio.update(), gradio.update()
+
+
+def update_output_path(output_path : str) -> Update:
+	facefusion.globals.output_path = output_path
+	return gradio.update(value = output_path)


 def clear() -> Tuple[Update, Update]:
--- a/facefusion/uis/components/output_settings.py
+++ b/facefusion/uis/components/output_settings.py
@@ -1,36 +1,73 @@
-from typing import Optional
+from typing import Optional, Tuple, List
 import gradio

 import facefusion.choices
 import facefusion.globals
 from facefusion import wording
 from facefusion.typing import OutputVideoEncoder
-from facefusion.uis.typing import Update
+from facefusion.uis import core as ui
+from facefusion.uis.typing import Update, ComponentName
+from facefusion.utilities import is_image, is_video

+OUTPUT_IMAGE_QUALITY_SLIDER : Optional[gradio.Slider] = None
 OUTPUT_VIDEO_ENCODER_DROPDOWN : Optional[gradio.Dropdown] = None
 OUTPUT_VIDEO_QUALITY_SLIDER : Optional[gradio.Slider] = None


 def render() -> None:
+	global OUTPUT_IMAGE_QUALITY_SLIDER
 	global OUTPUT_VIDEO_ENCODER_DROPDOWN
 	global OUTPUT_VIDEO_QUALITY_SLIDER

 	with gradio.Box():
+		OUTPUT_IMAGE_QUALITY_SLIDER = gradio.Slider(
+			label = wording.get('output_image_quality_slider_label'),
+			value = facefusion.globals.output_image_quality,
+			step = 1,
+			visible = is_image(facefusion.globals.target_path)
+		)
 		OUTPUT_VIDEO_ENCODER_DROPDOWN = gradio.Dropdown(
 			label = wording.get('output_video_encoder_dropdown_label'),
 			choices = facefusion.choices.output_video_encoder,
-			value = facefusion.globals.output_video_encoder
+			value = facefusion.globals.output_video_encoder,
+			visible = is_video(facefusion.globals.target_path)
 		)
 		OUTPUT_VIDEO_QUALITY_SLIDER = gradio.Slider(
 			label = wording.get('output_video_quality_slider_label'),
 			value = facefusion.globals.output_video_quality,
-			step = 1
+			step = 1,
+			visible = is_video(facefusion.globals.target_path)
 		)


 def listen() -> None:
+	OUTPUT_IMAGE_QUALITY_SLIDER.change(update_output_image_quality, inputs = OUTPUT_IMAGE_QUALITY_SLIDER, outputs = OUTPUT_IMAGE_QUALITY_SLIDER)
 	OUTPUT_VIDEO_ENCODER_DROPDOWN.select(update_output_video_encoder, inputs = OUTPUT_VIDEO_ENCODER_DROPDOWN, outputs = OUTPUT_VIDEO_ENCODER_DROPDOWN)
 	OUTPUT_VIDEO_QUALITY_SLIDER.change(update_output_video_quality, inputs = OUTPUT_VIDEO_QUALITY_SLIDER, outputs = OUTPUT_VIDEO_QUALITY_SLIDER)
+	multi_component_names : List[ComponentName] =\
+	[
+		'source_image',
+		'target_image',
+		'target_video'
+	]
+	for component_name in multi_component_names:
+		component = ui.get_component(component_name)
+		if component:
+			for method in [ 'upload', 'change', 'clear' ]:
+				getattr(component, method)(remote_update, outputs = [ OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER ])
+
+
+def remote_update() -> Tuple[Update, Update, Update]:
+	if is_image(facefusion.globals.target_path):
+		return gradio.update(visible = True), gradio.update(visible = False), gradio.update(visible = False)
+	if is_video(facefusion.globals.target_path):
+		return gradio.update(visible = False), gradio.update(visible = True), gradio.update(visible = True)
+	return gradio.update(visible = False), gradio.update(visible = False), gradio.update(visible = False)
+
+
+def update_output_image_quality(output_image_quality : int) -> Update:
+	facefusion.globals.output_image_quality = output_image_quality
+	return gradio.update(value = output_image_quality)


 def update_output_video_encoder(output_video_encoder: OutputVideoEncoder) -> Update:
--- a/facefusion/uis/components/preview.py
+++ b/facefusion/uis/components/preview.py
@@ -1,11 +1,10 @@
-from time import sleep
-from typing import Any, Dict, Tuple, List, Optional
+from typing import Any, Dict, List, Optional
 import cv2
 import gradio

 import facefusion.globals
 from facefusion import wording
-from facefusion.capturer import get_video_frame, get_video_frame_total
+from facefusion.vision import get_video_frame, count_video_frame_total, normalize_frame_color, resize_frame_dimension
 from facefusion.face_analyser import get_one_face
 from facefusion.face_reference import get_face_reference, set_face_reference
 from facefusion.predictor import predict_frame
@@ -24,25 +23,27 @@ def render() -> None:
 	global PREVIEW_FRAME_SLIDER

 	with gradio.Box():
-		preview_image_args: Dict[str, Any] = {
+		preview_image_args: Dict[str, Any] =\
+		{
 			'label': wording.get('preview_image_label')
 		}
-		preview_frame_slider_args: Dict[str, Any] = {
+		preview_frame_slider_args: Dict[str, Any] =\
+		{
 			'label': wording.get('preview_frame_slider_label'),
 			'step': 1,
 			'visible': False
 		}
 		if is_image(facefusion.globals.target_path):
 			target_frame = cv2.imread(facefusion.globals.target_path)
-			preview_frame = extract_preview_frame(target_frame)
-			preview_image_args['value'] = ui.normalize_frame(preview_frame)
+			preview_frame = process_preview_frame(target_frame)
+			preview_image_args['value'] = normalize_frame_color(preview_frame)
 		if is_video(facefusion.globals.target_path):
 			temp_frame = get_video_frame(facefusion.globals.target_path, facefusion.globals.reference_frame_number)
-			preview_frame = extract_preview_frame(temp_frame)
-			preview_image_args['value'] = ui.normalize_frame(preview_frame)
+			preview_frame = process_preview_frame(temp_frame)
+			preview_image_args['value'] = normalize_frame_color(preview_frame)
 			preview_image_args['visible'] = True
 			preview_frame_slider_args['value'] = facefusion.globals.reference_frame_number
-			preview_frame_slider_args['maximum'] = get_video_frame_total(facefusion.globals.target_path)
+			preview_frame_slider_args['maximum'] = count_video_frame_total(facefusion.globals.target_path)
 			preview_frame_slider_args['visible'] = True
 		PREVIEW_IMAGE = gradio.Image(**preview_image_args)
 		PREVIEW_FRAME_SLIDER = gradio.Slider(**preview_frame_slider_args)
@@ -50,19 +51,28 @@ def render() -> None:


 def listen() -> None:
-	PREVIEW_FRAME_SLIDER.change(update, inputs = PREVIEW_FRAME_SLIDER, outputs = [ PREVIEW_IMAGE, PREVIEW_FRAME_SLIDER ])
+	PREVIEW_FRAME_SLIDER.change(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE)
+	multi_component_names : List[ComponentName] =\
+	[
+		'source_image',
+		'target_image',
+		'target_video'
+	]
+	for component_name in multi_component_names:
+		component = ui.get_component(component_name)
+		if component:
+			for method in [ 'upload', 'change', 'clear' ]:
+				getattr(component, method)(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE)
+				getattr(component, method)(update_preview_frame_slider, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_FRAME_SLIDER)
 	update_component_names : List[ComponentName] =\
 	[
-		'source_file',
-		'target_file',
 		'face_recognition_dropdown',
-		'reference_face_distance_slider',
 		'frame_processors_checkbox_group'
 	]
 	for component_name in update_component_names:
 		component = ui.get_component(component_name)
 		if component:
-			component.change(update, inputs = PREVIEW_FRAME_SLIDER, outputs = [ PREVIEW_IMAGE, PREVIEW_FRAME_SLIDER ])
+			component.change(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE)
 	select_component_names : List[ComponentName] =\
 	[
 		'reference_face_position_gallery',
@@ -73,37 +83,48 @@ def listen() -> None:
 	for component_name in select_component_names:
 		component = ui.get_component(component_name)
 		if component:
-			component.select(update, inputs = PREVIEW_FRAME_SLIDER, outputs = [ PREVIEW_IMAGE, PREVIEW_FRAME_SLIDER ])
+			component.select(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE)
+	reference_face_distance_slider = ui.get_component('reference_face_distance_slider')
+	if reference_face_distance_slider:
+		reference_face_distance_slider.change(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE)


-def update(frame_number : int = 0) -> Tuple[Update, Update]:
-	sleep(0.1)
+def update_preview_image(frame_number : int = 0) -> Update:
 	if is_image(facefusion.globals.target_path):
+		conditional_set_face_reference()
 		target_frame = cv2.imread(facefusion.globals.target_path)
-		preview_frame = extract_preview_frame(target_frame)
-		return gradio.update(value = ui.normalize_frame(preview_frame)), gradio.update(value = None, maximum = None, visible = False)
+		preview_frame = process_preview_frame(target_frame)
+		preview_frame = normalize_frame_color(preview_frame)
+		return gradio.update(value = preview_frame)
+	if is_video(facefusion.globals.target_path):
+		conditional_set_face_reference()
+		facefusion.globals.reference_frame_number = frame_number
+		temp_frame = get_video_frame(facefusion.globals.target_path, facefusion.globals.reference_frame_number)
+		preview_frame = process_preview_frame(temp_frame)
+		preview_frame = normalize_frame_color(preview_frame)
+		return gradio.update(value = preview_frame)
+	return gradio.update(value = None)
+
+
+def update_preview_frame_slider(frame_number : int = 0) -> Update:
+	if is_image(facefusion.globals.target_path):
+		return gradio.update(value = None, maximum = None, visible = False)
 	if is_video(facefusion.globals.target_path):
 		facefusion.globals.reference_frame_number = frame_number
-		video_frame_total = get_video_frame_total(facefusion.globals.target_path)
-		temp_frame = get_video_frame(facefusion.globals.target_path, facefusion.globals.reference_frame_number)
-		preview_frame = extract_preview_frame(temp_frame)
-		return gradio.update(value = ui.normalize_frame(preview_frame)), gradio.update(maximum = video_frame_total, visible = True)
-	return gradio.update(value = None), gradio.update(value = None, maximum = None, visible = False)
+		video_frame_total = count_video_frame_total(facefusion.globals.target_path)
+		return gradio.update(maximum = video_frame_total, visible = True)
+	return gradio.update(value = None, maximum = None, visible = False)


-def extract_preview_frame(temp_frame : Frame) -> Frame:
+def process_preview_frame(temp_frame : Frame) -> Frame:
 	if predict_frame(temp_frame):
 		return cv2.GaussianBlur(temp_frame, (99, 99), 0)
 	source_face = get_one_face(cv2.imread(facefusion.globals.source_path)) if facefusion.globals.source_path else None
-	temp_frame = reduce_preview_frame(temp_frame)
-	if 'reference' in facefusion.globals.face_recognition and not get_face_reference():
-		reference_frame = get_video_frame(facefusion.globals.target_path, facefusion.globals.reference_frame_number)
-		reference_face = get_one_face(reference_frame, facefusion.globals.reference_face_position)
-		set_face_reference(reference_face)
 	reference_face = get_face_reference() if 'reference' in facefusion.globals.face_recognition else None
+	temp_frame = resize_frame_dimension(temp_frame, 480)
 	for frame_processor in facefusion.globals.frame_processors:
 		frame_processor_module = load_frame_processor_module(frame_processor)
-		if frame_processor_module.pre_process():
+		if frame_processor_module.pre_process('preview'):
 			temp_frame = frame_processor_module.process_frame(
 				source_face,
 				reference_face,
@@ -112,10 +133,8 @@ def extract_preview_frame(temp_frame : Frame) -> Frame:
 	return temp_frame


-def reduce_preview_frame(temp_frame : Frame, max_height : int = 480) -> Frame:
-	height, width = temp_frame.shape[:2]
-	if height > max_height:
-		scale = max_height / height
-		max_width = int(width * scale)
-		temp_frame = cv2.resize(temp_frame, (max_width, max_height))
-	return temp_frame
+def conditional_set_face_reference() -> None:
+	if 'reference' in facefusion.globals.face_recognition and not get_face_reference():
+		reference_frame = get_video_frame(facefusion.globals.target_path, facefusion.globals.reference_frame_number)
+		reference_face = get_one_face(reference_frame, facefusion.globals.reference_face_position)
+		set_face_reference(reference_face)
--- a/facefusion/uis/components/source.py
+++ b/facefusion/uis/components/source.py
@@ -19,7 +19,7 @@ def render() -> None:
 		is_source_image = is_image(facefusion.globals.source_path)
 		SOURCE_FILE = gradio.File(
 			file_count = 'single',
-			file_types=
+			file_types =
 			[
 				'.png',
 				'.jpg',
@@ -28,12 +28,12 @@ def render() -> None:
 			label = wording.get('source_file_label'),
 			value = facefusion.globals.source_path if is_source_image else None
 		)
-		ui.register_component('source_file', SOURCE_FILE)
 		SOURCE_IMAGE = gradio.Image(
 			value = SOURCE_FILE.value['name'] if is_source_image else None,
 			visible = is_source_image,
 			show_label = False
 		)
+		ui.register_component('source_image', SOURCE_IMAGE)


 def listen() -> None:
--- a/facefusion/uis/components/target.py
+++ b/facefusion/uis/components/target.py
@@ -43,7 +43,8 @@ def render() -> None:
 			visible = is_target_video,
 			show_label = False
 		)
-		ui.register_component('target_file', TARGET_FILE)
+		ui.register_component('target_image', TARGET_IMAGE)
+		ui.register_component('target_video', TARGET_VIDEO)


 def listen() -> None:
--- a/facefusion/uis/components/temp_frame.py
+++ b/facefusion/uis/components/temp_frame.py
@@ -1,12 +1,13 @@
-from typing import Optional
+from typing import Optional, Tuple
 import gradio

 import facefusion.choices
 import facefusion.globals
 from facefusion import wording
 from facefusion.typing import TempFrameFormat
-
+from facefusion.uis import core as ui
 from facefusion.uis.typing import Update
+from facefusion.utilities import is_video

 TEMP_FRAME_FORMAT_DROPDOWN : Optional[gradio.Dropdown] = None
 TEMP_FRAME_QUALITY_SLIDER : Optional[gradio.Slider] = None
@@ -20,18 +21,30 @@ def render() -> None:
 		TEMP_FRAME_FORMAT_DROPDOWN = gradio.Dropdown(
 			label = wording.get('temp_frame_format_dropdown_label'),
 			choices = facefusion.choices.temp_frame_format,
-			value = facefusion.globals.temp_frame_format
+			value = facefusion.globals.temp_frame_format,
+			visible = is_video(facefusion.globals.target_path)
 		)
 		TEMP_FRAME_QUALITY_SLIDER = gradio.Slider(
 			label = wording.get('temp_frame_quality_slider_label'),
 			value = facefusion.globals.temp_frame_quality,
-			step = 1
+			step = 1,
+			visible = is_video(facefusion.globals.target_path)
 		)


 def listen() -> None:
 	TEMP_FRAME_FORMAT_DROPDOWN.select(update_temp_frame_format, inputs = TEMP_FRAME_FORMAT_DROPDOWN, outputs = TEMP_FRAME_FORMAT_DROPDOWN)
 	TEMP_FRAME_QUALITY_SLIDER.change(update_temp_frame_quality, inputs = TEMP_FRAME_QUALITY_SLIDER, outputs = TEMP_FRAME_QUALITY_SLIDER)
+	target_video = ui.get_component('target_video')
+	if target_video:
+		for method in [ 'upload', 'change', 'clear' ]:
+			getattr(target_video, method)(remote_update, outputs = [ TEMP_FRAME_FORMAT_DROPDOWN, TEMP_FRAME_QUALITY_SLIDER ])
+
+
+def remote_update() -> Tuple[Update, Update]:
+	if is_video(facefusion.globals.target_path):
+		return gradio.update(visible = True), gradio.update(visible = True)
+	return gradio.update(visible = False), gradio.update(visible = False)


 def update_temp_frame_format(temp_frame_format : TempFrameFormat) -> Update:
--- a/facefusion/uis/components/trim_frame.py
+++ b/facefusion/uis/components/trim_frame.py
@@ -1,11 +1,9 @@
-from time import sleep
 from typing import Any, Dict, Tuple, Optional
-
 import gradio

 import facefusion.globals
 from facefusion import wording
-from facefusion.capturer import get_video_frame_total
+from facefusion.vision import count_video_frame_total
 from facefusion.uis import core as ui
 from facefusion.uis.typing import Update
 from facefusion.utilities import is_video
@@ -19,18 +17,20 @@ def render() -> None:
 	global TRIM_FRAME_END_SLIDER

 	with gradio.Box():
-		trim_frame_start_slider_args : Dict[str, Any] = {
+		trim_frame_start_slider_args : Dict[str, Any] =\
+		{
 			'label': wording.get('trim_frame_start_slider_label'),
 			'step': 1,
 			'visible': False
 		}
-		trim_frame_end_slider_args : Dict[str, Any] = {
+		trim_frame_end_slider_args : Dict[str, Any] =\
+		{
 			'label': wording.get('trim_frame_end_slider_label'),
 			'step': 1,
 			'visible': False
 		}
 		if is_video(facefusion.globals.target_path):
-			video_frame_total = get_video_frame_total(facefusion.globals.target_path)
+			video_frame_total = count_video_frame_total(facefusion.globals.target_path)
 			trim_frame_start_slider_args['value'] = facefusion.globals.trim_frame_start or 0
 			trim_frame_start_slider_args['maximum'] = video_frame_total
 			trim_frame_start_slider_args['visible'] = True
@@ -43,23 +43,29 @@ def render() -> None:


 def listen() -> None:
-	target_file = ui.get_component('target_file')
-	if target_file:
-		target_file.change(remote_update, outputs = [ TRIM_FRAME_START_SLIDER, TRIM_FRAME_END_SLIDER ])
-	TRIM_FRAME_START_SLIDER.change(lambda value : update_number('trim_frame_start', int(value)), inputs = TRIM_FRAME_START_SLIDER, outputs = TRIM_FRAME_START_SLIDER)
-	TRIM_FRAME_END_SLIDER.change(lambda value : update_number('trim_frame_end', int(value)), inputs = TRIM_FRAME_END_SLIDER, outputs = TRIM_FRAME_END_SLIDER)
+	TRIM_FRAME_START_SLIDER.change(update_trim_frame_start, inputs = TRIM_FRAME_START_SLIDER, outputs = TRIM_FRAME_START_SLIDER)
+	TRIM_FRAME_END_SLIDER.change(update_trim_frame_end, inputs = TRIM_FRAME_END_SLIDER, outputs = TRIM_FRAME_END_SLIDER)
+	target_video = ui.get_component('target_video')
+	if target_video:
+		for method in [ 'upload', 'change', 'clear' ]:
+			getattr(target_video, method)(remote_update, outputs = [ TRIM_FRAME_START_SLIDER, TRIM_FRAME_END_SLIDER ])


 def remote_update() -> Tuple[Update, Update]:
-	sleep(0.1)
 	if is_video(facefusion.globals.target_path):
-		video_frame_total = get_video_frame_total(facefusion.globals.target_path)
-		facefusion.globals.trim_frame_start = 0
-		facefusion.globals.trim_frame_end = video_frame_total
+		video_frame_total = count_video_frame_total(facefusion.globals.target_path)
+		facefusion.globals.trim_frame_start = None
+		facefusion.globals.trim_frame_end = None
 		return gradio.update(value = 0, maximum = video_frame_total, visible = True), gradio.update(value = video_frame_total, maximum = video_frame_total, visible = True)
 	return gradio.update(value = None, maximum = None, visible = False), gradio.update(value = None, maximum = None, visible = False)


-def update_number(name : str, value : int) -> Update:
-	setattr(facefusion.globals, name, value)
-	return gradio.update(value = value)
+def update_trim_frame_start(trim_frame_start : int) -> Update:
+	facefusion.globals.trim_frame_start = trim_frame_start if trim_frame_start > 0 else None
+	return gradio.update(value = trim_frame_start)
+
+
+def update_trim_frame_end(trim_frame_end : int) -> Update:
+	video_frame_total = count_video_frame_total(facefusion.globals.target_path)
+	facefusion.globals.trim_frame_end = trim_frame_end if trim_frame_end < video_frame_total else None
+	return gradio.update(value = trim_frame_end)
--- a/facefusion/uis/components/webcam.py
+++ b/facefusion/uis/components/webcam.py
@@ -0,0 +1,103 @@
+from typing import Optional, Generator
+import os
+import subprocess
+import cv2
+import gradio
+
+import facefusion.globals
+from facefusion import wording
+from facefusion.typing import Frame
+from facefusion.face_analyser import get_one_face
+from facefusion.processors.frame.core import load_frame_processor_module
+from facefusion.uis.typing import StreamMode, WebcamMode, Update
+from facefusion.utilities import open_ffmpeg
+from facefusion.vision import normalize_frame_color
+
+WEBCAM_IMAGE : Optional[gradio.Image] = None
+WEBCAM_MODE_RADIO : Optional[gradio.Radio] = None
+WEBCAM_START_BUTTON : Optional[gradio.Button] = None
+WEBCAM_STOP_BUTTON : Optional[gradio.Button] = None
+
+
+def render() -> None:
+	global WEBCAM_IMAGE
+	global WEBCAM_MODE_RADIO
+	global WEBCAM_START_BUTTON
+	global WEBCAM_STOP_BUTTON
+
+	WEBCAM_IMAGE = gradio.Image(
+		label = wording.get('webcam_image_label')
+	)
+	WEBCAM_MODE_RADIO = gradio.Radio(
+		label = wording.get('webcam_mode_radio_label'),
+		choices = [ 'inline', 'stream_udp', 'stream_v4l2' ],
+		value = 'inline'
+	)
+	WEBCAM_START_BUTTON = gradio.Button(wording.get('start_button_label'))
+	WEBCAM_STOP_BUTTON = gradio.Button(wording.get('stop_button_label'))
+
+
+def listen() -> None:
+	start_event = WEBCAM_START_BUTTON.click(start, inputs = WEBCAM_MODE_RADIO, outputs = WEBCAM_IMAGE)
+	WEBCAM_MODE_RADIO.change(update, outputs = WEBCAM_IMAGE, cancels = start_event)
+	WEBCAM_STOP_BUTTON.click(None, cancels = start_event)
+
+
+def update() -> Update:
+	return gradio.update(value = None)
+
+
+def start(webcam_mode : WebcamMode) -> Generator[Frame, None, None]:
+	if webcam_mode == 'inline':
+		yield from start_inline()
+	if webcam_mode == 'stream_udp':
+		yield from start_stream('udp')
+	if webcam_mode == 'stream_v4l2':
+		yield from start_stream('v4l2')
+
+
+def start_inline() -> Generator[Frame, None, None]:
+	facefusion.globals.face_recognition = 'many'
+	capture = cv2.VideoCapture(0)
+	if capture.isOpened():
+		while True:
+			_, temp_frame = capture.read()
+			temp_frame = process_stream_frame(temp_frame)
+			if temp_frame is not None:
+				yield normalize_frame_color(temp_frame)
+
+
+def start_stream(mode : StreamMode) -> Generator[None, None, None]:
+	facefusion.globals.face_recognition = 'many'
+	capture = cv2.VideoCapture(0)
+	ffmpeg_process = open_stream(mode)
+	if capture.isOpened():
+		while True:
+			_, frame = capture.read()
+			temp_frame = process_stream_frame(frame)
+			if temp_frame is not None:
+				ffmpeg_process.stdin.write(temp_frame.tobytes())
+				yield normalize_frame_color(temp_frame)
+
+
+def process_stream_frame(temp_frame : Frame) -> Frame:
+	source_face = get_one_face(cv2.imread(facefusion.globals.source_path)) if facefusion.globals.source_path else None
+	for frame_processor in facefusion.globals.frame_processors:
+		frame_processor_module = load_frame_processor_module(frame_processor)
+		if frame_processor_module.pre_process('stream'):
+			temp_frame = frame_processor_module.process_frame(
+				source_face,
+				None,
+				temp_frame
+			)
+	return temp_frame
+
+
+def open_stream(mode : StreamMode) -> subprocess.Popen[bytes]:
+	commands = [ '-f', 'rawvideo', '-pix_fmt', 'bgr24', '-s', '640x480', '-r', '30', '-i', '-' ]
+	if mode == 'udp':
+		commands.extend([ '-b:v', '2000k', '-f', 'mpegts', 'udp://localhost:27000' ])
+	if mode == 'v4l2':
+		device_name = os.listdir('/sys/devices/virtual/video4linux')[0]
+		commands.extend([ '-f', 'v4l2', '/dev/' + device_name ])
+	return open_ffmpeg(commands)