diff --git a/docs/docs/configuration/hardware_acceleration_enrichments.md b/docs/docs/configuration/hardware_acceleration_enrichments.md index 84688b8b4..45c7cd4d1 100644 --- a/docs/docs/configuration/hardware_acceleration_enrichments.md +++ b/docs/docs/configuration/hardware_acceleration_enrichments.md @@ -5,7 +5,7 @@ title: Enrichments # Enrichments -Some of Frigate's enrichments can use a discrete GPU / NPU for accelerated processing. +Some of Frigate's enrichments can use a discrete GPU or integrated GPU for accelerated processing. ## Requirements @@ -18,8 +18,10 @@ Object detection and enrichments (like Semantic Search, Face Recognition, and Li - **Intel** - OpenVINO will automatically be detected and used for enrichments in the default Frigate image. + - **Note:** Intel NPUs have limited model support for enrichments. GPU is recommended for enrichments when available. - **Nvidia** + - Nvidia GPUs will automatically be detected and used for enrichments in the `-tensorrt` Frigate image. - Jetson devices will automatically be detected and used for enrichments in the `-tensorrt-jp6` Frigate image. diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md index 2da602979..2dd3330c2 100644 --- a/docs/docs/configuration/object_detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -261,6 +261,8 @@ OpenVINO is supported on 6th Gen Intel platforms (Skylake) and newer. It will al :::tip +**NPU + GPU Systems:** If you have both NPU and GPU available (Intel Core Ultra processors), use NPU for object detection and GPU for enrichments (semantic search, face recognition, etc.) for best performance and compatibility. + When using many cameras one detector may not be enough to keep up. Multiple detectors can be defined assuming GPU resources are available. An example configuration would be: ```yaml @@ -283,7 +285,7 @@ detectors: | [RF-DETR](#rf-detr) | ✅ | ✅ | Requires XE iGPU or Arc | | [YOLO-NAS](#yolo-nas) | ✅ | ✅ | | | [MobileNet v2](#ssdlite-mobilenet-v2) | ✅ | ✅ | Fast and lightweight model, less accurate than larger models | -| [YOLOX](#yolox) | ✅ | ? | | +| [YOLOX](#yolox) | ✅ | ? | | | [D-FINE](#d-fine) | ❌ | ❌ | | #### SSDLite MobileNet v2 diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md index 70ef161be..b3ce37177 100644 --- a/docs/docs/configuration/semantic_search.md +++ b/docs/docs/configuration/semantic_search.md @@ -78,7 +78,7 @@ Switching between V1 and V2 requires reindexing your embeddings. The embeddings ### GPU Acceleration -The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU / NPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation. +The CLIP models are downloaded in ONNX format, and the `large` model can be accelerated using GPU hardware, when available. This depends on the Docker build that is used. You can also target a specific device in a multi-GPU installation. ```yaml semantic_search: @@ -90,7 +90,7 @@ semantic_search: :::info -If the correct build is used for your GPU / NPU and the `large` model is configured, then the GPU / NPU will be detected and used automatically. +If the correct build is used for your GPU / NPU and the `large` model is configured, then the GPU will be detected and used automatically. Specify the `device` option to target a specific GPU in a multi-GPU system (see [onnxruntime's provider options](https://onnxruntime.ai/docs/execution-providers/)). If you do not specify a device, the first available GPU will be used. diff --git a/frigate/detectors/detection_runners.py b/frigate/detectors/detection_runners.py index 5b45238be..6b9c1531b 100644 --- a/frigate/detectors/detection_runners.py +++ b/frigate/detectors/detection_runners.py @@ -161,12 +161,12 @@ class CudaGraphRunner(BaseModelRunner): """ @staticmethod - def is_complex_model(model_type: str) -> bool: + def is_model_supported(model_type: str) -> bool: # Import here to avoid circular imports from frigate.detectors.detector_config import ModelTypeEnum from frigate.embeddings.types import EnrichmentModelTypeEnum - return model_type in [ + return model_type not in [ ModelTypeEnum.yolonas.value, EnrichmentModelTypeEnum.paddleocr.value, EnrichmentModelTypeEnum.jina_v1.value, @@ -239,9 +239,30 @@ class OpenVINOModelRunner(BaseModelRunner): EnrichmentModelTypeEnum.jina_v2.value, ] + @staticmethod + def is_model_npu_supported(model_type: str) -> bool: + # Import here to avoid circular imports + from frigate.embeddings.types import EnrichmentModelTypeEnum + + return model_type not in [ + EnrichmentModelTypeEnum.paddleocr.value, + EnrichmentModelTypeEnum.jina_v1.value, + EnrichmentModelTypeEnum.jina_v2.value, + EnrichmentModelTypeEnum.arcface.value, + ] + def __init__(self, model_path: str, device: str, model_type: str, **kwargs): self.model_path = model_path self.device = device + + if device == "NPU" and not OpenVINOModelRunner.is_model_npu_supported( + model_type + ): + logger.warning( + f"OpenVINO model {model_type} is not supported on NPU, using GPU instead" + ) + device = "GPU" + self.complex_model = OpenVINOModelRunner.is_complex_model(model_type) if not os.path.isfile(model_path): @@ -500,7 +521,7 @@ def get_optimized_runner( return OpenVINOModelRunner(model_path, device, model_type, **kwargs) if ( - not CudaGraphRunner.is_complex_model(model_type) + not CudaGraphRunner.is_model_supported(model_type) and providers[0] == "CUDAExecutionProvider" ): options[0] = {