Support model caching

2026-02-14 15:15:22 +03:00 · 2024-09-16 15:29:04 -06:00 · 2024-09-16 15:29:04 -06:00 · 20dd63e7bd
commit 20dd63e7bd
parent 2f69f5afe6
2 changed files with 23 additions and 2 deletions
--- a/docker/tensorrt/requirements-amd64.txt
+++ b/docker/tensorrt/requirements-amd64.txt
@ -7,7 +7,7 @@ cython == 0.29.*; platform_machine == 'x86_64'
 nvidia-cuda-runtime-cu12 == 12.1.*; platform_machine == 'x86_64'
 nvidia-cuda-runtime-cu11 == 11.8.*; platform_machine == 'x86_64'
 nvidia-cublas-cu11 == 11.11.3.6; platform_machine == 'x86_64'
-nvidia-cudnn-cu11 == 8.5.0.*; platform_machine == 'x86_64'
+nvidia-cudnn-cu11 == 8.6.0.*; platform_machine == 'x86_64'
 nvidia-cufft-cu11==10.*; platform_machine == 'x86_64'
 onnx==1.14.0; platform_machine == 'x86_64'
 onnxruntime-gpu==1.17.*; platform_machine == 'x86_64'
--- a/frigate/detectors/plugins/onnx.py
+++ b/frigate/detectors/plugins/onnx.py
@ -36,7 +36,28 @@ class ONNXDetector(DetectionApi):
        path = detector_config.model.path
        logger.info(f"ONNX: loading {detector_config.model.path}")
-        self.model = ort.InferenceSession(path, providers=ort.get_available_providers())
+
        providers = ort.get_available_providers()
        options = []
        for provider in providers:
            if provider == "TensorrtExecutionProvider":
                options.append(
                    {
                        "trt_timing_cache_enable": True,
                        "trt_timing_cache_path": "/config/model_cache/onnx",
                        "trt_engine_cache_enable": True,
                        "trt_engine_cache_path": "/config/model_cache/onnx/trt-engines",
                    }
                )
            elif provider == "OpenVINOExecutionProvider":
                options.append({"cache_dir": "/config/model_cache/onnx"})
            else:
                options.append({})
        self.model = ort.InferenceSession(
            path, providers=providers, provider_options=options
        )
        self.h = detector_config.model.height
        self.w = detector_config.model.width