Support model caching

This commit is contained in:
Nicolas Mowen 2024-09-16 15:29:04 -06:00
parent 2f69f5afe6
commit 20dd63e7bd
2 changed files with 23 additions and 2 deletions

View File

@ -7,7 +7,7 @@ cython == 0.29.*; platform_machine == 'x86_64'
nvidia-cuda-runtime-cu12 == 12.1.*; platform_machine == 'x86_64' nvidia-cuda-runtime-cu12 == 12.1.*; platform_machine == 'x86_64'
nvidia-cuda-runtime-cu11 == 11.8.*; platform_machine == 'x86_64' nvidia-cuda-runtime-cu11 == 11.8.*; platform_machine == 'x86_64'
nvidia-cublas-cu11 == 11.11.3.6; platform_machine == 'x86_64' nvidia-cublas-cu11 == 11.11.3.6; platform_machine == 'x86_64'
nvidia-cudnn-cu11 == 8.5.0.*; platform_machine == 'x86_64' nvidia-cudnn-cu11 == 8.6.0.*; platform_machine == 'x86_64'
nvidia-cufft-cu11==10.*; platform_machine == 'x86_64' nvidia-cufft-cu11==10.*; platform_machine == 'x86_64'
onnx==1.14.0; platform_machine == 'x86_64' onnx==1.14.0; platform_machine == 'x86_64'
onnxruntime-gpu==1.17.*; platform_machine == 'x86_64' onnxruntime-gpu==1.17.*; platform_machine == 'x86_64'

View File

@ -36,7 +36,28 @@ class ONNXDetector(DetectionApi):
path = detector_config.model.path path = detector_config.model.path
logger.info(f"ONNX: loading {detector_config.model.path}") logger.info(f"ONNX: loading {detector_config.model.path}")
self.model = ort.InferenceSession(path, providers=ort.get_available_providers())
providers = ort.get_available_providers()
options = []
for provider in providers:
if provider == "TensorrtExecutionProvider":
options.append(
{
"trt_timing_cache_enable": True,
"trt_timing_cache_path": "/config/model_cache/onnx",
"trt_engine_cache_enable": True,
"trt_engine_cache_path": "/config/model_cache/onnx/trt-engines",
}
)
elif provider == "OpenVINOExecutionProvider":
options.append({"cache_dir": "/config/model_cache/onnx"})
else:
options.append({})
self.model = ort.InferenceSession(
path, providers=providers, provider_options=options
)
self.h = detector_config.model.height self.h = detector_config.model.height
self.w = detector_config.model.width self.w = detector_config.model.width