From 43a3943c50aeace0353fe3a7b5e115d83d7b8e90 Mon Sep 17 00:00:00 2001 From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com> Date: Mon, 26 May 2025 08:19:41 -0500 Subject: [PATCH] use float16 and small model on gpu for real-time --- frigate/data_processing/real_time/whisper_online.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/frigate/data_processing/real_time/whisper_online.py b/frigate/data_processing/real_time/whisper_online.py index 6122dbdd9..96c1ce0cf 100644 --- a/frigate/data_processing/real_time/whisper_online.py +++ b/frigate/data_processing/real_time/whisper_online.py @@ -122,9 +122,9 @@ class FasterWhisperASR(ASRBase): # this worked fast and reliably on NVIDIA L40 model = WhisperModel( - model_size_or_path="tiny", + model_size_or_path="small" if device == "cuda" else "tiny", device=device, - compute_type="int8", + compute_type="float16" if device == "cuda" else "int8", local_files_only=False, download_root=model_dir, )