use float16 and small model on gpu for real-time

2026-05-01 19:17:41 +03:00 · 2025-05-26 08:19:41 -05:00 · 2025-05-26 08:19:41 -05:00 · 43a3943c50
commit 43a3943c50
parent e60b51d036
1 changed files with 2 additions and 2 deletions
--- a/frigate/data_processing/real_time/whisper_online.py
+++ b/frigate/data_processing/real_time/whisper_online.py
@ -122,9 +122,9 @@ class FasterWhisperASR(ASRBase):

        # this worked fast and reliably on NVIDIA L40
        model = WhisperModel(
-            model_size_or_path="tiny",
+            model_size_or_path="small" if device == "cuda" else "tiny",
            device=device,
-            compute_type="int8",
+            compute_type="float16" if device == "cuda" else "int8",
            local_files_only=False,
            download_root=model_dir,
        )