From 43a3943c50aeace0353fe3a7b5e115d83d7b8e90 Mon Sep 17 00:00:00 2001
From: Josh Hawkins <32435876+hawkeye217@users.noreply.github.com>
Date: Mon, 26 May 2025 08:19:41 -0500
Subject: [PATCH] use float16 and small model on gpu for real-time

---
 frigate/data_processing/real_time/whisper_online.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/frigate/data_processing/real_time/whisper_online.py b/frigate/data_processing/real_time/whisper_online.py
index 6122dbdd9..96c1ce0cf 100644
--- a/frigate/data_processing/real_time/whisper_online.py
+++ b/frigate/data_processing/real_time/whisper_online.py
@@ -122,9 +122,9 @@ class FasterWhisperASR(ASRBase):
 
         # this worked fast and reliably on NVIDIA L40
         model = WhisperModel(
-            model_size_or_path="tiny",
+            model_size_or_path="small" if device == "cuda" else "tiny",
             device=device,
-            compute_type="int8",
+            compute_type="float16" if device == "cuda" else "int8",
             local_files_only=False,
             download_root=model_dir,
         )