use float16 and small model on gpu for real-time

This commit is contained in:
Josh Hawkins 2025-05-26 08:19:41 -05:00
parent e60b51d036
commit 43a3943c50

View File

@ -122,9 +122,9 @@ class FasterWhisperASR(ASRBase):
# this worked fast and reliably on NVIDIA L40 # this worked fast and reliably on NVIDIA L40
model = WhisperModel( model = WhisperModel(
model_size_or_path="tiny", model_size_or_path="small" if device == "cuda" else "tiny",
device=device, device=device,
compute_type="int8", compute_type="float16" if device == "cuda" else "int8",
local_files_only=False, local_files_only=False,
download_root=model_dir, download_root=model_dir,
) )