Various Tweaks (#20742)

* Pull context size from openai models

* Adjust wording based on type of model

* Instruct to not use parenthesis

* Simplify genai config

* Don't use GPU for training
This commit is contained in:
Nicolas Mowen 2025-10-31 12:40:31 -06:00 committed by GitHub
parent 685f2c5030
commit 338b681ed0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 39 additions and 10 deletions

View File

@ -13,7 +13,6 @@ nvidia_cusolver_cu12==11.6.3.*; platform_machine == 'x86_64'
nvidia_cusparse_cu12==12.5.1.*; platform_machine == 'x86_64'
nvidia_nccl_cu12==2.23.4; platform_machine == 'x86_64'
nvidia_nvjitlink_cu12==12.5.82; platform_machine == 'x86_64'
tensorflow==2.19.*; platform_machine == 'x86_64'
onnx==1.16.*; platform_machine == 'x86_64'
onnxruntime-gpu==1.22.*; platform_machine == 'x86_64'
protobuf==3.20.3; platform_machine == 'x86_64'

View File

@ -10,7 +10,6 @@ Object classification allows you to train a custom MobileNetV2 classification mo
Object classification models are lightweight and run very fast on CPU. Inference should be usable on virtually any machine that can run Frigate.
Training the model does briefly use a high amount of system resources for about 13 minutes per training run. On lower-power devices, training may take longer.
When running the `-tensorrt` image, Nvidia GPUs will automatically be used to accelerate training.
## Classes

View File

@ -10,7 +10,6 @@ State classification allows you to train a custom MobileNetV2 classification mod
State classification models are lightweight and run very fast on CPU. Inference should be usable on virtually any machine that can run Frigate.
Training the model does briefly use a high amount of system resources for about 13 minutes per training run. On lower-power devices, training may take longer.
When running the `-tensorrt` image, Nvidia GPUs will automatically be used to accelerate training.
## Classes

View File

@ -114,7 +114,7 @@ Your response MUST be a flat JSON object with:
## Objects in Scene
Each line represents a detection state, not necessarily unique individuals. Objects with names in parentheses (e.g., "Name (person)") are verified identities. Objects without names (e.g., "Person") are detected but not identified.
Each line represents a detection state, not necessarily unique individuals. Parentheses indicate object type or category, use only the name/label in your response, not the parentheses.
**CRITICAL: When you see both recognized and unrecognized entries of the same type (e.g., "Joe (person)" and "Person"), visually count how many distinct people/objects you actually see based on appearance and clothing. If you observe only ONE person throughout the sequence, use ONLY the recognized name (e.g., "Joe"). The same person may be recognized in some frames but not others. Only describe both if you visually see MULTIPLE distinct people with clearly different appearances.**

View File

@ -18,6 +18,7 @@ class OpenAIClient(GenAIClient):
"""Generative AI client for Frigate using OpenAI."""
provider: OpenAI
context_size: Optional[int] = None
def _init_provider(self):
"""Initialize the client."""
@ -69,5 +70,33 @@ class OpenAIClient(GenAIClient):
def get_context_size(self) -> int:
"""Get the context window size for OpenAI."""
# OpenAI GPT-4 Vision models have 128K token context window
return 128000
if self.context_size is not None:
return self.context_size
try:
models = self.provider.models.list()
for model in models.data:
if model.id == self.genai_config.model:
if hasattr(model, "max_model_len") and model.max_model_len:
self.context_size = model.max_model_len
logger.debug(
f"Retrieved context size {self.context_size} for model {self.genai_config.model}"
)
return self.context_size
except Exception as e:
logger.debug(
f"Failed to fetch model context size from API: {e}, using default"
)
# Default to 128K for ChatGPT models, 8K for others
model_name = self.genai_config.model.lower()
if "gpt-4o" in model_name:
self.context_size = 128000
else:
self.context_size = 8192
logger.debug(
f"Using default context size {self.context_size} for model {self.genai_config.model}"
)
return self.context_size

View File

@ -384,10 +384,10 @@ def migrate_017_0(config: dict[str, dict[str, Any]]) -> dict[str, dict[str, Any]
new_object_config["genai"] = {}
for key in global_genai.keys():
if key not in ["enabled", "model", "provider", "base_url", "api_key"]:
new_object_config["genai"][key] = global_genai[key]
else:
if key in ["model", "provider", "base_url", "api_key"]:
new_genai_config[key] = global_genai[key]
else:
new_object_config["genai"][key] = global_genai[key]
config["genai"] = new_genai_config

View File

@ -86,6 +86,7 @@
"classificationSubLabel": "Sub Label",
"classificationAttribute": "Attribute",
"classes": "Classes",
"states": "States",
"classesTip": "Learn about classes",
"classesStateDesc": "Define the different states your camera area can be in. For example: 'open' and 'closed' for a garage door.",
"classesObjectDesc": "Define the different categories to classify detected objects into. For example: 'delivery_person', 'resident', 'stranger' for person classification.",

View File

@ -394,7 +394,9 @@ export default function Step1NameAndDefine({
<div className="flex items-center justify-between">
<div className="flex items-center gap-1">
<FormLabel className="text-primary-variant">
{t("wizard.step1.classes")}
{watchedModelType === "state"
? t("wizard.step1.states")
: t("wizard.step1.classes")}
</FormLabel>
<Popover>
<PopoverTrigger asChild>