mirror of
https://github.com/blakeblackshear/frigate.git
synced 2025-12-06 13:34:13 +03:00
Various Tweaks (#20742)
* Pull context size from openai models * Adjust wording based on type of model * Instruct to not use parenthesis * Simplify genai config * Don't use GPU for training
This commit is contained in:
parent
685f2c5030
commit
338b681ed0
@ -13,7 +13,6 @@ nvidia_cusolver_cu12==11.6.3.*; platform_machine == 'x86_64'
|
|||||||
nvidia_cusparse_cu12==12.5.1.*; platform_machine == 'x86_64'
|
nvidia_cusparse_cu12==12.5.1.*; platform_machine == 'x86_64'
|
||||||
nvidia_nccl_cu12==2.23.4; platform_machine == 'x86_64'
|
nvidia_nccl_cu12==2.23.4; platform_machine == 'x86_64'
|
||||||
nvidia_nvjitlink_cu12==12.5.82; platform_machine == 'x86_64'
|
nvidia_nvjitlink_cu12==12.5.82; platform_machine == 'x86_64'
|
||||||
tensorflow==2.19.*; platform_machine == 'x86_64'
|
|
||||||
onnx==1.16.*; platform_machine == 'x86_64'
|
onnx==1.16.*; platform_machine == 'x86_64'
|
||||||
onnxruntime-gpu==1.22.*; platform_machine == 'x86_64'
|
onnxruntime-gpu==1.22.*; platform_machine == 'x86_64'
|
||||||
protobuf==3.20.3; platform_machine == 'x86_64'
|
protobuf==3.20.3; platform_machine == 'x86_64'
|
||||||
|
|||||||
@ -10,7 +10,6 @@ Object classification allows you to train a custom MobileNetV2 classification mo
|
|||||||
Object classification models are lightweight and run very fast on CPU. Inference should be usable on virtually any machine that can run Frigate.
|
Object classification models are lightweight and run very fast on CPU. Inference should be usable on virtually any machine that can run Frigate.
|
||||||
|
|
||||||
Training the model does briefly use a high amount of system resources for about 1–3 minutes per training run. On lower-power devices, training may take longer.
|
Training the model does briefly use a high amount of system resources for about 1–3 minutes per training run. On lower-power devices, training may take longer.
|
||||||
When running the `-tensorrt` image, Nvidia GPUs will automatically be used to accelerate training.
|
|
||||||
|
|
||||||
## Classes
|
## Classes
|
||||||
|
|
||||||
|
|||||||
@ -10,7 +10,6 @@ State classification allows you to train a custom MobileNetV2 classification mod
|
|||||||
State classification models are lightweight and run very fast on CPU. Inference should be usable on virtually any machine that can run Frigate.
|
State classification models are lightweight and run very fast on CPU. Inference should be usable on virtually any machine that can run Frigate.
|
||||||
|
|
||||||
Training the model does briefly use a high amount of system resources for about 1–3 minutes per training run. On lower-power devices, training may take longer.
|
Training the model does briefly use a high amount of system resources for about 1–3 minutes per training run. On lower-power devices, training may take longer.
|
||||||
When running the `-tensorrt` image, Nvidia GPUs will automatically be used to accelerate training.
|
|
||||||
|
|
||||||
## Classes
|
## Classes
|
||||||
|
|
||||||
|
|||||||
@ -114,7 +114,7 @@ Your response MUST be a flat JSON object with:
|
|||||||
|
|
||||||
## Objects in Scene
|
## Objects in Scene
|
||||||
|
|
||||||
Each line represents a detection state, not necessarily unique individuals. Objects with names in parentheses (e.g., "Name (person)") are verified identities. Objects without names (e.g., "Person") are detected but not identified.
|
Each line represents a detection state, not necessarily unique individuals. Parentheses indicate object type or category, use only the name/label in your response, not the parentheses.
|
||||||
|
|
||||||
**CRITICAL: When you see both recognized and unrecognized entries of the same type (e.g., "Joe (person)" and "Person"), visually count how many distinct people/objects you actually see based on appearance and clothing. If you observe only ONE person throughout the sequence, use ONLY the recognized name (e.g., "Joe"). The same person may be recognized in some frames but not others. Only describe both if you visually see MULTIPLE distinct people with clearly different appearances.**
|
**CRITICAL: When you see both recognized and unrecognized entries of the same type (e.g., "Joe (person)" and "Person"), visually count how many distinct people/objects you actually see based on appearance and clothing. If you observe only ONE person throughout the sequence, use ONLY the recognized name (e.g., "Joe"). The same person may be recognized in some frames but not others. Only describe both if you visually see MULTIPLE distinct people with clearly different appearances.**
|
||||||
|
|
||||||
|
|||||||
@ -18,6 +18,7 @@ class OpenAIClient(GenAIClient):
|
|||||||
"""Generative AI client for Frigate using OpenAI."""
|
"""Generative AI client for Frigate using OpenAI."""
|
||||||
|
|
||||||
provider: OpenAI
|
provider: OpenAI
|
||||||
|
context_size: Optional[int] = None
|
||||||
|
|
||||||
def _init_provider(self):
|
def _init_provider(self):
|
||||||
"""Initialize the client."""
|
"""Initialize the client."""
|
||||||
@ -69,5 +70,33 @@ class OpenAIClient(GenAIClient):
|
|||||||
|
|
||||||
def get_context_size(self) -> int:
|
def get_context_size(self) -> int:
|
||||||
"""Get the context window size for OpenAI."""
|
"""Get the context window size for OpenAI."""
|
||||||
# OpenAI GPT-4 Vision models have 128K token context window
|
if self.context_size is not None:
|
||||||
return 128000
|
return self.context_size
|
||||||
|
|
||||||
|
try:
|
||||||
|
models = self.provider.models.list()
|
||||||
|
for model in models.data:
|
||||||
|
if model.id == self.genai_config.model:
|
||||||
|
if hasattr(model, "max_model_len") and model.max_model_len:
|
||||||
|
self.context_size = model.max_model_len
|
||||||
|
logger.debug(
|
||||||
|
f"Retrieved context size {self.context_size} for model {self.genai_config.model}"
|
||||||
|
)
|
||||||
|
return self.context_size
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(
|
||||||
|
f"Failed to fetch model context size from API: {e}, using default"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Default to 128K for ChatGPT models, 8K for others
|
||||||
|
model_name = self.genai_config.model.lower()
|
||||||
|
if "gpt-4o" in model_name:
|
||||||
|
self.context_size = 128000
|
||||||
|
else:
|
||||||
|
self.context_size = 8192
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Using default context size {self.context_size} for model {self.genai_config.model}"
|
||||||
|
)
|
||||||
|
return self.context_size
|
||||||
|
|||||||
@ -384,10 +384,10 @@ def migrate_017_0(config: dict[str, dict[str, Any]]) -> dict[str, dict[str, Any]
|
|||||||
new_object_config["genai"] = {}
|
new_object_config["genai"] = {}
|
||||||
|
|
||||||
for key in global_genai.keys():
|
for key in global_genai.keys():
|
||||||
if key not in ["enabled", "model", "provider", "base_url", "api_key"]:
|
if key in ["model", "provider", "base_url", "api_key"]:
|
||||||
new_object_config["genai"][key] = global_genai[key]
|
|
||||||
else:
|
|
||||||
new_genai_config[key] = global_genai[key]
|
new_genai_config[key] = global_genai[key]
|
||||||
|
else:
|
||||||
|
new_object_config["genai"][key] = global_genai[key]
|
||||||
|
|
||||||
config["genai"] = new_genai_config
|
config["genai"] = new_genai_config
|
||||||
|
|
||||||
|
|||||||
@ -86,6 +86,7 @@
|
|||||||
"classificationSubLabel": "Sub Label",
|
"classificationSubLabel": "Sub Label",
|
||||||
"classificationAttribute": "Attribute",
|
"classificationAttribute": "Attribute",
|
||||||
"classes": "Classes",
|
"classes": "Classes",
|
||||||
|
"states": "States",
|
||||||
"classesTip": "Learn about classes",
|
"classesTip": "Learn about classes",
|
||||||
"classesStateDesc": "Define the different states your camera area can be in. For example: 'open' and 'closed' for a garage door.",
|
"classesStateDesc": "Define the different states your camera area can be in. For example: 'open' and 'closed' for a garage door.",
|
||||||
"classesObjectDesc": "Define the different categories to classify detected objects into. For example: 'delivery_person', 'resident', 'stranger' for person classification.",
|
"classesObjectDesc": "Define the different categories to classify detected objects into. For example: 'delivery_person', 'resident', 'stranger' for person classification.",
|
||||||
|
|||||||
@ -394,7 +394,9 @@ export default function Step1NameAndDefine({
|
|||||||
<div className="flex items-center justify-between">
|
<div className="flex items-center justify-between">
|
||||||
<div className="flex items-center gap-1">
|
<div className="flex items-center gap-1">
|
||||||
<FormLabel className="text-primary-variant">
|
<FormLabel className="text-primary-variant">
|
||||||
{t("wizard.step1.classes")}
|
{watchedModelType === "state"
|
||||||
|
? t("wizard.step1.states")
|
||||||
|
: t("wizard.step1.classes")}
|
||||||
</FormLabel>
|
</FormLabel>
|
||||||
<Popover>
|
<Popover>
|
||||||
<PopoverTrigger asChild>
|
<PopoverTrigger asChild>
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user