diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 02fde5861..ebe107d3d 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -107,7 +107,7 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Build - run: make + run: make debug - name: Run mypy run: docker run --rm --entrypoint=python3 frigate:latest -u -m mypy --config-file frigate/mypy.ini frigate - name: Run tests diff --git a/Makefile b/Makefile index 5500174af..2baac5aad 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ default_target: local COMMIT_HASH := $(shell git log -1 --pretty=format:"%h"|tail -1) -VERSION = 0.16.0 +VERSION = 0.17.0 IMAGE_REPO ?= ghcr.io/blakeblackshear/frigate GITHUB_REF_NAME ?= $(shell git rev-parse --abbrev-ref HEAD) BOARDS= #Initialized empty @@ -20,6 +20,12 @@ local: version --tag frigate:latest \ --load +debug: version + docker buildx build --target=frigate --file docker/main/Dockerfile . \ + --build-arg DEBUG=true \ + --tag frigate:latest \ + --load + amd64: docker buildx build --target=frigate --file docker/main/Dockerfile . \ --tag $(IMAGE_REPO):$(VERSION)-$(COMMIT_HASH) \ diff --git a/benchmark.py b/benchmark.py index 1f39302a7..46adc59df 100755 --- a/benchmark.py +++ b/benchmark.py @@ -4,13 +4,13 @@ from statistics import mean import numpy as np -import frigate.util as util from frigate.config import DetectorTypeEnum from frigate.object_detection.base import ( ObjectDetectProcess, RemoteObjectDetector, load_labels, ) +from frigate.util.process import FrigateProcess my_frame = np.expand_dims(np.full((300, 300, 3), 1, np.uint8), axis=0) labels = load_labels("/labelmap.txt") @@ -91,7 +91,7 @@ edgetpu_process_2 = ObjectDetectProcess( ) for x in range(0, 10): - camera_process = util.Process( + camera_process = FrigateProcess( target=start, args=(x, 300, detection_queue, events[str(x)]) ) camera_process.daemon = True diff --git a/docker/main/Dockerfile b/docker/main/Dockerfile index f58e3d5a9..ca93f4a3b 100644 --- a/docker/main/Dockerfile +++ b/docker/main/Dockerfile @@ -148,11 +148,12 @@ RUN --mount=type=bind,source=docker/main/install_s6_overlay.sh,target=/deps/inst FROM base AS wheels ARG DEBIAN_FRONTEND ARG TARGETARCH +ARG DEBUG=false # Use a separate container to build wheels to prevent build dependencies in final image RUN apt-get -qq update \ && apt-get -qq install -y \ - apt-transport-https wget \ + apt-transport-https wget unzip \ && apt-get -qq update \ && apt-get -qq install -y \ python3.11 \ @@ -177,6 +178,8 @@ RUN wget -q https://bootstrap.pypa.io/get-pip.py -O get-pip.py \ && python3 get-pip.py "pip" COPY docker/main/requirements.txt /requirements.txt +COPY docker/main/requirements-dev.txt /requirements-dev.txt + RUN pip3 install -r /requirements.txt # Build pysqlite3 from source @@ -184,7 +187,10 @@ COPY docker/main/build_pysqlite3.sh /build_pysqlite3.sh RUN /build_pysqlite3.sh COPY docker/main/requirements-wheels.txt /requirements-wheels.txt -RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt +RUN pip3 wheel --wheel-dir=/wheels -r /requirements-wheels.txt && \ + if [ "$DEBUG" = "true" ]; then \ + pip3 wheel --wheel-dir=/wheels -r /requirements-dev.txt; \ + fi # Install HailoRT & Wheels RUN --mount=type=bind,source=docker/main/install_hailort.sh,target=/deps/install_hailort.sh \ @@ -224,9 +230,15 @@ ENV TRANSFORMERS_NO_ADVISORY_WARNINGS=1 # Set OpenCV ffmpeg loglevel to fatal: https://ffmpeg.org/doxygen/trunk/log_8h.html ENV OPENCV_FFMPEG_LOGLEVEL=8 +# Set NumPy to ignore getlimits warning +ENV PYTHONWARNINGS="ignore:::numpy.core.getlimits" + # Set HailoRT to disable logging ENV HAILORT_LOGGER_PATH=NONE +# TensorFlow error only +ENV TF_CPP_MIN_LOG_LEVEL=3 + ENV PATH="/usr/local/go2rtc/bin:/usr/local/tempio/bin:/usr/local/nginx/sbin:${PATH}" # Install dependencies diff --git a/docker/main/build_pysqlite3.sh b/docker/main/build_pysqlite3.sh index 6375b33fa..c84c6fcf7 100755 --- a/docker/main/build_pysqlite3.sh +++ b/docker/main/build_pysqlite3.sh @@ -2,18 +2,25 @@ set -euxo pipefail -SQLITE3_VERSION="96c92aba00c8375bc32fafcdf12429c58bd8aabfcadab6683e35bbb9cdebf19e" # 3.46.0 +SQLITE3_VERSION="3.46.1" PYSQLITE3_VERSION="0.5.3" -# Fetch the source code for the latest release of Sqlite. +# Fetch the pre-built sqlite amalgamation instead of building from source if [[ ! -d "sqlite" ]]; then - wget https://www.sqlite.org/src/tarball/sqlite.tar.gz?r=${SQLITE3_VERSION} -O sqlite.tar.gz - tar xzf sqlite.tar.gz - cd sqlite/ - LIBS="-lm" ./configure --disable-tcl --enable-tempstore=always - make sqlite3.c + mkdir sqlite + cd sqlite + + # Download the pre-built amalgamation from sqlite.org + # For SQLite 3.46.1, the amalgamation version is 3460100 + SQLITE_AMALGAMATION_VERSION="3460100" + + wget https://www.sqlite.org/2024/sqlite-amalgamation-${SQLITE_AMALGAMATION_VERSION}.zip -O sqlite-amalgamation.zip + unzip sqlite-amalgamation.zip + mv sqlite-amalgamation-${SQLITE_AMALGAMATION_VERSION}/* . + rmdir sqlite-amalgamation-${SQLITE_AMALGAMATION_VERSION} + rm sqlite-amalgamation.zip + cd ../ - rm sqlite.tar.gz fi # Grab the pysqlite3 source code. diff --git a/docker/main/install_deps.sh b/docker/main/install_deps.sh index 9684199f8..bd9f363e9 100755 --- a/docker/main/install_deps.sh +++ b/docker/main/install_deps.sh @@ -31,6 +31,14 @@ unset DEBIAN_FRONTEND yes | dpkg -i /tmp/libedgetpu1-max.deb && export DEBIAN_FRONTEND=noninteractive rm /tmp/libedgetpu1-max.deb +# install mesa-teflon-delegate from bookworm-backports +# Only available for arm64 at the moment +if [[ "${TARGETARCH}" == "arm64" ]]; then + echo "deb http://deb.debian.org/debian bookworm-backports main" | tee /etc/apt/sources.list.d/bookworm-backports.list + apt-get -qq update + apt-get -qq install --no-install-recommends --no-install-suggests -y mesa-teflon-delegate/bookworm-backports +fi + # ffmpeg -> amd64 if [[ "${TARGETARCH}" == "amd64" ]]; then mkdir -p /usr/lib/ffmpeg/5.0 @@ -71,11 +79,33 @@ if [[ "${TARGETARCH}" == "amd64" ]]; then echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy client" | tee /etc/apt/sources.list.d/intel-gpu-jammy.list apt-get -qq update apt-get -qq install --no-install-recommends --no-install-suggests -y \ - intel-opencl-icd=24.35.30872.31-996~22.04 intel-level-zero-gpu=1.3.29735.27-914~22.04 intel-media-va-driver-non-free=24.3.3-996~22.04 \ - libmfx1=23.2.2-880~22.04 libmfxgen1=24.2.4-914~22.04 libvpl2=1:2.13.0.0-996~22.04 + intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 + + apt-get -qq install -y ocl-icd-libopencl1 rm -f /usr/share/keyrings/intel-graphics.gpg rm -f /etc/apt/sources.list.d/intel-gpu-jammy.list + + # install legacy and standard intel icd and level-zero-gpu + # see https://github.com/intel/compute-runtime/blob/master/LEGACY_PLATFORMS.md for more info + # needed core package + wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/libigdgmm12_22.5.5_amd64.deb + dpkg -i libigdgmm12_22.5.5_amd64.deb + rm libigdgmm12_22.5.5_amd64.deb + + # legacy packages + wget https://github.com/intel/compute-runtime/releases/download/24.35.30872.22/intel-opencl-icd-legacy1_24.35.30872.22_amd64.deb + wget https://github.com/intel/compute-runtime/releases/download/24.35.30872.22/intel-level-zero-gpu-legacy1_1.3.30872.22_amd64.deb + wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.20/intel-igc-opencl_1.0.17537.20_amd64.deb + wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.20/intel-igc-core_1.0.17537.20_amd64.deb + # standard packages + wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-opencl-icd_24.52.32224.5_amd64.deb + wget https://github.com/intel/compute-runtime/releases/download/24.52.32224.5/intel-level-zero-gpu_1.6.32224.5_amd64.deb + wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-opencl-2_2.5.6+18417_amd64.deb + wget https://github.com/intel/intel-graphics-compiler/releases/download/v2.5.6/intel-igc-core-2_2.5.6+18417_amd64.deb + + dpkg -i *.deb + rm *.deb fi if [[ "${TARGETARCH}" == "arm64" ]]; then diff --git a/docker/main/requirements-dev.txt b/docker/main/requirements-dev.txt index af3ee5763..ac9d35758 100644 --- a/docker/main/requirements-dev.txt +++ b/docker/main/requirements-dev.txt @@ -1 +1,4 @@ ruff + +# types +types-peewee == 3.17.* diff --git a/docker/main/requirements-wheels.txt b/docker/main/requirements-wheels.txt index 2764eca43..7a2e2d6df 100644 --- a/docker/main/requirements-wheels.txt +++ b/docker/main/requirements-wheels.txt @@ -11,6 +11,9 @@ joserfc == 1.0.* pathvalidate == 3.2.* markupsafe == 3.0.* python-multipart == 0.0.12 +# Classification Model Training +tensorflow == 2.19.* ; platform_machine == 'aarch64' +tensorflow-cpu == 2.19.* ; platform_machine == 'x86_64' # General mypy == 1.6.1 onvif-zeep-async == 3.1.* @@ -38,14 +41,14 @@ opencv-python-headless == 4.11.0.* opencv-contrib-python == 4.11.0.* scipy == 1.14.* # OpenVino & ONNX -openvino == 2024.4.* -onnxruntime-openvino == 1.20.* ; platform_machine == 'x86_64' -onnxruntime == 1.20.* ; platform_machine == 'aarch64' +openvino == 2025.1.* +onnxruntime-openvino == 1.22.* ; platform_machine == 'x86_64' +onnxruntime == 1.22.* ; platform_machine == 'aarch64' # Embeddings transformers == 4.45.* # Generative AI google-generativeai == 0.8.* -ollama == 0.3.* +ollama == 0.5.* openai == 1.65.* # push notifications py-vapid == 1.9.* @@ -71,3 +74,8 @@ prometheus-client == 0.21.* # TFLite tflite_runtime @ https://github.com/frigate-nvr/TFlite-builds/releases/download/v2.17.1/tflite_runtime-2.17.1-cp311-cp311-linux_x86_64.whl; platform_machine == 'x86_64' tflite_runtime @ https://github.com/feranick/TFlite-builds/releases/download/v2.17.1/tflite_runtime-2.17.1-cp311-cp311-linux_aarch64.whl; platform_machine == 'aarch64' +# audio transcription +sherpa-onnx==1.12.* +faster-whisper==1.1.* +librosa==0.11.* +soundfile==0.13.* \ No newline at end of file diff --git a/docker/rocm/Dockerfile b/docker/rocm/Dockerfile index 7cac69eef..edd6b50c2 100644 --- a/docker/rocm/Dockerfile +++ b/docker/rocm/Dockerfile @@ -2,7 +2,7 @@ # https://askubuntu.com/questions/972516/debian-frontend-environment-variable ARG DEBIAN_FRONTEND=noninteractive -ARG ROCM=6.3.3 +ARG ROCM=1 ARG AMDGPU=gfx900 ARG HSA_OVERRIDE_GFX_VERSION ARG HSA_OVERRIDE @@ -13,12 +13,12 @@ FROM wget AS rocm ARG ROCM ARG AMDGPU -RUN apt update && \ +RUN apt update -qq && \ apt install -y wget gpg && \ - wget -O rocm.deb https://repo.radeon.com/amdgpu-install/$ROCM/ubuntu/jammy/amdgpu-install_6.3.60303-1_all.deb && \ + wget -O rocm.deb https://repo.radeon.com/amdgpu-install/6.4.1/ubuntu/jammy/amdgpu-install_6.4.60401-1_all.deb && \ apt install -y ./rocm.deb && \ apt update && \ - apt install -y rocm + apt install -qq -y rocm RUN mkdir -p /opt/rocm-dist/opt/rocm-$ROCM/lib RUN cd /opt/rocm-$ROCM/lib && \ @@ -33,7 +33,10 @@ RUN echo /opt/rocm/lib|tee /opt/rocm-dist/etc/ld.so.conf.d/rocm.conf ####################################################################### FROM deps AS deps-prelim -RUN apt-get update && apt-get install -y libnuma1 +COPY docker/rocm/debian-backports.sources /etc/apt/sources.list.d/debian-backports.sources +RUN apt-get update && \ + apt-get install -y libnuma1 && \ + apt-get install -qq -y -t bookworm-backports mesa-va-drivers mesa-vulkan-drivers WORKDIR /opt/frigate COPY --from=rootfs / / @@ -62,7 +65,6 @@ COPY --from=rocm /opt/rocm-dist/ / ####################################################################### FROM deps-prelim AS rocm-prelim-hsa-override0 ENV HSA_ENABLE_SDMA=0 -ENV MIGRAPHX_ENABLE_NHWC=1 ENV TF_ROCM_USE_IMMEDIATE_MODE=1 COPY --from=rocm-dist / / diff --git a/docker/rocm/debian-backports.sources b/docker/rocm/debian-backports.sources new file mode 100644 index 000000000..fc51f4eeb --- /dev/null +++ b/docker/rocm/debian-backports.sources @@ -0,0 +1,6 @@ +Types: deb +URIs: http://deb.debian.org/debian +Suites: bookworm-backports +Components: main +Enabled: yes +Signed-By: /usr/share/keyrings/debian-archive-keyring.gpg diff --git a/docker/rocm/requirements-wheels-rocm.txt b/docker/rocm/requirements-wheels-rocm.txt index 85450768e..21aebf4bd 100644 --- a/docker/rocm/requirements-wheels-rocm.txt +++ b/docker/rocm/requirements-wheels-rocm.txt @@ -1 +1 @@ -onnxruntime-rocm @ https://github.com/NickM-27/frigate-onnxruntime-rocm/releases/download/v6.3.3/onnxruntime_rocm-1.20.1-cp311-cp311-linux_x86_64.whl \ No newline at end of file +onnxruntime-rocm @ https://github.com/NickM-27/frigate-onnxruntime-rocm/releases/download/v6.4.1/onnxruntime_rocm-1.21.1-cp311-cp311-linux_x86_64.whl \ No newline at end of file diff --git a/docker/rocm/rocm.hcl b/docker/rocm/rocm.hcl index 6a84b350d..0745a9f3d 100644 --- a/docker/rocm/rocm.hcl +++ b/docker/rocm/rocm.hcl @@ -2,7 +2,7 @@ variable "AMDGPU" { default = "gfx900" } variable "ROCM" { - default = "6.3.3" + default = "6.4.1" } variable "HSA_OVERRIDE_GFX_VERSION" { default = "" diff --git a/docker/tensorrt/requirements-amd64.txt b/docker/tensorrt/requirements-amd64.txt index be4aaa066..a7853aeec 100644 --- a/docker/tensorrt/requirements-amd64.txt +++ b/docker/tensorrt/requirements-amd64.txt @@ -13,6 +13,7 @@ nvidia_cusolver_cu12==11.6.3.*; platform_machine == 'x86_64' nvidia_cusparse_cu12==12.5.1.*; platform_machine == 'x86_64' nvidia_nccl_cu12==2.23.4; platform_machine == 'x86_64' nvidia_nvjitlink_cu12==12.5.82; platform_machine == 'x86_64' +tensorflow==2.19.*; platform_machine == 'x86_64' onnx==1.16.*; platform_machine == 'x86_64' -onnxruntime-gpu==1.20.*; platform_machine == 'x86_64' +onnxruntime-gpu==1.22.*; platform_machine == 'x86_64' protobuf==3.20.3; platform_machine == 'x86_64' diff --git a/docs/docs/configuration/audio_detectors.md b/docs/docs/configuration/audio_detectors.md index b783daa69..2f4d43a6a 100644 --- a/docs/docs/configuration/audio_detectors.md +++ b/docs/docs/configuration/audio_detectors.md @@ -72,3 +72,77 @@ audio: - speech - yell ``` + +### Audio Transcription + +Frigate supports fully local audio transcription using either `sherpa-onnx` or OpenAI’s open-source Whisper models via `faster-whisper`. To enable transcription, it is recommended to only configure the features at the global level, and enable it at the individual camera level. + +```yaml +audio_transcription: + enabled: False + device: ... + model_size: ... +``` + +Enable audio transcription for select cameras at the camera level: + +```yaml +cameras: + back_yard: + ... + audio_transcription: + enabled: True +``` + +:::note + +Audio detection must be enabled and configured as described above in order to use audio transcription features. + +::: + +The optional config parameters that can be set at the global level include: + +- **`enabled`**: Enable or disable the audio transcription feature. + - Default: `False` + - It is recommended to only configure the features at the global level, and enable it at the individual camera level. +- **`device`**: Device to use to run transcription and translation models. + - Default: `CPU` + - This can be `CPU` or `GPU`. The `sherpa-onnx` models are lightweight and run on the CPU only. The `whisper` models can run on GPU but are only supported on CUDA hardware. +- **`model_size`**: The size of the model used for live transcription. + - Default: `small` + - This can be `small` or `large`. The `small` setting uses `sherpa-onnx` models that are fast, lightweight, and always run on the CPU but are not as accurate as the `whisper` model. + - The + - This config option applies to **live transcription only**. Recorded `speech` events will always use a different `whisper` model (and can be accelerated for CUDA hardware if available with `device: GPU`). +- **`language`**: Defines the language used by `whisper` to translate `speech` audio events (and live audio only if using the `large` model). + - Default: `en` + - You must use a valid [language code](https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10). + - Transcriptions for `speech` events are translated. + - Live audio is translated only if you are using the `large` model. The `small` `sherpa-onnx` model is English-only. + +The only field that is valid at the camera level is `enabled`. + +#### Live transcription + +The single camera Live view in the Frigate UI supports live transcription of audio for streams defined with the `audio` role. Use the Enable/Disable Live Audio Transcription button/switch to toggle transcription processing. When speech is heard, the UI will display a black box over the top of the camera stream with text. The MQTT topic `frigate//audio/transcription` will also be updated in real-time with transcribed text. + +Results can be error-prone due to a number of factors, including: + +- Poor quality camera microphone +- Distance of the audio source to the camera microphone +- Low audio bitrate setting in the camera +- Background noise +- Using the `small` model - it's fast, but not accurate for poor quality audio + +For speech sources close to the camera with minimal background noise, use the `small` model. + +If you have CUDA hardware, you can experiment with the `large` `whisper` model on GPU. Performance is not quite as fast as the `sherpa-onnx` `small` model, but live transcription is far more accurate. Using the `large` model with CPU will likely be too slow for real-time transcription. + +#### Transcription and translation of `speech` audio events + +Any `speech` events in Explore can be transcribed and/or translated through the Transcribe button in the Tracked Object Details pane. + +In order to use transcription and translation for past events, you must enable audio detection and define `speech` as an audio type to listen for in your config. To have `speech` events translated into the language of your choice, set the `language` config parameter with the correct [language code](https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10). + +The transcribed/translated speech will appear in the description box in the Tracked Object Details pane. If Semantic Search is enabled, embeddings are generated for the transcription text and are fully searchable using the description search type. + +Recorded `speech` events will always use a `whisper` model, regardless of the `model_size` config setting. Without a GPU, generating transcriptions for longer `speech` events may take a fair amount of time, so be patient. diff --git a/docs/docs/configuration/genai.md b/docs/docs/configuration/genai.md index 931de46c4..957922dbd 100644 --- a/docs/docs/configuration/genai.md +++ b/docs/docs/configuration/genai.md @@ -9,35 +9,38 @@ Requests for a description are sent off automatically to your AI provider at the ## Configuration -Generative AI can be enabled for all cameras or only for specific cameras. There are currently 3 native providers available to integrate with Frigate. Other providers that support the OpenAI standard API can also be used. See the OpenAI section below. +Generative AI can be enabled for all cameras or only for specific cameras. If GenAI is disabled for a camera, you can still manually generate descriptions for events using the HTTP API. There are currently 3 native providers available to integrate with Frigate. Other providers that support the OpenAI standard API can also be used. See the OpenAI section below. To use Generative AI, you must define a single provider at the global level of your Frigate configuration. If the provider you choose requires an API key, you may either directly paste it in your configuration, or store it in an environment variable prefixed with `FRIGATE_`. ```yaml genai: - enabled: True provider: gemini api_key: "{FRIGATE_GEMINI_API_KEY}" model: gemini-1.5-flash cameras: - front_camera: + front_camera: + objects: genai: - enabled: True # <- enable GenAI for your front camera - use_snapshot: True - objects: - - person - required_zones: - - steps + enabled: True # <- enable GenAI for your front camera + use_snapshot: True + objects: + - person + required_zones: + - steps indoor_camera: - genai: - enabled: False # <- disable GenAI for your indoor camera + objects: + genai: + enabled: False # <- disable GenAI for your indoor camera ``` By default, descriptions will be generated for all tracked objects and all zones. But you can also optionally specify `objects` and `required_zones` to only generate descriptions for certain tracked objects or zones. Optionally, you can generate the description using a snapshot (if enabled) by setting `use_snapshot` to `True`. By default, this is set to `False`, which sends the uncompressed images from the `detect` stream collected over the object's lifetime to the model. Once the object lifecycle ends, only a single compressed and cropped thumbnail is saved with the tracked object. Using a snapshot might be useful when you want to _regenerate_ a tracked object's description as it will provide the AI with a higher-quality image (typically downscaled by the AI itself) than the cropped/compressed thumbnail. Using a snapshot otherwise has a trade-off in that only a single image is sent to your provider, which will limit the model's ability to determine object movement or direction. +Generative AI can also be toggled dynamically for a camera via MQTT with the topic `frigate//object_descriptions/set`. See the [MQTT documentation](/integrations/mqtt/#frigatecamera_nameobjectdescriptionsset). + ## Ollama :::warning @@ -66,7 +69,6 @@ You should have at least 8 GB of RAM available (or VRAM if running on GPU) to ru ```yaml genai: - enabled: True provider: ollama base_url: http://localhost:11434 model: llava:7b @@ -93,12 +95,17 @@ To start using Gemini, you must first get an API key from [Google AI Studio](htt ```yaml genai: - enabled: True provider: gemini api_key: "{FRIGATE_GEMINI_API_KEY}" model: gemini-1.5-flash ``` +:::note + +To use a different Gemini-compatible API endpoint, set the `GEMINI_BASE_URL` environment variable to your provider's API URL. + +::: + ## OpenAI OpenAI does not have a free tier for their API. With the release of gpt-4o, pricing has been reduced and each generation should cost fractions of a cent if you choose to go this route. @@ -115,7 +122,6 @@ To start using OpenAI, you must first [create an API key](https://platform.opena ```yaml genai: - enabled: True provider: openai api_key: "{FRIGATE_OPENAI_API_KEY}" model: gpt-4o @@ -143,7 +149,6 @@ To start using Azure OpenAI, you must first [create a resource](https://learn.mi ```yaml genai: - enabled: True provider: azure_openai base_url: https://example-endpoint.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2023-03-15-preview api_key: "{FRIGATE_OPENAI_API_KEY}" @@ -186,32 +191,35 @@ You are also able to define custom prompts in your configuration. ```yaml genai: - enabled: True provider: ollama base_url: http://localhost:11434 model: llava - prompt: "Analyze the {label} in these images from the {camera} security camera. Focus on the actions, behavior, and potential intent of the {label}, rather than just describing its appearance." - object_prompts: - person: "Examine the main person in these images. What are they doing and what might their actions suggest about their intent (e.g., approaching a door, leaving an area, standing still)? Do not describe the surroundings or static details." - car: "Observe the primary vehicle in these images. Focus on its movement, direction, or purpose (e.g., parking, approaching, circling). If it's a delivery vehicle, mention the company." + +objects: + prompt: "Analyze the {label} in these images from the {camera} security camera. Focus on the actions, behavior, and potential intent of the {label}, rather than just describing its appearance." + object_prompts: + person: "Examine the main person in these images. What are they doing and what might their actions suggest about their intent (e.g., approaching a door, leaving an area, standing still)? Do not describe the surroundings or static details." + car: "Observe the primary vehicle in these images. Focus on its movement, direction, or purpose (e.g., parking, approaching, circling). If it's a delivery vehicle, mention the company." ``` -Prompts can also be overriden at the camera level to provide a more detailed prompt to the model about your specific camera, if you desire. +Prompts can also be overridden at the camera level to provide a more detailed prompt to the model about your specific camera, if you desire. ```yaml cameras: front_door: - genai: - use_snapshot: True - prompt: "Analyze the {label} in these images from the {camera} security camera at the front door. Focus on the actions and potential intent of the {label}." - object_prompts: - person: "Examine the person in these images. What are they doing, and how might their actions suggest their purpose (e.g., delivering something, approaching, leaving)? If they are carrying or interacting with a package, include details about its source or destination." - cat: "Observe the cat in these images. Focus on its movement and intent (e.g., wandering, hunting, interacting with objects). If the cat is near the flower pots or engaging in any specific actions, mention it." - objects: - - person - - cat - required_zones: - - steps + objects: + genai: + enabled: True + use_snapshot: True + prompt: "Analyze the {label} in these images from the {camera} security camera at the front door. Focus on the actions and potential intent of the {label}." + object_prompts: + person: "Examine the person in these images. What are they doing, and how might their actions suggest their purpose (e.g., delivering something, approaching, leaving)? If they are carrying or interacting with a package, include details about its source or destination." + cat: "Observe the cat in these images. Focus on its movement and intent (e.g., wandering, hunting, interacting with objects). If the cat is near the flower pots or engaging in any specific actions, mention it." + objects: + - person + - cat + required_zones: + - steps ``` ### Experiment with prompts diff --git a/docs/docs/configuration/genai/config.md b/docs/docs/configuration/genai/config.md new file mode 100644 index 000000000..bb4d213e1 --- /dev/null +++ b/docs/docs/configuration/genai/config.md @@ -0,0 +1,127 @@ +--- +id: genai_config +title: Configuring Generative AI +--- + +## Configuration + +A Generative AI provider can be configured in the global config, which will make the Generative AI features available for use. There are currently 3 native providers available to integrate with Frigate. Other providers that support the OpenAI standard API can also be used. See the OpenAI section below. + +To use Generative AI, you must define a single provider at the global level of your Frigate configuration. If the provider you choose requires an API key, you may either directly paste it in your configuration, or store it in an environment variable prefixed with `FRIGATE_`. + +## Ollama + +:::warning + +Using Ollama on CPU is not recommended, high inference times make using Generative AI impractical. + +::: + +[Ollama](https://ollama.com/) allows you to self-host large language models and keep everything running locally. It provides a nice API over [llama.cpp](https://github.com/ggerganov/llama.cpp). It is highly recommended to host this server on a machine with an Nvidia graphics card, or on a Apple silicon Mac for best performance. + +Most of the 7b parameter 4-bit vision models will fit inside 8GB of VRAM. There is also a [Docker container](https://hub.docker.com/r/ollama/ollama) available. + +Parallel requests also come with some caveats. You will need to set `OLLAMA_NUM_PARALLEL=1` and choose a `OLLAMA_MAX_QUEUE` and `OLLAMA_MAX_LOADED_MODELS` values that are appropriate for your hardware and preferences. See the [Ollama documentation](https://github.com/ollama/ollama/blob/main/docs/faq.md#how-does-ollama-handle-concurrent-requests). + +### Supported Models + +You must use a vision capable model with Frigate. Current model variants can be found [in their model library](https://ollama.com/library). Note that Frigate will not automatically download the model you specify in your config, Ollama will try to download the model but it may take longer than the timeout, it is recommended to pull the model beforehand by running `ollama pull your_model` on your Ollama server/Docker container. Note that the model specified in Frigate's config must match the downloaded model tag. + +The following models are recommended: + +| Model | Size | Notes | +| ----------------- | ------ | ----------------------------------------------------------- | +| `gemma3:4b` | 3.3 GB | Strong frame-to-frame understanding, slower inference times | +| `qwen2.5vl:3b` | 3.2 GB | Fast but capable model with good vision comprehension | +| `llava-phi3:3.8b` | 2.9 GB | Lightweight and fast model with vision comprehension | + +:::note + +You should have at least 8 GB of RAM available (or VRAM if running on GPU) to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models. + +::: + +### Configuration + +```yaml +genai: + provider: ollama + base_url: http://localhost:11434 + model: minicpm-v:8b + provider_options: # other Ollama client options can be defined + keep_alive: -1 +``` + +## Google Gemini + +Google Gemini has a free tier allowing [15 queries per minute](https://ai.google.dev/pricing) to the API, which is more than sufficient for standard Frigate usage. + +### Supported Models + +You must use a vision capable model with Frigate. Current model variants can be found [in their documentation](https://ai.google.dev/gemini-api/docs/models/gemini). At the time of writing, this includes `gemini-1.5-pro` and `gemini-1.5-flash`. + +### Get API Key + +To start using Gemini, you must first get an API key from [Google AI Studio](https://aistudio.google.com). + +1. Accept the Terms of Service +2. Click "Get API Key" from the right hand navigation +3. Click "Create API key in new project" +4. Copy the API key for use in your config + +### Configuration + +```yaml +genai: + provider: gemini + api_key: "{FRIGATE_GEMINI_API_KEY}" + model: gemini-1.5-flash +``` + +## OpenAI + +OpenAI does not have a free tier for their API. With the release of gpt-4o, pricing has been reduced and each generation should cost fractions of a cent if you choose to go this route. + +### Supported Models + +You must use a vision capable model with Frigate. Current model variants can be found [in their documentation](https://platform.openai.com/docs/models). At the time of writing, this includes `gpt-4o` and `gpt-4-turbo`. + +### Get API Key + +To start using OpenAI, you must first [create an API key](https://platform.openai.com/api-keys) and [configure billing](https://platform.openai.com/settings/organization/billing/overview). + +### Configuration + +```yaml +genai: + provider: openai + api_key: "{FRIGATE_OPENAI_API_KEY}" + model: gpt-4o +``` + +:::note + +To use a different OpenAI-compatible API endpoint, set the `OPENAI_BASE_URL` environment variable to your provider's API URL. + +::: + +## Azure OpenAI + +Microsoft offers several vision models through Azure OpenAI. A subscription is required. + +### Supported Models + +You must use a vision capable model with Frigate. Current model variants can be found [in their documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models). At the time of writing, this includes `gpt-4o` and `gpt-4-turbo`. + +### Create Resource and Get API Key + +To start using Azure OpenAI, you must first [create a resource](https://learn.microsoft.com/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource). You'll need your API key and resource URL, which must include the `api-version` parameter (see the example below). The model field is not required in your configuration as the model is part of the deployment name you chose when deploying the resource. + +### Configuration + +```yaml +genai: + provider: azure_openai + base_url: https://example-endpoint.openai.azure.com/openai/deployments/gpt-4o/chat/completions?api-version=2023-03-15-preview + api_key: "{FRIGATE_OPENAI_API_KEY}" +``` \ No newline at end of file diff --git a/docs/docs/configuration/genai/objects.md b/docs/docs/configuration/genai/objects.md new file mode 100644 index 000000000..e5aa92cc0 --- /dev/null +++ b/docs/docs/configuration/genai/objects.md @@ -0,0 +1,77 @@ +--- +id: genai_objects +title: Object Descriptions +--- + +Generative AI can be used to automatically generate descriptive text based on the thumbnails of your tracked objects. This helps with [Semantic Search](/configuration/semantic_search) in Frigate to provide more context about your tracked objects. Descriptions are accessed via the _Explore_ view in the Frigate UI by clicking on a tracked object's thumbnail. + +Requests for a description are sent off automatically to your AI provider at the end of the tracked object's lifecycle, or can optionally be sent earlier after a number of significantly changed frames, for example in use in more real-time notifications. Descriptions can also be regenerated manually via the Frigate UI. Note that if you are manually entering a description for tracked objects prior to its end, this will be overwritten by the generated response. + +By default, descriptions will be generated for all tracked objects and all zones. But you can also optionally specify `objects` and `required_zones` to only generate descriptions for certain tracked objects or zones. + +Optionally, you can generate the description using a snapshot (if enabled) by setting `use_snapshot` to `True`. By default, this is set to `False`, which sends the uncompressed images from the `detect` stream collected over the object's lifetime to the model. Once the object lifecycle ends, only a single compressed and cropped thumbnail is saved with the tracked object. Using a snapshot might be useful when you want to _regenerate_ a tracked object's description as it will provide the AI with a higher-quality image (typically downscaled by the AI itself) than the cropped/compressed thumbnail. Using a snapshot otherwise has a trade-off in that only a single image is sent to your provider, which will limit the model's ability to determine object movement or direction. + +Generative AI object descriptions can also be toggled dynamically for a camera via MQTT with the topic `frigate//object_descriptions/set`. See the [MQTT documentation](/integrations/mqtt/#frigatecamera_nameobjectdescriptionsset). + +## Usage and Best Practices + +Frigate's thumbnail search excels at identifying specific details about tracked objects – for example, using an "image caption" approach to find a "person wearing a yellow vest," "a white dog running across the lawn," or "a red car on a residential street." To enhance this further, Frigate’s default prompts are designed to ask your AI provider about the intent behind the object's actions, rather than just describing its appearance. + +While generating simple descriptions of detected objects is useful, understanding intent provides a deeper layer of insight. Instead of just recognizing "what" is in a scene, Frigate’s default prompts aim to infer "why" it might be there or "what" it could do next. Descriptions tell you what’s happening, but intent gives context. For instance, a person walking toward a door might seem like a visitor, but if they’re moving quickly after hours, you can infer a potential break-in attempt. Detecting a person loitering near a door at night can trigger an alert sooner than simply noting "a person standing by the door," helping you respond based on the situation’s context. + +## Custom Prompts + +Frigate sends multiple frames from the tracked object along with a prompt to your Generative AI provider asking it to generate a description. The default prompt is as follows: + +``` +Analyze the sequence of images containing the {label}. Focus on the likely intent or behavior of the {label} based on its actions and movement, rather than describing its appearance or the surroundings. Consider what the {label} is doing, why, and what it might do next. +``` + +:::tip + +Prompts can use variable replacements `{label}`, `{sub_label}`, and `{camera}` to substitute information from the tracked object as part of the prompt. + +::: + +You are also able to define custom prompts in your configuration. + +```yaml +genai: + provider: ollama + base_url: http://localhost:11434 + model: llava + +objects: + prompt: "Analyze the {label} in these images from the {camera} security camera. Focus on the actions, behavior, and potential intent of the {label}, rather than just describing its appearance." + object_prompts: + person: "Examine the main person in these images. What are they doing and what might their actions suggest about their intent (e.g., approaching a door, leaving an area, standing still)? Do not describe the surroundings or static details." + car: "Observe the primary vehicle in these images. Focus on its movement, direction, or purpose (e.g., parking, approaching, circling). If it's a delivery vehicle, mention the company." +``` + +Prompts can also be overridden at the camera level to provide a more detailed prompt to the model about your specific camera, if you desire. + +```yaml +cameras: + front_door: + objects: + genai: + enabled: True + use_snapshot: True + prompt: "Analyze the {label} in these images from the {camera} security camera at the front door. Focus on the actions and potential intent of the {label}." + object_prompts: + person: "Examine the person in these images. What are they doing, and how might their actions suggest their purpose (e.g., delivering something, approaching, leaving)? If they are carrying or interacting with a package, include details about its source or destination." + cat: "Observe the cat in these images. Focus on its movement and intent (e.g., wandering, hunting, interacting with objects). If the cat is near the flower pots or engaging in any specific actions, mention it." + objects: + - person + - cat + required_zones: + - steps +``` + +### Experiment with prompts + +Many providers also have a public facing chat interface for their models. Download a couple of different thumbnails or snapshots from Frigate and try new things in the playground to get descriptions to your liking before updating the prompt in Frigate. + +- OpenAI - [ChatGPT](https://chatgpt.com) +- Gemini - [Google AI Studio](https://aistudio.google.com) +- Ollama - [Open WebUI](https://docs.openwebui.com/) diff --git a/docs/docs/configuration/genai/review_summaries.md b/docs/docs/configuration/genai/review_summaries.md new file mode 100644 index 000000000..4acb5a70c --- /dev/null +++ b/docs/docs/configuration/genai/review_summaries.md @@ -0,0 +1,44 @@ +--- +id: genai_review +title: Review Summaries +--- + +Generative AI can be used to automatically generate structured summaries of review items. These summaries will show up in Frigate's native notifications as well as in the UI. Generative AI can also be used to take a collection of summaries over a period of time and provide a report, which may be useful to get a quick report of everything that happened while out for some amount of time. + +Requests for a summary are requested automatically to your AI provider for alert review items when the activity has ended, they can also be optionally enabled for detections as well. + +Generative AI review summaries can also be toggled dynamically for a camera via MQTT with the topic `frigate//review_descriptions/set`. See the [MQTT documentation](/integrations/mqtt/#frigatecamera_namereviewdescriptionsset). + +## Review Summary Usage and Best Practices + +Review summaries provide structured JSON responses that are saved for each review item: + +``` +- `scene` (string): A full description including setting, entities, actions, and any plausible supported inferences. +- `confidence` (float): 0-1 confidence in the analysis. +- `other_concerns` (list): List of user-defined concerns that may need additional investigation. +- `potential_threat_level` (integer): 0, 1, or 2 as defined below. + +Threat-level definitions: +- 0 — Typical or expected activity for this location/time (includes residents, guests, or known animals engaged in normal activities, even if they glance around or scan surroundings). +- 1 — Unusual or suspicious activity: At least one security-relevant behavior is present **and not explainable by a normal residential activity**. +- 2 — Active or immediate threat: Breaking in, vandalism, aggression, weapon display. +``` + +This will show in the UI as a list of concerns that each review item has along with the general description. + +### Additional Concerns + +Along with the concern of suspicious activity or immediate threat, you may have concerns such as animals in your garden or a gate being left open. These concerns can be configured so that the review summaries will make note of them if the activity requires additional review. For example: + +```yaml +review: + genai: + enabled: true + additional_concerns: + - animals in the garden +``` + +## Review Reports + +Along with individual review item summaries, Generative AI provides the ability to request a report of a given time period. For example, you can get a daily report while on a vacation of any suspicious activity or other concerns that may require review. \ No newline at end of file diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md index e048e0ec5..01b6f1932 100644 --- a/docs/docs/configuration/object_detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -19,6 +19,10 @@ Frigate supports multiple different detectors that work on different types of ha - [ROCm](#amdrocm-gpu-detector): ROCm can run on AMD Discrete GPUs to provide efficient object detection. - [ONNX](#onnx): ROCm will automatically be detected and used as a detector in the `-rocm` Frigate image when a supported ONNX model is configured. +**Apple Silicon** + +- [Apple Silicon](#apple-silicon-detector): Apple Silicon can run on M1 and newer Apple Silicon devices. + **Intel** - [OpenVino](#openvino-detector): OpenVino can run on Intel Arc GPUs, Intel integrated GPUs, and Intel CPUs to provide efficient object detection. @@ -264,7 +268,7 @@ detectors: ::: -### Supported Models +### OpenVINO Supported Models #### SSDLite MobileNet v2 @@ -402,6 +406,59 @@ model: Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects. +## Apple Silicon detector + +The NPU in Apple Silicon can't be accessed from within a container, so the [Apple Silicon detector client](https://github.com/frigate-nvr/apple-silicon-detector) must first be setup. It is recommended to use the Frigate docker image with `-standard-arm64` suffix, for example `ghcr.io/blakeblackshear/frigate:stable-arm64-standard`. + +### Setup + +1. Setup the [Apple Silicon detector client](https://github.com/frigate-nvr/apple-silicon-detector) and run the client +2. Configure the detector in Frigate and startup Frigate + +### Configuration + +Using the detector config below will connect to the client: + +```yaml +detectors: + apple-silicon: + type: zmq + endpoint: tcp://host.docker.internal:5555 +``` + +### Apple Silicon Supported Models + +There is no default model provided, the following formats are supported: + +#### YOLO (v3, v4, v7, v9) + +YOLOv3, YOLOv4, YOLOv7, and [YOLOv9](https://github.com/WongKinYiu/yolov9) models are supported, but not included by default. + +:::tip + +The YOLO detector has been designed to support YOLOv3, YOLOv4, YOLOv7, and YOLOv9 models, but may support other YOLO model architectures as well. See [the models section](#downloading-yolo-models) for more information on downloading YOLO models for use in Frigate. + +::: + +After placing the downloaded onnx model in your config folder, you can use the following configuration: + +```yaml +detectors: + onnx: + type: onnx + +model: + model_type: yolo-generic + width: 320 # <--- should match the imgsize set during model export + height: 320 # <--- should match the imgsize set during model export + input_tensor: nchw + input_dtype: float + path: /config/model_cache/yolo.onnx + labelmap_path: /labelmap/coco-80.txt +``` + +Note that the labelmap uses a subset of the complete COCO label set that has only 80 objects. + ## AMD/ROCm GPU detector ### Setup @@ -483,7 +540,7 @@ We unset the `HSA_OVERRIDE_GFX_VERSION` to prevent an existing override from mes $ docker exec -it frigate /bin/bash -c '(unset HSA_OVERRIDE_GFX_VERSION && /opt/rocm/bin/rocminfo |grep gfx)' ``` -### Supported Models +### ROCm Supported Models See [ONNX supported models](#supported-models) for supported models, there are some caveats: @@ -526,7 +583,7 @@ detectors: ::: -### Supported Models +### ONNX Supported Models There is no default model provided, the following formats are supported: @@ -824,7 +881,7 @@ $ cat /sys/kernel/debug/rknpu/load ::: -### Supported Models +### RockChip Supported Models This `config.yml` shows all relevant options to configure the detector and explains them. All values shown are the default values (except for two). Lines that are required at least to use the detector are labeled as required, all other lines are optional. diff --git a/docs/docs/configuration/record.md b/docs/docs/configuration/record.md index 52c0f0c88..2745ef27d 100644 --- a/docs/docs/configuration/record.md +++ b/docs/docs/configuration/record.md @@ -13,14 +13,15 @@ H265 recordings can be viewed in Chrome 108+, Edge and Safari only. All other br ### Most conservative: Ensure all video is saved -For users deploying Frigate in environments where it is important to have contiguous video stored even if there was no detectable motion, the following config will store all video for 3 days. After 3 days, only video containing motion and overlapping with alerts or detections will be retained until 30 days have passed. +For users deploying Frigate in environments where it is important to have contiguous video stored even if there was no detectable motion, the following config will store all video for 3 days. After 3 days, only video containing motion will be saved for 7 days. After 7 days, only video containing motion and overlapping with alerts or detections will be retained until 30 days have passed. ```yaml record: enabled: True - retain: + continuous: days: 3 - mode: all + motion: + days: 7 alerts: retain: days: 30 @@ -38,9 +39,8 @@ In order to reduce storage requirements, you can adjust your config to only reta ```yaml record: enabled: True - retain: + motion: days: 3 - mode: motion alerts: retain: days: 30 @@ -58,7 +58,7 @@ If you only want to retain video that occurs during a tracked object, this confi ```yaml record: enabled: True - retain: + continuous: days: 0 alerts: retain: @@ -80,15 +80,17 @@ Retention configs support decimals meaning they can be configured to retain `0.5 ::: -### Continuous Recording +### Continuous and Motion Recording -The number of days to retain continuous recordings can be set via the following config where X is a number, by default continuous recording is disabled. +The number of days to retain continuous and motion recordings can be set via the following config where X is a number, by default continuous recording is disabled. ```yaml record: enabled: True - retain: + continuous: days: 1 # <- number of days to keep continuous recordings + motion: + days: 2 # <- number of days to keep motion recordings ``` Continuous recording supports different retention modes [which are described below](#what-do-the-different-retain-modes-mean) @@ -112,38 +114,6 @@ This configuration will retain recording segments that overlap with alerts and d **WARNING**: Recordings still must be enabled in the config. If a camera has recordings disabled in the config, enabling via the methods listed above will have no effect. -## What do the different retain modes mean? - -Frigate saves from the stream with the `record` role in 10 second segments. These options determine which recording segments are kept for continuous recording (but can also affect tracked objects). - -Let's say you have Frigate configured so that your doorbell camera would retain the last **2** days of continuous recording. - -- With the `all` option all 48 hours of those two days would be kept and viewable. -- With the `motion` option the only parts of those 48 hours would be segments that Frigate detected motion. This is the middle ground option that won't keep all 48 hours, but will likely keep all segments of interest along with the potential for some extra segments. -- With the `active_objects` option the only segments that would be kept are those where there was a true positive object that was not considered stationary. - -The same options are available with alerts and detections, except it will only save the recordings when it overlaps with a review item of that type. - -A configuration example of the above retain modes where all `motion` segments are stored for 7 days and `active objects` are stored for 14 days would be as follows: - -```yaml -record: - enabled: True - retain: - days: 7 - mode: motion - alerts: - retain: - days: 14 - mode: active_objects - detections: - retain: - days: 14 - mode: active_objects -``` - -The above configuration example can be added globally or on a per camera basis. - ## Can I have "continuous" recordings, but only at certain times? Using Frigate UI, Home Assistant, or MQTT, cameras can be automated to only record in certain situations or at certain times. diff --git a/docs/docs/configuration/reference.md b/docs/docs/configuration/reference.md index 2de72417c..e54f07a2b 100644 --- a/docs/docs/configuration/reference.md +++ b/docs/docs/configuration/reference.md @@ -339,6 +339,33 @@ objects: # Optional: mask to prevent this object type from being detected in certain areas (default: no mask) # Checks based on the bottom center of the bounding box of the object mask: 0.000,0.000,0.781,0.000,0.781,0.278,0.000,0.278 + # Optional: Configuration for AI generated tracked object descriptions + genai: + # Optional: Enable AI object description generation (default: shown below) + enabled: False + # Optional: Use the object snapshot instead of thumbnails for description generation (default: shown below) + use_snapshot: False + # Optional: The default prompt for generating descriptions. Can use replacement + # variables like "label", "sub_label", "camera" to make more dynamic. (default: shown below) + prompt: "Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background." + # Optional: Object specific prompts to customize description results + # Format: {label}: {prompt} + object_prompts: + person: "My special person prompt." + # Optional: objects to generate descriptions for (default: all objects that are tracked) + objects: + - person + - cat + # Optional: Restrict generation to objects that entered any of the listed zones (default: none, all zones qualify) + required_zones: [] + # Optional: What triggers to use to send frames for a tracked object to generative AI (default: shown below) + send_triggers: + # Once the object is no longer tracked + tracked_object_end: True + # Optional: After X many significant updates are received (default: shown below) + after_significant_updates: None + # Optional: Save thumbnails sent to generative AI for review/debugging purposes (default: shown below) + debug_save_thumbnails: False # Optional: Review configuration # NOTE: Can be overridden at the camera level @@ -371,6 +398,19 @@ review: # should be configured at the camera level. required_zones: - driveway + # Optional: GenAI Review Summary Configuration + genai: + # Optional: Enable the GenAI review summary feature (default: shown below) + enabled: False + # Optional: Enable GenAI review summaries for alerts (default: shown below) + alerts: True + # Optional: Enable GenAI review summaries for detections (default: shown below) + detections: False + # Optional: Additional concerns that the GenAI should make note of (default: None) + additional_concerns: + - Animals in the garden + # Optional: Preferred response language (default: English) + preferred_language: English # Optional: Motion configuration # NOTE: Can be overridden at the camera level @@ -438,20 +478,20 @@ record: # Optional: Number of minutes to wait between cleanup runs (default: shown below) # This can be used to reduce the frequency of deleting recording segments from disk if you want to minimize i/o expire_interval: 60 - # Optional: Sync recordings with disk on startup and once a day (default: shown below). + # Optional: Two-way sync recordings database with disk on startup and once a day (default: shown below). sync_recordings: False - # Optional: Retention settings for recording - retain: + # Optional: Continuous retention settings + continuous: + # Optional: Number of days to retain recordings regardless of tracked objects or motion (default: shown below) + # NOTE: This should be set to 0 and retention should be defined in alerts and detections section below + # if you only want to retain recordings of alerts and detections. + days: 0 + # Optional: Motion retention settings + motion: # Optional: Number of days to retain recordings regardless of tracked objects (default: shown below) # NOTE: This should be set to 0 and retention should be defined in alerts and detections section below # if you only want to retain recordings of alerts and detections. days: 0 - # Optional: Mode for retention. Available options are: all, motion, and active_objects - # all - save all recording segments regardless of activity - # motion - save all recordings segments with any detected motion - # active_objects - save all recording segments with active/moving objects - # NOTE: this mode only applies when the days setting above is greater than 0 - mode: all # Optional: Recording Export Settings export: # Optional: Timelapse Output Args (default: shown below). @@ -612,13 +652,22 @@ genai: base_url: http://localhost::11434 # Required if gemini or openai api_key: "{FRIGATE_GENAI_API_KEY}" - # Optional: The default prompt for generating descriptions. Can use replacement - # variables like "label", "sub_label", "camera" to make more dynamic. (default: shown below) - prompt: "Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background." - # Optional: Object specific prompts to customize description results - # Format: {label}: {prompt} - object_prompts: - person: "My special person prompt." + # Optional additional args to pass to the GenAI Provider (default: None) + provider_options: + keep_alive: -1 + +# Optional: Configuration for audio transcription +# NOTE: only the enabled option can be overridden at the camera level +audio_transcription: + # Optional: Enable license plate recognition (default: shown below) + enabled: False + # Optional: The device to run the models on (default: shown below) + device: CPU + # Optional: Set the model size used for transcription. (default: shown below) + model_size: small + # Optional: Set the language used for transcription translation. (default: shown below) + # List of language codes: https://github.com/openai/whisper/blob/main/whisper/tokenizer.py#L10 + language: en # Optional: Restream configuration # Uses https://github.com/AlexxIT/go2rtc (v1.9.9) @@ -827,33 +876,22 @@ cameras: # By default the cameras are sorted alphabetically. order: 0 - # Optional: Configuration for AI generated tracked object descriptions - genai: - # Optional: Enable AI description generation (default: shown below) - enabled: False - # Optional: Use the object snapshot instead of thumbnails for description generation (default: shown below) - use_snapshot: False - # Optional: The default prompt for generating descriptions. Can use replacement - # variables like "label", "sub_label", "camera" to make more dynamic. (default: shown below) - prompt: "Describe the {label} in the sequence of images with as much detail as possible. Do not describe the background." - # Optional: Object specific prompts to customize description results - # Format: {label}: {prompt} - object_prompts: - person: "My special person prompt." - # Optional: objects to generate descriptions for (default: all objects that are tracked) - objects: - - person - - cat - # Optional: Restrict generation to objects that entered any of the listed zones (default: none, all zones qualify) - required_zones: [] - # Optional: What triggers to use to send frames for a tracked object to generative AI (default: shown below) - send_triggers: - # Once the object is no longer tracked - tracked_object_end: True - # Optional: After X many significant updates are received (default: shown below) - after_significant_updates: None - # Optional: Save thumbnails sent to generative AI for review/debugging purposes (default: shown below) - debug_save_thumbnails: False + # Optional: Configuration for triggers to automate actions based on semantic search results. + triggers: + # Required: Unique identifier for the trigger (generated automatically from nickname if not specified). + trigger_name: + # Required: Enable or disable the trigger. (default: shown below) + enabled: true + # Type of trigger, either `thumbnail` for image-based matching or `description` for text-based matching. (default: none) + type: thumbnail + # Reference data for matching, either an event ID for `thumbnail` or a text string for `description`. (default: none) + data: 1751565549.853251-b69j73 + # Similarity threshold for triggering. (default: none) + threshold: 0.7 + # List of actions to perform when the trigger fires. (default: none) + # Available options: `notification` (send a webpush notification) + actions: + - notification # Optional ui: diff --git a/docs/docs/configuration/semantic_search.md b/docs/docs/configuration/semantic_search.md index d9fcb5006..fc85ef259 100644 --- a/docs/docs/configuration/semantic_search.md +++ b/docs/docs/configuration/semantic_search.md @@ -39,7 +39,7 @@ If you are enabling Semantic Search for the first time, be advised that Frigate The [V1 model from Jina](https://huggingface.co/jinaai/jina-clip-v1) has a vision model which is able to embed both images and text into the same vector space, which allows `image -> image` and `text -> image` similarity searches. Frigate uses this model on tracked objects to encode the thumbnail image and store it in the database. When searching for tracked objects via text in the search box, Frigate will perform a `text -> image` similarity search against this embedding. When clicking "Find Similar" in the tracked object detail pane, Frigate will perform an `image -> image` similarity search to retrieve the closest matching thumbnails. -The V1 text model is used to embed tracked object descriptions and perform searches against them. Descriptions can be created, viewed, and modified on the Explore page when clicking on thumbnail of a tracked object. See [the Generative AI docs](/configuration/genai.md) for more information on how to automatically generate tracked object descriptions. +The V1 text model is used to embed tracked object descriptions and perform searches against them. Descriptions can be created, viewed, and modified on the Explore page when clicking on thumbnail of a tracked object. See [the object description docs](/configuration/genai/objects.md) for more information on how to automatically generate tracked object descriptions. Differently weighted versions of the Jina models are available and can be selected by setting the `model_size` config option as `small` or `large`: @@ -102,3 +102,41 @@ See the [Hardware Accelerated Enrichments](/configuration/hardware_acceleration_ 4. Make your search language and tone closely match exactly what you're looking for. If you are using thumbnail search, **phrase your query as an image caption**. Searching for "red car" may not work as well as "red sedan driving down a residential street on a sunny day". 5. Semantic search on thumbnails tends to return better results when matching large subjects that take up most of the frame. Small things like "cat" tend to not work well. 6. Experiment! Find a tracked object you want to test and start typing keywords and phrases to see what works for you. + +## Triggers + +Triggers utilize semantic search to automate actions when a tracked object matches a specified image or description. Triggers can be configured so that Frigate executes a specific actions when a tracked object's image or description matches a predefined image or text, based on a similarity threshold. Triggers are managed per camera and can be configured via the Frigate UI in the Settings page under the Triggers tab. + +### Configuration + +Triggers are defined within the `semantic_search` configuration for each camera in your Frigate configuration file or through the UI. Each trigger consists of a `type` (either `thumbnail` or `description`), a `data` field (the reference image event ID or text), a `threshold` for similarity matching, and a list of `actions` to perform when the trigger fires. + +#### Managing Triggers in the UI + +1. Navigate to the **Settings** page and select the **Triggers** tab. +2. Choose a camera from the dropdown menu to view or manage its triggers. +3. Click **Add Trigger** to create a new trigger or use the pencil icon to edit an existing one. +4. In the **Create Trigger** dialog: + - Enter a **Name** for the trigger (e.g., "red_car_alert"). + - Select the **Type** (`Thumbnail` or `Description`). + - For `Thumbnail`, select an image to trigger this action when a similar thumbnail image is detected, based on the threshold. + - For `Description`, enter text to trigger this action when a similar tracked object description is detected. + - Set the **Threshold** for similarity matching. + - Select **Actions** to perform when the trigger fires. +5. Save the trigger to update the configuration and store the embedding in the database. + +When a trigger fires, the UI highlights the trigger with a blue outline for 3 seconds for easy identification. + +### Usage and Best Practices + +1. **Thumbnail Triggers**: Select a representative image (event ID) from the Explore page that closely matches the object you want to detect. For best results, choose images where the object is prominent and fills most of the frame. +2. **Description Triggers**: Write concise, specific text descriptions (e.g., "Person in a red jacket") that align with the tracked object’s description. Avoid vague terms to improve matching accuracy. +3. **Threshold Tuning**: Adjust the threshold to balance sensitivity and specificity. A higher threshold (e.g., 0.8) requires closer matches, reducing false positives but potentially missing similar objects. A lower threshold (e.g., 0.6) is more inclusive but may trigger more often. +4. **Using Explore**: Use the context menu or right-click / long-press on a tracked object in the Grid View in Explore to quickly add a trigger based on the tracked object's thumbnail. +5. **Editing triggers**: For the best experience, triggers should be edited via the UI. However, Frigate will ensure triggers edited in the config will be synced with triggers created and edited in the UI. + +### Notes + +- Triggers rely on the same Jina AI CLIP models (V1 or V2) used for semantic search. Ensure `semantic_search` is enabled and properly configured. +- Reindexing embeddings (via the UI or `reindex: True`) does not affect trigger configurations but may update the embeddings used for matching. +- For optimal performance, use a system with sufficient RAM (8GB minimum, 16GB recommended) and a GPU for `large` model configurations, as described in the Semantic Search requirements. diff --git a/docs/docs/configuration/zones.md b/docs/docs/configuration/zones.md index d2a1083e6..025384a8b 100644 --- a/docs/docs/configuration/zones.md +++ b/docs/docs/configuration/zones.md @@ -88,7 +88,9 @@ Sometimes objects are expected to be passing through a zone, but an object loite :::note -When using loitering zones, a review item will remain active until the object leaves. Loitering zones are only meant to be used in areas where loitering is not expected behavior. +When using loitering zones, a review item will behave in the following way: +- When a person is in a loitering zone, the review item will remain active until the person leaves the loitering zone, regardless of if they are stationary. +- When any other object is in a loitering zone, the review item will remain active until the loitering time is met. Then if the object is stationary the review item will end. ::: diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md index 8a9454e2c..dbc2c7d87 100644 --- a/docs/docs/frigate/hardware.md +++ b/docs/docs/frigate/hardware.md @@ -61,19 +61,26 @@ Frigate supports multiple different detectors that work on different types of ha **AMD** - [ROCm](#rocm---amd-gpu): ROCm can run on AMD Discrete GPUs to provide efficient object detection - - [Supports limited model architectures](../../configuration/object_detectors#supported-models-1) + - [Supports limited model architectures](../../configuration/object_detectors#rocm-supported-models) - Runs best on discrete AMD GPUs +**Apple Silicon** + +- [Apple Silicon](#apple-silicon): Apple Silicon is usable on all M1 and newer Apple Silicon devices to provide efficient and fast object detection + - [Supports primarily ssdlite and mobilenet model architectures](../../configuration/object_detectors#apple-silicon-supported-models) + - Runs well with any size models including large + - Runs via ZMQ proxy which adds some latency, only recommended for local connection + **Intel** - [OpenVino](#openvino---intel): OpenVino can run on Intel Arc GPUs, Intel integrated GPUs, and Intel CPUs to provide efficient object detection. - - [Supports majority of model architectures](../../configuration/object_detectors#supported-models) + - [Supports majority of model architectures](../../configuration/object_detectors#openvino-supported-models) - Runs best with tiny, small, or medium models **Nvidia** - [TensortRT](#tensorrt---nvidia-gpu): TensorRT can run on Nvidia GPUs and Jetson devices. - - [Supports majority of model architectures via ONNX](../../configuration/object_detectors#supported-models-2) + - [Supports majority of model architectures via ONNX](../../configuration/object_detectors#onnx-supported-models) - Runs well with any size models including large **Rockchip** @@ -173,14 +180,28 @@ Inference speeds will vary greatly depending on the GPU and the model used. | RTX A4000 | | 320: ~ 15 ms | | | Tesla P40 | | 320: ~ 105 ms | | +### Apple Silicon + +With the [Apple Silicon](../configuration/object_detectors.md#apple-silicon-detector) detector Frigate can take advantage of the NPU in M1 and newer Apple Silicon. + +:::warning + +Apple Silicon can not run within a container, so a ZMQ proxy is utilized to communicate with [the Apple Silicon Frigate detector](https://github.com/frigate-nvr/apple-silicon-detector) which runs on the host. This should add minimal latency when run on the same device. + +::: + +| Name | YOLOv9 Inference Time | +| --------- | ---------------------- | +| M3 Pro | t-320: 6 ms s-320: 8ms | +| M1 | s-320: 9ms | + ### ROCm - AMD GPU -With the [rocm](../configuration/object_detectors.md#amdrocm-gpu-detector) detector Frigate can take advantage of many discrete AMD GPUs. +With the [ROCm](../configuration/object_detectors.md#amdrocm-gpu-detector) detector Frigate can take advantage of many discrete AMD GPUs. | Name | YOLOv9 Inference Time | YOLO-NAS Inference Time | | --------- | --------------------- | ------------------------- | | AMD 780M | ~ 14 ms | 320: ~ 25 ms 640: ~ 50 ms | -| AMD 8700G | | 320: ~ 20 ms 640: ~ 40 ms | ## Community Supported Detectors diff --git a/docs/docs/frigate/planning_setup.md b/docs/docs/frigate/planning_setup.md new file mode 100644 index 000000000..cddd50265 --- /dev/null +++ b/docs/docs/frigate/planning_setup.md @@ -0,0 +1,74 @@ +--- +id: planning_setup +title: Planning a New Installation +--- + +Choosing the right hardware for your Frigate NVR setup is important for optimal performance and a smooth experience. This guide will walk you through the key considerations, focusing on the number of cameras and the hardware required for efficient object detection. + +## Key Considerations + +### Number of Cameras and Simultaneous Activity + +The most fundamental factor in your hardware decision is the number of cameras you plan to use. However, it's not just about the raw count; it's also about how many of those cameras are likely to see activity and require object detection simultaneously. + +When motion is detected in a camera's feed, regions of that frame are sent to your chosen [object detection hardware](/configuration/object_detectors). + +- **Low Simultaneous Activity (1-6 cameras with occasional motion)**: If you have a few cameras in areas with infrequent activity (e.g., a seldom-used backyard, a quiet interior), the demand on your object detection hardware will be lower. A single, entry-level AI accelerator will suffice. +- **Moderate Simultaneous Activity (6-12 cameras with some overlapping motion)**: For setups with more cameras, especially in areas like a busy street or a property with multiple access points, it's more likely that several cameras will capture activity at the same time. This increases the load on your object detection hardware, requiring more processing power. +- **High Simultaneous Activity (12+ cameras or highly active zones)**: Large installations or scenarios where many cameras frequently capture activity (e.g., busy street with overview, identification, dedicated LPR cameras, etc.) will necessitate robust object detection capabilities. You'll likely need multiple entry-level AI accelerators or a more powerful single unit such as a discrete GPU. +- **Commercial Installations (40+ cameras)**: Commercial installations or scenarios where a substantial number of cameras capture activity (e.g., a commercial property, an active public space) will necessitate robust object detection capabilities. You'll likely need a modern discrete GPU. + +### Video Decoding + +Modern CPUs with integrated GPUs (Intel Quick Sync, AMD VCN) or dedicated GPUs can significantly offload video decoding from the main CPU, freeing up resources. This is highly recommended, especially for multiple cameras. + +:::tip + +For commercial installations it is important to verify the number of supported concurrent streams on your GPU, many consumer GPUs max out at ~20 concurrent camera streams. + +::: + +## Hardware Considerations + +### Object Detection + +There are many different hardware options for object detection depending on priorities and available hardware. See [the recommended hardware page](./hardware.md#detectors) for more specifics on what hardware is recommended for object detection. + +### Storage + +Storage is an important consideration when planning a new installation. To get a more precise estimate of your storage requirements, you can use an IP camera storage calculator. Websites like [IPConfigure Storage Calculator](https://calculator.ipconfigure.com/) can help you determine the necessary disk space based on your camera settings. + + +#### SSDs (Solid State Drives) + +SSDs are an excellent choice for Frigate, offering high speed and responsiveness. The older concern that SSDs would quickly "wear out" from constant video recording is largely no longer valid for modern consumer and enterprise-grade SSDs. + +- Longevity: Modern SSDs are designed with advanced wear-leveling algorithms and significantly higher "Terabytes Written" (TBW) ratings than earlier models. For typical home NVR use, a good quality SSD will likely outlast the useful life of your NVR hardware itself. +- Performance: SSDs excel at handling the numerous small write operations that occur during continuous video recording and can significantly improve the responsiveness of the Frigate UI and clip retrieval. +- Silence and Efficiency: SSDs produce no noise and consume less power than traditional HDDs. + +#### HDDs (Hard Disk Drives) + +Traditional Hard Disk Drives (HDDs) remain a great and often more cost-effective option for long-term video storage, especially for larger setups where raw capacity is prioritized. + +- Cost-Effectiveness: HDDs offer the best cost per gigabyte, making them ideal for storing many days, weeks, or months of continuous footage. +- Capacity: HDDs are available in much larger capacities than most consumer SSDs, which is beneficial for extensive video archives. +- NVR-Rated Drives: If choosing an HDD, consider drives specifically designed for surveillance (NVR) use, such as Western Digital Purple or Seagate SkyHawk. These drives are engineered for 24/7 operation and continuous write workloads, offering improved reliability compared to standard desktop drives. + +Determining Your Storage Needs +The amount of storage you need will depend on several factors: + +- Number of Cameras: More cameras naturally require more space. +- Resolution and Framerate: Higher resolution (e.g., 4K) and higher framerate (e.g., 30fps) streams consume significantly more storage. +- Recording Method: Continuous recording uses the most space. motion-only recording or object-triggered recording can save space, but may miss some footage. +- Retention Period: How many days, weeks, or months of footage do you want to keep? + +#### Network Storage (NFS/SMB) + +While supported, using network-attached storage (NAS) for recordings can introduce latency and network dependency considerations. For optimal performance and reliability, it is generally recommended to have local storage for your Frigate recordings. If using a NAS, ensure your network connection to it is robust and fast (Gigabit Ethernet at minimum) and that the NAS itself can handle the continuous write load. + +### RAM (Memory) + +- **Basic Minimum: 4GB RAM**: This is generally sufficient for a very basic Frigate setup with a few cameras and a dedicated object detection accelerator, without running any enrichments. Performance might be tight, especially with higher resolution streams or numerous detections. +- **Minimum for Enrichments: 8GB RAM**: If you plan to utilize Frigate's enrichment features (e.g., facial recognition, license plate recognition, or other AI models that run alongside standard object detection), 8GB of RAM should be considered the minimum. Enrichments require additional memory to load and process their respective models and data. +- **Recommended: 16GB RAM**: For most users, especially those with many cameras (8+) or who plan to heavily leverage enrichments, 16GB of RAM is highly recommended. This provides ample headroom for smooth operation, reduces the likelihood of swapping to disk (which can impact performance), and allows for future expansion. \ No newline at end of file diff --git a/docs/docs/frigate/updating.md b/docs/docs/frigate/updating.md index 95ba5b67c..12c8eb0a3 100644 --- a/docs/docs/frigate/updating.md +++ b/docs/docs/frigate/updating.md @@ -5,7 +5,7 @@ title: Updating # Updating Frigate -The current stable version of Frigate is **0.15.0**. The release notes and any breaking changes for this version can be found on the [Frigate GitHub releases page](https://github.com/blakeblackshear/frigate/releases/tag/v0.15.0). +The current stable version of Frigate is **0.16.0**. The release notes and any breaking changes for this version can be found on the [Frigate GitHub releases page](https://github.com/blakeblackshear/frigate/releases/tag/v0.16.0). Keeping Frigate up to date ensures you benefit from the latest features, performance improvements, and bug fixes. The update process varies slightly depending on your installation method (Docker, Home Assistant Addon, etc.). Below are instructions for the most common setups. @@ -33,21 +33,21 @@ If you’re running Frigate via Docker (recommended method), follow these steps: 2. **Update and Pull the Latest Image**: - If using Docker Compose: - - Edit your `docker-compose.yml` file to specify the desired version tag (e.g., `0.15.0` instead of `0.14.1`). For example: + - Edit your `docker-compose.yml` file to specify the desired version tag (e.g., `0.16.0` instead of `0.15.2`). For example: ```yaml services: frigate: - image: ghcr.io/blakeblackshear/frigate:0.15.0 + image: ghcr.io/blakeblackshear/frigate:0.16.0 ``` - Then pull the image: ```bash - docker pull ghcr.io/blakeblackshear/frigate:0.15.0 + docker pull ghcr.io/blakeblackshear/frigate:0.16.0 ``` - **Note for `stable` Tag Users**: If your `docker-compose.yml` uses the `stable` tag (e.g., `ghcr.io/blakeblackshear/frigate:stable`), you don’t need to update the tag manually. The `stable` tag always points to the latest stable release after pulling. - If using `docker run`: - - Pull the image with the appropriate tag (e.g., `0.15.0`, `0.15.0-tensorrt`, or `stable`): + - Pull the image with the appropriate tag (e.g., `0.16.0`, `0.16.0-tensorrt`, or `stable`): ```bash - docker pull ghcr.io/blakeblackshear/frigate:0.15.0 + docker pull ghcr.io/blakeblackshear/frigate:0.16.0 ``` 3. **Start the Container**: @@ -105,8 +105,8 @@ If an update causes issues: 1. Stop Frigate. 2. Restore your backed-up config file and database. 3. Revert to the previous image version: - - For Docker: Specify an older tag (e.g., `ghcr.io/blakeblackshear/frigate:0.14.1`) in your `docker run` command. - - For Docker Compose: Edit your `docker-compose.yml`, specify the older version tag (e.g., `ghcr.io/blakeblackshear/frigate:0.14.1`), and re-run `docker compose up -d`. + - For Docker: Specify an older tag (e.g., `ghcr.io/blakeblackshear/frigate:0.15.2`) in your `docker run` command. + - For Docker Compose: Edit your `docker-compose.yml`, specify the older version tag (e.g., `ghcr.io/blakeblackshear/frigate:0.15.2`), and re-run `docker compose up -d`. - For Home Assistant: Reinstall the previous addon version manually via the repository if needed and restart the addon. 4. Verify the old version is running again. diff --git a/docs/docs/integrations/mqtt.md b/docs/docs/integrations/mqtt.md index afbc78e99..ba1e1302f 100644 --- a/docs/docs/integrations/mqtt.md +++ b/docs/docs/integrations/mqtt.md @@ -139,7 +139,7 @@ Message published for updates to tracked object metadata, for example: "name": "John", "score": 0.95, "camera": "front_door_cam", - "timestamp": 1607123958.748393, + "timestamp": 1607123958.748393 } ``` @@ -153,7 +153,7 @@ Message published for updates to tracked object metadata, for example: "plate": "123ABC", "score": 0.95, "camera": "driveway_cam", - "timestamp": 1607123958.748393, + "timestamp": 1607123958.748393 } ``` @@ -206,6 +206,20 @@ Message published for each changed review item. The first message is published w } ``` +### `frigate/triggers` + +Message published when a trigger defined in a camera's `semantic_search` configuration fires. + +```json +{ + "name": "car_trigger", + "camera": "driveway", + "event_id": "1751565549.853251-b69j73", + "type": "thumbnail", + "score": 0.85 +} +``` + ### `frigate/stats` Same data available at `/api/stats` published at a configurable interval. @@ -269,6 +283,12 @@ Publishes the rms value for audio detected on this camera. **NOTE:** Requires audio detection to be enabled +### `frigate//audio/transcription` + +Publishes transcribed text for audio detected on this camera. + +**NOTE:** Requires audio detection and transcription to be enabled + ### `frigate//enabled/set` Topic to turn Frigate's processing of a camera on and off. Expected values are `ON` and `OFF`. @@ -391,6 +411,22 @@ Topic to turn review detections for a camera on or off. Expected values are `ON` Topic with current state of review detections for a camera. Published values are `ON` and `OFF`. +### `frigate//object_descriptions/set` + +Topic to turn generative AI object descriptions for a camera on or off. Expected values are `ON` and `OFF`. + +### `frigate//object_descriptions/state` + +Topic with current state of generative AI object descriptions for a camera. Published values are `ON` and `OFF`. + +### `frigate//review_descriptions/set` + +Topic to turn generative AI review descriptions for a camera on or off. Expected values are `ON` and `OFF`. + +### `frigate//review_descriptions/state` + +Topic with current state of generative AI review descriptions for a camera. Published values are `ON` and `OFF`. + ### `frigate//birdseye/set` Topic to turn Birdseye for a camera on and off. Expected values are `ON` and `OFF`. Birdseye mode diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 545fc9731..06e7b51d6 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -7,6 +7,7 @@ const sidebars: SidebarsConfig = { Frigate: [ 'frigate/index', 'frigate/hardware', + 'frigate/planning_setup', 'frigate/installation', 'frigate/updating', 'frigate/camera_setup', @@ -36,10 +37,23 @@ const sidebars: SidebarsConfig = { ], Enrichments: [ "configuration/semantic_search", - "configuration/genai", "configuration/face_recognition", "configuration/license_plate_recognition", "configuration/bird_classification", + { + type: "category", + label: "Generative AI", + link: { + type: "generated-index", + title: "Generative AI", + description: "Generative AI Features", + }, + items: [ + "configuration/genai/genai_config", + "configuration/genai/genai_review", + "configuration/genai/genai_objects", + ], + }, ], Cameras: [ "configuration/cameras", diff --git a/frigate/__main__.py b/frigate/__main__.py index 4143f7ae6..f3181e494 100644 --- a/frigate/__main__.py +++ b/frigate/__main__.py @@ -1,5 +1,6 @@ import argparse import faulthandler +import multiprocessing as mp import signal import sys import threading @@ -15,12 +16,17 @@ from frigate.util.config import find_config_file def main() -> None: + manager = mp.Manager() faulthandler.enable() # Setup the logging thread - setup_logging() + setup_logging(manager) threading.current_thread().name = "frigate" + stop_event = mp.Event() + + # send stop event on SIGINT + signal.signal(signal.SIGINT, lambda sig, frame: stop_event.set()) # Make sure we exit cleanly on SIGTERM. signal.signal(signal.SIGTERM, lambda sig, frame: sys.exit()) @@ -93,7 +99,14 @@ def main() -> None: print("*************************************************************") print("*** End Config Validation Errors ***") print("*************************************************************") - sys.exit(1) + + # attempt to start Frigate in recovery mode + try: + config = FrigateConfig.load(install=True, safe_load=True) + print("Starting Frigate in safe mode.") + except ValidationError: + print("Unable to start Frigate in safe mode.") + sys.exit(1) if args.validate_config: print("*************************************************************") print("*** Your config file is valid. ***") @@ -101,8 +114,23 @@ def main() -> None: sys.exit(0) # Run the main application. - FrigateApp(config).start() + FrigateApp(config, manager, stop_event).start() if __name__ == "__main__": + mp.set_forkserver_preload( + [ + # Standard library and core dependencies + "sqlite3", + # Third-party libraries commonly used in Frigate + "numpy", + "cv2", + "peewee", + "zmq", + "ruamel.yaml", + # Frigate core modules + "frigate.camera.maintainer", + ] + ) + mp.set_start_method("forkserver", force=True) main() diff --git a/frigate/api/app.py b/frigate/api/app.py index 5860377e7..d9e573d29 100644 --- a/frigate/api/app.py +++ b/frigate/api/app.py @@ -6,6 +6,7 @@ import json import logging import os import traceback +import urllib from datetime import datetime, timedelta from functools import reduce from io import StringIO @@ -20,7 +21,7 @@ from fastapi.encoders import jsonable_encoder from fastapi.params import Depends from fastapi.responses import JSONResponse, PlainTextResponse, StreamingResponse from markupsafe import escape -from peewee import operator +from peewee import SQL, operator from pydantic import ValidationError from frigate.api.auth import require_role @@ -28,12 +29,18 @@ from frigate.api.defs.query.app_query_parameters import AppTimelineHourlyQueryPa from frigate.api.defs.request.app_body import AppConfigSetBody from frigate.api.defs.tags import Tags from frigate.config import FrigateConfig +from frigate.config.camera.updater import ( + CameraConfigUpdateEnum, + CameraConfigUpdateTopic, +) from frigate.models import Event, Timeline from frigate.stats.prometheus import get_metrics, update_metrics from frigate.util.builtin import ( clean_camera_user_pass, + flatten_config_data, get_tz_modifiers, - update_yaml_from_url, + process_config_query_string, + update_yaml_file_bulk, ) from frigate.util.config import find_config_file from frigate.util.services import ( @@ -354,14 +361,37 @@ def config_set(request: Request, body: AppConfigSetBody): with open(config_file, "r") as f: old_raw_config = f.read() - f.close() try: - update_yaml_from_url(config_file, str(request.url)) + updates = {} + + # process query string parameters (takes precedence over body.config_data) + parsed_url = urllib.parse.urlparse(str(request.url)) + query_string = urllib.parse.parse_qs(parsed_url.query, keep_blank_values=True) + + # Filter out empty keys but keep blank values for non-empty keys + query_string = {k: v for k, v in query_string.items() if k} + + if query_string: + updates = process_config_query_string(query_string) + elif body.config_data: + updates = flatten_config_data(body.config_data) + + if not updates: + return JSONResponse( + content=( + {"success": False, "message": "No configuration data provided"} + ), + status_code=400, + ) + + # apply all updates in a single operation + update_yaml_file_bulk(config_file, updates) + + # validate the updated config with open(config_file, "r") as f: new_raw_config = f.read() - f.close() - # Validate the config schema + try: config = FrigateConfig.parse(new_raw_config) except Exception: @@ -385,8 +415,25 @@ def config_set(request: Request, body: AppConfigSetBody): status_code=500, ) - if body.requires_restart == 0: + if body.requires_restart == 0 or body.update_topic: + old_config: FrigateConfig = request.app.frigate_config request.app.frigate_config = config + + if body.update_topic and body.update_topic.startswith("config/cameras/"): + _, _, camera, field = body.update_topic.split("/") + + if field == "add": + settings = config.cameras[camera] + elif field == "remove": + settings = old_config.cameras[camera] + else: + settings = config.get_nested_object(body.update_topic) + + request.app.config_publisher.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum[field], camera), + settings, + ) + return JSONResponse( content=( { @@ -685,7 +732,14 @@ def plusModels(request: Request, filterByCurrentModelDetector: bool = False): @router.get("/recognized_license_plates") def get_recognized_license_plates(split_joined: Optional[int] = None): try: - events = Event.select(Event.data).distinct() + query = ( + Event.select( + SQL("json_extract(data, '$.recognized_license_plate') AS plate") + ) + .where(SQL("json_extract(data, '$.recognized_license_plate') IS NOT NULL")) + .distinct() + ) + recognized_license_plates = [row[0] for row in query.tuples()] except Exception: return JSONResponse( content=( @@ -694,14 +748,6 @@ def get_recognized_license_plates(split_joined: Optional[int] = None): status_code=404, ) - recognized_license_plates = [] - for e in events: - if e.data is not None and "recognized_license_plate" in e.data: - recognized_license_plates.append(e.data["recognized_license_plate"]) - - while None in recognized_license_plates: - recognized_license_plates.remove(None) - if split_joined: original_recognized_license_plates = recognized_license_plates.copy() for recognized_license_plate in original_recognized_license_plates: diff --git a/frigate/api/classification.py b/frigate/api/classification.py index e33d81e81..fd6326e2a 100644 --- a/frigate/api/classification.py +++ b/frigate/api/classification.py @@ -14,10 +14,14 @@ from peewee import DoesNotExist from playhouse.shortcuts import model_to_dict from frigate.api.auth import require_role -from frigate.api.defs.request.classification_body import RenameFaceBody +from frigate.api.defs.request.classification_body import ( + AudioTranscriptionBody, + RenameFaceBody, +) from frigate.api.defs.tags import Tags +from frigate.config import FrigateConfig from frigate.config.camera import DetectConfig -from frigate.const import FACE_DIR +from frigate.const import CLIPS_DIR, FACE_DIR from frigate.embeddings import EmbeddingsContext from frigate.models import Event from frigate.util.path import get_event_snapshot @@ -384,3 +388,255 @@ def reindex_embeddings(request: Request): }, status_code=500, ) + + +@router.put("/audio/transcribe") +def transcribe_audio(request: Request, body: AudioTranscriptionBody): + event_id = body.event_id + + try: + event = Event.get(Event.id == event_id) + except DoesNotExist: + message = f"Event {event_id} not found" + logger.error(message) + return JSONResponse( + content=({"success": False, "message": message}), status_code=404 + ) + + if not request.app.frigate_config.cameras[event.camera].audio_transcription.enabled: + message = f"Audio transcription is not enabled for {event.camera}." + logger.error(message) + return JSONResponse( + content=( + { + "success": False, + "message": message, + } + ), + status_code=400, + ) + + context: EmbeddingsContext = request.app.embeddings + response = context.transcribe_audio(model_to_dict(event)) + + if response == "started": + return JSONResponse( + content={ + "success": True, + "message": "Audio transcription has started.", + }, + status_code=202, # 202 Accepted + ) + elif response == "in_progress": + return JSONResponse( + content={ + "success": False, + "message": "Audio transcription for a speech event is currently in progress. Try again later.", + }, + status_code=409, # 409 Conflict + ) + else: + return JSONResponse( + content={ + "success": False, + "message": "Failed to transcribe audio.", + }, + status_code=500, + ) + + +# custom classification training + + +@router.get("/classification/{name}/dataset") +def get_classification_dataset(name: str): + dataset_dict: dict[str, list[str]] = {} + + dataset_dir = os.path.join(CLIPS_DIR, sanitize_filename(name), "dataset") + + if not os.path.exists(dataset_dir): + return JSONResponse(status_code=200, content={}) + + for name in os.listdir(dataset_dir): + category_dir = os.path.join(dataset_dir, name) + + if not os.path.isdir(category_dir): + continue + + dataset_dict[name] = [] + + for file in filter( + lambda f: (f.lower().endswith((".webp", ".png", ".jpg", ".jpeg"))), + os.listdir(category_dir), + ): + dataset_dict[name].append(file) + + return JSONResponse(status_code=200, content=dataset_dict) + + +@router.get("/classification/{name}/train") +def get_classification_images(name: str): + train_dir = os.path.join(CLIPS_DIR, sanitize_filename(name), "train") + + if not os.path.exists(train_dir): + return JSONResponse(status_code=200, content=[]) + + return JSONResponse( + status_code=200, + content=list( + filter( + lambda f: (f.lower().endswith((".webp", ".png", ".jpg", ".jpeg"))), + os.listdir(train_dir), + ) + ), + ) + + +@router.post("/classification/{name}/train") +async def train_configured_model(request: Request, name: str): + config: FrigateConfig = request.app.frigate_config + + if name not in config.classification.custom: + return JSONResponse( + content=( + { + "success": False, + "message": f"{name} is not a known classification model.", + } + ), + status_code=404, + ) + + context: EmbeddingsContext = request.app.embeddings + context.start_classification_training(name) + return JSONResponse( + content={"success": True, "message": "Started classification model training."}, + status_code=200, + ) + + +@router.post( + "/classification/{name}/dataset/{category}/delete", + dependencies=[Depends(require_role(["admin"]))], +) +def delete_classification_dataset_images( + request: Request, name: str, category: str, body: dict = None +): + config: FrigateConfig = request.app.frigate_config + + if name not in config.classification.custom: + return JSONResponse( + content=( + { + "success": False, + "message": f"{name} is not a known classification model.", + } + ), + status_code=404, + ) + + json: dict[str, Any] = body or {} + list_of_ids = json.get("ids", "") + folder = os.path.join( + CLIPS_DIR, sanitize_filename(name), "dataset", sanitize_filename(category) + ) + + for id in list_of_ids: + file_path = os.path.join(folder, sanitize_filename(id)) + + if os.path.isfile(file_path): + os.unlink(file_path) + + return JSONResponse( + content=({"success": True, "message": "Successfully deleted faces."}), + status_code=200, + ) + + +@router.post( + "/classification/{name}/dataset/categorize", + dependencies=[Depends(require_role(["admin"]))], +) +def categorize_classification_image(request: Request, name: str, body: dict = None): + config: FrigateConfig = request.app.frigate_config + + if name not in config.classification.custom: + return JSONResponse( + content=( + { + "success": False, + "message": f"{name} is not a known classification model.", + } + ), + status_code=404, + ) + + json: dict[str, Any] = body or {} + category = sanitize_filename(json.get("category", "")) + training_file_name = sanitize_filename(json.get("training_file", "")) + training_file = os.path.join( + CLIPS_DIR, sanitize_filename(name), "train", training_file_name + ) + + if training_file_name and not os.path.isfile(training_file): + return JSONResponse( + content=( + { + "success": False, + "message": f"Invalid filename or no file exists: {training_file_name}", + } + ), + status_code=404, + ) + + new_name = f"{category}-{datetime.datetime.now().timestamp()}.png" + new_file_folder = os.path.join( + CLIPS_DIR, sanitize_filename(name), "dataset", category + ) + + if not os.path.exists(new_file_folder): + os.mkdir(new_file_folder) + + # use opencv because webp images can not be used to train + img = cv2.imread(training_file) + cv2.imwrite(os.path.join(new_file_folder, new_name), img) + os.unlink(training_file) + + return JSONResponse( + content=({"success": True, "message": "Successfully deleted faces."}), + status_code=200, + ) + + +@router.post( + "/classification/{name}/train/delete", + dependencies=[Depends(require_role(["admin"]))], +) +def delete_classification_train_images(request: Request, name: str, body: dict = None): + config: FrigateConfig = request.app.frigate_config + + if name not in config.classification.custom: + return JSONResponse( + content=( + { + "success": False, + "message": f"{name} is not a known classification model.", + } + ), + status_code=404, + ) + + json: dict[str, Any] = body or {} + list_of_ids = json.get("ids", "") + folder = os.path.join(CLIPS_DIR, sanitize_filename(name), "train") + + for id in list_of_ids: + file_path = os.path.join(folder, sanitize_filename(id)) + + if os.path.isfile(file_path): + os.unlink(file_path) + + return JSONResponse( + content=({"success": True, "message": "Successfully deleted faces."}), + status_code=200, + ) diff --git a/frigate/api/defs/query/media_query_parameters.py b/frigate/api/defs/query/media_query_parameters.py index 4750d3277..fd53af49a 100644 --- a/frigate/api/defs/query/media_query_parameters.py +++ b/frigate/api/defs/query/media_query_parameters.py @@ -1,7 +1,8 @@ from enum import Enum -from typing import Optional +from typing import Optional, Union from pydantic import BaseModel +from pydantic.json_schema import SkipJsonSchema class Extension(str, Enum): @@ -17,6 +18,7 @@ class MediaLatestFrameQueryParams(BaseModel): zones: Optional[int] = None mask: Optional[int] = None motion: Optional[int] = None + paths: Optional[int] = None regions: Optional[int] = None quality: Optional[int] = 70 height: Optional[int] = None @@ -46,3 +48,10 @@ class MediaMjpegFeedQueryParams(BaseModel): class MediaRecordingsSummaryQueryParams(BaseModel): timezone: str = "utc" cameras: Optional[str] = "all" + + +class MediaRecordingsAvailabilityQueryParams(BaseModel): + cameras: str = "all" + before: Union[float, SkipJsonSchema[None]] = None + after: Union[float, SkipJsonSchema[None]] = None + scale: int = 30 diff --git a/frigate/api/defs/query/regenerate_query_parameters.py b/frigate/api/defs/query/regenerate_query_parameters.py index bcce47b1b..af50ada2c 100644 --- a/frigate/api/defs/query/regenerate_query_parameters.py +++ b/frigate/api/defs/query/regenerate_query_parameters.py @@ -1,9 +1,13 @@ from typing import Optional -from pydantic import BaseModel +from pydantic import BaseModel, Field from frigate.events.types import RegenerateDescriptionEnum class RegenerateQueryParameters(BaseModel): source: Optional[RegenerateDescriptionEnum] = RegenerateDescriptionEnum.thumbnails + force: Optional[bool] = Field( + default=False, + description="Force (re)generating the description even if GenAI is disabled for this camera.", + ) diff --git a/frigate/api/defs/request/app_body.py b/frigate/api/defs/request/app_body.py index 1fc05db2f..7f8ca40ec 100644 --- a/frigate/api/defs/request/app_body.py +++ b/frigate/api/defs/request/app_body.py @@ -1,10 +1,12 @@ -from typing import Optional +from typing import Any, Dict, Optional from pydantic import BaseModel class AppConfigSetBody(BaseModel): requires_restart: int = 1 + update_topic: str | None = None + config_data: Optional[Dict[str, Any]] = None class AppPutPasswordBody(BaseModel): diff --git a/frigate/api/defs/request/classification_body.py b/frigate/api/defs/request/classification_body.py index c4a32c332..31c5688bf 100644 --- a/frigate/api/defs/request/classification_body.py +++ b/frigate/api/defs/request/classification_body.py @@ -3,3 +3,7 @@ from pydantic import BaseModel class RenameFaceBody(BaseModel): new_name: str + + +class AudioTranscriptionBody(BaseModel): + event_id: str diff --git a/frigate/api/defs/request/events_body.py b/frigate/api/defs/request/events_body.py index 0883d066f..dd18ff8f7 100644 --- a/frigate/api/defs/request/events_body.py +++ b/frigate/api/defs/request/events_body.py @@ -2,6 +2,8 @@ from typing import List, Optional, Union from pydantic import BaseModel, Field +from frigate.config.classification import TriggerType + class EventsSubLabelBody(BaseModel): subLabel: str = Field(title="Sub label", max_length=100) @@ -45,3 +47,9 @@ class EventsDeleteBody(BaseModel): class SubmitPlusBody(BaseModel): include_annotation: int = Field(default=1) + + +class TriggerEmbeddingBody(BaseModel): + type: TriggerType + data: str + threshold: float = Field(default=0.5, ge=0.0, le=1.0) diff --git a/frigate/api/event.py b/frigate/api/event.py index 4287e829a..9b65f9826 100644 --- a/frigate/api/event.py +++ b/frigate/api/event.py @@ -1,5 +1,6 @@ """Event apis.""" +import base64 import datetime import logging import os @@ -10,9 +11,11 @@ from pathlib import Path from urllib.parse import unquote import cv2 +import numpy as np from fastapi import APIRouter, Request from fastapi.params import Depends from fastapi.responses import JSONResponse +from pathvalidate import sanitize_filename from peewee import JOIN, DoesNotExist, fn, operator from playhouse.shortcuts import model_to_dict @@ -34,6 +37,7 @@ from frigate.api.defs.request.events_body import ( EventsLPRBody, EventsSubLabelBody, SubmitPlusBody, + TriggerEmbeddingBody, ) from frigate.api.defs.response.event_response import ( EventCreateResponse, @@ -44,11 +48,12 @@ from frigate.api.defs.response.event_response import ( from frigate.api.defs.response.generic_response import GenericResponse from frigate.api.defs.tags import Tags from frigate.comms.event_metadata_updater import EventMetadataTypeEnum -from frigate.const import CLIPS_DIR +from frigate.const import CLIPS_DIR, TRIGGER_DIR from frigate.embeddings import EmbeddingsContext -from frigate.models import Event, ReviewSegment, Timeline +from frigate.models import Event, ReviewSegment, Timeline, Trigger from frigate.track.object_processing import TrackedObject from frigate.util.builtin import get_tz_modifiers +from frigate.util.path import get_event_thumbnail_bytes logger = logging.getLogger(__name__) @@ -724,15 +729,24 @@ def events_search(request: Request, params: EventsSearchQueryParams = Depends()) if (sort is None or sort == "relevance") and search_results: processed_events.sort(key=lambda x: x.get("search_distance", float("inf"))) - elif min_score is not None and max_score is not None and sort == "score_asc": + elif sort == "score_asc": processed_events.sort(key=lambda x: x["data"]["score"]) - elif min_score is not None and max_score is not None and sort == "score_desc": + elif sort == "score_desc": processed_events.sort(key=lambda x: x["data"]["score"], reverse=True) - elif min_speed is not None and max_speed is not None and sort == "speed_asc": - processed_events.sort(key=lambda x: x["data"]["average_estimated_speed"]) - elif min_speed is not None and max_speed is not None and sort == "speed_desc": + elif sort == "speed_asc": processed_events.sort( - key=lambda x: x["data"]["average_estimated_speed"], reverse=True + key=lambda x: ( + x["data"].get("average_estimated_speed") is None, + x["data"].get("average_estimated_speed"), + ) + ) + elif sort == "speed_desc": + processed_events.sort( + key=lambda x: ( + x["data"].get("average_estimated_speed") is None, + x["data"].get("average_estimated_speed", float("-inf")), + ), + reverse=True, ) elif sort == "date_asc": processed_events.sort(key=lambda x: x["start_time"]) @@ -1090,7 +1104,7 @@ def set_sub_label( new_score = None request.app.event_metadata_updater.publish( - EventMetadataTypeEnum.sub_label, (event_id, new_sub_label, new_score) + (event_id, new_sub_label, new_score), EventMetadataTypeEnum.sub_label.value ) return JSONResponse( @@ -1144,7 +1158,8 @@ def set_plate( new_score = None request.app.event_metadata_updater.publish( - EventMetadataTypeEnum.recognized_license_plate, (event_id, new_plate, new_score) + (event_id, "recognized_license_plate", new_plate, new_score), + EventMetadataTypeEnum.attribute.value, ) return JSONResponse( @@ -1225,9 +1240,10 @@ def regenerate_description( camera_config = request.app.frigate_config.cameras[event.camera] - if camera_config.genai.enabled: + if camera_config.objects.genai.enabled or params.force: request.app.event_metadata_updater.publish( - EventMetadataTypeEnum.regenerate_description, (event.id, params.source) + (event.id, params.source, params.force), + EventMetadataTypeEnum.regenerate_description.value, ) return JSONResponse( @@ -1254,6 +1270,38 @@ def regenerate_description( ) +@router.post( + "/description/generate", + response_model=GenericResponse, + # dependencies=[Depends(require_role(["admin"]))], +) +def generate_description_embedding( + request: Request, + body: EventsDescriptionBody, +): + new_description = body.description + + # If semantic search is enabled, update the index + if request.app.frigate_config.semantic_search.enabled: + context: EmbeddingsContext = request.app.embeddings + if len(new_description) > 0: + result = context.generate_description_embedding( + new_description, + ) + + return JSONResponse( + content=( + { + "success": True, + "message": f"Embedding for description is {result}" + if result + else "Failed to generate embedding", + } + ), + status_code=200, + ) + + def delete_single_event(event_id: str, request: Request) -> dict: try: event = Event.get(Event.id == event_id) @@ -1352,7 +1400,6 @@ def create_event( event_id = f"{now}-{rand_id}" request.app.event_metadata_updater.publish( - EventMetadataTypeEnum.manual_event_create, ( now, camera_name, @@ -1365,6 +1412,7 @@ def create_event( body.source_type, body.draw, ), + EventMetadataTypeEnum.manual_event_create.value, ) return JSONResponse( @@ -1388,7 +1436,7 @@ def end_event(request: Request, event_id: str, body: EventsEndBody): try: end_time = body.end_time or datetime.datetime.now().timestamp() request.app.event_metadata_updater.publish( - EventMetadataTypeEnum.manual_event_end, (event_id, end_time) + (event_id, end_time), EventMetadataTypeEnum.manual_event_end.value ) except Exception: return JSONResponse( @@ -1402,3 +1450,423 @@ def end_event(request: Request, event_id: str, body: EventsEndBody): content=({"success": True, "message": "Event successfully ended."}), status_code=200, ) + + +@router.post( + "/trigger/embedding", + response_model=dict, + dependencies=[Depends(require_role(["admin"]))], +) +def create_trigger_embedding( + request: Request, + body: TriggerEmbeddingBody, + camera: str, + name: str, +): + try: + if not request.app.frigate_config.semantic_search.enabled: + return JSONResponse( + content={ + "success": False, + "message": "Semantic search is not enabled", + }, + status_code=400, + ) + + # Check if trigger already exists + if ( + Trigger.select() + .where(Trigger.camera == camera, Trigger.name == name) + .exists() + ): + return JSONResponse( + content={ + "success": False, + "message": f"Trigger {camera}:{name} already exists", + }, + status_code=400, + ) + + context: EmbeddingsContext = request.app.embeddings + # Generate embedding based on type + embedding = None + if body.type == "description": + embedding = context.generate_description_embedding(body.data) + elif body.type == "thumbnail": + try: + event: Event = Event.get(Event.id == body.data) + except DoesNotExist: + # TODO: check triggers directory for image + return JSONResponse( + content={ + "success": False, + "message": f"Failed to fetch event for {body.type} trigger", + }, + status_code=400, + ) + + # Skip the event if not an object + if event.data.get("type") != "object": + return + + if thumbnail := get_event_thumbnail_bytes(event): + cursor = context.db.execute_sql( + """ + SELECT thumbnail_embedding FROM vec_thumbnails WHERE id = ? + """, + [body.data], + ) + + row = cursor.fetchone() if cursor else None + + if row: + query_embedding = row[0] + embedding = np.frombuffer(query_embedding, dtype=np.float32) + else: + # Extract valid thumbnail + thumbnail = get_event_thumbnail_bytes(event) + + if thumbnail is None: + return JSONResponse( + content={ + "success": False, + "message": f"Failed to get thumbnail for {body.data} for {body.type} trigger", + }, + status_code=400, + ) + + embedding = context.generate_image_embedding( + body.data, (base64.b64encode(thumbnail).decode("ASCII")) + ) + + if embedding is None: + return JSONResponse( + content={ + "success": False, + "message": f"Failed to generate embedding for {body.type} trigger", + }, + status_code=400, + ) + + if body.type == "thumbnail": + # Save image to the triggers directory + try: + os.makedirs( + os.path.join(TRIGGER_DIR, sanitize_filename(camera)), exist_ok=True + ) + with open( + os.path.join( + TRIGGER_DIR, + sanitize_filename(camera), + f"{sanitize_filename(body.data)}.webp", + ), + "wb", + ) as f: + f.write(thumbnail) + logger.debug( + f"Writing thumbnail for trigger with data {body.data} in {camera}." + ) + except Exception as e: + logger.error(e.with_traceback()) + logger.error( + f"Failed to write thumbnail for trigger with data {body.data} in {camera}" + ) + + Trigger.create( + camera=camera, + name=name, + type=body.type, + data=body.data, + threshold=body.threshold, + model=request.app.frigate_config.semantic_search.model, + embedding=np.array(embedding, dtype=np.float32).tobytes(), + triggering_event_id="", + last_triggered=None, + ) + + return JSONResponse( + content={ + "success": True, + "message": f"Trigger created successfully for {camera}:{name}", + }, + status_code=200, + ) + + except Exception as e: + logger.error(e.with_traceback()) + return JSONResponse( + content={ + "success": False, + "message": "Error creating trigger embedding", + }, + status_code=500, + ) + + +@router.put( + "/trigger/embedding/{camera}/{name}", + response_model=dict, + dependencies=[Depends(require_role(["admin"]))], +) +def update_trigger_embedding( + request: Request, + camera: str, + name: str, + body: TriggerEmbeddingBody, +): + try: + if not request.app.frigate_config.semantic_search.enabled: + return JSONResponse( + content={ + "success": False, + "message": "Semantic search is not enabled", + }, + status_code=400, + ) + + context: EmbeddingsContext = request.app.embeddings + # Generate embedding based on type + embedding = None + if body.type == "description": + embedding = context.generate_description_embedding(body.data) + elif body.type == "thumbnail": + webp_file = sanitize_filename(body.data) + ".webp" + webp_path = os.path.join(TRIGGER_DIR, sanitize_filename(camera), webp_file) + + try: + event: Event = Event.get(Event.id == body.data) + # Skip the event if not an object + if event.data.get("type") != "object": + return JSONResponse( + content={ + "success": False, + "message": f"Event {body.data} is not a tracked object for {body.type} trigger", + }, + status_code=400, + ) + # Extract valid thumbnail + thumbnail = get_event_thumbnail_bytes(event) + + with open(webp_path, "wb") as f: + f.write(thumbnail) + except DoesNotExist: + # check triggers directory for image + if not os.path.exists(webp_path): + return JSONResponse( + content={ + "success": False, + "message": f"Failed to fetch event for {body.type} trigger", + }, + status_code=400, + ) + else: + # Load the image from the triggers directory + with open(webp_path, "rb") as f: + thumbnail = f.read() + + embedding = context.generate_image_embedding( + body.data, (base64.b64encode(thumbnail).decode("ASCII")) + ) + + if embedding is None: + return JSONResponse( + content={ + "success": False, + "message": f"Failed to generate embedding for {body.type} trigger", + }, + status_code=400, + ) + + # Check if trigger exists for upsert + trigger = Trigger.get_or_none(Trigger.camera == camera, Trigger.name == name) + + if trigger: + # Update existing trigger + if trigger.data != body.data: # Delete old thumbnail only if data changes + try: + os.remove( + os.path.join( + TRIGGER_DIR, + sanitize_filename(camera), + f"{trigger.data}.webp", + ) + ) + logger.debug( + f"Deleted thumbnail for trigger with data {trigger.data} in {camera}." + ) + except Exception as e: + logger.error(e.with_traceback()) + logger.error( + f"Failed to delete thumbnail for trigger with data {trigger.data} in {camera}" + ) + + Trigger.update( + data=body.data, + model=request.app.frigate_config.semantic_search.model, + embedding=np.array(embedding, dtype=np.float32).tobytes(), + threshold=body.threshold, + triggering_event_id="", + last_triggered=None, + ).where(Trigger.camera == camera, Trigger.name == name).execute() + else: + # Create new trigger (for rename case) + Trigger.create( + camera=camera, + name=name, + type=body.type, + data=body.data, + threshold=body.threshold, + model=request.app.frigate_config.semantic_search.model, + embedding=np.array(embedding, dtype=np.float32).tobytes(), + triggering_event_id="", + last_triggered=None, + ) + + if body.type == "thumbnail": + # Save image to the triggers directory + try: + camera_path = os.path.join(TRIGGER_DIR, sanitize_filename(camera)) + os.makedirs(camera_path, exist_ok=True) + with open( + os.path.join(camera_path, f"{sanitize_filename(body.data)}.webp"), + "wb", + ) as f: + f.write(thumbnail) + logger.debug( + f"Writing thumbnail for trigger with data {body.data} in {camera}." + ) + except Exception as e: + logger.error(e.with_traceback()) + logger.error( + f"Failed to write thumbnail for trigger with data {body.data} in {camera}" + ) + + return JSONResponse( + content={ + "success": True, + "message": f"Trigger updated successfully for {camera}:{name}", + }, + status_code=200, + ) + + except Exception as e: + logger.error(e.with_traceback()) + return JSONResponse( + content={ + "success": False, + "message": "Error updating trigger embedding", + }, + status_code=500, + ) + + +@router.delete( + "/trigger/embedding/{camera}/{name}", + response_model=dict, + dependencies=[Depends(require_role(["admin"]))], +) +def delete_trigger_embedding( + request: Request, + camera: str, + name: str, +): + try: + trigger = Trigger.get_or_none(Trigger.camera == camera, Trigger.name == name) + if trigger is None: + return JSONResponse( + content={ + "success": False, + "message": f"Trigger {camera}:{name} not found", + }, + status_code=500, + ) + + deleted = ( + Trigger.delete() + .where(Trigger.camera == camera, Trigger.name == name) + .execute() + ) + if deleted == 0: + return JSONResponse( + content={ + "success": False, + "message": f"Error deleting trigger {camera}:{name}", + }, + status_code=401, + ) + + try: + os.remove( + os.path.join( + TRIGGER_DIR, sanitize_filename(camera), f"{trigger.data}.webp" + ) + ) + logger.debug( + f"Deleted thumbnail for trigger with data {trigger.data} in {camera}." + ) + except Exception as e: + logger.error(e.with_traceback()) + logger.error( + f"Failed to delete thumbnail for trigger with data {trigger.data} in {camera}" + ) + + return JSONResponse( + content={ + "success": True, + "message": f"Trigger deleted successfully for {camera}:{name}", + }, + status_code=200, + ) + + except Exception as e: + logger.error(e.with_traceback()) + return JSONResponse( + content={ + "success": False, + "message": "Error deleting trigger embedding", + }, + status_code=500, + ) + + +@router.get( + "/triggers/status/{camera_name}", + response_model=dict, + dependencies=[Depends(require_role(["admin"]))], +) +def get_triggers_status( + camera_name: str, +): + try: + # Fetch all triggers for the specified camera + triggers = Trigger.select().where(Trigger.camera == camera_name) + + # Prepare the response with trigger status + status = { + trigger.name: { + "last_triggered": trigger.last_triggered.timestamp() + if trigger.last_triggered + else None, + "triggering_event_id": trigger.triggering_event_id + if trigger.triggering_event_id + else None, + } + for trigger in triggers + } + + if not status: + return JSONResponse( + content={ + "success": False, + "message": f"No triggers found for camera {camera_name}", + }, + status_code=404, + ) + + return {"success": True, "triggers": status} + except Exception as ex: + logger.exception(ex) + return JSONResponse( + content=({"success": False, "message": "Error fetching trigger status"}), + status_code=400, + ) diff --git a/frigate/api/fastapi_app.py b/frigate/api/fastapi_app.py index 0657752dc..1265f3af9 100644 --- a/frigate/api/fastapi_app.py +++ b/frigate/api/fastapi_app.py @@ -26,6 +26,7 @@ from frigate.comms.event_metadata_updater import ( EventMetadataPublisher, ) from frigate.config import FrigateConfig +from frigate.config.camera.updater import CameraConfigUpdatePublisher from frigate.embeddings import EmbeddingsContext from frigate.ptz.onvif import OnvifController from frigate.stats.emitter import StatsEmitter @@ -57,6 +58,7 @@ def create_fastapi_app( onvif: OnvifController, stats_emitter: StatsEmitter, event_metadata_updater: EventMetadataPublisher, + config_publisher: CameraConfigUpdatePublisher, ): logger.info("Starting FastAPI app") app = FastAPI( @@ -127,6 +129,7 @@ def create_fastapi_app( app.onvif = onvif app.stats_emitter = stats_emitter app.event_metadata_updater = event_metadata_updater + app.config_publisher = config_publisher app.jwt_token = get_jwt_secret() if frigate_config.auth.enabled else None return app diff --git a/frigate/api/media.py b/frigate/api/media.py index b4db46d38..a1ab6648f 100644 --- a/frigate/api/media.py +++ b/frigate/api/media.py @@ -8,6 +8,7 @@ import os import subprocess as sp import time from datetime import datetime, timedelta, timezone +from functools import reduce from pathlib import Path as FilePath from typing import Any from urllib.parse import unquote @@ -19,7 +20,7 @@ from fastapi import APIRouter, Path, Query, Request, Response from fastapi.params import Depends from fastapi.responses import FileResponse, JSONResponse, StreamingResponse from pathvalidate import sanitize_filename -from peewee import DoesNotExist, fn +from peewee import DoesNotExist, fn, operator from tzlocal import get_localzone_name from frigate.api.defs.query.media_query_parameters import ( @@ -27,6 +28,7 @@ from frigate.api.defs.query.media_query_parameters import ( MediaEventsSnapshotQueryParams, MediaLatestFrameQueryParams, MediaMjpegFeedQueryParams, + MediaRecordingsAvailabilityQueryParams, MediaRecordingsSummaryQueryParams, ) from frigate.api.defs.tags import Tags @@ -139,6 +141,7 @@ def latest_frame( "zones": params.zones, "mask": params.mask, "motion_boxes": params.motion, + "paths": params.paths, "regions": params.regions, } quality = params.quality @@ -542,6 +545,66 @@ def recordings( return JSONResponse(content=list(recordings)) +@router.get("/recordings/unavailable", response_model=list[dict]) +def no_recordings(params: MediaRecordingsAvailabilityQueryParams = Depends()): + """Get time ranges with no recordings.""" + cameras = params.cameras + before = params.before or datetime.datetime.now().timestamp() + after = ( + params.after + or (datetime.datetime.now() - datetime.timedelta(hours=1)).timestamp() + ) + scale = params.scale + + clauses = [(Recordings.start_time > after) & (Recordings.end_time < before)] + if cameras != "all": + camera_list = cameras.split(",") + clauses.append((Recordings.camera << camera_list)) + + # Get recording start times + data: list[Recordings] = ( + Recordings.select(Recordings.start_time, Recordings.end_time) + .where(reduce(operator.and_, clauses)) + .order_by(Recordings.start_time.asc()) + .dicts() + .iterator() + ) + + # Convert recordings to list of (start, end) tuples + recordings = [(r["start_time"], r["end_time"]) for r in data] + + # Generate all time segments + current = after + no_recording_segments = [] + current_start = None + + while current < before: + segment_end = current + scale + # Check if segment overlaps with any recording + has_recording = any( + start <= segment_end and end >= current for start, end in recordings + ) + if not has_recording: + if current_start is None: + current_start = current # Start a new gap + else: + if current_start is not None: + # End the current gap and append it + no_recording_segments.append( + {"start_time": int(current_start), "end_time": int(current)} + ) + current_start = None + current = segment_end + + # Append the last gap if it exists + if current_start is not None: + no_recording_segments.append( + {"start_time": int(current_start), "end_time": int(before)} + ) + + return JSONResponse(content=no_recording_segments) + + @router.get( "/{camera_name}/start/{start_ts}/end/{end_ts}/clip.mp4", description="For iOS devices, use the master.m3u8 HLS link instead of clip.mp4. Safari does not reliably process progressive mp4 files.", diff --git a/frigate/api/review.py b/frigate/api/review.py index e6d010db7..2ff97eeea 100644 --- a/frigate/api/review.py +++ b/frigate/api/review.py @@ -6,7 +6,7 @@ from functools import reduce from pathlib import Path import pandas as pd -from fastapi import APIRouter +from fastapi import APIRouter, Request from fastapi.params import Depends from fastapi.responses import JSONResponse from peewee import Case, DoesNotExist, IntegrityError, fn, operator @@ -26,6 +26,8 @@ from frigate.api.defs.response.review_response import ( ReviewSummaryResponse, ) from frigate.api.defs.tags import Tags +from frigate.config import FrigateConfig +from frigate.embeddings import EmbeddingsContext from frigate.models import Recordings, ReviewSegment, UserReviewStatus from frigate.review.types import SeverityEnum from frigate.util.builtin import get_tz_modifiers @@ -606,3 +608,35 @@ async def set_not_reviewed( content=({"success": True, "message": f"Set Review {review_id} as not viewed"}), status_code=200, ) + + +@router.post( + "/review/summarize/start/{start_ts}/end/{end_ts}", + description="Use GenAI to summarize review items over a period of time.", +) +def generate_review_summary(request: Request, start_ts: float, end_ts: float): + config: FrigateConfig = request.app.frigate_config + + if not config.genai.provider: + return JSONResponse( + content=( + { + "success": False, + "message": "GenAI must be configured to use this feature.", + } + ), + status_code=400, + ) + + context: EmbeddingsContext = request.app.embeddings + summary = context.generate_review_summary(start_ts, end_ts) + + if summary: + return JSONResponse( + content=({"success": True, "summary": summary}), status_code=200 + ) + else: + return JSONResponse( + content=({"success": False, "message": "Failed to create summary."}), + status_code=500, + ) diff --git a/frigate/app.py b/frigate/app.py index cc596a98a..858247866 100644 --- a/frigate/app.py +++ b/frigate/app.py @@ -5,6 +5,7 @@ import os import secrets import shutil from multiprocessing import Queue +from multiprocessing.managers import DictProxy, SyncManager from multiprocessing.synchronize import Event as MpEvent from pathlib import Path from typing import Optional @@ -14,19 +15,20 @@ import uvicorn from peewee_migrate import Router from playhouse.sqlite_ext import SqliteExtDatabase -import frigate.util as util from frigate.api.auth import hash_password from frigate.api.fastapi_app import create_fastapi_app from frigate.camera import CameraMetrics, PTZMetrics +from frigate.camera.maintainer import CameraMaintainer from frigate.comms.base_communicator import Communicator -from frigate.comms.config_updater import ConfigPublisher from frigate.comms.dispatcher import Dispatcher from frigate.comms.event_metadata_updater import EventMetadataPublisher from frigate.comms.inter_process import InterProcessCommunicator from frigate.comms.mqtt import MqttClient +from frigate.comms.object_detector_signaler import DetectorProxy from frigate.comms.webpush import WebPushClient from frigate.comms.ws import WebSocketClient from frigate.comms.zmq_proxy import ZmqProxy +from frigate.config.camera.updater import CameraConfigUpdatePublisher from frigate.config.config import FrigateConfig from frigate.const import ( CACHE_DIR, @@ -36,12 +38,12 @@ from frigate.const import ( FACE_DIR, MODEL_CACHE_DIR, RECORD_DIR, - SHM_FRAMES_VAR, THUMB_DIR, + TRIGGER_DIR, ) from frigate.data_processing.types import DataProcessorMetrics from frigate.db.sqlitevecq import SqliteVecQueueDatabase -from frigate.embeddings import EmbeddingsContext, manage_embeddings +from frigate.embeddings import EmbeddingProcess, EmbeddingsContext from frigate.events.audio import AudioProcessor from frigate.events.cleanup import EventCleanup from frigate.events.maintainer import EventProcessor @@ -55,56 +57,58 @@ from frigate.models import ( Regions, ReviewSegment, Timeline, + Trigger, User, ) from frigate.object_detection.base import ObjectDetectProcess -from frigate.output.output import output_frames +from frigate.output.output import OutputProcess from frigate.ptz.autotrack import PtzAutoTrackerThread from frigate.ptz.onvif import OnvifController from frigate.record.cleanup import RecordingCleanup from frigate.record.export import migrate_exports -from frigate.record.record import manage_recordings -from frigate.review.review import manage_review_segments +from frigate.record.record import RecordProcess +from frigate.review.review import ReviewProcess from frigate.stats.emitter import StatsEmitter from frigate.stats.util import stats_init from frigate.storage import StorageMaintainer from frigate.timeline import TimelineProcessor from frigate.track.object_processing import TrackedObjectProcessor from frigate.util.builtin import empty_and_close_queue -from frigate.util.image import SharedMemoryFrameManager, UntrackedSharedMemory -from frigate.util.object import get_camera_regions_grid +from frigate.util.image import UntrackedSharedMemory from frigate.util.services import set_file_limit from frigate.version import VERSION -from frigate.video import capture_camera, track_camera from frigate.watchdog import FrigateWatchdog logger = logging.getLogger(__name__) class FrigateApp: - def __init__(self, config: FrigateConfig) -> None: + def __init__( + self, config: FrigateConfig, manager: SyncManager, stop_event: MpEvent + ) -> None: + self.metrics_manager = manager self.audio_process: Optional[mp.Process] = None - self.stop_event: MpEvent = mp.Event() + self.stop_event = stop_event self.detection_queue: Queue = mp.Queue() self.detectors: dict[str, ObjectDetectProcess] = {} - self.detection_out_events: dict[str, MpEvent] = {} self.detection_shms: list[mp.shared_memory.SharedMemory] = [] self.log_queue: Queue = mp.Queue() - self.camera_metrics: dict[str, CameraMetrics] = {} + self.camera_metrics: DictProxy = self.metrics_manager.dict() self.embeddings_metrics: DataProcessorMetrics | None = ( - DataProcessorMetrics() + DataProcessorMetrics( + self.metrics_manager, list(config.classification.custom.keys()) + ) if ( config.semantic_search.enabled or config.lpr.enabled or config.face_recognition.enabled + or len(config.classification.custom) > 0 ) else None ) self.ptz_metrics: dict[str, PTZMetrics] = {} self.processes: dict[str, int] = {} self.embeddings: Optional[EmbeddingsContext] = None - self.region_grids: dict[str, list[list[dict[str, int]]]] = {} - self.frame_manager = SharedMemoryFrameManager() self.config = config def ensure_dirs(self) -> None: @@ -121,6 +125,9 @@ class FrigateApp: if self.config.face_recognition.enabled: dirs.append(FACE_DIR) + if self.config.semantic_search.enabled: + dirs.append(TRIGGER_DIR) + for d in dirs: if not os.path.exists(d) and not os.path.islink(d): logger.info(f"Creating directory: {d}") @@ -131,7 +138,7 @@ class FrigateApp: def init_camera_metrics(self) -> None: # create camera_metrics for camera_name in self.config.cameras.keys(): - self.camera_metrics[camera_name] = CameraMetrics() + self.camera_metrics[camera_name] = CameraMetrics(self.metrics_manager) self.ptz_metrics[camera_name] = PTZMetrics( autotracker_enabled=self.config.cameras[ camera_name @@ -140,8 +147,16 @@ class FrigateApp: def init_queues(self) -> None: # Queue for cameras to push tracked objects to + # leaving room for 2 extra cameras to be added self.detected_frames_queue: Queue = mp.Queue( - maxsize=sum(camera.enabled for camera in self.config.cameras.values()) * 2 + maxsize=( + sum( + camera.enabled_in_config == True + for camera in self.config.cameras.values() + ) + + 2 + ) + * 2 ) # Queue for timeline events @@ -217,51 +232,24 @@ class FrigateApp: self.processes["go2rtc"] = proc.info["pid"] def init_recording_manager(self) -> None: - recording_process = util.Process( - target=manage_recordings, - name="recording_manager", - args=(self.config,), - ) - recording_process.daemon = True + recording_process = RecordProcess(self.config, self.stop_event) self.recording_process = recording_process recording_process.start() self.processes["recording"] = recording_process.pid or 0 logger.info(f"Recording process started: {recording_process.pid}") def init_review_segment_manager(self) -> None: - review_segment_process = util.Process( - target=manage_review_segments, - name="review_segment_manager", - args=(self.config,), - ) - review_segment_process.daemon = True + review_segment_process = ReviewProcess(self.config, self.stop_event) self.review_segment_process = review_segment_process review_segment_process.start() self.processes["review_segment"] = review_segment_process.pid or 0 logger.info(f"Review process started: {review_segment_process.pid}") def init_embeddings_manager(self) -> None: - genai_cameras = [ - c for c in self.config.cameras.values() if c.enabled and c.genai.enabled - ] - - if ( - not self.config.semantic_search.enabled - and not genai_cameras - and not self.config.lpr.enabled - and not self.config.face_recognition.enabled - ): - return - - embedding_process = util.Process( - target=manage_embeddings, - name="embeddings_manager", - args=( - self.config, - self.embeddings_metrics, - ), + # always start the embeddings process + embedding_process = EmbeddingProcess( + self.config, self.embeddings_metrics, self.stop_event ) - embedding_process.daemon = True self.embedding_process = embedding_process embedding_process.start() self.processes["embeddings"] = embedding_process.pid or 0 @@ -278,7 +266,9 @@ class FrigateApp: "synchronous": "NORMAL", # Safe when using WAL https://www.sqlite.org/pragma.html#pragma_synchronous }, timeout=max( - 60, 10 * len([c for c in self.config.cameras.values() if c.enabled]) + 60, + 10 + * len([c for c in self.config.cameras.values() if c.enabled_in_config]), ), load_vec_extension=self.config.semantic_search.enabled, ) @@ -292,6 +282,7 @@ class FrigateApp: ReviewSegment, Timeline, User, + Trigger, ] self.db.bind(models) @@ -307,24 +298,15 @@ class FrigateApp: migrate_exports(self.config.ffmpeg, list(self.config.cameras.keys())) def init_embeddings_client(self) -> None: - genai_cameras = [ - c for c in self.config.cameras.values() if c.enabled and c.genai.enabled - ] - - if ( - self.config.semantic_search.enabled - or self.config.lpr.enabled - or genai_cameras - or self.config.face_recognition.enabled - ): - # Create a client for other processes to use - self.embeddings = EmbeddingsContext(self.db) + # Create a client for other processes to use + self.embeddings = EmbeddingsContext(self.db) def init_inter_process_communicator(self) -> None: self.inter_process_communicator = InterProcessCommunicator() - self.inter_config_updater = ConfigPublisher() + self.inter_config_updater = CameraConfigUpdatePublisher() self.event_metadata_updater = EventMetadataPublisher() self.inter_zmq_proxy = ZmqProxy() + self.detection_proxy = DetectorProxy() def init_onvif(self) -> None: self.onvif_controller = OnvifController(self.config, self.ptz_metrics) @@ -357,8 +339,6 @@ class FrigateApp: def start_detectors(self) -> None: for name in self.config.cameras.keys(): - self.detection_out_events[name] = mp.Event() - try: largest_frame = max( [ @@ -390,8 +370,10 @@ class FrigateApp: self.detectors[name] = ObjectDetectProcess( name, self.detection_queue, - self.detection_out_events, + list(self.config.cameras.keys()), + self.config, detector_config, + self.stop_event, ) def start_ptz_autotracker(self) -> None: @@ -415,79 +397,22 @@ class FrigateApp: self.detected_frames_processor.start() def start_video_output_processor(self) -> None: - output_processor = util.Process( - target=output_frames, - name="output_processor", - args=(self.config,), - ) - output_processor.daemon = True + output_processor = OutputProcess(self.config, self.stop_event) self.output_processor = output_processor output_processor.start() logger.info(f"Output process started: {output_processor.pid}") - def init_historical_regions(self) -> None: - # delete region grids for removed or renamed cameras - cameras = list(self.config.cameras.keys()) - Regions.delete().where(~(Regions.camera << cameras)).execute() - - # create or update region grids for each camera - for camera in self.config.cameras.values(): - assert camera.name is not None - self.region_grids[camera.name] = get_camera_regions_grid( - camera.name, - camera.detect, - max(self.config.model.width, self.config.model.height), - ) - - def start_camera_processors(self) -> None: - for name, config in self.config.cameras.items(): - if not self.config.cameras[name].enabled_in_config: - logger.info(f"Camera processor not started for disabled camera {name}") - continue - - camera_process = util.Process( - target=track_camera, - name=f"camera_processor:{name}", - args=( - name, - config, - self.config.model, - self.config.model.merged_labelmap, - self.detection_queue, - self.detection_out_events[name], - self.detected_frames_queue, - self.camera_metrics[name], - self.ptz_metrics[name], - self.region_grids[name], - ), - daemon=True, - ) - self.camera_metrics[name].process = camera_process - camera_process.start() - logger.info(f"Camera processor started for {name}: {camera_process.pid}") - - def start_camera_capture_processes(self) -> None: - shm_frame_count = self.shm_frame_count() - - for name, config in self.config.cameras.items(): - if not self.config.cameras[name].enabled_in_config: - logger.info(f"Capture process not started for disabled camera {name}") - continue - - # pre-create shms - for i in range(shm_frame_count): - frame_size = config.frame_shape_yuv[0] * config.frame_shape_yuv[1] - self.frame_manager.create(f"{config.name}_frame{i}", frame_size) - - capture_process = util.Process( - target=capture_camera, - name=f"camera_capture:{name}", - args=(name, config, shm_frame_count, self.camera_metrics[name]), - ) - capture_process.daemon = True - self.camera_metrics[name].capture_process = capture_process - capture_process.start() - logger.info(f"Capture process started for {name}: {capture_process.pid}") + def start_camera_processor(self) -> None: + self.camera_maintainer = CameraMaintainer( + self.config, + self.detection_queue, + self.detected_frames_queue, + self.camera_metrics, + self.ptz_metrics, + self.stop_event, + self.metrics_manager, + ) + self.camera_maintainer.start() def start_audio_processor(self) -> None: audio_cameras = [ @@ -497,7 +422,9 @@ class FrigateApp: ] if audio_cameras: - self.audio_process = AudioProcessor(audio_cameras, self.camera_metrics) + self.audio_process = AudioProcessor( + self.config, audio_cameras, self.camera_metrics, self.stop_event + ) self.audio_process.start() self.processes["audio_detector"] = self.audio_process.pid or 0 @@ -545,45 +472,6 @@ class FrigateApp: self.frigate_watchdog = FrigateWatchdog(self.detectors, self.stop_event) self.frigate_watchdog.start() - def shm_frame_count(self) -> int: - total_shm = round(shutil.disk_usage("/dev/shm").total / pow(2, 20), 1) - - # required for log files + nginx cache - min_req_shm = 40 + 10 - - if self.config.birdseye.restream: - min_req_shm += 8 - - available_shm = total_shm - min_req_shm - cam_total_frame_size = 0.0 - - for camera in self.config.cameras.values(): - if camera.enabled and camera.detect.width and camera.detect.height: - cam_total_frame_size += round( - (camera.detect.width * camera.detect.height * 1.5 + 270480) - / 1048576, - 1, - ) - - if cam_total_frame_size == 0.0: - return 0 - - shm_frame_count = min( - int(os.environ.get(SHM_FRAMES_VAR, "50")), - int(available_shm / (cam_total_frame_size)), - ) - - logger.debug( - f"Calculated total camera size {available_shm} / {cam_total_frame_size} :: {shm_frame_count} frames for each camera in SHM" - ) - - if shm_frame_count < 20: - logger.warning( - f"The current SHM size of {total_shm}MB is too small, recommend increasing it to at least {round(min_req_shm + cam_total_frame_size * 20)}MB." - ) - - return shm_frame_count - def init_auth(self) -> None: if self.config.auth.enabled: if User.select().count() == 0: @@ -644,19 +532,17 @@ class FrigateApp: self.init_recording_manager() self.init_review_segment_manager() self.init_go2rtc() - self.start_detectors() self.init_embeddings_manager() self.bind_database() self.check_db_data_migrations() self.init_inter_process_communicator() + self.start_detectors() self.init_dispatcher() self.init_embeddings_client() self.start_video_output_processor() self.start_ptz_autotracker() - self.init_historical_regions() self.start_detected_frames_processor() - self.start_camera_processors() - self.start_camera_capture_processes() + self.start_camera_processor() self.start_audio_processor() self.start_storage_maintainer() self.start_stats_emitter() @@ -679,6 +565,7 @@ class FrigateApp: self.onvif_controller, self.stats_emitter, self.event_metadata_updater, + self.inter_config_updater, ), host="127.0.0.1", port=5001, @@ -712,24 +599,6 @@ class FrigateApp: if self.onvif_controller: self.onvif_controller.close() - # ensure the capture processes are done - for camera, metrics in self.camera_metrics.items(): - capture_process = metrics.capture_process - if capture_process is not None: - logger.info(f"Waiting for capture process for {camera} to stop") - capture_process.terminate() - capture_process.join() - - # ensure the camera processors are done - for camera, metrics in self.camera_metrics.items(): - camera_process = metrics.process - if camera_process is not None: - logger.info(f"Waiting for process for {camera} to stop") - camera_process.terminate() - camera_process.join() - logger.info(f"Closing frame queue for {camera}") - empty_and_close_queue(metrics.frame_queue) - # ensure the detectors are done for detector in self.detectors.values(): detector.stop() @@ -773,14 +642,12 @@ class FrigateApp: self.inter_config_updater.stop() self.event_metadata_updater.stop() self.inter_zmq_proxy.stop() + self.detection_proxy.stop() - self.frame_manager.cleanup() while len(self.detection_shms) > 0: shm = self.detection_shms.pop() shm.close() shm.unlink() - # exit the mp Manager process _stop_logging() - - os._exit(os.EX_OK) + self.metrics_manager.shutdown() diff --git a/frigate/camera/__init__.py b/frigate/camera/__init__.py index 456751c52..77b1fd424 100644 --- a/frigate/camera/__init__.py +++ b/frigate/camera/__init__.py @@ -1,7 +1,7 @@ import multiprocessing as mp +from multiprocessing.managers import SyncManager from multiprocessing.sharedctypes import Synchronized from multiprocessing.synchronize import Event -from typing import Optional class CameraMetrics: @@ -16,25 +16,25 @@ class CameraMetrics: frame_queue: mp.Queue - process: Optional[mp.Process] - capture_process: Optional[mp.Process] + process_pid: Synchronized + capture_process_pid: Synchronized ffmpeg_pid: Synchronized - def __init__(self): - self.camera_fps = mp.Value("d", 0) - self.detection_fps = mp.Value("d", 0) - self.detection_frame = mp.Value("d", 0) - self.process_fps = mp.Value("d", 0) - self.skipped_fps = mp.Value("d", 0) - self.read_start = mp.Value("d", 0) - self.audio_rms = mp.Value("d", 0) - self.audio_dBFS = mp.Value("d", 0) + def __init__(self, manager: SyncManager): + self.camera_fps = manager.Value("d", 0) + self.detection_fps = manager.Value("d", 0) + self.detection_frame = manager.Value("d", 0) + self.process_fps = manager.Value("d", 0) + self.skipped_fps = manager.Value("d", 0) + self.read_start = manager.Value("d", 0) + self.audio_rms = manager.Value("d", 0) + self.audio_dBFS = manager.Value("d", 0) - self.frame_queue = mp.Queue(maxsize=2) + self.frame_queue = manager.Queue(maxsize=2) - self.process = None - self.capture_process = None - self.ffmpeg_pid = mp.Value("i", 0) + self.process_pid = manager.Value("i", 0) + self.capture_process_pid = manager.Value("i", 0) + self.ffmpeg_pid = manager.Value("i", 0) class PTZMetrics: diff --git a/frigate/camera/activity_manager.py b/frigate/camera/activity_manager.py index 6039a07f6..e10730931 100644 --- a/frigate/camera/activity_manager.py +++ b/frigate/camera/activity_manager.py @@ -3,7 +3,7 @@ from collections import Counter from typing import Any, Callable -from frigate.config.config import FrigateConfig +from frigate.config import CameraConfig, FrigateConfig class CameraActivityManager: @@ -23,26 +23,33 @@ class CameraActivityManager: if not camera_config.enabled_in_config: continue - self.last_camera_activity[camera_config.name] = {} - self.camera_all_object_counts[camera_config.name] = Counter() - self.camera_active_object_counts[camera_config.name] = Counter() + self.__init_camera(camera_config) - for zone, zone_config in camera_config.zones.items(): - if zone not in self.all_zone_labels: - self.zone_all_object_counts[zone] = Counter() - self.zone_active_object_counts[zone] = Counter() - self.all_zone_labels[zone] = set() + def __init_camera(self, camera_config: CameraConfig) -> None: + self.last_camera_activity[camera_config.name] = {} + self.camera_all_object_counts[camera_config.name] = Counter() + self.camera_active_object_counts[camera_config.name] = Counter() - self.all_zone_labels[zone].update( - zone_config.objects - if zone_config.objects - else camera_config.objects.track - ) + for zone, zone_config in camera_config.zones.items(): + if zone not in self.all_zone_labels: + self.zone_all_object_counts[zone] = Counter() + self.zone_active_object_counts[zone] = Counter() + self.all_zone_labels[zone] = set() + + self.all_zone_labels[zone].update( + zone_config.objects + if zone_config.objects + else camera_config.objects.track + ) def update_activity(self, new_activity: dict[str, dict[str, Any]]) -> None: all_objects: list[dict[str, Any]] = [] for camera in new_activity.keys(): + # handle cameras that were added dynamically + if camera not in self.camera_all_object_counts: + self.__init_camera(self.config.cameras[camera]) + new_objects = new_activity[camera].get("objects", []) all_objects.extend(new_objects) diff --git a/frigate/camera/maintainer.py b/frigate/camera/maintainer.py new file mode 100644 index 000000000..5bd97136c --- /dev/null +++ b/frigate/camera/maintainer.py @@ -0,0 +1,250 @@ +"""Create and maintain camera processes / management.""" + +import logging +import multiprocessing as mp +import os +import shutil +import threading +from multiprocessing import Queue +from multiprocessing.managers import DictProxy, SyncManager +from multiprocessing.synchronize import Event as MpEvent + +from frigate.camera import CameraMetrics, PTZMetrics +from frigate.config import FrigateConfig +from frigate.config.camera import CameraConfig +from frigate.config.camera.updater import ( + CameraConfigUpdateEnum, + CameraConfigUpdateSubscriber, +) +from frigate.const import SHM_FRAMES_VAR +from frigate.models import Regions +from frigate.util.builtin import empty_and_close_queue +from frigate.util.image import SharedMemoryFrameManager, UntrackedSharedMemory +from frigate.util.object import get_camera_regions_grid +from frigate.video import CameraCapture, CameraTracker + +logger = logging.getLogger(__name__) + + +class CameraMaintainer(threading.Thread): + def __init__( + self, + config: FrigateConfig, + detection_queue: Queue, + detected_frames_queue: Queue, + camera_metrics: DictProxy, + ptz_metrics: dict[str, PTZMetrics], + stop_event: MpEvent, + metrics_manager: SyncManager, + ): + super().__init__(name="camera_processor") + self.config = config + self.detection_queue = detection_queue + self.detected_frames_queue = detected_frames_queue + self.stop_event = stop_event + self.camera_metrics = camera_metrics + self.ptz_metrics = ptz_metrics + self.frame_manager = SharedMemoryFrameManager() + self.region_grids: dict[str, list[list[dict[str, int]]]] = {} + self.update_subscriber = CameraConfigUpdateSubscriber( + self.config, + {}, + [ + CameraConfigUpdateEnum.add, + CameraConfigUpdateEnum.remove, + ], + ) + self.shm_count = self.__calculate_shm_frame_count() + self.camera_processes: dict[str, mp.Process] = {} + self.capture_processes: dict[str, mp.Process] = {} + self.metrics_manager = metrics_manager + + def __init_historical_regions(self) -> None: + # delete region grids for removed or renamed cameras + cameras = list(self.config.cameras.keys()) + Regions.delete().where(~(Regions.camera << cameras)).execute() + + # create or update region grids for each camera + for camera in self.config.cameras.values(): + assert camera.name is not None + self.region_grids[camera.name] = get_camera_regions_grid( + camera.name, + camera.detect, + max(self.config.model.width, self.config.model.height), + ) + + def __calculate_shm_frame_count(self) -> int: + total_shm = round(shutil.disk_usage("/dev/shm").total / pow(2, 20), 1) + + # required for log files + nginx cache + min_req_shm = 40 + 10 + + if self.config.birdseye.restream: + min_req_shm += 8 + + available_shm = total_shm - min_req_shm + cam_total_frame_size = 0.0 + + for camera in self.config.cameras.values(): + if ( + camera.enabled_in_config + and camera.detect.width + and camera.detect.height + ): + cam_total_frame_size += round( + (camera.detect.width * camera.detect.height * 1.5 + 270480) + / 1048576, + 1, + ) + + # leave room for 2 cameras that are added dynamically, if a user wants to add more cameras they may need to increase the SHM size and restart after adding them. + cam_total_frame_size += 2 * round( + (1280 * 720 * 1.5 + 270480) / 1048576, + 1, + ) + + if cam_total_frame_size == 0.0: + return 0 + + shm_frame_count = min( + int(os.environ.get(SHM_FRAMES_VAR, "50")), + int(available_shm / (cam_total_frame_size)), + ) + + logger.debug( + f"Calculated total camera size {available_shm} / {cam_total_frame_size} :: {shm_frame_count} frames for each camera in SHM" + ) + + if shm_frame_count < 20: + logger.warning( + f"The current SHM size of {total_shm}MB is too small, recommend increasing it to at least {round(min_req_shm + cam_total_frame_size * 20)}MB." + ) + + return shm_frame_count + + def __start_camera_processor( + self, name: str, config: CameraConfig, runtime: bool = False + ) -> None: + if not config.enabled_in_config: + logger.info(f"Camera processor not started for disabled camera {name}") + return + + if runtime: + self.camera_metrics[name] = CameraMetrics(self.metrics_manager) + self.ptz_metrics[name] = PTZMetrics(autotracker_enabled=False) + self.region_grids[name] = get_camera_regions_grid( + name, + config.detect, + max(self.config.model.width, self.config.model.height), + ) + + try: + largest_frame = max( + [ + det.model.height * det.model.width * 3 + if det.model is not None + else 320 + for det in self.config.detectors.values() + ] + ) + UntrackedSharedMemory(name=f"out-{name}", create=True, size=20 * 6 * 4) + UntrackedSharedMemory( + name=name, + create=True, + size=largest_frame, + ) + except FileExistsError: + pass + + camera_process = CameraTracker( + config, + self.config.model, + self.config.model.merged_labelmap, + self.detection_queue, + self.detected_frames_queue, + self.camera_metrics[name], + self.ptz_metrics[name], + self.region_grids[name], + self.stop_event, + ) + self.camera_processes[config.name] = camera_process + camera_process.start() + self.camera_metrics[config.name].process_pid.value = camera_process.pid + logger.info(f"Camera processor started for {config.name}: {camera_process.pid}") + + def __start_camera_capture( + self, name: str, config: CameraConfig, runtime: bool = False + ) -> None: + if not config.enabled_in_config: + logger.info(f"Capture process not started for disabled camera {name}") + return + + # pre-create shms + count = 10 if runtime else self.shm_count + for i in range(count): + frame_size = config.frame_shape_yuv[0] * config.frame_shape_yuv[1] + self.frame_manager.create(f"{config.name}_frame{i}", frame_size) + + capture_process = CameraCapture( + config, count, self.camera_metrics[name], self.stop_event + ) + capture_process.daemon = True + self.capture_processes[name] = capture_process + capture_process.start() + self.camera_metrics[name].capture_process_pid.value = capture_process.pid + logger.info(f"Capture process started for {name}: {capture_process.pid}") + + def __stop_camera_capture_process(self, camera: str) -> None: + capture_process = self.capture_processes[camera] + if capture_process is not None: + logger.info(f"Waiting for capture process for {camera} to stop") + capture_process.terminate() + capture_process.join() + + def __stop_camera_process(self, camera: str) -> None: + camera_process = self.camera_processes[camera] + if camera_process is not None: + logger.info(f"Waiting for process for {camera} to stop") + camera_process.terminate() + camera_process.join() + logger.info(f"Closing frame queue for {camera}") + empty_and_close_queue(self.camera_metrics[camera].frame_queue) + + def run(self): + self.__init_historical_regions() + + # start camera processes + for camera, config in self.config.cameras.items(): + self.__start_camera_processor(camera, config) + self.__start_camera_capture(camera, config) + + while not self.stop_event.wait(1): + updates = self.update_subscriber.check_for_updates() + + for update_type, updated_cameras in updates.items(): + if update_type == CameraConfigUpdateEnum.add.name: + for camera in updated_cameras: + self.__start_camera_processor( + camera, + self.update_subscriber.camera_configs[camera], + runtime=True, + ) + self.__start_camera_capture( + camera, + self.update_subscriber.camera_configs[camera], + runtime=True, + ) + elif update_type == CameraConfigUpdateEnum.remove.name: + self.__stop_camera_capture_process(camera) + self.__stop_camera_process(camera) + + # ensure the capture processes are done + for camera in self.camera_processes.keys(): + self.__stop_camera_capture_process(camera) + + # ensure the camera processors are done + for camera in self.capture_processes.keys(): + self.__stop_camera_process(camera) + + self.update_subscriber.stop() + self.frame_manager.cleanup() diff --git a/frigate/camera/state.py b/frigate/camera/state.py index 06564bce2..328c7bd23 100644 --- a/frigate/camera/state.py +++ b/frigate/camera/state.py @@ -54,7 +54,7 @@ class CameraState: self.ptz_autotracker_thread = ptz_autotracker_thread self.prev_enabled = self.camera_config.enabled - def get_current_frame(self, draw_options: dict[str, Any] = {}): + def get_current_frame(self, draw_options: dict[str, Any] = {}) -> np.ndarray: with self.current_frame_lock: frame_copy = np.copy(self._current_frame) frame_time = self.current_frame_time @@ -228,12 +228,51 @@ class CameraState: position=self.camera_config.timestamp_style.position, ) + if draw_options.get("paths"): + for obj in tracked_objects.values(): + if obj["frame_time"] == frame_time and obj["path_data"]: + color = self.config.model.colormap.get( + obj["label"], (255, 255, 255) + ) + + path_points = [ + ( + int(point[0][0] * self.camera_config.detect.width), + int(point[0][1] * self.camera_config.detect.height), + ) + for point in obj["path_data"] + ] + + for point in path_points: + cv2.circle(frame_copy, point, 5, color, -1) + + for i in range(1, len(path_points)): + cv2.line( + frame_copy, + path_points[i - 1], + path_points[i], + color, + 2, + ) + + bottom_center = ( + int((obj["box"][0] + obj["box"][2]) / 2), + int(obj["box"][3]), + ) + cv2.line( + frame_copy, + path_points[-1], + bottom_center, + color, + 2, + ) + return frame_copy def finished(self, obj_id): del self.tracked_objects[obj_id] - def on(self, event_type: str, callback: Callable[[dict], None]): + def on(self, event_type: str, callback: Callable): self.callbacks[event_type].append(callback) def update( diff --git a/frigate/comms/config_updater.py b/frigate/comms/config_updater.py index 06b870c62..447089a94 100644 --- a/frigate/comms/config_updater.py +++ b/frigate/comms/config_updater.py @@ -1,8 +1,9 @@ """Facilitates communication between processes.""" import multiprocessing as mp +from _pickle import UnpicklingError from multiprocessing.synchronize import Event as MpEvent -from typing import Any, Optional +from typing import Any import zmq @@ -32,7 +33,7 @@ class ConfigPublisher: class ConfigSubscriber: """Simplifies receiving an updated config.""" - def __init__(self, topic: str, exact=False) -> None: + def __init__(self, topic: str, exact: bool = False) -> None: self.topic = topic self.exact = exact self.context = zmq.Context() @@ -40,7 +41,7 @@ class ConfigSubscriber: self.socket.setsockopt_string(zmq.SUBSCRIBE, topic) self.socket.connect(SOCKET_PUB_SUB) - def check_for_update(self) -> Optional[tuple[str, Any]]: + def check_for_update(self) -> tuple[str, Any] | tuple[None, None]: """Returns updated config or None if no update.""" try: topic = self.socket.recv_string(flags=zmq.NOBLOCK) @@ -50,7 +51,7 @@ class ConfigSubscriber: return (topic, obj) else: return (None, None) - except zmq.ZMQError: + except (zmq.ZMQError, UnicodeDecodeError, UnpicklingError): return (None, None) def stop(self) -> None: diff --git a/frigate/comms/detections_updater.py b/frigate/comms/detections_updater.py index 1718d1347..dff61c8a2 100644 --- a/frigate/comms/detections_updater.py +++ b/frigate/comms/detections_updater.py @@ -1,7 +1,7 @@ """Facilitates communication between processes.""" from enum import Enum -from typing import Any, Optional +from typing import Any from .zmq_proxy import Publisher, Subscriber @@ -19,8 +19,7 @@ class DetectionPublisher(Publisher): topic_base = "detection/" - def __init__(self, topic: DetectionTypeEnum) -> None: - topic = topic.value + def __init__(self, topic: str) -> None: super().__init__(topic) @@ -29,16 +28,15 @@ class DetectionSubscriber(Subscriber): topic_base = "detection/" - def __init__(self, topic: DetectionTypeEnum) -> None: - topic = topic.value + def __init__(self, topic: str) -> None: super().__init__(topic) def check_for_update( - self, timeout: float = None - ) -> Optional[tuple[DetectionTypeEnum, Any]]: + self, timeout: float | None = None + ) -> tuple[str, Any] | tuple[None, None] | None: return super().check_for_update(timeout) def _return_object(self, topic: str, payload: Any) -> Any: if payload is None: return (None, None) - return (DetectionTypeEnum[topic[len(self.topic_base) :]], payload) + return (topic[len(self.topic_base) :], payload) diff --git a/frigate/comms/dispatcher.py b/frigate/comms/dispatcher.py index 87891ec88..93956068c 100644 --- a/frigate/comms/dispatcher.py +++ b/frigate/comms/dispatcher.py @@ -3,24 +3,30 @@ import datetime import json import logging -from typing import Any, Callable, Optional +from typing import Any, Callable, Optional, cast from frigate.camera import PTZMetrics from frigate.camera.activity_manager import CameraActivityManager from frigate.comms.base_communicator import Communicator -from frigate.comms.config_updater import ConfigPublisher from frigate.comms.webpush import WebPushClient from frigate.config import BirdseyeModeEnum, FrigateConfig +from frigate.config.camera.updater import ( + CameraConfigUpdateEnum, + CameraConfigUpdatePublisher, + CameraConfigUpdateTopic, +) from frigate.const import ( CLEAR_ONGOING_REVIEW_SEGMENTS, INSERT_MANY_RECORDINGS, INSERT_PREVIEW, NOTIFICATION_TEST, REQUEST_REGION_GRID, + UPDATE_BIRDSEYE_LAYOUT, UPDATE_CAMERA_ACTIVITY, UPDATE_EMBEDDINGS_REINDEX_PROGRESS, UPDATE_EVENT_DESCRIPTION, UPDATE_MODEL_STATE, + UPDATE_REVIEW_DESCRIPTION, UPSERT_REVIEW_SEGMENT, ) from frigate.models import Event, Previews, Recordings, ReviewSegment @@ -38,7 +44,7 @@ class Dispatcher: def __init__( self, config: FrigateConfig, - config_updater: ConfigPublisher, + config_updater: CameraConfigUpdatePublisher, onvif: OnvifController, ptz_metrics: dict[str, PTZMetrics], communicators: list[Communicator], @@ -49,11 +55,12 @@ class Dispatcher: self.ptz_metrics = ptz_metrics self.comms = communicators self.camera_activity = CameraActivityManager(config, self.publish) - self.model_state = {} - self.embeddings_reindex = {} - + self.model_state: dict[str, ModelStatusTypesEnum] = {} + self.embeddings_reindex: dict[str, Any] = {} + self.birdseye_layout: dict[str, Any] = {} self._camera_settings_handlers: dict[str, Callable] = { "audio": self._on_audio_command, + "audio_transcription": self._on_audio_transcription_command, "detect": self._on_detect_command, "enabled": self._on_enabled_command, "improve_contrast": self._on_motion_improve_contrast_command, @@ -68,6 +75,8 @@ class Dispatcher: "birdseye_mode": self._on_birdseye_mode_command, "review_alerts": self._on_alerts_command, "review_detections": self._on_detections_command, + "object_descriptions": self._on_object_description_command, + "review_descriptions": self._on_review_description_command, } self._global_settings_handlers: dict[str, Callable] = { "notifications": self._on_global_notification_command, @@ -80,10 +89,12 @@ class Dispatcher: (comm for comm in communicators if isinstance(comm, WebPushClient)), None ) - def _receive(self, topic: str, payload: str) -> Optional[Any]: + def _receive(self, topic: str, payload: Any) -> Optional[Any]: """Handle receiving of payload from communicators.""" - def handle_camera_command(command_type, camera_name, command, payload): + def handle_camera_command( + command_type: str, camera_name: str, command: str, payload: str + ) -> None: try: if command_type == "set": self._camera_settings_handlers[command](camera_name, payload) @@ -92,13 +103,13 @@ class Dispatcher: except KeyError: logger.error(f"Invalid command type or handler: {command_type}") - def handle_restart(): + def handle_restart() -> None: restart_frigate() - def handle_insert_many_recordings(): + def handle_insert_many_recordings() -> None: Recordings.insert_many(payload).execute() - def handle_request_region_grid(): + def handle_request_region_grid() -> Any: camera = payload grid = get_camera_regions_grid( camera, @@ -107,26 +118,26 @@ class Dispatcher: ) return grid - def handle_insert_preview(): + def handle_insert_preview() -> None: Previews.insert(payload).execute() - def handle_upsert_review_segment(): + def handle_upsert_review_segment() -> None: ReviewSegment.insert(payload).on_conflict( conflict_target=[ReviewSegment.id], update=payload, ).execute() - def handle_clear_ongoing_review_segments(): + def handle_clear_ongoing_review_segments() -> None: ReviewSegment.update(end_time=datetime.datetime.now().timestamp()).where( ReviewSegment.end_time.is_null(True) ).execute() - def handle_update_camera_activity(): + def handle_update_camera_activity() -> None: self.camera_activity.update_activity(payload) - def handle_update_event_description(): + def handle_update_event_description() -> None: event: Event = Event.get(Event.id == payload["id"]) - event.data["description"] = payload["description"] + cast(dict, event.data)["description"] = payload["description"] event.save() self.publish( "tracked_object_update", @@ -140,30 +151,46 @@ class Dispatcher: ), ) - def handle_update_model_state(): + def handle_update_review_description() -> None: + final_data = payload["after"] + ReviewSegment.insert(final_data).on_conflict( + conflict_target=[ReviewSegment.id], + update=final_data, + ).execute() + self.publish("reviews", json.dumps(payload)) + + def handle_update_model_state() -> None: if payload: model = payload["model"] state = payload["state"] self.model_state[model] = ModelStatusTypesEnum[state] self.publish("model_state", json.dumps(self.model_state)) - def handle_model_state(): + def handle_model_state() -> None: self.publish("model_state", json.dumps(self.model_state.copy())) - def handle_update_embeddings_reindex_progress(): + def handle_update_embeddings_reindex_progress() -> None: self.embeddings_reindex = payload self.publish( "embeddings_reindex_progress", json.dumps(payload), ) - def handle_embeddings_reindex_progress(): + def handle_embeddings_reindex_progress() -> None: self.publish( "embeddings_reindex_progress", json.dumps(self.embeddings_reindex.copy()), ) - def handle_on_connect(): + def handle_update_birdseye_layout() -> None: + if payload: + self.birdseye_layout = payload + self.publish("birdseye_layout", json.dumps(self.birdseye_layout)) + + def handle_birdseye_layout() -> None: + self.publish("birdseye_layout", json.dumps(self.birdseye_layout.copy())) + + def handle_on_connect() -> None: camera_status = self.camera_activity.last_camera_activity.copy() cameras_with_status = camera_status.keys() @@ -177,6 +204,9 @@ class Dispatcher: "snapshots": self.config.cameras[camera].snapshots.enabled, "record": self.config.cameras[camera].record.enabled, "audio": self.config.cameras[camera].audio.enabled, + "audio_transcription": self.config.cameras[ + camera + ].audio_transcription.live_enabled, "notifications": self.config.cameras[camera].notifications.enabled, "notifications_suspended": int( self.web_push_client.suspended_cameras.get(camera, 0) @@ -189,6 +219,12 @@ class Dispatcher: ].onvif.autotracking.enabled, "alerts": self.config.cameras[camera].review.alerts.enabled, "detections": self.config.cameras[camera].review.detections.enabled, + "object_descriptions": self.config.cameras[ + camera + ].objects.genai.enabled, + "review_descriptions": self.config.cameras[ + camera + ].review.genai.enabled, } self.publish("camera_activity", json.dumps(camera_status)) @@ -197,8 +233,9 @@ class Dispatcher: "embeddings_reindex_progress", json.dumps(self.embeddings_reindex.copy()), ) + self.publish("birdseye_layout", json.dumps(self.birdseye_layout.copy())) - def handle_notification_test(): + def handle_notification_test() -> None: self.publish("notification_test", "Test notification") # Dictionary mapping topic to handlers @@ -210,12 +247,15 @@ class Dispatcher: CLEAR_ONGOING_REVIEW_SEGMENTS: handle_clear_ongoing_review_segments, UPDATE_CAMERA_ACTIVITY: handle_update_camera_activity, UPDATE_EVENT_DESCRIPTION: handle_update_event_description, + UPDATE_REVIEW_DESCRIPTION: handle_update_review_description, UPDATE_MODEL_STATE: handle_update_model_state, UPDATE_EMBEDDINGS_REINDEX_PROGRESS: handle_update_embeddings_reindex_progress, + UPDATE_BIRDSEYE_LAYOUT: handle_update_birdseye_layout, NOTIFICATION_TEST: handle_notification_test, "restart": handle_restart, "embeddingsReindexProgress": handle_embeddings_reindex_progress, "modelState": handle_model_state, + "birdseyeLayout": handle_birdseye_layout, "onConnect": handle_on_connect, } @@ -243,11 +283,12 @@ class Dispatcher: logger.error( f"Received invalid {topic.split('/')[-1]} command: {topic}" ) - return + return None elif topic in topic_handlers: return topic_handlers[topic]() else: self.publish(topic, payload, retain=False) + return None def publish(self, topic: str, payload: Any, retain: bool = False) -> None: """Handle publishing to communicators.""" @@ -273,8 +314,11 @@ class Dispatcher: f"Turning on motion for {camera_name} due to detection being enabled." ) motion_settings.enabled = True - self.config_updater.publish( - f"config/motion/{camera_name}", motion_settings + self.config_updater.publish_update( + CameraConfigUpdateTopic( + CameraConfigUpdateEnum.motion, camera_name + ), + motion_settings, ) self.publish(f"{camera_name}/motion/state", payload, retain=True) elif payload == "OFF": @@ -282,7 +326,10 @@ class Dispatcher: logger.info(f"Turning off detection for {camera_name}") detect_settings.enabled = False - self.config_updater.publish(f"config/detect/{camera_name}", detect_settings) + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.detect, camera_name), + detect_settings, + ) self.publish(f"{camera_name}/detect/state", payload, retain=True) def _on_enabled_command(self, camera_name: str, payload: str) -> None: @@ -303,7 +350,10 @@ class Dispatcher: logger.info(f"Turning off camera {camera_name}") camera_settings.enabled = False - self.config_updater.publish(f"config/enabled/{camera_name}", camera_settings) + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.enabled, camera_name), + camera_settings.enabled, + ) self.publish(f"{camera_name}/enabled/state", payload, retain=True) def _on_motion_command(self, camera_name: str, payload: str) -> None: @@ -326,7 +376,10 @@ class Dispatcher: logger.info(f"Turning off motion for {camera_name}") motion_settings.enabled = False - self.config_updater.publish(f"config/motion/{camera_name}", motion_settings) + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.motion, camera_name), + motion_settings, + ) self.publish(f"{camera_name}/motion/state", payload, retain=True) def _on_motion_improve_contrast_command( @@ -338,13 +391,16 @@ class Dispatcher: if payload == "ON": if not motion_settings.improve_contrast: logger.info(f"Turning on improve contrast for {camera_name}") - motion_settings.improve_contrast = True # type: ignore[union-attr] + motion_settings.improve_contrast = True elif payload == "OFF": if motion_settings.improve_contrast: logger.info(f"Turning off improve contrast for {camera_name}") - motion_settings.improve_contrast = False # type: ignore[union-attr] + motion_settings.improve_contrast = False - self.config_updater.publish(f"config/motion/{camera_name}", motion_settings) + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.motion, camera_name), + motion_settings, + ) self.publish(f"{camera_name}/improve_contrast/state", payload, retain=True) def _on_ptz_autotracker_command(self, camera_name: str, payload: str) -> None: @@ -383,8 +439,11 @@ class Dispatcher: motion_settings = self.config.cameras[camera_name].motion logger.info(f"Setting motion contour area for {camera_name}: {payload}") - motion_settings.contour_area = payload # type: ignore[union-attr] - self.config_updater.publish(f"config/motion/{camera_name}", motion_settings) + motion_settings.contour_area = payload + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.motion, camera_name), + motion_settings, + ) self.publish(f"{camera_name}/motion_contour_area/state", payload, retain=True) def _on_motion_threshold_command(self, camera_name: str, payload: int) -> None: @@ -397,8 +456,11 @@ class Dispatcher: motion_settings = self.config.cameras[camera_name].motion logger.info(f"Setting motion threshold for {camera_name}: {payload}") - motion_settings.threshold = payload # type: ignore[union-attr] - self.config_updater.publish(f"config/motion/{camera_name}", motion_settings) + motion_settings.threshold = payload + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.motion, camera_name), + motion_settings, + ) self.publish(f"{camera_name}/motion_threshold/state", payload, retain=True) def _on_global_notification_command(self, payload: str) -> None: @@ -409,9 +471,9 @@ class Dispatcher: notification_settings = self.config.notifications logger.info(f"Setting all notifications: {payload}") - notification_settings.enabled = payload == "ON" # type: ignore[union-attr] - self.config_updater.publish( - "config/notifications", {"_global_notifications": notification_settings} + notification_settings.enabled = payload == "ON" + self.config_updater.publisher.publish( + "config/notifications", notification_settings ) self.publish("notifications/state", payload, retain=True) @@ -434,9 +496,43 @@ class Dispatcher: logger.info(f"Turning off audio detection for {camera_name}") audio_settings.enabled = False - self.config_updater.publish(f"config/audio/{camera_name}", audio_settings) + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.audio, camera_name), + audio_settings, + ) self.publish(f"{camera_name}/audio/state", payload, retain=True) + def _on_audio_transcription_command(self, camera_name: str, payload: str) -> None: + """Callback for live audio transcription topic.""" + audio_transcription_settings = self.config.cameras[ + camera_name + ].audio_transcription + + if payload == "ON": + if not self.config.cameras[ + camera_name + ].audio_transcription.enabled_in_config: + logger.error( + "Audio transcription must be enabled in the config to be turned on via MQTT." + ) + return + + if not audio_transcription_settings.live_enabled: + logger.info(f"Turning on live audio transcription for {camera_name}") + audio_transcription_settings.live_enabled = True + elif payload == "OFF": + if audio_transcription_settings.live_enabled: + logger.info(f"Turning off live audio transcription for {camera_name}") + audio_transcription_settings.live_enabled = False + + self.config_updater.publish_update( + CameraConfigUpdateTopic( + CameraConfigUpdateEnum.audio_transcription, camera_name + ), + audio_transcription_settings, + ) + self.publish(f"{camera_name}/audio_transcription/state", payload, retain=True) + def _on_recordings_command(self, camera_name: str, payload: str) -> None: """Callback for recordings topic.""" record_settings = self.config.cameras[camera_name].record @@ -456,7 +552,10 @@ class Dispatcher: logger.info(f"Turning off recordings for {camera_name}") record_settings.enabled = False - self.config_updater.publish(f"config/record/{camera_name}", record_settings) + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.record, camera_name), + record_settings, + ) self.publish(f"{camera_name}/recordings/state", payload, retain=True) def _on_snapshots_command(self, camera_name: str, payload: str) -> None: @@ -472,6 +571,10 @@ class Dispatcher: logger.info(f"Turning off snapshots for {camera_name}") snapshots_settings.enabled = False + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.snapshots, camera_name), + snapshots_settings, + ) self.publish(f"{camera_name}/snapshots/state", payload, retain=True) def _on_ptz_command(self, camera_name: str, payload: str) -> None: @@ -506,7 +609,10 @@ class Dispatcher: logger.info(f"Turning off birdseye for {camera_name}") birdseye_settings.enabled = False - self.config_updater.publish(f"config/birdseye/{camera_name}", birdseye_settings) + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.birdseye, camera_name), + birdseye_settings, + ) self.publish(f"{camera_name}/birdseye/state", payload, retain=True) def _on_birdseye_mode_command(self, camera_name: str, payload: str) -> None: @@ -527,7 +633,10 @@ class Dispatcher: f"Setting birdseye mode for {camera_name} to {birdseye_settings.mode}" ) - self.config_updater.publish(f"config/birdseye/{camera_name}", birdseye_settings) + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.birdseye, camera_name), + birdseye_settings, + ) self.publish(f"{camera_name}/birdseye_mode/state", payload, retain=True) def _on_camera_notification_command(self, camera_name: str, payload: str) -> None: @@ -559,8 +668,9 @@ class Dispatcher: ): self.web_push_client.suspended_cameras[camera_name] = 0 - self.config_updater.publish( - "config/notifications", {camera_name: notification_settings} + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.notifications, camera_name), + notification_settings, ) self.publish(f"{camera_name}/notifications/state", payload, retain=True) self.publish(f"{camera_name}/notifications/suspended", "0", retain=True) @@ -617,7 +727,10 @@ class Dispatcher: logger.info(f"Turning off alerts for {camera_name}") review_settings.alerts.enabled = False - self.config_updater.publish(f"config/review/{camera_name}", review_settings) + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.review, camera_name), + review_settings, + ) self.publish(f"{camera_name}/review_alerts/state", payload, retain=True) def _on_detections_command(self, camera_name: str, payload: str) -> None: @@ -639,5 +752,58 @@ class Dispatcher: logger.info(f"Turning off detections for {camera_name}") review_settings.detections.enabled = False - self.config_updater.publish(f"config/review/{camera_name}", review_settings) + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.review, camera_name), + review_settings, + ) self.publish(f"{camera_name}/review_detections/state", payload, retain=True) + + def _on_object_description_command(self, camera_name: str, payload: str) -> None: + """Callback for object description topic.""" + genai_settings = self.config.cameras[camera_name].objects.genai + + if payload == "ON": + if not self.config.cameras[camera_name].objects.genai.enabled_in_config: + logger.error( + "GenAI must be enabled in the config to be turned on via MQTT." + ) + return + + if not genai_settings.enabled: + logger.info(f"Turning on object descriptions for {camera_name}") + genai_settings.enabled = True + elif payload == "OFF": + if genai_settings.enabled: + logger.info(f"Turning off object descriptions for {camera_name}") + genai_settings.enabled = False + + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.object_genai, camera_name), + genai_settings, + ) + self.publish(f"{camera_name}/object_descriptions/state", payload, retain=True) + + def _on_review_description_command(self, camera_name: str, payload: str) -> None: + """Callback for review description topic.""" + genai_settings = self.config.cameras[camera_name].review.genai + + if payload == "ON": + if not self.config.cameras[camera_name].review.genai.enabled_in_config: + logger.error( + "GenAI Alerts or Detections must be enabled in the config to be turned on via MQTT." + ) + return + + if not genai_settings.enabled: + logger.info(f"Turning on review descriptions for {camera_name}") + genai_settings.enabled = True + elif payload == "OFF": + if genai_settings.enabled: + logger.info(f"Turning off review descriptions for {camera_name}") + genai_settings.enabled = False + + self.config_updater.publish_update( + CameraConfigUpdateTopic(CameraConfigUpdateEnum.review_genai, camera_name), + genai_settings, + ) + self.publish(f"{camera_name}/review_descriptions/state", payload, retain=True) diff --git a/frigate/comms/embeddings_updater.py b/frigate/comms/embeddings_updater.py index 74a87e60f..f7fd9c2bf 100644 --- a/frigate/comms/embeddings_updater.py +++ b/frigate/comms/embeddings_updater.py @@ -1,23 +1,36 @@ """Facilitates communication between processes.""" +import logging from enum import Enum from typing import Any, Callable import zmq +logger = logging.getLogger(__name__) + + SOCKET_REP_REQ = "ipc:///tmp/cache/embeddings" class EmbeddingsRequestEnum(Enum): + # audio + transcribe_audio = "transcribe_audio" + # custom classification + reload_classification_model = "reload_classification_model" + # face clear_face_classifier = "clear_face_classifier" - embed_description = "embed_description" - embed_thumbnail = "embed_thumbnail" - generate_search = "generate_search" recognize_face = "recognize_face" register_face = "register_face" reprocess_face = "reprocess_face" - reprocess_plate = "reprocess_plate" + # semantic search + embed_description = "embed_description" + embed_thumbnail = "embed_thumbnail" + generate_search = "generate_search" reindex = "reindex" + # LPR + reprocess_plate = "reprocess_plate" + # Review Descriptions + summarize_review = "summarize_review" class EmbeddingsResponder: @@ -34,9 +47,16 @@ class EmbeddingsResponder: break try: - (topic, value) = self.socket.recv_json(flags=zmq.NOBLOCK) + raw = self.socket.recv_json(flags=zmq.NOBLOCK) - response = process(topic, value) + if isinstance(raw, list): + (topic, value) = raw + response = process(topic, value) + else: + logging.warning( + f"Received unexpected data type in ZMQ recv_json: {type(raw)}" + ) + response = None if response is not None: self.socket.send_json(response) @@ -58,7 +78,7 @@ class EmbeddingsRequestor: self.socket = self.context.socket(zmq.REQ) self.socket.connect(SOCKET_REP_REQ) - def send_data(self, topic: str, data: Any) -> str: + def send_data(self, topic: str, data: Any) -> Any: """Sends data and then waits for reply.""" try: self.socket.send_json((topic, data)) diff --git a/frigate/comms/event_metadata_updater.py b/frigate/comms/event_metadata_updater.py index 6305de5a1..897778832 100644 --- a/frigate/comms/event_metadata_updater.py +++ b/frigate/comms/event_metadata_updater.py @@ -15,7 +15,7 @@ class EventMetadataTypeEnum(str, Enum): manual_event_end = "manual_event_end" regenerate_description = "regenerate_description" sub_label = "sub_label" - recognized_license_plate = "recognized_license_plate" + attribute = "attribute" lpr_event_create = "lpr_event_create" save_lpr_snapshot = "save_lpr_snapshot" @@ -28,8 +28,8 @@ class EventMetadataPublisher(Publisher): def __init__(self) -> None: super().__init__() - def publish(self, topic: EventMetadataTypeEnum, payload: Any) -> None: - super().publish(payload, topic.value) + def publish(self, payload: Any, sub_topic: str = "") -> None: + super().publish(payload, sub_topic) class EventMetadataSubscriber(Subscriber): @@ -40,9 +40,10 @@ class EventMetadataSubscriber(Subscriber): def __init__(self, topic: EventMetadataTypeEnum) -> None: super().__init__(topic.value) - def _return_object(self, topic: str, payload: tuple) -> tuple: + def _return_object( + self, topic: str, payload: tuple | None + ) -> tuple[str, Any] | tuple[None, None]: if payload is None: return (None, None) - topic = EventMetadataTypeEnum[topic[len(self.topic_base) :]] return (topic, payload) diff --git a/frigate/comms/events_updater.py b/frigate/comms/events_updater.py index b1d7a6328..cfd958d2c 100644 --- a/frigate/comms/events_updater.py +++ b/frigate/comms/events_updater.py @@ -7,7 +7,9 @@ from frigate.events.types import EventStateEnum, EventTypeEnum from .zmq_proxy import Publisher, Subscriber -class EventUpdatePublisher(Publisher): +class EventUpdatePublisher( + Publisher[tuple[EventTypeEnum, EventStateEnum, str | None, str, dict[str, Any]]] +): """Publishes events (objects, audio, manual).""" topic_base = "event/" @@ -16,9 +18,11 @@ class EventUpdatePublisher(Publisher): super().__init__("update") def publish( - self, payload: tuple[EventTypeEnum, EventStateEnum, str, str, dict[str, Any]] + self, + payload: tuple[EventTypeEnum, EventStateEnum, str | None, str, dict[str, Any]], + sub_topic: str = "", ) -> None: - super().publish(payload) + super().publish(payload, sub_topic) class EventUpdateSubscriber(Subscriber): @@ -30,7 +34,9 @@ class EventUpdateSubscriber(Subscriber): super().__init__("update") -class EventEndPublisher(Publisher): +class EventEndPublisher( + Publisher[tuple[EventTypeEnum, EventStateEnum, str, dict[str, Any]]] +): """Publishes events that have ended.""" topic_base = "event/" @@ -39,9 +45,11 @@ class EventEndPublisher(Publisher): super().__init__("finalized") def publish( - self, payload: tuple[EventTypeEnum, EventStateEnum, str, dict[str, Any]] + self, + payload: tuple[EventTypeEnum, EventStateEnum, str, dict[str, Any]], + sub_topic: str = "", ) -> None: - super().publish(payload) + super().publish(payload, sub_topic) class EventEndSubscriber(Subscriber): diff --git a/frigate/comms/inter_process.py b/frigate/comms/inter_process.py index ee1a78efc..e4aad9107 100644 --- a/frigate/comms/inter_process.py +++ b/frigate/comms/inter_process.py @@ -1,5 +1,6 @@ """Facilitates communication between processes.""" +import logging import multiprocessing as mp import threading from multiprocessing.synchronize import Event as MpEvent @@ -9,6 +10,8 @@ import zmq from frigate.comms.base_communicator import Communicator +logger = logging.getLogger(__name__) + SOCKET_REP_REQ = "ipc:///tmp/cache/comms" @@ -19,7 +22,7 @@ class InterProcessCommunicator(Communicator): self.socket.bind(SOCKET_REP_REQ) self.stop_event: MpEvent = mp.Event() - def publish(self, topic: str, payload: str, retain: bool) -> None: + def publish(self, topic: str, payload: Any, retain: bool = False) -> None: """There is no communication back to the processes.""" pass @@ -37,9 +40,16 @@ class InterProcessCommunicator(Communicator): break try: - (topic, value) = self.socket.recv_json(flags=zmq.NOBLOCK) + raw = self.socket.recv_json(flags=zmq.NOBLOCK) - response = self._dispatcher(topic, value) + if isinstance(raw, list): + (topic, value) = raw + response = self._dispatcher(topic, value) + else: + logging.warning( + f"Received unexpected data type in ZMQ recv_json: {type(raw)}" + ) + response = None if response is not None: self.socket.send_json(response) diff --git a/frigate/comms/mqtt.py b/frigate/comms/mqtt.py index e487b30ee..0af56e259 100644 --- a/frigate/comms/mqtt.py +++ b/frigate/comms/mqtt.py @@ -11,7 +11,7 @@ from frigate.config import FrigateConfig logger = logging.getLogger(__name__) -class MqttClient(Communicator): # type: ignore[misc] +class MqttClient(Communicator): """Frigate wrapper for mqtt client.""" def __init__(self, config: FrigateConfig) -> None: @@ -75,7 +75,7 @@ class MqttClient(Communicator): # type: ignore[misc] ) self.publish( f"{camera_name}/improve_contrast/state", - "ON" if camera.motion.improve_contrast else "OFF", # type: ignore[union-attr] + "ON" if camera.motion.improve_contrast else "OFF", retain=True, ) self.publish( @@ -85,12 +85,12 @@ class MqttClient(Communicator): # type: ignore[misc] ) self.publish( f"{camera_name}/motion_threshold/state", - camera.motion.threshold, # type: ignore[union-attr] + camera.motion.threshold, retain=True, ) self.publish( f"{camera_name}/motion_contour_area/state", - camera.motion.contour_area, # type: ignore[union-attr] + camera.motion.contour_area, retain=True, ) self.publish( @@ -122,6 +122,16 @@ class MqttClient(Communicator): # type: ignore[misc] "ON" if camera.review.detections.enabled_in_config else "OFF", retain=True, ) + self.publish( + f"{camera_name}/object_descriptions/state", + "ON" if camera.objects.genai.enabled_in_config else "OFF", + retain=True, + ) + self.publish( + f"{camera_name}/review_descriptions/state", + "ON" if camera.review.genai.enabled_in_config else "OFF", + retain=True, + ) if self.config.notifications.enabled_in_config: self.publish( @@ -145,7 +155,7 @@ class MqttClient(Communicator): # type: ignore[misc] client: mqtt.Client, userdata: Any, flags: Any, - reason_code: mqtt.ReasonCode, + reason_code: mqtt.ReasonCode, # type: ignore[name-defined] properties: Any, ) -> None: """Mqtt connection callback.""" @@ -177,7 +187,7 @@ class MqttClient(Communicator): # type: ignore[misc] client: mqtt.Client, userdata: Any, flags: Any, - reason_code: mqtt.ReasonCode, + reason_code: mqtt.ReasonCode, # type: ignore[name-defined] properties: Any, ) -> None: """Mqtt disconnection callback.""" @@ -215,6 +225,7 @@ class MqttClient(Communicator): # type: ignore[misc] "birdseye_mode", "review_alerts", "review_detections", + "genai", ] for name in self.config.cameras.keys(): diff --git a/frigate/comms/object_detector_signaler.py b/frigate/comms/object_detector_signaler.py new file mode 100644 index 000000000..e8871db1a --- /dev/null +++ b/frigate/comms/object_detector_signaler.py @@ -0,0 +1,92 @@ +"""Facilitates communication between processes for object detection signals.""" + +import threading + +import zmq + +SOCKET_PUB = "ipc:///tmp/cache/detector_pub" +SOCKET_SUB = "ipc:///tmp/cache/detector_sub" + + +class ZmqProxyRunner(threading.Thread): + def __init__(self, context: zmq.Context[zmq.Socket]) -> None: + super().__init__(name="detector_proxy") + self.context = context + + def run(self) -> None: + """Run the proxy.""" + incoming = self.context.socket(zmq.XSUB) + incoming.bind(SOCKET_PUB) + outgoing = self.context.socket(zmq.XPUB) + outgoing.bind(SOCKET_SUB) + + # Blocking: This will unblock (via exception) when we destroy the context + # The incoming and outgoing sockets will be closed automatically + # when the context is destroyed as well. + try: + zmq.proxy(incoming, outgoing) + except zmq.ZMQError: + pass + + +class DetectorProxy: + """Proxies object detection signals.""" + + def __init__(self) -> None: + self.context = zmq.Context() + self.runner = ZmqProxyRunner(self.context) + self.runner.start() + + def stop(self) -> None: + # destroying the context will tell the proxy to stop + self.context.destroy() + self.runner.join() + + +class ObjectDetectorPublisher: + """Publishes signal for object detection to different processes.""" + + topic_base = "object_detector/" + + def __init__(self, topic: str = "") -> None: + self.topic = f"{self.topic_base}{topic}" + self.context = zmq.Context() + self.socket = self.context.socket(zmq.PUB) + self.socket.connect(SOCKET_PUB) + + def publish(self, sub_topic: str = "") -> None: + """Publish message.""" + self.socket.send_string(f"{self.topic}{sub_topic}/") + + def stop(self) -> None: + self.socket.close() + self.context.destroy() + + +class ObjectDetectorSubscriber: + """Simplifies receiving a signal for object detection.""" + + topic_base = "object_detector/" + + def __init__(self, topic: str = "") -> None: + self.topic = f"{self.topic_base}{topic}/" + self.context = zmq.Context() + self.socket = self.context.socket(zmq.SUB) + self.socket.setsockopt_string(zmq.SUBSCRIBE, self.topic) + self.socket.connect(SOCKET_SUB) + + def check_for_update(self, timeout: float = 5) -> str | None: + """Returns message or None if no update.""" + try: + has_update, _, _ = zmq.select([self.socket], [], [], timeout) + + if has_update: + return self.socket.recv_string(flags=zmq.NOBLOCK) + except zmq.ZMQError: + pass + + return None + + def stop(self) -> None: + self.socket.close() + self.context.destroy() diff --git a/frigate/comms/recordings_updater.py b/frigate/comms/recordings_updater.py index 862ec1041..0db4ad289 100644 --- a/frigate/comms/recordings_updater.py +++ b/frigate/comms/recordings_updater.py @@ -13,17 +13,16 @@ class RecordingsDataTypeEnum(str, Enum): recordings_available_through = "recordings_available_through" -class RecordingsDataPublisher(Publisher): +class RecordingsDataPublisher(Publisher[tuple[str, float]]): """Publishes latest recording data.""" topic_base = "recordings/" def __init__(self, topic: RecordingsDataTypeEnum) -> None: - topic = topic.value - super().__init__(topic) + super().__init__(topic.value) - def publish(self, payload: tuple[str, float]) -> None: - super().publish(payload) + def publish(self, payload: tuple[str, float], sub_topic: str = "") -> None: + super().publish(payload, sub_topic) class RecordingsDataSubscriber(Subscriber): @@ -32,5 +31,4 @@ class RecordingsDataSubscriber(Subscriber): topic_base = "recordings/" def __init__(self, topic: RecordingsDataTypeEnum) -> None: - topic = topic.value - super().__init__(topic) + super().__init__(topic.value) diff --git a/frigate/comms/review_updater.py b/frigate/comms/review_updater.py new file mode 100644 index 000000000..2b3a5b3aa --- /dev/null +++ b/frigate/comms/review_updater.py @@ -0,0 +1,30 @@ +"""Facilitates communication between processes.""" + +import logging + +from .zmq_proxy import Publisher, Subscriber + +logger = logging.getLogger(__name__) + + +class ReviewDataPublisher( + Publisher +): # update when typing improvement is added Publisher[tuple[str, float]] + """Publishes review item data.""" + + topic_base = "review/" + + def __init__(self, topic: str) -> None: + super().__init__(topic) + + def publish(self, payload: tuple[str, float], sub_topic: str = "") -> None: + super().publish(payload, sub_topic) + + +class ReviewDataSubscriber(Subscriber): + """Receives review item data.""" + + topic_base = "review/" + + def __init__(self, topic: str) -> None: + super().__init__(topic) diff --git a/frigate/comms/webpush.py b/frigate/comms/webpush.py index c5986d45c..7eb2cd0c2 100644 --- a/frigate/comms/webpush.py +++ b/frigate/comms/webpush.py @@ -17,6 +17,10 @@ from titlecase import titlecase from frigate.comms.base_communicator import Communicator from frigate.comms.config_updater import ConfigSubscriber from frigate.config import FrigateConfig +from frigate.config.camera.updater import ( + CameraConfigUpdateEnum, + CameraConfigUpdateSubscriber, +) from frigate.const import CONFIG_DIR from frigate.models import User @@ -35,7 +39,7 @@ class PushNotification: ttl: int = 0 -class WebPushClient(Communicator): # type: ignore[misc] +class WebPushClient(Communicator): """Frigate wrapper for webpush client.""" def __init__(self, config: FrigateConfig, stop_event: MpEvent) -> None: @@ -46,10 +50,12 @@ class WebPushClient(Communicator): # type: ignore[misc] self.web_pushers: dict[str, list[WebPusher]] = {} self.expired_subs: dict[str, list[str]] = {} self.suspended_cameras: dict[str, int] = { - c.name: 0 for c in self.config.cameras.values() + c.name: 0 # type: ignore[misc] + for c in self.config.cameras.values() } self.last_camera_notification_time: dict[str, float] = { - c.name: 0 for c in self.config.cameras.values() + c.name: 0 # type: ignore[misc] + for c in self.config.cameras.values() } self.last_notification_time: float = 0 self.notification_queue: queue.Queue[PushNotification] = queue.Queue() @@ -64,7 +70,7 @@ class WebPushClient(Communicator): # type: ignore[misc] # Pull keys from PEM or generate if they do not exist self.vapid = Vapid01.from_file(os.path.join(CONFIG_DIR, "notifications.pem")) - users: list[User] = ( + users: list[dict[str, Any]] = ( User.select(User.username, User.notification_tokens).dicts().iterator() ) for user in users: @@ -73,7 +79,12 @@ class WebPushClient(Communicator): # type: ignore[misc] self.web_pushers[user["username"]].append(WebPusher(sub)) # notification config updater - self.config_subscriber = ConfigSubscriber("config/notifications") + self.global_config_subscriber = ConfigSubscriber( + "config/notifications", exact=True + ) + self.config_subscriber = CameraConfigUpdateSubscriber( + self.config, self.config.cameras, [CameraConfigUpdateEnum.notifications] + ) def subscribe(self, receiver: Callable) -> None: """Wrapper for allowing dispatcher to subscribe.""" @@ -154,15 +165,19 @@ class WebPushClient(Communicator): # type: ignore[misc] def publish(self, topic: str, payload: Any, retain: bool = False) -> None: """Wrapper for publishing when client is in valid state.""" # check for updated notification config - _, updated_notification_config = self.config_subscriber.check_for_update() + _, updated_notification_config = ( + self.global_config_subscriber.check_for_update() + ) if updated_notification_config: - for key, value in updated_notification_config.items(): - if key == "_global_notifications": - self.config.notifications = value + self.config.notifications = updated_notification_config - elif key in self.config.cameras: - self.config.cameras[key].notifications = value + updates = self.config_subscriber.check_for_updates() + + if "add" in updates: + for camera in updates["add"]: + self.suspended_cameras[camera] = 0 + self.last_camera_notification_time[camera] = 0 if topic == "reviews": decoded = json.loads(payload) @@ -173,6 +188,28 @@ class WebPushClient(Communicator): # type: ignore[misc] logger.debug(f"Notifications for {camera} are currently suspended.") return self.send_alert(decoded) + if topic == "triggers": + decoded = json.loads(payload) + + camera = decoded["camera"] + name = decoded["name"] + + # ensure notifications are enabled and the specific trigger has + # notification action enabled + if ( + not self.config.cameras[camera].notifications.enabled + or name not in self.config.cameras[camera].semantic_search.triggers + or "notification" + not in self.config.cameras[camera] + .semantic_search.triggers[name] + .actions + ): + return + + if self.is_camera_suspended(camera): + logger.debug(f"Notifications for {camera} are currently suspended.") + return + self.send_trigger(decoded) elif topic == "notification_test": if not self.config.notifications.enabled and not any( cam.notifications.enabled for cam in self.config.cameras.values() @@ -254,6 +291,23 @@ class WebPushClient(Communicator): # type: ignore[misc] except Exception as e: logger.error(f"Error processing notification: {str(e)}") + def _within_cooldown(self, camera: str) -> bool: + now = datetime.datetime.now().timestamp() + if now - self.last_notification_time < self.config.notifications.cooldown: + logger.debug( + f"Skipping notification for {camera} - in global cooldown period" + ) + return True + if ( + now - self.last_camera_notification_time[camera] + < self.config.cameras[camera].notifications.cooldown + ): + logger.debug( + f"Skipping notification for {camera} - in camera-specific cooldown period" + ) + return True + return False + def send_notification_test(self) -> None: if not self.config.notifications.email: return @@ -282,24 +336,7 @@ class WebPushClient(Communicator): # type: ignore[misc] camera: str = payload["after"]["camera"] current_time = datetime.datetime.now().timestamp() - # Check global cooldown period - if ( - current_time - self.last_notification_time - < self.config.notifications.cooldown - ): - logger.debug( - f"Skipping notification for {camera} - in global cooldown period" - ) - return - - # Check camera-specific cooldown period - if ( - current_time - self.last_camera_notification_time[camera] - < self.config.cameras[camera].notifications.cooldown - ): - logger.debug( - f"Skipping notification for {camera} - in camera-specific cooldown period" - ) + if self._within_cooldown(camera): return self.check_registrations() @@ -332,12 +369,22 @@ class WebPushClient(Communicator): # type: ignore[misc] sorted_objects.update(payload["after"]["data"]["sub_labels"]) title = f"{titlecase(', '.join(sorted_objects).replace('_', ' '))}{' was' if state == 'end' else ''} detected in {titlecase(', '.join(payload['after']['data']['zones']).replace('_', ' '))}" - message = f"Detected on {titlecase(camera.replace('_', ' '))}" image = f"{payload['after']['thumb_path'].replace('/media/frigate', '')}" + ended = state == "end" or state == "genai" + + if state == "genai" and payload["after"]["data"]["metadata"]: + message = payload["after"]["data"]["metadata"]["scene"] + else: + message = f"Detected on {titlecase(camera.replace('_', ' '))}" + + if ended: + logger.debug( + f"Sending a notification with state {state} and message {message}" + ) # if event is ongoing open to live view otherwise open to recordings view - direct_url = f"/review?id={reviewId}" if state == "end" else f"/#{camera}" - ttl = 3600 if state == "end" else 0 + direct_url = f"/review?id={reviewId}" if ended else f"/#{camera}" + ttl = 3600 if ended else 0 logger.debug(f"Sending push notification for {camera}, review ID {reviewId}") @@ -354,6 +401,48 @@ class WebPushClient(Communicator): # type: ignore[misc] self.cleanup_registrations() + def send_trigger(self, payload: dict[str, Any]) -> None: + if not self.config.notifications.email: + return + + camera: str = payload["camera"] + current_time = datetime.datetime.now().timestamp() + + if self._within_cooldown(camera): + return + + self.check_registrations() + + self.last_camera_notification_time[camera] = current_time + self.last_notification_time = current_time + + trigger_type = payload["type"] + event_id = payload["event_id"] + name = payload["name"] + score = payload["score"] + + title = f"{name.replace('_', ' ')} triggered on {titlecase(camera.replace('_', ' '))}" + message = f"{titlecase(trigger_type)} trigger fired for {titlecase(camera.replace('_', ' '))} with score {score:.2f}" + image = f"clips/triggers/{camera}/{event_id}.webp" + + direct_url = f"/explore?event_id={event_id}" + ttl = 0 + + logger.debug(f"Sending push notification for {camera}, trigger name {name}") + + for user in self.web_pushers: + self.send_push_notification( + user=user, + payload=payload, + title=title, + message=message, + direct_url=direct_url, + image=image, + ttl=ttl, + ) + + self.cleanup_registrations() + def stop(self) -> None: logger.info("Closing notification queue") self.notification_thread.join() diff --git a/frigate/comms/ws.py b/frigate/comms/ws.py index 1eed290f7..6cfe4ecc0 100644 --- a/frigate/comms/ws.py +++ b/frigate/comms/ws.py @@ -4,7 +4,7 @@ import errno import json import logging import threading -from typing import Callable +from typing import Any, Callable from wsgiref.simple_server import make_server from ws4py.server.wsgirefserver import ( @@ -21,8 +21,8 @@ from frigate.config import FrigateConfig logger = logging.getLogger(__name__) -class WebSocket(WebSocket_): - def unhandled_error(self, error): +class WebSocket(WebSocket_): # type: ignore[misc] + def unhandled_error(self, error: Any) -> None: """ Handles the unfriendly socket closures on the server side without showing a confusing error message @@ -33,12 +33,12 @@ class WebSocket(WebSocket_): logging.getLogger("ws4py").exception("Failed to receive data") -class WebSocketClient(Communicator): # type: ignore[misc] +class WebSocketClient(Communicator): """Frigate wrapper for ws client.""" def __init__(self, config: FrigateConfig) -> None: self.config = config - self.websocket_server = None + self.websocket_server: WSGIServer | None = None def subscribe(self, receiver: Callable) -> None: self._dispatcher = receiver @@ -47,10 +47,10 @@ class WebSocketClient(Communicator): # type: ignore[misc] def start(self) -> None: """Start the websocket client.""" - class _WebSocketHandler(WebSocket): # type: ignore[misc] + class _WebSocketHandler(WebSocket): receiver = self._dispatcher - def received_message(self, message: WebSocket.received_message) -> None: + def received_message(self, message: WebSocket.received_message) -> None: # type: ignore[name-defined] try: json_message = json.loads(message.data.decode("utf-8")) json_message = { @@ -86,7 +86,7 @@ class WebSocketClient(Communicator): # type: ignore[misc] ) self.websocket_thread.start() - def publish(self, topic: str, payload: str, _: bool) -> None: + def publish(self, topic: str, payload: Any, _: bool = False) -> None: try: ws_message = json.dumps( { @@ -109,9 +109,11 @@ class WebSocketClient(Communicator): # type: ignore[misc] pass def stop(self) -> None: - self.websocket_server.manager.close_all() - self.websocket_server.manager.stop() - self.websocket_server.manager.join() - self.websocket_server.shutdown() + if self.websocket_server is not None: + self.websocket_server.manager.close_all() + self.websocket_server.manager.stop() + self.websocket_server.manager.join() + self.websocket_server.shutdown() + self.websocket_thread.join() logger.info("Exiting websocket client...") diff --git a/frigate/comms/zmq_proxy.py b/frigate/comms/zmq_proxy.py index d26da3312..29329ec59 100644 --- a/frigate/comms/zmq_proxy.py +++ b/frigate/comms/zmq_proxy.py @@ -2,7 +2,7 @@ import json import threading -from typing import Any, Optional +from typing import Generic, TypeVar import zmq @@ -47,7 +47,10 @@ class ZmqProxy: self.runner.join() -class Publisher: +T = TypeVar("T") + + +class Publisher(Generic[T]): """Publishes messages.""" topic_base: str = "" @@ -58,7 +61,7 @@ class Publisher: self.socket = self.context.socket(zmq.PUB) self.socket.connect(SOCKET_PUB) - def publish(self, payload: Any, sub_topic: str = "") -> None: + def publish(self, payload: T, sub_topic: str = "") -> None: """Publish message.""" self.socket.send_string(f"{self.topic}{sub_topic} {json.dumps(payload)}") @@ -67,7 +70,7 @@ class Publisher: self.context.destroy() -class Subscriber: +class Subscriber(Generic[T]): """Receives messages.""" topic_base: str = "" @@ -79,9 +82,7 @@ class Subscriber: self.socket.setsockopt_string(zmq.SUBSCRIBE, self.topic) self.socket.connect(SOCKET_SUB) - def check_for_update( - self, timeout: float = FAST_QUEUE_TIMEOUT - ) -> Optional[tuple[str, Any]]: + def check_for_update(self, timeout: float | None = FAST_QUEUE_TIMEOUT) -> T | None: """Returns message or None if no update.""" try: has_update, _, _ = zmq.select([self.socket], [], [], timeout) @@ -98,5 +99,5 @@ class Subscriber: self.socket.close() self.context.destroy() - def _return_object(self, topic: str, payload: Any) -> Any: + def _return_object(self, topic: str, payload: T | None) -> T | None: return payload diff --git a/frigate/config/base.py b/frigate/config/base.py index 068a68acd..1e369e293 100644 --- a/frigate/config/base.py +++ b/frigate/config/base.py @@ -1,5 +1,29 @@ +from typing import Any + from pydantic import BaseModel, ConfigDict class FrigateBaseModel(BaseModel): model_config = ConfigDict(extra="forbid", protected_namespaces=()) + + def get_nested_object(self, path: str) -> Any: + parts = path.split("/") + obj = self + for part in parts: + if part == "config": + continue + + if isinstance(obj, BaseModel): + try: + obj = getattr(obj, part) + except AttributeError: + return None + elif isinstance(obj, dict): + try: + obj = obj[part] + except KeyError: + return None + else: + return None + + return obj diff --git a/frigate/config/camera/camera.py b/frigate/config/camera/camera.py index 3b24dabac..a3c9733ff 100644 --- a/frigate/config/camera/camera.py +++ b/frigate/config/camera/camera.py @@ -19,14 +19,15 @@ from frigate.util.builtin import ( from ..base import FrigateBaseModel from ..classification import ( + AudioTranscriptionConfig, CameraFaceRecognitionConfig, CameraLicensePlateRecognitionConfig, + CameraSemanticSearchConfig, ) from .audio import AudioConfig from .birdseye import BirdseyeCameraConfig from .detect import DetectConfig from .ffmpeg import CameraFfmpegConfig, CameraInput -from .genai import GenAICameraConfig from .live import CameraLiveConfig from .motion import MotionConfig from .mqtt import CameraMqttConfig @@ -56,6 +57,9 @@ class CameraConfig(FrigateBaseModel): audio: AudioConfig = Field( default_factory=AudioConfig, title="Audio events configuration." ) + audio_transcription: AudioTranscriptionConfig = Field( + default_factory=AudioTranscriptionConfig, title="Audio transcription config." + ) birdseye: BirdseyeCameraConfig = Field( default_factory=BirdseyeCameraConfig, title="Birdseye camera configuration." ) @@ -66,18 +70,13 @@ class CameraConfig(FrigateBaseModel): default_factory=CameraFaceRecognitionConfig, title="Face recognition config." ) ffmpeg: CameraFfmpegConfig = Field(title="FFmpeg configuration for the camera.") - genai: GenAICameraConfig = Field( - default_factory=GenAICameraConfig, title="Generative AI configuration." - ) live: CameraLiveConfig = Field( default_factory=CameraLiveConfig, title="Live playback settings." ) lpr: CameraLicensePlateRecognitionConfig = Field( default_factory=CameraLicensePlateRecognitionConfig, title="LPR config." ) - motion: Optional[MotionConfig] = Field( - None, title="Motion detection configuration." - ) + motion: MotionConfig = Field(None, title="Motion detection configuration.") objects: ObjectConfig = Field( default_factory=ObjectConfig, title="Object configuration." ) @@ -87,6 +86,10 @@ class CameraConfig(FrigateBaseModel): review: ReviewConfig = Field( default_factory=ReviewConfig, title="Review configuration." ) + semantic_search: CameraSemanticSearchConfig = Field( + default_factory=CameraSemanticSearchConfig, + title="Semantic search configuration.", + ) snapshots: SnapshotsConfig = Field( default_factory=SnapshotsConfig, title="Snapshot configuration." ) diff --git a/frigate/config/camera/genai.py b/frigate/config/camera/genai.py index 6ef93682b..3c6baeb15 100644 --- a/frigate/config/camera/genai.py +++ b/frigate/config/camera/genai.py @@ -1,12 +1,12 @@ from enum import Enum -from typing import Optional, Union +from typing import Any, Optional -from pydantic import BaseModel, Field, field_validator +from pydantic import Field from ..base import FrigateBaseModel from ..env import EnvString -__all__ = ["GenAIConfig", "GenAICameraConfig", "GenAIProviderEnum"] +__all__ = ["GenAIConfig", "GenAIProviderEnum"] class GenAIProviderEnum(str, Enum): @@ -16,70 +16,13 @@ class GenAIProviderEnum(str, Enum): ollama = "ollama" -class GenAISendTriggersConfig(BaseModel): - tracked_object_end: bool = Field( - default=True, title="Send once the object is no longer tracked." - ) - after_significant_updates: Optional[int] = Field( - default=None, - title="Send an early request to generative AI when X frames accumulated.", - ge=1, - ) - - -# uses BaseModel because some global attributes are not available at the camera level -class GenAICameraConfig(BaseModel): - enabled: bool = Field(default=False, title="Enable GenAI for camera.") - use_snapshot: bool = Field( - default=False, title="Use snapshots for generating descriptions." - ) - prompt: str = Field( - default="Analyze the sequence of images containing the {label}. Focus on the likely intent or behavior of the {label} based on its actions and movement, rather than describing its appearance or the surroundings. Consider what the {label} is doing, why, and what it might do next.", - title="Default caption prompt.", - ) - object_prompts: dict[str, str] = Field( - default_factory=dict, title="Object specific prompts." - ) - - objects: Union[str, list[str]] = Field( - default_factory=list, - title="List of objects to run generative AI for.", - ) - required_zones: Union[str, list[str]] = Field( - default_factory=list, - title="List of required zones to be entered in order to run generative AI.", - ) - debug_save_thumbnails: bool = Field( - default=False, - title="Save thumbnails sent to generative AI for debugging purposes.", - ) - send_triggers: GenAISendTriggersConfig = Field( - default_factory=GenAISendTriggersConfig, - title="What triggers to use to send frames to generative AI for a tracked object.", - ) - - @field_validator("required_zones", mode="before") - @classmethod - def validate_required_zones(cls, v): - if isinstance(v, str) and "," not in v: - return [v] - - return v - - class GenAIConfig(FrigateBaseModel): - enabled: bool = Field(default=False, title="Enable GenAI.") - prompt: str = Field( - default="Analyze the sequence of images containing the {label}. Focus on the likely intent or behavior of the {label} based on its actions and movement, rather than describing its appearance or the surroundings. Consider what the {label} is doing, why, and what it might do next.", - title="Default caption prompt.", - ) - object_prompts: dict[str, str] = Field( - default_factory=dict, title="Object specific prompts." - ) + """Primary GenAI Config to define GenAI Provider.""" api_key: Optional[EnvString] = Field(default=None, title="Provider API key.") base_url: Optional[str] = Field(default=None, title="Provider base url.") model: str = Field(default="gpt-4o", title="GenAI model.") - provider: GenAIProviderEnum = Field( - default=GenAIProviderEnum.openai, title="GenAI provider." + provider: GenAIProviderEnum | None = Field(default=None, title="GenAI provider.") + provider_options: dict[str, Any] = Field( + default={}, title="GenAI Provider extra options." ) diff --git a/frigate/config/camera/notification.py b/frigate/config/camera/notification.py index b0d7cebf9..ce1ac8223 100644 --- a/frigate/config/camera/notification.py +++ b/frigate/config/camera/notification.py @@ -10,7 +10,7 @@ __all__ = ["NotificationConfig"] class NotificationConfig(FrigateBaseModel): enabled: bool = Field(default=False, title="Enable notifications") email: Optional[str] = Field(default=None, title="Email required for push.") - cooldown: Optional[int] = Field( + cooldown: int = Field( default=0, ge=0, title="Cooldown period for notifications (time in seconds)." ) enabled_in_config: Optional[bool] = Field( diff --git a/frigate/config/camera/objects.py b/frigate/config/camera/objects.py index 0d559b6ce..7b6317dd0 100644 --- a/frigate/config/camera/objects.py +++ b/frigate/config/camera/objects.py @@ -1,10 +1,10 @@ from typing import Any, Optional, Union -from pydantic import Field, PrivateAttr, field_serializer +from pydantic import Field, PrivateAttr, field_serializer, field_validator from ..base import FrigateBaseModel -__all__ = ["ObjectConfig", "FilterConfig"] +__all__ = ["ObjectConfig", "GenAIObjectConfig", "FilterConfig"] DEFAULT_TRACKED_OBJECTS = ["person"] @@ -49,12 +49,69 @@ class FilterConfig(FrigateBaseModel): return None +class GenAIObjectTriggerConfig(FrigateBaseModel): + tracked_object_end: bool = Field( + default=True, title="Send once the object is no longer tracked." + ) + after_significant_updates: Optional[int] = Field( + default=None, + title="Send an early request to generative AI when X frames accumulated.", + ge=1, + ) + + +class GenAIObjectConfig(FrigateBaseModel): + enabled: bool = Field(default=False, title="Enable GenAI for camera.") + use_snapshot: bool = Field( + default=False, title="Use snapshots for generating descriptions." + ) + prompt: str = Field( + default="Analyze the sequence of images containing the {label}. Focus on the likely intent or behavior of the {label} based on its actions and movement, rather than describing its appearance or the surroundings. Consider what the {label} is doing, why, and what it might do next.", + title="Default caption prompt.", + ) + object_prompts: dict[str, str] = Field( + default_factory=dict, title="Object specific prompts." + ) + + objects: Union[str, list[str]] = Field( + default_factory=list, + title="List of objects to run generative AI for.", + ) + required_zones: Union[str, list[str]] = Field( + default_factory=list, + title="List of required zones to be entered in order to run generative AI.", + ) + debug_save_thumbnails: bool = Field( + default=False, + title="Save thumbnails sent to generative AI for debugging purposes.", + ) + send_triggers: GenAIObjectTriggerConfig = Field( + default_factory=GenAIObjectTriggerConfig, + title="What triggers to use to send frames to generative AI for a tracked object.", + ) + enabled_in_config: Optional[bool] = Field( + default=None, title="Keep track of original state of generative AI." + ) + + @field_validator("required_zones", mode="before") + @classmethod + def validate_required_zones(cls, v): + if isinstance(v, str) and "," not in v: + return [v] + + return v + + class ObjectConfig(FrigateBaseModel): track: list[str] = Field(default=DEFAULT_TRACKED_OBJECTS, title="Objects to track.") filters: dict[str, FilterConfig] = Field( default_factory=dict, title="Object filters." ) mask: Union[str, list[str]] = Field(default="", title="Object mask.") + genai: GenAIObjectConfig = Field( + default_factory=GenAIObjectConfig, + title="Config for using genai to analyze objects.", + ) _all_objects: list[str] = PrivateAttr() @property diff --git a/frigate/config/camera/record.py b/frigate/config/camera/record.py index 52d11e2a5..09a7a84d5 100644 --- a/frigate/config/camera/record.py +++ b/frigate/config/camera/record.py @@ -22,27 +22,31 @@ __all__ = [ DEFAULT_TIME_LAPSE_FFMPEG_ARGS = "-vf setpts=0.04*PTS -r 30" +class RecordRetainConfig(FrigateBaseModel): + days: float = Field(default=0, ge=0, title="Default retention period.") + + class RetainModeEnum(str, Enum): all = "all" motion = "motion" active_objects = "active_objects" -class RecordRetainConfig(FrigateBaseModel): - days: float = Field(default=0, title="Default retention period.") - mode: RetainModeEnum = Field(default=RetainModeEnum.all, title="Retain mode.") - - class ReviewRetainConfig(FrigateBaseModel): - days: float = Field(default=10, title="Default retention period.") + days: float = Field(default=10, ge=0, title="Default retention period.") mode: RetainModeEnum = Field(default=RetainModeEnum.motion, title="Retain mode.") class EventsConfig(FrigateBaseModel): pre_capture: int = Field( - default=5, title="Seconds to retain before event starts.", le=MAX_PRE_CAPTURE + default=5, + title="Seconds to retain before event starts.", + le=MAX_PRE_CAPTURE, + ge=0, + ) + post_capture: int = Field( + default=5, ge=0, title="Seconds to retain after event ends." ) - post_capture: int = Field(default=5, title="Seconds to retain after event ends.") retain: ReviewRetainConfig = Field( default_factory=ReviewRetainConfig, title="Event retention settings." ) @@ -77,8 +81,12 @@ class RecordConfig(FrigateBaseModel): default=60, title="Number of minutes to wait between cleanup runs.", ) - retain: RecordRetainConfig = Field( - default_factory=RecordRetainConfig, title="Record retention settings." + continuous: RecordRetainConfig = Field( + default_factory=RecordRetainConfig, + title="Continuous recording retention settings.", + ) + motion: RecordRetainConfig = Field( + default_factory=RecordRetainConfig, title="Motion recording retention settings." ) detections: EventsConfig = Field( default_factory=EventsConfig, title="Detection specific retention settings." diff --git a/frigate/config/camera/review.py b/frigate/config/camera/review.py index d8d26edb9..51268339b 100644 --- a/frigate/config/camera/review.py +++ b/frigate/config/camera/review.py @@ -62,6 +62,30 @@ class DetectionsConfig(FrigateBaseModel): return v +class GenAIReviewConfig(FrigateBaseModel): + enabled: bool = Field( + default=False, + title="Enable GenAI descriptions for review items.", + ) + alerts: bool = Field(default=True, title="Enable GenAI for alerts.") + detections: bool = Field(default=False, title="Enable GenAI for detections.") + additional_concerns: list[str] = Field( + default=[], + title="Additional concerns that GenAI should make note of on this camera.", + ) + debug_save_thumbnails: bool = Field( + default=False, + title="Save thumbnails sent to generative AI for debugging purposes.", + ) + enabled_in_config: Optional[bool] = Field( + default=None, title="Keep track of original state of generative AI." + ) + preferred_language: str | None = Field( + title="Preferred language for GenAI Response", + default=None, + ) + + class ReviewConfig(FrigateBaseModel): """Configure reviews""" @@ -71,3 +95,6 @@ class ReviewConfig(FrigateBaseModel): detections: DetectionsConfig = Field( default_factory=DetectionsConfig, title="Review detections config." ) + genai: GenAIReviewConfig = Field( + default_factory=GenAIReviewConfig, title="Review description genai config." + ) diff --git a/frigate/config/camera/updater.py b/frigate/config/camera/updater.py new file mode 100644 index 000000000..125094f10 --- /dev/null +++ b/frigate/config/camera/updater.py @@ -0,0 +1,147 @@ +"""Convenience classes for updating configurations dynamically.""" + +from dataclasses import dataclass +from enum import Enum +from typing import Any + +from frigate.comms.config_updater import ConfigPublisher, ConfigSubscriber +from frigate.config import CameraConfig, FrigateConfig + + +class CameraConfigUpdateEnum(str, Enum): + """Supported camera config update types.""" + + add = "add" # for adding a camera + audio = "audio" + audio_transcription = "audio_transcription" + birdseye = "birdseye" + detect = "detect" + enabled = "enabled" + motion = "motion" # includes motion and motion masks + notifications = "notifications" + objects = "objects" + object_genai = "object_genai" + record = "record" + remove = "remove" # for removing a camera + review = "review" + review_genai = "review_genai" + semantic_search = "semantic_search" # for semantic search triggers + snapshots = "snapshots" + zones = "zones" + + +@dataclass +class CameraConfigUpdateTopic: + update_type: CameraConfigUpdateEnum + camera: str + + @property + def topic(self) -> str: + return f"config/cameras/{self.camera}/{self.update_type.name}" + + +class CameraConfigUpdatePublisher: + def __init__(self): + self.publisher = ConfigPublisher() + + def publish_update(self, topic: CameraConfigUpdateTopic, config: Any) -> None: + self.publisher.publish(topic.topic, config) + + def stop(self) -> None: + self.publisher.stop() + + +class CameraConfigUpdateSubscriber: + def __init__( + self, + config: FrigateConfig | None, + camera_configs: dict[str, CameraConfig], + topics: list[CameraConfigUpdateEnum], + ): + self.config = config + self.camera_configs = camera_configs + self.topics = topics + + base_topic = "config/cameras" + + if len(self.camera_configs) == 1: + base_topic += f"/{list(self.camera_configs.keys())[0]}" + + self.subscriber = ConfigSubscriber( + base_topic, + exact=False, + ) + + def __update_config( + self, camera: str, update_type: CameraConfigUpdateEnum, updated_config: Any + ) -> None: + if update_type == CameraConfigUpdateEnum.add: + self.config.cameras[camera] = updated_config + self.camera_configs[camera] = updated_config + return + elif update_type == CameraConfigUpdateEnum.remove: + self.config.cameras.pop(camera) + self.camera_configs.pop(camera) + return + + config = self.camera_configs.get(camera) + + if not config: + return + + if update_type == CameraConfigUpdateEnum.audio: + config.audio = updated_config + elif update_type == CameraConfigUpdateEnum.audio_transcription: + config.audio_transcription = updated_config + elif update_type == CameraConfigUpdateEnum.birdseye: + config.birdseye = updated_config + elif update_type == CameraConfigUpdateEnum.detect: + config.detect = updated_config + elif update_type == CameraConfigUpdateEnum.enabled: + config.enabled = updated_config + elif update_type == CameraConfigUpdateEnum.object_genai: + config.objects.genai = updated_config + elif update_type == CameraConfigUpdateEnum.motion: + config.motion = updated_config + elif update_type == CameraConfigUpdateEnum.notifications: + config.notifications = updated_config + elif update_type == CameraConfigUpdateEnum.objects: + config.objects = updated_config + elif update_type == CameraConfigUpdateEnum.record: + config.record = updated_config + elif update_type == CameraConfigUpdateEnum.review: + config.review = updated_config + elif update_type == CameraConfigUpdateEnum.review_genai: + config.review.genai = updated_config + elif update_type == CameraConfigUpdateEnum.semantic_search: + config.semantic_search = updated_config + elif update_type == CameraConfigUpdateEnum.snapshots: + config.snapshots = updated_config + elif update_type == CameraConfigUpdateEnum.zones: + config.zones = updated_config + + def check_for_updates(self) -> dict[str, list[str]]: + updated_topics: dict[str, list[str]] = {} + + # get all updates available + while True: + update_topic, update_config = self.subscriber.check_for_update() + + if update_topic is None or update_config is None: + break + + _, _, camera, raw_type = update_topic.split("/") + update_type = CameraConfigUpdateEnum[raw_type] + + if update_type in self.topics: + if update_type.name in updated_topics: + updated_topics[update_type.name].append(camera) + else: + updated_topics[update_type.name] = [camera] + + self.__update_config(camera, update_type, update_config) + + return updated_topics + + def stop(self) -> None: + self.subscriber.stop() diff --git a/frigate/config/classification.py b/frigate/config/classification.py index 06e69a774..234113dd2 100644 --- a/frigate/config/classification.py +++ b/frigate/config/classification.py @@ -10,6 +10,7 @@ __all__ = [ "CameraLicensePlateRecognitionConfig", "FaceRecognitionConfig", "SemanticSearchConfig", + "CameraSemanticSearchConfig", "LicensePlateRecognitionConfig", ] @@ -19,11 +20,46 @@ class SemanticSearchModelEnum(str, Enum): jinav2 = "jinav2" -class LPRDeviceEnum(str, Enum): +class EnrichmentsDeviceEnum(str, Enum): GPU = "GPU" CPU = "CPU" +class TriggerType(str, Enum): + THUMBNAIL = "thumbnail" + DESCRIPTION = "description" + + +class TriggerAction(str, Enum): + NOTIFICATION = "notification" + + +class ObjectClassificationType(str, Enum): + sub_label = "sub_label" + attribute = "attribute" + + +class AudioTranscriptionConfig(FrigateBaseModel): + enabled: bool = Field(default=False, title="Enable audio transcription.") + language: str = Field( + default="en", + title="Language abbreviation to use for audio event transcription/translation.", + ) + device: Optional[EnrichmentsDeviceEnum] = Field( + default=EnrichmentsDeviceEnum.CPU, + title="The device used for license plate recognition.", + ) + model_size: str = Field( + default="small", title="The size of the embeddings model used." + ) + enabled_in_config: Optional[bool] = Field( + default=None, title="Keep track of original state of camera." + ) + live_enabled: Optional[bool] = Field( + default=False, title="Enable live transcriptions." + ) + + class BirdClassificationConfig(FrigateBaseModel): enabled: bool = Field(default=False, title="Enable bird classification.") threshold: float = Field( @@ -34,10 +70,52 @@ class BirdClassificationConfig(FrigateBaseModel): ) +class CustomClassificationStateCameraConfig(FrigateBaseModel): + crop: list[int, int, int, int] = Field( + title="Crop of image frame on this camera to run classification on." + ) + + +class CustomClassificationStateConfig(FrigateBaseModel): + cameras: Dict[str, CustomClassificationStateCameraConfig] = Field( + title="Cameras to run classification on." + ) + motion: bool = Field( + default=False, + title="If classification should be run when motion is detected in the crop.", + ) + interval: int | None = Field( + default=None, + title="Interval to run classification on in seconds.", + gt=0, + ) + + +class CustomClassificationObjectConfig(FrigateBaseModel): + objects: list[str] = Field(title="Object types to classify.") + classification_type: ObjectClassificationType = Field( + default=ObjectClassificationType.sub_label, + title="Type of classification that is applied.", + ) + + +class CustomClassificationConfig(FrigateBaseModel): + enabled: bool = Field(default=True, title="Enable running the model.") + name: str | None = Field(default=None, title="Name of classification model.") + threshold: float = Field( + default=0.8, title="Classification score threshold to change the state." + ) + object_config: CustomClassificationObjectConfig | None = Field(default=None) + state_config: CustomClassificationStateConfig | None = Field(default=None) + + class ClassificationConfig(FrigateBaseModel): bird: BirdClassificationConfig = Field( default_factory=BirdClassificationConfig, title="Bird classification config." ) + custom: Dict[str, CustomClassificationConfig] = Field( + default={}, title="Custom Classification Model Configs." + ) class SemanticSearchConfig(FrigateBaseModel): @@ -54,6 +132,32 @@ class SemanticSearchConfig(FrigateBaseModel): ) +class TriggerConfig(FrigateBaseModel): + enabled: bool = Field(default=True, title="Enable this trigger") + type: TriggerType = Field(default=TriggerType.DESCRIPTION, title="Type of trigger") + data: str = Field(title="Trigger content (text phrase or image ID)") + threshold: float = Field( + title="Confidence score required to run the trigger", + default=0.8, + gt=0.0, + le=1.0, + ) + actions: List[TriggerAction] = Field( + default=[], title="Actions to perform when trigger is matched" + ) + + model_config = ConfigDict(extra="forbid", protected_namespaces=()) + + +class CameraSemanticSearchConfig(FrigateBaseModel): + triggers: Dict[str, TriggerConfig] = Field( + default={}, + title="Trigger actions on tracked objects that match existing thumbnails or descriptions", + ) + + model_config = ConfigDict(extra="forbid", protected_namespaces=()) + + class FaceRecognitionConfig(FrigateBaseModel): enabled: bool = Field(default=False, title="Enable face recognition.") model_size: str = Field( @@ -105,8 +209,8 @@ class CameraFaceRecognitionConfig(FrigateBaseModel): class LicensePlateRecognitionConfig(FrigateBaseModel): enabled: bool = Field(default=False, title="Enable license plate recognition.") - device: Optional[LPRDeviceEnum] = Field( - default=LPRDeviceEnum.CPU, + device: Optional[EnrichmentsDeviceEnum] = Field( + default=EnrichmentsDeviceEnum.CPU, title="The device used for license plate recognition.", ) model_size: str = Field( diff --git a/frigate/config/config.py b/frigate/config/config.py index 206a71072..95683b151 100644 --- a/frigate/config/config.py +++ b/frigate/config/config.py @@ -48,12 +48,13 @@ from .camera.genai import GenAIConfig from .camera.motion import MotionConfig from .camera.notification import NotificationConfig from .camera.objects import FilterConfig, ObjectConfig -from .camera.record import RecordConfig, RetainModeEnum +from .camera.record import RecordConfig from .camera.review import ReviewConfig from .camera.snapshots import SnapshotsConfig from .camera.timestamp import TimestampStyleConfig from .camera_group import CameraGroupConfig from .classification import ( + AudioTranscriptionConfig, ClassificationConfig, FaceRecognitionConfig, LicensePlateRecognitionConfig, @@ -204,33 +205,6 @@ def verify_valid_live_stream_names( ) -def verify_recording_retention(camera_config: CameraConfig) -> None: - """Verify that recording retention modes are ranked correctly.""" - rank_map = { - RetainModeEnum.all: 0, - RetainModeEnum.motion: 1, - RetainModeEnum.active_objects: 2, - } - - if ( - camera_config.record.retain.days != 0 - and rank_map[camera_config.record.retain.mode] - > rank_map[camera_config.record.alerts.retain.mode] - ): - logger.warning( - f"{camera_config.name}: Recording retention is configured for {camera_config.record.retain.mode} and alert retention is configured for {camera_config.record.alerts.retain.mode}. The more restrictive retention policy will be applied." - ) - - if ( - camera_config.record.retain.days != 0 - and rank_map[camera_config.record.retain.mode] - > rank_map[camera_config.record.detections.retain.mode] - ): - logger.warning( - f"{camera_config.name}: Recording retention is configured for {camera_config.record.retain.mode} and detection retention is configured for {camera_config.record.detections.retain.mode}. The more restrictive retention policy will be applied." - ) - - def verify_recording_segments_setup_with_reasonable_time( camera_config: CameraConfig, ) -> None: @@ -335,6 +309,9 @@ def verify_lpr_and_face( class FrigateConfig(FrigateBaseModel): version: Optional[str] = Field(default=None, title="Current config version.") + safe_mode: bool = Field( + default=False, title="If Frigate should be started in safe mode." + ) # Fields that install global state should be defined first, so that their validators run first. environment_vars: EnvVars = Field( @@ -382,6 +359,11 @@ class FrigateConfig(FrigateBaseModel): default_factory=ModelConfig, title="Detection model configuration." ) + # GenAI config + genai: GenAIConfig = Field( + default_factory=GenAIConfig, title="Generative AI configuration." + ) + # Camera config cameras: Dict[str, CameraConfig] = Field(title="Camera configuration.") audio: AudioConfig = Field( @@ -396,9 +378,6 @@ class FrigateConfig(FrigateBaseModel): ffmpeg: FfmpegConfig = Field( default_factory=FfmpegConfig, title="Global FFmpeg configuration." ) - genai: GenAIConfig = Field( - default_factory=GenAIConfig, title="Generative AI configuration." - ) live: CameraLiveConfig = Field( default_factory=CameraLiveConfig, title="Live playback settings." ) @@ -423,6 +402,9 @@ class FrigateConfig(FrigateBaseModel): ) # Classification Config + audio_transcription: AudioTranscriptionConfig = Field( + default_factory=AudioTranscriptionConfig, title="Audio transcription config." + ) classification: ClassificationConfig = Field( default_factory=ClassificationConfig, title="Object classification config." ) @@ -476,6 +458,7 @@ class FrigateConfig(FrigateBaseModel): global_config = self.model_dump( include={ "audio": ..., + "audio_transcription": ..., "birdseye": ..., "face_recognition": ..., "lpr": ..., @@ -484,7 +467,6 @@ class FrigateConfig(FrigateBaseModel): "live": ..., "objects": ..., "review": ..., - "genai": ..., "motion": ..., "notifications": ..., "detect": ..., @@ -513,7 +495,9 @@ class FrigateConfig(FrigateBaseModel): model_config["path"] = detector_config.model_path if "path" not in model_config: - if detector_config.type == "cpu": + if detector_config.type == "cpu" or detector_config.type.endswith( + "_tfl" + ): model_config["path"] = "/cpu_model.tflite" elif detector_config.type == "edgetpu": model_config["path"] = "/edgetpu_model.tflite" @@ -532,6 +516,7 @@ class FrigateConfig(FrigateBaseModel): allowed_fields_map = { "face_recognition": ["enabled", "min_area"], "lpr": ["enabled", "expire_time", "min_area", "enhancement"], + "audio_transcription": ["enabled", "live_enabled"], } for section in allowed_fields_map: @@ -613,6 +598,9 @@ class FrigateConfig(FrigateBaseModel): # set config pre-value camera_config.enabled_in_config = camera_config.enabled camera_config.audio.enabled_in_config = camera_config.audio.enabled + camera_config.audio_transcription.enabled_in_config = ( + camera_config.audio_transcription.enabled + ) camera_config.record.enabled_in_config = camera_config.record.enabled camera_config.notifications.enabled_in_config = ( camera_config.notifications.enabled @@ -626,6 +614,12 @@ class FrigateConfig(FrigateBaseModel): camera_config.review.detections.enabled_in_config = ( camera_config.review.detections.enabled ) + camera_config.objects.genai.enabled_in_config = ( + camera_config.objects.genai.enabled + ) + camera_config.review.genai.enabled_in_config = ( + camera_config.review.genai.enabled + ) # Add default filters object_keys = camera_config.objects.track @@ -692,7 +686,6 @@ class FrigateConfig(FrigateBaseModel): verify_config_roles(camera_config) verify_valid_live_stream_names(self, camera_config) - verify_recording_retention(camera_config) verify_recording_segments_setup_with_reasonable_time(camera_config) verify_zone_objects_are_tracked(camera_config) verify_required_zones_exist(camera_config) @@ -701,10 +694,29 @@ class FrigateConfig(FrigateBaseModel): verify_objects_track(camera_config, labelmap_objects) verify_lpr_and_face(self, camera_config) + # set names on classification configs + for name, config in self.classification.custom.items(): + config.name = name + self.objects.parse_all_objects(self.cameras) self.model.create_colormap(sorted(self.objects.all_objects)) self.model.check_and_load_plus_model(self.plus_api) + # Check audio transcription and audio detection requirements + if self.audio_transcription.enabled: + # If audio transcription is enabled globally, at least one camera must have audio detection enabled + if not any(camera.audio.enabled for camera in self.cameras.values()): + raise ValueError( + "Audio transcription is enabled globally, but no cameras have audio detection enabled. At least one camera must have audio detection enabled." + ) + else: + # If audio transcription is disabled globally, check each camera with audio_transcription enabled + for camera in self.cameras.values(): + if camera.audio_transcription.enabled and not camera.audio.enabled: + raise ValueError( + f"Camera {camera.name} has audio transcription enabled, but audio detection is not enabled for this camera. Audio detection must be enabled for cameras with audio transcription when it is disabled globally." + ) + if self.plus_api and not self.snapshots.clean_copy: logger.warning( "Frigate+ is configured but clean snapshots are not enabled, submissions to Frigate+ will not be possible./" @@ -723,6 +735,7 @@ class FrigateConfig(FrigateBaseModel): @classmethod def load(cls, **kwargs): + """Loads the Frigate config file, runs migrations, and creates the config object.""" config_path = find_config_file() # No configuration file found, create one. @@ -750,7 +763,7 @@ class FrigateConfig(FrigateBaseModel): return FrigateConfig.parse(f, **kwargs) @classmethod - def parse(cls, config, *, is_json=None, **context): + def parse(cls, config, *, is_json=None, safe_load=False, **context): # If config is a file, read its contents. if hasattr(config, "read"): fname = getattr(config, "name", None) @@ -774,6 +787,15 @@ class FrigateConfig(FrigateBaseModel): else: config = yaml.load(config) + # load minimal Frigate config after the full config did not validate + if safe_load: + safe_config = {"safe_mode": True, "cameras": {}, "mqtt": {"enabled": False}} + + # copy over auth and proxy config in case auth needs to be enforced + safe_config["auth"] = config.get("auth", {}) + safe_config["proxy"] = config.get("proxy", {}) + return cls.parse_object(safe_config, **context) + # Validate and return the config dict. return cls.parse_object(config, **context) diff --git a/frigate/config/logger.py b/frigate/config/logger.py index e6e1c06d3..0ba3e6972 100644 --- a/frigate/config/logger.py +++ b/frigate/config/logger.py @@ -1,20 +1,11 @@ -import logging -from enum import Enum - from pydantic import Field, ValidationInfo, model_validator from typing_extensions import Self +from frigate.log import LogLevel, apply_log_levels + from .base import FrigateBaseModel -__all__ = ["LoggerConfig", "LogLevel"] - - -class LogLevel(str, Enum): - debug = "debug" - info = "info" - warning = "warning" - error = "error" - critical = "critical" +__all__ = ["LoggerConfig"] class LoggerConfig(FrigateBaseModel): @@ -26,16 +17,6 @@ class LoggerConfig(FrigateBaseModel): @model_validator(mode="after") def post_validation(self, info: ValidationInfo) -> Self: if isinstance(info.context, dict) and info.context.get("install", False): - logging.getLogger().setLevel(self.default.value.upper()) - - log_levels = { - "httpx": LogLevel.error, - "werkzeug": LogLevel.error, - "ws4py": LogLevel.error, - **self.logs, - } - - for log, level in log_levels.items(): - logging.getLogger(log).setLevel(level.value.upper()) + apply_log_levels(self.default.value.upper(), self.logs) return self diff --git a/frigate/config/mqtt.py b/frigate/config/mqtt.py index cedd53734..a760d0a1f 100644 --- a/frigate/config/mqtt.py +++ b/frigate/config/mqtt.py @@ -30,7 +30,7 @@ class MqttConfig(FrigateBaseModel): ) tls_client_key: Optional[str] = Field(default=None, title="MQTT TLS Client Key") tls_insecure: Optional[bool] = Field(default=None, title="MQTT TLS Insecure") - qos: Optional[int] = Field(default=0, title="MQTT QoS") + qos: int = Field(default=0, title="MQTT QoS") @model_validator(mode="after") def user_requires_pass(self, info: ValidationInfo) -> Self: diff --git a/frigate/const.py b/frigate/const.py index 699a194ac..67f2fd907 100644 --- a/frigate/const.py +++ b/frigate/const.py @@ -11,6 +11,7 @@ EXPORT_DIR = f"{BASE_DIR}/exports" FACE_DIR = f"{CLIPS_DIR}/faces" THUMB_DIR = f"{CLIPS_DIR}/thumbs" RECORD_DIR = f"{BASE_DIR}/recordings" +TRIGGER_DIR = f"{CLIPS_DIR}/triggers" BIRDSEYE_PIPE = "/tmp/cache/birdseye" CACHE_DIR = "/tmp/cache" FRIGATE_LOCALHOST = "http://127.0.0.1:5000" @@ -110,10 +111,18 @@ UPSERT_REVIEW_SEGMENT = "upsert_review_segment" CLEAR_ONGOING_REVIEW_SEGMENTS = "clear_ongoing_review_segments" UPDATE_CAMERA_ACTIVITY = "update_camera_activity" UPDATE_EVENT_DESCRIPTION = "update_event_description" +UPDATE_REVIEW_DESCRIPTION = "update_review_description" UPDATE_MODEL_STATE = "update_model_state" UPDATE_EMBEDDINGS_REINDEX_PROGRESS = "handle_embeddings_reindex_progress" +UPDATE_BIRDSEYE_LAYOUT = "update_birdseye_layout" NOTIFICATION_TEST = "notification_test" +# IO Nice Values + +PROCESS_PRIORITY_HIGH = 0 +PROCESS_PRIORITY_MED = 10 +PROCESS_PRIORITY_LOW = 19 + # Stats Values FREQUENCY_STATS_POINTS = 15 diff --git a/frigate/data_processing/common/audio_transcription/model.py b/frigate/data_processing/common/audio_transcription/model.py new file mode 100644 index 000000000..0fe5ddb5c --- /dev/null +++ b/frigate/data_processing/common/audio_transcription/model.py @@ -0,0 +1,81 @@ +"""Set up audio transcription models based on model size.""" + +import logging +import os + +import sherpa_onnx +from faster_whisper.utils import download_model + +from frigate.comms.inter_process import InterProcessRequestor +from frigate.const import MODEL_CACHE_DIR +from frigate.data_processing.types import AudioTranscriptionModel +from frigate.util.downloader import ModelDownloader + +logger = logging.getLogger(__name__) + + +class AudioTranscriptionModelRunner: + def __init__( + self, + device: str = "CPU", + model_size: str = "small", + ): + self.model: AudioTranscriptionModel = None + self.requestor = InterProcessRequestor() + + if model_size == "large": + # use the Whisper download function instead of our own + logger.debug("Downloading Whisper audio transcription model") + download_model( + size_or_id="small" if device == "cuda" else "tiny", + local_files_only=False, + cache_dir=os.path.join(MODEL_CACHE_DIR, "whisper"), + ) + logger.debug("Whisper audio transcription model downloaded") + + else: + # small model as default + download_path = os.path.join(MODEL_CACHE_DIR, "sherpa-onnx") + HF_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co") + self.model_files = { + "encoder.onnx": f"{HF_ENDPOINT}/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26/resolve/main/encoder-epoch-99-avg-1-chunk-16-left-128.onnx", + "decoder.onnx": f"{HF_ENDPOINT}/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26/resolve/main/decoder-epoch-99-avg-1-chunk-16-left-128.onnx", + "joiner.onnx": f"{HF_ENDPOINT}/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26/resolve/main/joiner-epoch-99-avg-1-chunk-16-left-128.onnx", + "tokens.txt": f"{HF_ENDPOINT}/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26/resolve/main/tokens.txt", + } + + if not all( + os.path.exists(os.path.join(download_path, n)) + for n in self.model_files.keys() + ): + self.downloader = ModelDownloader( + model_name="sherpa-onnx", + download_path=download_path, + file_names=self.model_files.keys(), + download_func=self.__download_models, + ) + self.downloader.ensure_model_files() + self.downloader.wait_for_download() + + self.model = sherpa_onnx.OnlineRecognizer.from_transducer( + tokens=os.path.join(MODEL_CACHE_DIR, "sherpa-onnx/tokens.txt"), + encoder=os.path.join(MODEL_CACHE_DIR, "sherpa-onnx/encoder.onnx"), + decoder=os.path.join(MODEL_CACHE_DIR, "sherpa-onnx/decoder.onnx"), + joiner=os.path.join(MODEL_CACHE_DIR, "sherpa-onnx/joiner.onnx"), + num_threads=2, + sample_rate=16000, + feature_dim=80, + enable_endpoint_detection=True, + rule1_min_trailing_silence=2.4, + rule2_min_trailing_silence=1.2, + rule3_min_utterance_length=300, + decoding_method="greedy_search", + provider="cpu", + ) + + def __download_models(self, path: str) -> None: + try: + file_name = os.path.basename(path) + ModelDownloader.download_from_url(self.model_files[file_name], path) + except Exception as e: + logger.error(f"Failed to download {path}: {e}") diff --git a/frigate/data_processing/common/face/model.py b/frigate/data_processing/common/face/model.py index aea6751a0..f230a1b2c 100644 --- a/frigate/data_processing/common/face/model.py +++ b/frigate/data_processing/common/face/model.py @@ -11,6 +11,7 @@ from scipy import stats from frigate.config import FrigateConfig from frigate.const import MODEL_CACHE_DIR from frigate.embeddings.onnx.face_embedding import ArcfaceEmbedding, FaceNetEmbedding +from frigate.log import redirect_output_to_logger logger = logging.getLogger(__name__) @@ -37,6 +38,7 @@ class FaceRecognizer(ABC): def classify(self, face_image: np.ndarray) -> tuple[str, float] | None: pass + @redirect_output_to_logger(logger, logging.DEBUG) def init_landmark_detector(self) -> None: landmark_model = os.path.join(MODEL_CACHE_DIR, "facedet/landmarkdet.yaml") diff --git a/frigate/data_processing/common/license_plate/mixin.py b/frigate/data_processing/common/license_plate/mixin.py index 2c68ce374..eae0f2045 100644 --- a/frigate/data_processing/common/license_plate/mixin.py +++ b/frigate/data_processing/common/license_plate/mixin.py @@ -22,7 +22,7 @@ from frigate.comms.event_metadata_updater import ( EventMetadataPublisher, EventMetadataTypeEnum, ) -from frigate.const import CLIPS_DIR +from frigate.const import CLIPS_DIR, MODEL_CACHE_DIR from frigate.embeddings.onnx.lpr_embedding import LPR_EMBEDDING_SIZE from frigate.types import TrackedObjectUpdateTypesEnum from frigate.util.builtin import EventsPerSecond, InferenceSpeed @@ -43,7 +43,11 @@ class LicensePlateProcessingMixin: self.plates_det_second = EventsPerSecond() self.plates_det_second.start() self.event_metadata_publisher = EventMetadataPublisher() - self.ctc_decoder = CTCDecoder() + self.ctc_decoder = CTCDecoder( + character_dict_path=os.path.join( + MODEL_CACHE_DIR, "paddleocr-onnx", "ppocr_keys_v1.txt" + ) + ) self.batch_size = 6 # Detection specific parameters @@ -1168,7 +1172,6 @@ class LicensePlateProcessingMixin: event_id = f"{now}-{rand_id}" self.event_metadata_publisher.publish( - EventMetadataTypeEnum.lpr_event_create, ( now, camera, @@ -1179,6 +1182,7 @@ class LicensePlateProcessingMixin: None, plate, ), + EventMetadataTypeEnum.lpr_event_create.value, ) return event_id @@ -1522,7 +1526,7 @@ class LicensePlateProcessingMixin: # If it's a known plate, publish to sub_label if sub_label is not None: self.sub_label_publisher.publish( - EventMetadataTypeEnum.sub_label, (id, sub_label, avg_confidence) + (id, sub_label, avg_confidence), EventMetadataTypeEnum.sub_label.value ) # always publish to recognized_license_plate field @@ -1541,8 +1545,8 @@ class LicensePlateProcessingMixin: ), ) self.sub_label_publisher.publish( - EventMetadataTypeEnum.recognized_license_plate, - (id, top_plate, avg_confidence), + (id, "recognized_license_plate", top_plate, avg_confidence), + EventMetadataTypeEnum.attribute.value, ) # save the best snapshot for dedicated lpr cams not using frigate+ @@ -1556,8 +1560,8 @@ class LicensePlateProcessingMixin: frame_bgr = cv2.cvtColor(frame, cv2.COLOR_YUV2BGR_I420) _, encoded_img = cv2.imencode(".jpg", frame_bgr) self.sub_label_publisher.publish( - EventMetadataTypeEnum.save_lpr_snapshot, (base64.b64encode(encoded_img).decode("ASCII"), id, camera), + EventMetadataTypeEnum.save_lpr_snapshot.value, ) if id not in self.detected_license_plates: @@ -1595,113 +1599,121 @@ class CTCDecoder: for each decoded character sequence. """ - def __init__(self): + def __init__(self, character_dict_path=None): """ - Initialize the CTCDecoder with a list of characters and a character map. + Initializes the CTCDecoder. + :param character_dict_path: Path to the character dictionary file. + If None, a default (English-focused) list is used. + For Chinese models, this should point to the correct + character dictionary file provided with the model. + """ + self.characters = [] + if character_dict_path and os.path.exists(character_dict_path): + with open(character_dict_path, "r", encoding="utf-8") as f: + self.characters = ( + ["blank"] + [line.strip() for line in f if line.strip()] + [" "] + ) + else: + self.characters = [ + "blank", + "0", + "1", + "2", + "3", + "4", + "5", + "6", + "7", + "8", + "9", + ":", + ";", + "<", + "=", + ">", + "?", + "@", + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "H", + "I", + "J", + "K", + "L", + "M", + "N", + "O", + "P", + "Q", + "R", + "S", + "T", + "U", + "V", + "W", + "X", + "Y", + "Z", + "[", + "\\", + "]", + "^", + "_", + "`", + "a", + "b", + "c", + "d", + "e", + "f", + "g", + "h", + "i", + "j", + "k", + "l", + "m", + "n", + "o", + "p", + "q", + "r", + "s", + "t", + "u", + "v", + "w", + "x", + "y", + "z", + "{", + "|", + "}", + "~", + "!", + '"', + "#", + "$", + "%", + "&", + "'", + "(", + ")", + "*", + "+", + ",", + "-", + ".", + "/", + " ", + " ", + ] - The character set includes digits, letters, special characters, and a "blank" token - (used by the CTC model for decoding purposes). A character map is created to map - indices to characters. - """ - self.characters = [ - "blank", - "0", - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - ":", - ";", - "<", - "=", - ">", - "?", - "@", - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "H", - "I", - "J", - "K", - "L", - "M", - "N", - "O", - "P", - "Q", - "R", - "S", - "T", - "U", - "V", - "W", - "X", - "Y", - "Z", - "[", - "\\", - "]", - "^", - "_", - "`", - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - "{", - "|", - "}", - "~", - "!", - '"', - "#", - "$", - "%", - "&", - "'", - "(", - ")", - "*", - "+", - ",", - "-", - ".", - "/", - " ", - " ", - ] self.char_map = {i: char for i, char in enumerate(self.characters)} def __call__( @@ -1735,7 +1747,7 @@ class CTCDecoder: merged_path.append(char_index) merged_probs.append(seq_log_probs[t, char_index]) - result = "".join(self.char_map[idx] for idx in merged_path) + result = "".join(self.char_map.get(idx, "") for idx in merged_path) results.append(result) confidence = np.exp(merged_probs).tolist() diff --git a/frigate/data_processing/post/api.py b/frigate/data_processing/post/api.py index cd6dda128..c341bd8ef 100644 --- a/frigate/data_processing/post/api.py +++ b/frigate/data_processing/post/api.py @@ -39,7 +39,9 @@ class PostProcessorApi(ABC): pass @abstractmethod - def handle_request(self, request_data: dict[str, Any]) -> dict[str, Any] | None: + def handle_request( + self, topic: str, request_data: dict[str, Any] + ) -> dict[str, Any] | None: """Handle metadata requests. Args: request_data (dict): containing data about requested change to process. diff --git a/frigate/data_processing/post/audio_transcription.py b/frigate/data_processing/post/audio_transcription.py new file mode 100644 index 000000000..146b4e0f1 --- /dev/null +++ b/frigate/data_processing/post/audio_transcription.py @@ -0,0 +1,212 @@ +"""Handle post-processing for audio transcription.""" + +import logging +import os +import threading +import time +from typing import Optional + +from faster_whisper import WhisperModel +from peewee import DoesNotExist + +from frigate.comms.embeddings_updater import EmbeddingsRequestEnum +from frigate.comms.inter_process import InterProcessRequestor +from frigate.config import FrigateConfig +from frigate.const import ( + CACHE_DIR, + MODEL_CACHE_DIR, + UPDATE_EVENT_DESCRIPTION, +) +from frigate.data_processing.types import PostProcessDataEnum +from frigate.types import TrackedObjectUpdateTypesEnum +from frigate.util.audio import get_audio_from_recording + +from ..types import DataProcessorMetrics +from .api import PostProcessorApi + +logger = logging.getLogger(__name__) + + +class AudioTranscriptionPostProcessor(PostProcessorApi): + def __init__( + self, + config: FrigateConfig, + requestor: InterProcessRequestor, + metrics: DataProcessorMetrics, + ): + super().__init__(config, metrics, None) + self.config = config + self.requestor = requestor + self.recognizer = None + self.transcription_lock = threading.Lock() + self.transcription_thread = None + self.transcription_running = False + + # faster-whisper handles model downloading automatically + self.model_path = os.path.join(MODEL_CACHE_DIR, "whisper") + os.makedirs(self.model_path, exist_ok=True) + + self.__build_recognizer() + + def __build_recognizer(self) -> None: + try: + self.recognizer = WhisperModel( + model_size_or_path="small", + device="cuda" + if self.config.audio_transcription.device == "GPU" + else "cpu", + download_root=self.model_path, + local_files_only=False, # Allow downloading if not cached + compute_type="int8", + ) + logger.debug("Audio transcription (recordings) initialized") + except Exception as e: + logger.error(f"Failed to initialize recordings audio transcription: {e}") + self.recognizer = None + + def process_data( + self, data: dict[str, any], data_type: PostProcessDataEnum + ) -> None: + """Transcribe audio from a recording. + + Args: + data (dict): Contains data about the input (event_id, camera, etc.). + data_type (enum): Describes the data being processed (recording or tracked_object). + + Returns: + None + """ + event_id = data["event_id"] + camera_name = data["camera"] + + if data_type == PostProcessDataEnum.recording: + start_ts = data["frame_time"] + recordings_available_through = data["recordings_available"] + end_ts = min(recordings_available_through, start_ts + 60) # Default 60s + + elif data_type == PostProcessDataEnum.tracked_object: + obj_data = data["event"]["data"] + obj_data["id"] = data["event"]["id"] + obj_data["camera"] = data["event"]["camera"] + start_ts = data["event"]["start_time"] + end_ts = data["event"].get( + "end_time", start_ts + 60 + ) # Use end_time if available + + else: + logger.error("No data type passed to audio transcription post-processing") + return + + try: + audio_data = get_audio_from_recording( + self.config.cameras[camera_name].ffmpeg, + camera_name, + start_ts, + end_ts, + sample_rate=16000, + ) + + if not audio_data: + logger.debug(f"No audio data extracted for {event_id}") + return + + transcription = self.__transcribe_audio(audio_data) + if not transcription: + logger.debug("No transcription generated from audio") + return + + logger.debug(f"Transcribed audio for {event_id}: '{transcription}'") + + self.requestor.send_data( + UPDATE_EVENT_DESCRIPTION, + { + "type": TrackedObjectUpdateTypesEnum.description, + "id": event_id, + "description": transcription, + "camera": camera_name, + }, + ) + + # Embed the description + self.requestor.send_data( + EmbeddingsRequestEnum.embed_description.value, + {"id": event_id, "description": transcription}, + ) + + except DoesNotExist: + logger.debug("No recording found for audio transcription post-processing") + return + except Exception as e: + logger.error(f"Error in audio transcription post-processing: {e}") + + def __transcribe_audio(self, audio_data: bytes) -> Optional[tuple[str, float]]: + """Transcribe WAV audio data using faster-whisper.""" + if not self.recognizer: + logger.debug("Recognizer not initialized") + return None + + try: + # Save audio data to a temporary wav (faster-whisper expects a file) + temp_wav = os.path.join(CACHE_DIR, f"temp_audio_{int(time.time())}.wav") + with open(temp_wav, "wb") as f: + f.write(audio_data) + + segments, info = self.recognizer.transcribe( + temp_wav, + language=self.config.audio_transcription.language, + beam_size=5, + ) + + os.remove(temp_wav) + + # Combine all segment texts + text = " ".join(segment.text.strip() for segment in segments) + if not text: + return None + + logger.debug( + "Detected language '%s' with probability %f" + % (info.language, info.language_probability) + ) + + return text + except Exception as e: + logger.error(f"Error transcribing audio: {e}") + return None + + def _transcription_wrapper(self, event: dict[str, any]) -> None: + """Wrapper to run transcription and reset running flag when done.""" + try: + self.process_data( + { + "event_id": event["id"], + "camera": event["camera"], + "event": event, + }, + PostProcessDataEnum.tracked_object, + ) + finally: + with self.transcription_lock: + self.transcription_running = False + self.transcription_thread = None + + def handle_request(self, topic: str, request_data: dict[str, any]) -> str | None: + if topic == "transcribe_audio": + event = request_data["event"] + + with self.transcription_lock: + if self.transcription_running: + logger.warning( + "Audio transcription for a speech event is already running." + ) + return "in_progress" + + # Mark as running and start the thread + self.transcription_running = True + self.transcription_thread = threading.Thread( + target=self._transcription_wrapper, args=(event,), daemon=True + ) + self.transcription_thread.start() + return "started" + + return None diff --git a/frigate/data_processing/post/review_descriptions.py b/frigate/data_processing/post/review_descriptions.py new file mode 100644 index 000000000..6293241b8 --- /dev/null +++ b/frigate/data_processing/post/review_descriptions.py @@ -0,0 +1,266 @@ +"""Post processor for review items to get descriptions.""" + +import copy +import datetime +import logging +import os +import shutil +import threading +from pathlib import Path +from typing import Any + +import cv2 + +from frigate.comms.embeddings_updater import EmbeddingsRequestEnum +from frigate.comms.inter_process import InterProcessRequestor +from frigate.config import FrigateConfig +from frigate.config.camera.review import GenAIReviewConfig +from frigate.const import CACHE_DIR, CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION +from frigate.data_processing.types import PostProcessDataEnum +from frigate.genai import GenAIClient +from frigate.models import ReviewSegment +from frigate.util.builtin import EventsPerSecond, InferenceSpeed + +from ..post.api import PostProcessorApi +from ..types import DataProcessorMetrics + +logger = logging.getLogger(__name__) + + +class ReviewDescriptionProcessor(PostProcessorApi): + def __init__( + self, + config: FrigateConfig, + requestor: InterProcessRequestor, + metrics: DataProcessorMetrics, + client: GenAIClient, + ): + super().__init__(config, metrics, None) + self.requestor = requestor + self.metrics = metrics + self.genai_client = client + self.review_desc_speed = InferenceSpeed(self.metrics.review_desc_speed) + self.review_descs_dps = EventsPerSecond() + self.review_descs_dps.start() + + def process_data(self, data, data_type): + self.metrics.review_desc_dps.value = self.review_descs_dps.eps() + + if data_type != PostProcessDataEnum.review: + return + + camera = data["after"]["camera"] + camera_config = self.config.cameras[camera] + + if not camera_config.review.genai.enabled: + return + + id = data["after"]["id"] + + if data["type"] == "new" or data["type"] == "update": + return + else: + final_data = data["after"] + + if ( + final_data["severity"] == "alert" + and not camera_config.review.genai.alerts + ): + return + elif ( + final_data["severity"] == "detection" + and not camera_config.review.genai.detections + ): + return + + frames = self.get_cache_frames( + camera, final_data["start_time"], final_data["end_time"] + ) + + if not frames: + frames = [final_data["thumb_path"]] + + thumbs = [] + + for idx, thumb_path in enumerate(frames): + thumb_data = cv2.imread(thumb_path) + ret, jpg = cv2.imencode( + ".jpg", thumb_data, [int(cv2.IMWRITE_JPEG_QUALITY), 100] + ) + + if ret: + thumbs.append(jpg.tobytes()) + + if camera_config.review.genai.debug_save_thumbnails: + id = data["after"]["id"] + Path(os.path.join(CLIPS_DIR, f"genai-requests/{id}")).mkdir( + parents=True, exist_ok=True + ) + shutil.copy( + thumb_path, + os.path.join( + CLIPS_DIR, + f"genai-requests/{id}/{idx}.webp", + ), + ) + + # kickoff analysis + self.review_descs_dps.update() + threading.Thread( + target=run_analysis, + args=( + self.requestor, + self.genai_client, + self.review_desc_speed, + camera, + final_data, + thumbs, + camera_config.review.genai, + list(self.config.model.merged_labelmap.values()), + ), + ).start() + + def handle_request(self, topic, request_data): + if topic == EmbeddingsRequestEnum.summarize_review.value: + start_ts = request_data["start_ts"] + end_ts = request_data["end_ts"] + items: list[dict[str, Any]] = [ + r["data"]["metadata"] + for r in ( + ReviewSegment.select(ReviewSegment.data) + .where( + (ReviewSegment.data["metadata"].is_null(False)) + & (ReviewSegment.start_time < end_ts) + & (ReviewSegment.end_time > start_ts) + ) + .order_by(ReviewSegment.start_time.asc()) + .dicts() + .iterator() + ) + ] + + if len(items) == 0: + logger.debug("No review items with metadata found during time period") + return None + + important_items = list( + filter( + lambda item: item.get("potential_threat_level", 0) > 0 + or item.get("other_concerns"), + items, + ) + ) + + if not important_items: + return "No concerns were found during this time period." + + return self.genai_client.generate_review_summary( + start_ts, end_ts, important_items + ) + else: + return None + + def get_cache_frames( + self, + camera: str, + start_time: float, + end_time: float, + desired_frame_count: int = 12, + ) -> list[str]: + preview_dir = os.path.join(CACHE_DIR, "preview_frames") + file_start = f"preview_{camera}" + start_file = f"{file_start}-{start_time}.webp" + end_file = f"{file_start}-{end_time}.webp" + all_frames = [] + + for file in sorted(os.listdir(preview_dir)): + if not file.startswith(file_start): + continue + + if file < start_file: + if len(all_frames): + all_frames[0] = os.path.join(preview_dir, file) + else: + all_frames.append(os.path.join(preview_dir, file)) + + continue + + if file > end_file: + all_frames.append(os.path.join(preview_dir, file)) + break + + all_frames.append(os.path.join(preview_dir, file)) + + frame_count = len(all_frames) + if frame_count <= desired_frame_count: + return all_frames + + selected_frames = [] + step_size = (frame_count - 1) / (desired_frame_count - 1) + + for i in range(desired_frame_count): + index = round(i * step_size) + selected_frames.append(all_frames[index]) + + return selected_frames + + +@staticmethod +def run_analysis( + requestor: InterProcessRequestor, + genai_client: GenAIClient, + review_inference_speed: InferenceSpeed, + camera: str, + final_data: dict[str, str], + thumbs: list[bytes], + genai_config: GenAIReviewConfig, + labelmap_objects: list[str], +) -> None: + start = datetime.datetime.now().timestamp() + analytics_data = { + "id": final_data["id"], + "camera": camera, + "zones": final_data["data"]["zones"], + "start": datetime.datetime.fromtimestamp(final_data["start_time"]).strftime( + "%A, %I:%M %p" + ), + "duration": final_data["end_time"] - final_data["start_time"], + } + + objects = [] + verified_objects = [] + + for label in set(final_data["data"]["objects"] + final_data["data"]["sub_labels"]): + if "-verified" in label: + continue + + if label in labelmap_objects: + objects.append(label.replace("_", " ").title()) + else: + verified_objects.append(label.replace("_", " ").title()) + + analytics_data["objects"] = objects + analytics_data["recognized_objects"] = verified_objects + + metadata = genai_client.generate_review_description( + analytics_data, + thumbs, + genai_config.additional_concerns, + genai_config.preferred_language, + genai_config.debug_save_thumbnails, + ) + review_inference_speed.update(datetime.datetime.now().timestamp() - start) + + if not metadata: + return None + + prev_data = copy.deepcopy(final_data) + final_data["data"]["metadata"] = metadata.model_dump() + requestor.send_data( + UPDATE_REVIEW_DESCRIPTION, + { + "type": "genai", + "before": {k: v for k, v in prev_data.items()}, + "after": {k: v for k, v in final_data.items()}, + }, + ) diff --git a/frigate/data_processing/post/semantic_trigger.py b/frigate/data_processing/post/semantic_trigger.py new file mode 100644 index 000000000..baa47ba1c --- /dev/null +++ b/frigate/data_processing/post/semantic_trigger.py @@ -0,0 +1,233 @@ +"""Post time processor to trigger actions based on similar embeddings.""" + +import datetime +import json +import logging +import os +from typing import Any + +import cv2 +import numpy as np +from peewee import DoesNotExist + +from frigate.comms.inter_process import InterProcessRequestor +from frigate.config import FrigateConfig +from frigate.const import CONFIG_DIR +from frigate.data_processing.types import PostProcessDataEnum +from frigate.db.sqlitevecq import SqliteVecQueueDatabase +from frigate.embeddings.util import ZScoreNormalization +from frigate.models import Event, Trigger +from frigate.util.builtin import cosine_distance +from frigate.util.path import get_event_thumbnail_bytes + +from ..post.api import PostProcessorApi +from ..types import DataProcessorMetrics + +logger = logging.getLogger(__name__) + +WRITE_DEBUG_IMAGES = False + + +class SemanticTriggerProcessor(PostProcessorApi): + def __init__( + self, + db: SqliteVecQueueDatabase, + config: FrigateConfig, + requestor: InterProcessRequestor, + metrics: DataProcessorMetrics, + embeddings, + ): + super().__init__(config, metrics, None) + self.db = db + self.embeddings = embeddings + self.requestor = requestor + self.trigger_embeddings: list[np.ndarray] = [] + + self.thumb_stats = ZScoreNormalization() + self.desc_stats = ZScoreNormalization() + + # load stats from disk + try: + with open(os.path.join(CONFIG_DIR, ".search_stats.json"), "r") as f: + data = json.loads(f.read()) + self.thumb_stats.from_dict(data["thumb_stats"]) + self.desc_stats.from_dict(data["desc_stats"]) + except FileNotFoundError: + pass + + def process_data( + self, data: dict[str, Any], data_type: PostProcessDataEnum + ) -> None: + event_id = data["event_id"] + camera = data["camera"] + process_type = data["type"] + + if self.config.cameras[camera].semantic_search.triggers is None: + return + + triggers = ( + Trigger.select( + Trigger.camera, + Trigger.name, + Trigger.data, + Trigger.type, + Trigger.embedding, + Trigger.threshold, + ) + .where(Trigger.camera == camera) + .dicts() + .iterator() + ) + + for trigger in triggers: + if ( + trigger["name"] + not in self.config.cameras[camera].semantic_search.triggers + or not self.config.cameras[camera] + .semantic_search.triggers[trigger["name"]] + .enabled + ): + logger.debug( + f"Trigger {trigger['name']} is disabled for camera {camera}" + ) + continue + + logger.debug( + f"Processing {trigger['type']} trigger for {event_id} on {trigger['camera']}: {trigger['name']}" + ) + + trigger_embedding = np.frombuffer(trigger["embedding"], dtype=np.float32) + + # Get embeddings based on type + thumbnail_embedding = None + description_embedding = None + + if process_type == "image": + cursor = self.db.execute_sql( + """ + SELECT thumbnail_embedding FROM vec_thumbnails WHERE id = ? + """, + [event_id], + ) + row = cursor.fetchone() if cursor else None + if row: + thumbnail_embedding = np.frombuffer(row[0], dtype=np.float32) + + if process_type == "text": + cursor = self.db.execute_sql( + """ + SELECT description_embedding FROM vec_descriptions WHERE id = ? + """, + [event_id], + ) + row = cursor.fetchone() if cursor else None + if row: + description_embedding = np.frombuffer(row[0], dtype=np.float32) + + # Skip processing if we don't have any embeddings + if thumbnail_embedding is None and description_embedding is None: + logger.debug(f"No embeddings found for {event_id}") + return + + # Determine which embedding to compare based on trigger type + if ( + trigger["type"] in ["text", "thumbnail"] + and thumbnail_embedding is not None + ): + data_embedding = thumbnail_embedding + normalized_distance = self.thumb_stats.normalize( + [cosine_distance(data_embedding, trigger_embedding)], + save_stats=False, + )[0] + elif trigger["type"] == "description" and description_embedding is not None: + data_embedding = description_embedding + normalized_distance = self.desc_stats.normalize( + [cosine_distance(data_embedding, trigger_embedding)], + save_stats=False, + )[0] + + else: + continue + + similarity = 1 - normalized_distance + + logger.debug( + f"Trigger {trigger['name']} ({trigger['data'] if trigger['type'] == 'text' or trigger['type'] == 'description' else 'image'}): " + f"normalized distance: {normalized_distance:.4f}, " + f"similarity: {similarity:.4f}, threshold: {trigger['threshold']}" + ) + + # Check if similarity meets threshold + if similarity >= trigger["threshold"]: + logger.info( + f"Trigger {trigger['name']} activated with similarity {similarity:.4f}" + ) + + # Update the trigger's last_triggered and triggering_event_id + Trigger.update( + last_triggered=datetime.datetime.now(), triggering_event_id=event_id + ).where( + Trigger.camera == camera, Trigger.name == trigger["name"] + ).execute() + + # Always publish MQTT message + self.requestor.send_data( + "triggers", + json.dumps( + { + "name": trigger["name"], + "camera": camera, + "event_id": event_id, + "type": trigger["type"], + "score": similarity, + } + ), + ) + + if ( + self.config.cameras[camera] + .semantic_search.triggers[trigger["name"]] + .actions + ): + # TODO: handle actions for the trigger + # notifications already handled by webpush + pass + + if WRITE_DEBUG_IMAGES: + try: + event: Event = Event.get(Event.id == event_id) + except DoesNotExist: + return + + # Skip the event if not an object + if event.data.get("type") != "object": + return + + thumbnail_bytes = get_event_thumbnail_bytes(event) + + nparr = np.frombuffer(thumbnail_bytes, np.uint8) + thumbnail = cv2.imdecode(nparr, cv2.IMREAD_COLOR) + + font_scale = 0.5 + font = cv2.FONT_HERSHEY_SIMPLEX + cv2.putText( + thumbnail, + f"{similarity:.4f}", + (10, 30), + font, + fontScale=font_scale, + color=(0, 255, 0), + thickness=2, + ) + + current_time = int(datetime.datetime.now().timestamp()) + cv2.imwrite( + f"debug/frames/trigger-{event_id}_{current_time}.jpg", + thumbnail, + ) + + def handle_request(self, topic, request_data): + return None + + def expire_object(self, object_id, camera): + pass diff --git a/frigate/data_processing/post/types.py b/frigate/data_processing/post/types.py new file mode 100644 index 000000000..9810d1947 --- /dev/null +++ b/frigate/data_processing/post/types.py @@ -0,0 +1,21 @@ +from pydantic import BaseModel, ConfigDict, Field + + +class ReviewMetadata(BaseModel): + model_config = ConfigDict(extra="ignore", protected_namespaces=()) + + scene: str = Field( + description="A comprehensive description of the setting and entities, including relevant context and plausible inferences if supported by visual evidence." + ) + confidence: float = Field( + description="A float between 0 and 1 representing your overall confidence in this analysis." + ) + potential_threat_level: int = Field( + ge=0, + le=3, + description="An integer representing the potential threat level (1-3). 1: Minor anomaly. 2: Moderate concern. 3: High threat. Only include this field if a clear security concern is observable; otherwise, omit it.", + ) + other_concerns: list[str] | None = Field( + default=None, + description="Other concerns highlighted by the user that are observed.", + ) diff --git a/frigate/data_processing/real_time/audio_transcription.py b/frigate/data_processing/real_time/audio_transcription.py new file mode 100644 index 000000000..2e6d599eb --- /dev/null +++ b/frigate/data_processing/real_time/audio_transcription.py @@ -0,0 +1,281 @@ +"""Handle processing audio for speech transcription using sherpa-onnx with FFmpeg pipe.""" + +import logging +import os +import queue +import threading +from typing import Optional + +import numpy as np + +from frigate.comms.inter_process import InterProcessRequestor +from frigate.config import CameraConfig, FrigateConfig +from frigate.const import MODEL_CACHE_DIR +from frigate.data_processing.common.audio_transcription.model import ( + AudioTranscriptionModelRunner, +) +from frigate.data_processing.real_time.whisper_online import ( + FasterWhisperASR, + OnlineASRProcessor, +) + +from ..types import DataProcessorMetrics +from .api import RealTimeProcessorApi + +logger = logging.getLogger(__name__) + + +class AudioTranscriptionRealTimeProcessor(RealTimeProcessorApi): + def __init__( + self, + config: FrigateConfig, + camera_config: CameraConfig, + requestor: InterProcessRequestor, + model_runner: AudioTranscriptionModelRunner, + metrics: DataProcessorMetrics, + stop_event: threading.Event, + ): + super().__init__(config, metrics) + self.config = config + self.camera_config = camera_config + self.requestor = requestor + self.stream = None + self.whisper_model = None + self.model_runner = model_runner + self.transcription_segments = [] + self.audio_queue = queue.Queue() + self.stop_event = stop_event + + def __build_recognizer(self) -> None: + try: + if self.config.audio_transcription.model_size == "large": + # Whisper models need to be per-process and can only run one stream at a time + # TODO: try parallel: https://github.com/SYSTRAN/faster-whisper/issues/100 + logger.debug(f"Loading Whisper model for {self.camera_config.name}") + self.whisper_model = FasterWhisperASR( + modelsize="tiny", + device="cuda" + if self.config.audio_transcription.device == "GPU" + else "cpu", + lan=self.config.audio_transcription.language, + model_dir=os.path.join(MODEL_CACHE_DIR, "whisper"), + ) + self.whisper_model.use_vad() + self.stream = OnlineASRProcessor( + asr=self.whisper_model, + ) + else: + logger.debug(f"Loading sherpa stream for {self.camera_config.name}") + self.stream = self.model_runner.model.create_stream() + logger.debug( + f"Audio transcription (live) initialized for {self.camera_config.name}" + ) + except Exception as e: + logger.error( + f"Failed to initialize live streaming audio transcription: {e}" + ) + + def __process_audio_stream( + self, audio_data: np.ndarray + ) -> Optional[tuple[str, bool]]: + if ( + self.model_runner.model is None + and self.config.audio_transcription.model_size == "small" + ): + logger.debug("Audio transcription (live) model not initialized") + return None + + if not self.stream: + self.__build_recognizer() + + try: + if audio_data.dtype != np.float32: + audio_data = audio_data.astype(np.float32) + + if audio_data.max() > 1.0 or audio_data.min() < -1.0: + audio_data = audio_data / 32768.0 # Normalize from int16 + + rms = float(np.sqrt(np.mean(np.absolute(np.square(audio_data))))) + logger.debug(f"Audio chunk size: {audio_data.size}, RMS: {rms:.4f}") + + if self.config.audio_transcription.model_size == "large": + # large model + self.stream.insert_audio_chunk(audio_data) + output = self.stream.process_iter() + text = output[2].strip() + is_endpoint = ( + text.endswith((".", "!", "?")) + and sum(len(str(lines)) for lines in self.transcription_segments) + > 300 + ) + + if text: + self.transcription_segments.append(text) + concatenated_text = " ".join(self.transcription_segments) + logger.debug(f"Concatenated transcription: '{concatenated_text}'") + text = concatenated_text + + else: + # small model + self.stream.accept_waveform(16000, audio_data) + + while self.model_runner.model.is_ready(self.stream): + self.model_runner.model.decode_stream(self.stream) + + text = self.model_runner.model.get_result(self.stream).strip() + is_endpoint = self.model_runner.model.is_endpoint(self.stream) + + logger.debug(f"Transcription result: '{text}'") + + if not text: + logger.debug("No transcription, returning") + return None + + logger.debug(f"Endpoint detected: {is_endpoint}") + + if is_endpoint and self.config.audio_transcription.model_size == "small": + # reset sherpa if we've reached an endpoint + self.model_runner.model.reset(self.stream) + + return text, is_endpoint + except Exception as e: + logger.error(f"Error processing audio stream: {e}") + return None + + def process_frame(self, obj_data: dict[str, any], frame: np.ndarray) -> None: + pass + + def process_audio(self, obj_data: dict[str, any], audio: np.ndarray) -> bool | None: + if audio is None or audio.size == 0: + logger.debug("No audio data provided for transcription") + return None + + # enqueue audio data for processing in the thread + self.audio_queue.put((obj_data, audio)) + return None + + def run(self) -> None: + """Run method for the transcription thread to process queued audio data.""" + logger.debug( + f"Starting audio transcription thread for {self.camera_config.name}" + ) + + # start with an empty transcription + self.requestor.send_data( + f"{self.camera_config.name}/audio/transcription", + "", + ) + + while not self.stop_event.is_set(): + try: + # Get audio data from queue with a timeout to check stop_event + _, audio = self.audio_queue.get(timeout=0.1) + result = self.__process_audio_stream(audio) + + if not result: + continue + + text, is_endpoint = result + logger.debug(f"Transcribed audio: '{text}', Endpoint: {is_endpoint}") + + self.requestor.send_data( + f"{self.camera_config.name}/audio/transcription", text + ) + + self.audio_queue.task_done() + + if is_endpoint: + self.reset() + + except queue.Empty: + continue + except Exception as e: + logger.error(f"Error processing audio in thread: {e}") + self.audio_queue.task_done() + + logger.debug( + f"Stopping audio transcription thread for {self.camera_config.name}" + ) + + def clear_audio_queue(self) -> None: + # Clear the audio queue + while not self.audio_queue.empty(): + try: + self.audio_queue.get_nowait() + self.audio_queue.task_done() + except queue.Empty: + break + + def reset(self) -> None: + if self.config.audio_transcription.model_size == "large": + # get final output from whisper + output = self.stream.finish() + self.transcription_segments = [] + + self.requestor.send_data( + f"{self.camera_config.name}/audio/transcription", + (output[2].strip() + " "), + ) + + # reset whisper + self.stream.init() + self.transcription_segments = [] + else: + # reset sherpa + self.model_runner.model.reset(self.stream) + + logger.debug("Stream reset") + + def check_unload_model(self) -> None: + # regularly called in the loop in audio maintainer + if ( + self.config.audio_transcription.model_size == "large" + and self.whisper_model is not None + ): + logger.debug(f"Unloading Whisper model for {self.camera_config.name}") + self.clear_audio_queue() + self.transcription_segments = [] + self.stream = None + self.whisper_model = None + + self.requestor.send_data( + f"{self.camera_config.name}/audio/transcription", + "", + ) + if ( + self.config.audio_transcription.model_size == "small" + and self.stream is not None + ): + logger.debug(f"Clearing sherpa stream for {self.camera_config.name}") + self.stream = None + + self.requestor.send_data( + f"{self.camera_config.name}/audio/transcription", + "", + ) + + def stop(self) -> None: + """Stop the transcription thread and clean up.""" + self.stop_event.set() + # Clear the queue to prevent processing stale data + while not self.audio_queue.empty(): + try: + self.audio_queue.get_nowait() + self.audio_queue.task_done() + except queue.Empty: + break + logger.debug( + f"Transcription thread stop signaled for {self.camera_config.name}" + ) + + def handle_request( + self, topic: str, request_data: dict[str, any] + ) -> dict[str, any] | None: + if topic == "clear_audio_recognizer": + self.stream = None + self.__build_recognizer() + return {"message": "Audio recognizer cleared and rebuilt", "success": True} + return None + + def expire_object(self, object_id: str) -> None: + pass diff --git a/frigate/data_processing/real_time/bird.py b/frigate/data_processing/real_time/bird.py index 8d2c598fc..ed2496b90 100644 --- a/frigate/data_processing/real_time/bird.py +++ b/frigate/data_processing/real_time/bird.py @@ -13,6 +13,7 @@ from frigate.comms.event_metadata_updater import ( ) from frigate.config import FrigateConfig from frigate.const import MODEL_CACHE_DIR +from frigate.log import redirect_output_to_logger from frigate.util.object import calculate_region from ..types import DataProcessorMetrics @@ -76,6 +77,7 @@ class BirdRealTimeProcessor(RealTimeProcessorApi): except Exception as e: logger.error(f"Failed to download {path}: {e}") + @redirect_output_to_logger(logger, logging.DEBUG) def __build_detector(self) -> None: self.interpreter = Interpreter( model_path=os.path.join(MODEL_CACHE_DIR, "bird/bird.tflite"), @@ -154,8 +156,8 @@ class BirdRealTimeProcessor(RealTimeProcessorApi): return self.sub_label_publisher.publish( - EventMetadataTypeEnum.sub_label, (obj_data["id"], self.labelmap[best_id], score), + EventMetadataTypeEnum.sub_label.value, ) self.detected_birds[obj_data["id"]] = score diff --git a/frigate/data_processing/real_time/custom_classification.py b/frigate/data_processing/real_time/custom_classification.py new file mode 100644 index 000000000..b62b29882 --- /dev/null +++ b/frigate/data_processing/real_time/custom_classification.py @@ -0,0 +1,352 @@ +"""Real time processor that works with classification tflite models.""" + +import datetime +import logging +import os +from typing import Any + +import cv2 +import numpy as np + +from frigate.comms.embeddings_updater import EmbeddingsRequestEnum +from frigate.comms.event_metadata_updater import ( + EventMetadataPublisher, + EventMetadataTypeEnum, +) +from frigate.comms.inter_process import InterProcessRequestor +from frigate.config import FrigateConfig +from frigate.config.classification import ( + CustomClassificationConfig, + ObjectClassificationType, +) +from frigate.const import CLIPS_DIR, MODEL_CACHE_DIR +from frigate.log import redirect_output_to_logger +from frigate.util.builtin import EventsPerSecond, InferenceSpeed, load_labels +from frigate.util.object import box_overlaps, calculate_region + +from ..types import DataProcessorMetrics +from .api import RealTimeProcessorApi + +try: + from tflite_runtime.interpreter import Interpreter +except ModuleNotFoundError: + from tensorflow.lite.python.interpreter import Interpreter + +logger = logging.getLogger(__name__) + + +class CustomStateClassificationProcessor(RealTimeProcessorApi): + def __init__( + self, + config: FrigateConfig, + model_config: CustomClassificationConfig, + requestor: InterProcessRequestor, + metrics: DataProcessorMetrics, + ): + super().__init__(config, metrics) + self.model_config = model_config + self.requestor = requestor + self.model_dir = os.path.join(MODEL_CACHE_DIR, self.model_config.name) + self.train_dir = os.path.join(CLIPS_DIR, self.model_config.name, "train") + self.interpreter: Interpreter = None + self.tensor_input_details: dict[str, Any] = None + self.tensor_output_details: dict[str, Any] = None + self.labelmap: dict[int, str] = {} + self.classifications_per_second = EventsPerSecond() + self.inference_speed = InferenceSpeed( + self.metrics.classification_speeds[self.model_config.name] + ) + self.last_run = datetime.datetime.now().timestamp() + self.__build_detector() + + @redirect_output_to_logger(logger, logging.DEBUG) + def __build_detector(self) -> None: + self.interpreter = Interpreter( + model_path=os.path.join(self.model_dir, "model.tflite"), + num_threads=2, + ) + self.interpreter.allocate_tensors() + self.tensor_input_details = self.interpreter.get_input_details() + self.tensor_output_details = self.interpreter.get_output_details() + self.labelmap = load_labels( + os.path.join(self.model_dir, "labelmap.txt"), + prefill=0, + ) + self.classifications_per_second.start() + + def __update_metrics(self, duration: float) -> None: + self.classifications_per_second.update() + self.inference_speed.update(duration) + + def process_frame(self, frame_data: dict[str, Any], frame: np.ndarray): + self.metrics.classification_cps[ + self.model_config.name + ].value = self.classifications_per_second.eps() + camera = frame_data.get("camera") + + if camera not in self.model_config.state_config.cameras: + return + + camera_config = self.model_config.state_config.cameras[camera] + crop = [ + camera_config.crop[0], + camera_config.crop[1], + camera_config.crop[2], + camera_config.crop[3], + ] + should_run = False + + now = datetime.datetime.now().timestamp() + if ( + self.model_config.state_config.interval + and now > self.last_run + self.model_config.state_config.interval + ): + self.last_run = now + should_run = True + + if ( + not should_run + and self.model_config.state_config.motion + and any([box_overlaps(crop, mb) for mb in frame_data.get("motion", [])]) + ): + # classification should run at most once per second + if now > self.last_run + 1: + self.last_run = now + should_run = True + + if not should_run: + return + + x, y, x2, y2 = calculate_region( + frame.shape, + crop[0], + crop[1], + crop[2], + crop[3], + 224, + 1.0, + ) + + rgb = cv2.cvtColor(frame, cv2.COLOR_YUV2RGB_I420) + frame = rgb[ + y:y2, + x:x2, + ] + + if frame.shape != (224, 224): + frame = cv2.resize(frame, (224, 224)) + + input = np.expand_dims(frame, axis=0) + self.interpreter.set_tensor(self.tensor_input_details[0]["index"], input) + self.interpreter.invoke() + res: np.ndarray = self.interpreter.get_tensor( + self.tensor_output_details[0]["index"] + )[0] + probs = res / res.sum(axis=0) + best_id = np.argmax(probs) + score = round(probs[best_id], 2) + self.__update_metrics(datetime.datetime.now().timestamp() - now) + + write_classification_attempt( + self.train_dir, + cv2.cvtColor(frame, cv2.COLOR_RGB2BGR), + now, + self.labelmap[best_id], + score, + ) + + if score >= self.model_config.threshold: + self.requestor.send_data( + f"{camera}/classification/{self.model_config.name}", + self.labelmap[best_id], + ) + + def handle_request(self, topic, request_data): + if topic == EmbeddingsRequestEnum.reload_classification_model.value: + if request_data.get("model_name") == self.model_config.name: + self.__build_detector() + logger.info( + f"Successfully loaded updated model for {self.model_config.name}" + ) + return { + "success": True, + "message": f"Loaded {self.model_config.name} model.", + } + else: + return None + else: + return None + + def expire_object(self, object_id, camera): + pass + + +class CustomObjectClassificationProcessor(RealTimeProcessorApi): + def __init__( + self, + config: FrigateConfig, + model_config: CustomClassificationConfig, + sub_label_publisher: EventMetadataPublisher, + metrics: DataProcessorMetrics, + ): + super().__init__(config, metrics) + self.model_config = model_config + self.model_dir = os.path.join(MODEL_CACHE_DIR, self.model_config.name) + self.train_dir = os.path.join(CLIPS_DIR, self.model_config.name, "train") + self.interpreter: Interpreter = None + self.sub_label_publisher = sub_label_publisher + self.tensor_input_details: dict[str, Any] = None + self.tensor_output_details: dict[str, Any] = None + self.detected_objects: dict[str, float] = {} + self.labelmap: dict[int, str] = {} + self.classifications_per_second = EventsPerSecond() + self.inference_speed = InferenceSpeed( + self.metrics.classification_speeds[self.model_config.name] + ) + self.__build_detector() + + @redirect_output_to_logger(logger, logging.DEBUG) + def __build_detector(self) -> None: + self.interpreter = Interpreter( + model_path=os.path.join(self.model_dir, "model.tflite"), + num_threads=2, + ) + self.interpreter.allocate_tensors() + self.tensor_input_details = self.interpreter.get_input_details() + self.tensor_output_details = self.interpreter.get_output_details() + self.labelmap = load_labels( + os.path.join(self.model_dir, "labelmap.txt"), + prefill=0, + ) + + def __update_metrics(self, duration: float) -> None: + self.classifications_per_second.update() + self.inference_speed.update(duration) + + def process_frame(self, obj_data, frame): + self.metrics.classification_cps[ + self.model_config.name + ].value = self.classifications_per_second.eps() + + if obj_data["false_positive"]: + return + + if obj_data["label"] not in self.model_config.object_config.objects: + return + + now = datetime.datetime.now().timestamp() + x, y, x2, y2 = calculate_region( + frame.shape, + obj_data["box"][0], + obj_data["box"][1], + obj_data["box"][2], + obj_data["box"][3], + max( + obj_data["box"][1] - obj_data["box"][0], + obj_data["box"][3] - obj_data["box"][2], + ), + 1.0, + ) + + rgb = cv2.cvtColor(frame, cv2.COLOR_YUV2RGB_I420) + crop = rgb[ + y:y2, + x:x2, + ] + + if crop.shape != (224, 224): + crop = cv2.resize(crop, (224, 224)) + + input = np.expand_dims(crop, axis=0) + self.interpreter.set_tensor(self.tensor_input_details[0]["index"], input) + self.interpreter.invoke() + res: np.ndarray = self.interpreter.get_tensor( + self.tensor_output_details[0]["index"] + )[0] + probs = res / res.sum(axis=0) + best_id = np.argmax(probs) + score = round(probs[best_id], 2) + previous_score = self.detected_objects.get(obj_data["id"], 0.0) + self.__update_metrics(datetime.datetime.now().timestamp() - now) + + write_classification_attempt( + self.train_dir, + cv2.cvtColor(crop, cv2.COLOR_RGB2BGR), + now, + self.labelmap[best_id], + score, + ) + + if score < self.model_config.threshold: + logger.debug(f"Score {score} is less than threshold.") + return + + if score <= previous_score: + logger.debug(f"Score {score} is worse than previous score {previous_score}") + return + + sub_label = self.labelmap[best_id] + self.detected_objects[obj_data["id"]] = score + + if ( + self.model_config.object_config.classification_type + == ObjectClassificationType.sub_label + ): + if sub_label != "none": + self.sub_label_publisher.publish( + (obj_data["id"], sub_label, score), + EventMetadataTypeEnum.sub_label, + ) + elif ( + self.model_config.object_config.classification_type + == ObjectClassificationType.attribute + ): + self.sub_label_publisher.publish( + (obj_data["id"], self.model_config.name, sub_label, score), + EventMetadataTypeEnum.attribute.value, + ) + + def handle_request(self, topic, request_data): + if topic == EmbeddingsRequestEnum.reload_classification_model.value: + if request_data.get("model_name") == self.model_config.name: + logger.info( + f"Successfully loaded updated model for {self.model_config.name}" + ) + return { + "success": True, + "message": f"Loaded {self.model_config.name} model.", + } + else: + return None + else: + return None + + def expire_object(self, object_id, camera): + if object_id in self.detected_objects: + self.detected_objects.pop(object_id) + + +@staticmethod +def write_classification_attempt( + folder: str, + frame: np.ndarray, + timestamp: float, + label: str, + score: float, +) -> None: + if "-" in label: + label = label.replace("-", "_") + + file = os.path.join(folder, f"{timestamp}-{label}-{score}.webp") + os.makedirs(folder, exist_ok=True) + cv2.imwrite(file, frame) + + files = sorted( + filter(lambda f: (f.endswith(".webp")), os.listdir(folder)), + key=lambda f: os.path.getctime(os.path.join(folder, f)), + reverse=True, + ) + + # delete oldest face image if maximum is reached + if len(files) > 100: + os.unlink(os.path.join(folder, files[-1])) diff --git a/frigate/data_processing/real_time/face.py b/frigate/data_processing/real_time/face.py index 144ec42d4..a9e94ac92 100644 --- a/frigate/data_processing/real_time/face.py +++ b/frigate/data_processing/real_time/face.py @@ -319,8 +319,8 @@ class FaceRealTimeProcessor(RealTimeProcessorApi): if weighted_score >= self.face_config.recognition_threshold: self.sub_label_publisher.publish( - EventMetadataTypeEnum.sub_label, (id, weighted_sub_label, weighted_score), + EventMetadataTypeEnum.sub_label.value, ) self.__update_metrics(datetime.datetime.now().timestamp() - start) diff --git a/frigate/data_processing/real_time/whisper_online.py b/frigate/data_processing/real_time/whisper_online.py new file mode 100644 index 000000000..9b81d7fbe --- /dev/null +++ b/frigate/data_processing/real_time/whisper_online.py @@ -0,0 +1,1158 @@ +# imported to Frigate from https://github.com/ufal/whisper_streaming +# with only minor modifications +import io +import logging +import math +import sys +import time +from functools import lru_cache + +import librosa +import numpy as np +import soundfile as sf + +logger = logging.getLogger(__name__) + + +@lru_cache(10**6) +def load_audio(fname): + a, _ = librosa.load(fname, sr=16000, dtype=np.float32) + return a + + +def load_audio_chunk(fname, beg, end): + audio = load_audio(fname) + beg_s = int(beg * 16000) + end_s = int(end * 16000) + return audio[beg_s:end_s] + + +# Whisper backend + + +class ASRBase: + sep = "" # join transcribe words with this character (" " for whisper_timestamped, + # "" for faster-whisper because it emits the spaces when neeeded) + + def __init__( + self, + lan, + modelsize=None, + cache_dir=None, + model_dir=None, + logfile=sys.stderr, + device="cpu", + ): + self.logfile = logfile + + self.transcribe_kargs = {} + if lan == "auto": + self.original_language = None + else: + self.original_language = lan + + self.model = self.load_model(modelsize, cache_dir, model_dir, device) + + def load_model(self, modelsize, cache_dir): + raise NotImplementedError("must be implemented in the child class") + + def transcribe(self, audio, init_prompt=""): + raise NotImplementedError("must be implemented in the child class") + + def use_vad(self): + raise NotImplementedError("must be implemented in the child class") + + +class WhisperTimestampedASR(ASRBase): + """Uses whisper_timestamped library as the backend. Initially, we tested the code on this backend. It worked, but slower than faster-whisper. + On the other hand, the installation for GPU could be easier. + """ + + sep = " " + + def load_model(self, modelsize=None, cache_dir=None, model_dir=None): + import whisper + from whisper_timestamped import transcribe_timestamped + + self.transcribe_timestamped = transcribe_timestamped + if model_dir is not None: + logger.debug("ignoring model_dir, not implemented") + return whisper.load_model(modelsize, download_root=cache_dir) + + def transcribe(self, audio, init_prompt=""): + result = self.transcribe_timestamped( + self.model, + audio, + language=self.original_language, + initial_prompt=init_prompt, + verbose=None, + condition_on_previous_text=True, + **self.transcribe_kargs, + ) + return result + + def ts_words(self, r): + # return: transcribe result object to [(beg,end,"word1"), ...] + o = [] + for s in r["segments"]: + for w in s["words"]: + t = (w["start"], w["end"], w["text"]) + o.append(t) + return o + + def segments_end_ts(self, res): + return [s["end"] for s in res["segments"]] + + def use_vad(self): + self.transcribe_kargs["vad"] = True + + def set_translate_task(self): + self.transcribe_kargs["task"] = "translate" + + +class FasterWhisperASR(ASRBase): + """Uses faster-whisper library as the backend. Works much faster, appx 4-times (in offline mode). For GPU, it requires installation with a specific CUDNN version.""" + + sep = "" + + def load_model(self, modelsize=None, cache_dir=None, model_dir=None, device="cpu"): + from faster_whisper import WhisperModel + + logging.getLogger("faster_whisper").setLevel(logging.WARNING) + + # this worked fast and reliably on NVIDIA L40 + model = WhisperModel( + model_size_or_path="small" if device == "cuda" else "tiny", + device=device, + compute_type="float16" if device == "cuda" else "int8", + local_files_only=False, + download_root=model_dir, + ) + + # or run on GPU with INT8 + # tested: the transcripts were different, probably worse than with FP16, and it was slightly (appx 20%) slower + # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16") + + # or run on CPU with INT8 + # tested: works, but slow, appx 10-times than cuda FP16 + # model = WhisperModel(modelsize, device="cpu", compute_type="int8") #, download_root="faster-disk-cache-dir/") + return model + + def transcribe(self, audio, init_prompt=""): + from faster_whisper import BatchedInferencePipeline + + # tested: beam_size=5 is faster and better than 1 (on one 200 second document from En ESIC, min chunk 0.01) + batched_model = BatchedInferencePipeline(model=self.model) + segments, info = batched_model.transcribe( + audio, + language=self.original_language, + initial_prompt=init_prompt, + beam_size=5, + word_timestamps=True, + condition_on_previous_text=True, + **self.transcribe_kargs, + ) + # print(info) # info contains language detection result + + return list(segments) + + def ts_words(self, segments): + o = [] + for segment in segments: + for word in segment.words: + if segment.no_speech_prob > 0.9: + continue + # not stripping the spaces -- should not be merged with them! + w = word.word + t = (word.start, word.end, w) + o.append(t) + return o + + def segments_end_ts(self, res): + return [s.end for s in res] + + def use_vad(self): + self.transcribe_kargs["vad_filter"] = True + + def set_translate_task(self): + self.transcribe_kargs["task"] = "translate" + + +class MLXWhisper(ASRBase): + """ + Uses MLX Whisper library as the backend, optimized for Apple Silicon. + Models available: https://huggingface.co/collections/mlx-community/whisper-663256f9964fbb1177db93dc + Significantly faster than faster-whisper (without CUDA) on Apple M1. + """ + + sep = " " + + def load_model(self, modelsize=None, cache_dir=None, model_dir=None): + """ + Loads the MLX-compatible Whisper model. + + Args: + modelsize (str, optional): The size or name of the Whisper model to load. + If provided, it will be translated to an MLX-compatible model path using the `translate_model_name` method. + Example: "large-v3-turbo" -> "mlx-community/whisper-large-v3-turbo". + cache_dir (str, optional): Path to the directory for caching models. + **Note**: This is not supported by MLX Whisper and will be ignored. + model_dir (str, optional): Direct path to a custom model directory. + If specified, it overrides the `modelsize` parameter. + """ + import mlx.core as mx # Is installed with mlx-whisper + from mlx_whisper.transcribe import ModelHolder, transcribe + + if model_dir is not None: + logger.debug( + f"Loading whisper model from model_dir {model_dir}. modelsize parameter is not used." + ) + model_size_or_path = model_dir + elif modelsize is not None: + model_size_or_path = self.translate_model_name(modelsize) + logger.debug( + f"Loading whisper model {modelsize}. You use mlx whisper, so {model_size_or_path} will be used." + ) + + self.model_size_or_path = model_size_or_path + + # Note: ModelHolder.get_model loads the model into a static class variable, + # making it a global resource. This means: + # - Only one model can be loaded at a time; switching models requires reloading. + # - This approach may not be suitable for scenarios requiring multiple models simultaneously, + # such as using whisper-streaming as a module with varying model sizes. + dtype = mx.float16 # Default to mx.float16. In mlx_whisper.transcribe: dtype = mx.float16 if decode_options.get("fp16", True) else mx.float32 + ModelHolder.get_model( + model_size_or_path, dtype + ) # Model is preloaded to avoid reloading during transcription + + return transcribe + + def translate_model_name(self, model_name): + """ + Translates a given model name to its corresponding MLX-compatible model path. + + Args: + model_name (str): The name of the model to translate. + + Returns: + str: The MLX-compatible model path. + """ + # Dictionary mapping model names to MLX-compatible paths + model_mapping = { + "tiny.en": "mlx-community/whisper-tiny.en-mlx", + "tiny": "mlx-community/whisper-tiny-mlx", + "base.en": "mlx-community/whisper-base.en-mlx", + "base": "mlx-community/whisper-base-mlx", + "small.en": "mlx-community/whisper-small.en-mlx", + "small": "mlx-community/whisper-small-mlx", + "medium.en": "mlx-community/whisper-medium.en-mlx", + "medium": "mlx-community/whisper-medium-mlx", + "large-v1": "mlx-community/whisper-large-v1-mlx", + "large-v2": "mlx-community/whisper-large-v2-mlx", + "large-v3": "mlx-community/whisper-large-v3-mlx", + "large-v3-turbo": "mlx-community/whisper-large-v3-turbo", + "large": "mlx-community/whisper-large-mlx", + } + + # Retrieve the corresponding MLX model path + mlx_model_path = model_mapping.get(model_name) + + if mlx_model_path: + return mlx_model_path + else: + raise ValueError( + f"Model name '{model_name}' is not recognized or not supported." + ) + + def transcribe(self, audio, init_prompt=""): + segments = self.model( + audio, + language=self.original_language, + initial_prompt=init_prompt, + word_timestamps=True, + condition_on_previous_text=True, + path_or_hf_repo=self.model_size_or_path, + **self.transcribe_kargs, + ) + return segments.get("segments", []) + + def ts_words(self, segments): + """ + Extract timestamped words from transcription segments and skips words with high no-speech probability. + """ + return [ + (word["start"], word["end"], word["word"]) + for segment in segments + for word in segment.get("words", []) + if segment.get("no_speech_prob", 0) <= 0.9 + ] + + def segments_end_ts(self, res): + return [s["end"] for s in res] + + def use_vad(self): + self.transcribe_kargs["vad_filter"] = True + + def set_translate_task(self): + self.transcribe_kargs["task"] = "translate" + + +class OpenaiApiASR(ASRBase): + """Uses OpenAI's Whisper API for audio transcription.""" + + def __init__(self, lan=None, temperature=0, logfile=sys.stderr): + self.logfile = logfile + + self.modelname = "whisper-1" + self.original_language = ( + None if lan == "auto" else lan + ) # ISO-639-1 language code + self.response_format = "verbose_json" + self.temperature = temperature + + self.load_model() + + self.use_vad_opt = False + + # reset the task in set_translate_task + self.task = "transcribe" + + def load_model(self, *args, **kwargs): + from openai import OpenAI + + self.client = OpenAI() + + self.transcribed_seconds = ( + 0 # for logging how many seconds were processed by API, to know the cost + ) + + def ts_words(self, segments): + no_speech_segments = [] + if self.use_vad_opt: + for segment in segments.segments: + # TODO: threshold can be set from outside + if segment["no_speech_prob"] > 0.8: + no_speech_segments.append( + (segment.get("start"), segment.get("end")) + ) + + o = [] + for word in segments.words: + start = word.start + end = word.end + if any(s[0] <= start <= s[1] for s in no_speech_segments): + # print("Skipping word", word.get("word"), "because it's in a no-speech segment") + continue + o.append((start, end, word.word)) + return o + + def segments_end_ts(self, res): + return [s.end for s in res.words] + + def transcribe(self, audio_data, prompt=None, *args, **kwargs): + # Write the audio data to a buffer + buffer = io.BytesIO() + buffer.name = "temp.wav" + sf.write(buffer, audio_data, samplerate=16000, format="WAV", subtype="PCM_16") + buffer.seek(0) # Reset buffer's position to the beginning + + self.transcribed_seconds += math.ceil( + len(audio_data) / 16000 + ) # it rounds up to the whole seconds + + params = { + "model": self.modelname, + "file": buffer, + "response_format": self.response_format, + "temperature": self.temperature, + "timestamp_granularities": ["word", "segment"], + } + if self.task != "translate" and self.original_language: + params["language"] = self.original_language + if prompt: + params["prompt"] = prompt + + if self.task == "translate": + proc = self.client.audio.translations + else: + proc = self.client.audio.transcriptions + + # Process transcription/translation + transcript = proc.create(**params) + logger.debug( + f"OpenAI API processed accumulated {self.transcribed_seconds} seconds" + ) + + return transcript + + def use_vad(self): + self.use_vad_opt = True + + def set_translate_task(self): + self.task = "translate" + + +class HypothesisBuffer: + def __init__(self, logfile=sys.stderr): + self.commited_in_buffer = [] + self.buffer = [] + self.new = [] + + self.last_commited_time = 0 + self.last_commited_word = None + + self.logfile = logfile + + def insert(self, new, offset): + # compare self.commited_in_buffer and new. It inserts only the words in new that extend the commited_in_buffer, it means they are roughly behind last_commited_time and new in content + # the new tail is added to self.new + + new = [(a + offset, b + offset, t) for a, b, t in new] + self.new = [(a, b, t) for a, b, t in new if a > self.last_commited_time - 0.1] + + if len(self.new) >= 1: + a, b, t = self.new[0] + if abs(a - self.last_commited_time) < 1: + if self.commited_in_buffer: + # it's going to search for 1, 2, ..., 5 consecutive words (n-grams) that are identical in commited and new. If they are, they're dropped. + cn = len(self.commited_in_buffer) + nn = len(self.new) + for i in range(1, min(min(cn, nn), 5) + 1): # 5 is the maximum + c = " ".join( + [self.commited_in_buffer[-j][2] for j in range(1, i + 1)][ + ::-1 + ] + ) + tail = " ".join(self.new[j - 1][2] for j in range(1, i + 1)) + if c == tail: + words = [] + for j in range(i): + words.append(repr(self.new.pop(0))) + words_msg = " ".join(words) + logger.debug(f"removing last {i} words: {words_msg}") + break + + def flush(self): + # returns commited chunk = the longest common prefix of 2 last inserts. + + commit = [] + while self.new: + na, nb, nt = self.new[0] + + if len(self.buffer) == 0: + break + + if nt == self.buffer[0][2]: + commit.append((na, nb, nt)) + self.last_commited_word = nt + self.last_commited_time = nb + self.buffer.pop(0) + self.new.pop(0) + else: + break + self.buffer = self.new + self.new = [] + self.commited_in_buffer.extend(commit) + return commit + + def pop_commited(self, time): + while self.commited_in_buffer and self.commited_in_buffer[0][1] <= time: + self.commited_in_buffer.pop(0) + + def complete(self): + return self.buffer + + +class OnlineASRProcessor: + SAMPLING_RATE = 16000 + + def __init__( + self, asr, tokenizer=None, buffer_trimming=("segment", 15), logfile=sys.stderr + ): + """asr: WhisperASR object + tokenizer: sentence tokenizer object for the target language. Must have a method *split* that behaves like the one of MosesTokenizer. It can be None, if "segment" buffer trimming option is used, then tokenizer is not used at all. + ("segment", 15) + buffer_trimming: a pair of (option, seconds), where option is either "sentence" or "segment", and seconds is a number. Buffer is trimmed if it is longer than "seconds" threshold. Default is the most recommended option. + logfile: where to store the log. + """ + self.asr = asr + self.tokenizer = tokenizer + self.logfile = logfile + + self.init() + + self.buffer_trimming_way, self.buffer_trimming_sec = buffer_trimming + + def init(self, offset=None): + """run this when starting or restarting processing""" + self.audio_buffer = np.array([], dtype=np.float32) + self.transcript_buffer = HypothesisBuffer(logfile=self.logfile) + self.buffer_time_offset = 0 + if offset is not None: + self.buffer_time_offset = offset + self.transcript_buffer.last_commited_time = self.buffer_time_offset + self.commited = [] + + def insert_audio_chunk(self, audio): + self.audio_buffer = np.append(self.audio_buffer, audio) + + def prompt(self): + """Returns a tuple: (prompt, context), where "prompt" is a 200-character suffix of commited text that is inside of the scrolled away part of audio buffer. + "context" is the commited text that is inside the audio buffer. It is transcribed again and skipped. It is returned only for debugging and logging reasons. + """ + k = max(0, len(self.commited) - 1) + while k > 0 and self.commited[k - 1][1] > self.buffer_time_offset: + k -= 1 + + p = self.commited[:k] + p = [t for _, _, t in p] + prompt = [] + y = 0 + while p and y < 200: # 200 characters prompt size + x = p.pop(-1) + y += len(x) + 1 + prompt.append(x) + non_prompt = self.commited[k:] + return self.asr.sep.join(prompt[::-1]), self.asr.sep.join( + t for _, _, t in non_prompt + ) + + def process_iter(self): + """Runs on the current audio buffer. + Returns: a tuple (beg_timestamp, end_timestamp, "text"), or (None, None, ""). + The non-emty text is confirmed (committed) partial transcript. + """ + + prompt, non_prompt = self.prompt() + logger.debug(f"PROMPT: {prompt}") + logger.debug(f"CONTEXT: {non_prompt}") + logger.debug( + f"transcribing {len(self.audio_buffer) / self.SAMPLING_RATE:2.2f} seconds from {self.buffer_time_offset:2.2f}" + ) + res = self.asr.transcribe(self.audio_buffer, init_prompt=prompt) + + # transform to [(beg,end,"word1"), ...] + tsw = self.asr.ts_words(res) + + self.transcript_buffer.insert(tsw, self.buffer_time_offset) + o = self.transcript_buffer.flush() + self.commited.extend(o) + completed = self.to_flush(o) + logger.debug(f">>>>COMPLETE NOW: {completed}") + the_rest = self.to_flush(self.transcript_buffer.complete()) + logger.debug(f"INCOMPLETE: {the_rest}") + + # there is a newly confirmed text + + if o and self.buffer_trimming_way == "sentence": # trim the completed sentences + if ( + len(self.audio_buffer) / self.SAMPLING_RATE > self.buffer_trimming_sec + ): # longer than this + self.chunk_completed_sentence() + + if self.buffer_trimming_way == "segment": + s = self.buffer_trimming_sec # trim the completed segments longer than s, + else: + s = 30 # if the audio buffer is longer than 30s, trim it + + if len(self.audio_buffer) / self.SAMPLING_RATE > s: + self.chunk_completed_segment(res) + + # alternative: on any word + # l = self.buffer_time_offset + len(self.audio_buffer)/self.SAMPLING_RATE - 10 + # let's find commited word that is less + # k = len(self.commited)-1 + # while k>0 and self.commited[k][1] > l: + # k -= 1 + # t = self.commited[k][1] + logger.debug("chunking segment") + # self.chunk_at(t) + + logger.debug( + f"len of buffer now: {len(self.audio_buffer) / self.SAMPLING_RATE:2.2f}" + ) + return self.to_flush(o) + + def chunk_completed_sentence(self): + if self.commited == []: + return + logger.debug(self.commited) + sents = self.words_to_sentences(self.commited) + for s in sents: + logger.debug(f"\t\tSENT: {s}") + if len(sents) < 2: + return + while len(sents) > 2: + sents.pop(0) + # we will continue with audio processing at this timestamp + chunk_at = sents[-2][1] + + logger.debug(f"--- sentence chunked at {chunk_at:2.2f}") + self.chunk_at(chunk_at) + + def chunk_completed_segment(self, res): + if self.commited == []: + return + + ends = self.asr.segments_end_ts(res) + + t = self.commited[-1][1] + + if len(ends) > 1: + e = ends[-2] + self.buffer_time_offset + while len(ends) > 2 and e > t: + ends.pop(-1) + e = ends[-2] + self.buffer_time_offset + if e <= t: + logger.debug(f"--- segment chunked at {e:2.2f}") + self.chunk_at(e) + else: + logger.debug("--- last segment not within commited area") + else: + logger.debug("--- not enough segments to chunk") + + def chunk_at(self, time): + """trims the hypothesis and audio buffer at "time" """ + self.transcript_buffer.pop_commited(time) + cut_seconds = time - self.buffer_time_offset + self.audio_buffer = self.audio_buffer[int(cut_seconds * self.SAMPLING_RATE) :] + self.buffer_time_offset = time + + def words_to_sentences(self, words): + """Uses self.tokenizer for sentence segmentation of words. + Returns: [(beg,end,"sentence 1"),...] + """ + + cwords = [w for w in words] + t = " ".join(o[2] for o in cwords) + s = self.tokenizer.split(t) + out = [] + while s: + beg = None + end = None + sent = s.pop(0).strip() + fsent = sent + while cwords: + b, e, w = cwords.pop(0) + w = w.strip() + if beg is None and sent.startswith(w): + beg = b + elif end is None and sent == w: + end = e + out.append((beg, end, fsent)) + break + sent = sent[len(w) :].strip() + return out + + def finish(self): + """Flush the incomplete text when the whole processing ends. + Returns: the same format as self.process_iter() + """ + o = self.transcript_buffer.complete() + f = self.to_flush(o) + logger.debug(f"last, noncommited: {f}") + self.buffer_time_offset += len(self.audio_buffer) / 16000 + return f + + def to_flush( + self, + sents, + sep=None, + offset=0, + ): + # concatenates the timestamped words or sentences into one sequence that is flushed in one line + # sents: [(beg1, end1, "sentence1"), ...] or [] if empty + # return: (beg1,end-of-last-sentence,"concatenation of sentences") or (None, None, "") if empty + if sep is None: + sep = self.asr.sep + t = sep.join(s[2] for s in sents) + if len(sents) == 0: + b = None + e = None + else: + b = offset + sents[0][0] + e = offset + sents[-1][1] + return (b, e, t) + + +class VACOnlineASRProcessor(OnlineASRProcessor): + """Wraps OnlineASRProcessor with VAC (Voice Activity Controller). + + It works the same way as OnlineASRProcessor: it receives chunks of audio (e.g. 0.04 seconds), + it runs VAD and continuously detects whether there is speech or not. + When it detects end of speech (non-voice for 500ms), it makes OnlineASRProcessor to end the utterance immediately. + """ + + def __init__(self, online_chunk_size, *a, **kw): + self.online_chunk_size = online_chunk_size + + self.online = OnlineASRProcessor(*a, **kw) + + # VAC: + import torch + + model, _ = torch.hub.load(repo_or_dir="snakers4/silero-vad", model="silero_vad") + from silero_vad_iterator import FixedVADIterator + + self.vac = FixedVADIterator( + model + ) # we use the default options there: 500ms silence, 100ms padding, etc. + + self.logfile = self.online.logfile + self.init() + + def init(self): + self.online.init() + self.vac.reset_states() + self.current_online_chunk_buffer_size = 0 + + self.is_currently_final = False + + self.status = None # or "voice" or "nonvoice" + self.audio_buffer = np.array([], dtype=np.float32) + self.buffer_offset = 0 # in frames + + def clear_buffer(self): + self.buffer_offset += len(self.audio_buffer) + self.audio_buffer = np.array([], dtype=np.float32) + + def insert_audio_chunk(self, audio): + res = self.vac(audio) + self.audio_buffer = np.append(self.audio_buffer, audio) + + if res is not None: + frame = list(res.values())[0] - self.buffer_offset + if "start" in res and "end" not in res: + self.status = "voice" + send_audio = self.audio_buffer[frame:] + self.online.init( + offset=(frame + self.buffer_offset) / self.SAMPLING_RATE + ) + self.online.insert_audio_chunk(send_audio) + self.current_online_chunk_buffer_size += len(send_audio) + self.clear_buffer() + elif "end" in res and "start" not in res: + self.status = "nonvoice" + send_audio = self.audio_buffer[:frame] + self.online.insert_audio_chunk(send_audio) + self.current_online_chunk_buffer_size += len(send_audio) + self.is_currently_final = True + self.clear_buffer() + else: + beg = res["start"] - self.buffer_offset + end = res["end"] - self.buffer_offset + self.status = "nonvoice" + send_audio = self.audio_buffer[beg:end] + self.online.init(offset=(beg + self.buffer_offset) / self.SAMPLING_RATE) + self.online.insert_audio_chunk(send_audio) + self.current_online_chunk_buffer_size += len(send_audio) + self.is_currently_final = True + self.clear_buffer() + else: + if self.status == "voice": + self.online.insert_audio_chunk(self.audio_buffer) + self.current_online_chunk_buffer_size += len(self.audio_buffer) + self.clear_buffer() + else: + # We keep 1 second because VAD may later find start of voice in it. + # But we trim it to prevent OOM. + self.buffer_offset += max( + 0, len(self.audio_buffer) - self.SAMPLING_RATE + ) + self.audio_buffer = self.audio_buffer[-self.SAMPLING_RATE :] + + def process_iter(self): + if self.is_currently_final: + return self.finish() + elif ( + self.current_online_chunk_buffer_size + > self.SAMPLING_RATE * self.online_chunk_size + ): + self.current_online_chunk_buffer_size = 0 + ret = self.online.process_iter() + return ret + else: + print("no online update, only VAD", self.status, file=self.logfile) + return (None, None, "") + + def finish(self): + ret = self.online.finish() + self.current_online_chunk_buffer_size = 0 + self.is_currently_final = False + return ret + + +WHISPER_LANG_CODES = "af,am,ar,as,az,ba,be,bg,bn,bo,br,bs,ca,cs,cy,da,de,el,en,es,et,eu,fa,fi,fo,fr,gl,gu,ha,haw,he,hi,hr,ht,hu,hy,id,is,it,ja,jw,ka,kk,km,kn,ko,la,lb,ln,lo,lt,lv,mg,mi,mk,ml,mn,mr,ms,mt,my,ne,nl,nn,no,oc,pa,pl,ps,pt,ro,ru,sa,sd,si,sk,sl,sn,so,sq,sr,su,sv,sw,ta,te,tg,th,tk,tl,tr,tt,uk,ur,uz,vi,yi,yo,zh".split( + "," +) + + +def create_tokenizer(lan): + """returns an object that has split function that works like the one of MosesTokenizer""" + + assert lan in WHISPER_LANG_CODES, ( + "language must be Whisper's supported lang code: " + + " ".join(WHISPER_LANG_CODES) + ) + + if lan == "uk": + import tokenize_uk + + class UkrainianTokenizer: + def split(self, text): + return tokenize_uk.tokenize_sents(text) + + return UkrainianTokenizer() + + # supported by fast-mosestokenizer + if ( + lan + in "as bn ca cs de el en es et fi fr ga gu hi hu is it kn lt lv ml mni mr nl or pa pl pt ro ru sk sl sv ta te yue zh".split() + ): + from mosestokenizer import MosesTokenizer + + return MosesTokenizer(lan) + + # the following languages are in Whisper, but not in wtpsplit: + if ( + lan + in "as ba bo br bs fo haw hr ht jw lb ln lo mi nn oc sa sd sn so su sw tk tl tt".split() + ): + logger.debug( + f"{lan} code is not supported by wtpsplit. Going to use None lang_code option." + ) + lan = None + + from wtpsplit import WtP + + # downloads the model from huggingface on the first use + wtp = WtP("wtp-canine-s-12l-no-adapters") + + class WtPtok: + def split(self, sent): + return wtp.split(sent, lang_code=lan) + + return WtPtok() + + +def add_shared_args(parser): + """shared args for simulation (this entry point) and server + parser: argparse.ArgumentParser object + """ + parser.add_argument( + "--min-chunk-size", + type=float, + default=1.0, + help="Minimum audio chunk size in seconds. It waits up to this time to do processing. If the processing takes shorter time, it waits, otherwise it processes the whole segment that was received by this time.", + ) + parser.add_argument( + "--model", + type=str, + default="large-v2", + choices="tiny.en,tiny,base.en,base,small.en,small,medium.en,medium,large-v1,large-v2,large-v3,large,large-v3-turbo".split( + "," + ), + help="Name size of the Whisper model to use (default: large-v2). The model is automatically downloaded from the model hub if not present in model cache dir.", + ) + parser.add_argument( + "--model_cache_dir", + type=str, + default=None, + help="Overriding the default model cache dir where models downloaded from the hub are saved", + ) + parser.add_argument( + "--model_dir", + type=str, + default=None, + help="Dir where Whisper model.bin and other files are saved. This option overrides --model and --model_cache_dir parameter.", + ) + parser.add_argument( + "--lan", + "--language", + type=str, + default="auto", + help="Source language code, e.g. en,de,cs, or 'auto' for language detection.", + ) + parser.add_argument( + "--task", + type=str, + default="transcribe", + choices=["transcribe", "translate"], + help="Transcribe or translate.", + ) + parser.add_argument( + "--backend", + type=str, + default="faster-whisper", + choices=["faster-whisper", "whisper_timestamped", "mlx-whisper", "openai-api"], + help="Load only this backend for Whisper processing.", + ) + parser.add_argument( + "--vac", + action="store_true", + default=False, + help="Use VAC = voice activity controller. Recommended. Requires torch.", + ) + parser.add_argument( + "--vac-chunk-size", type=float, default=0.04, help="VAC sample size in seconds." + ) + parser.add_argument( + "--vad", + action="store_true", + default=False, + help="Use VAD = voice activity detection, with the default parameters.", + ) + parser.add_argument( + "--buffer_trimming", + type=str, + default="segment", + choices=["sentence", "segment"], + help='Buffer trimming strategy -- trim completed sentences marked with punctuation mark and detected by sentence segmenter, or the completed segments returned by Whisper. Sentence segmenter must be installed for "sentence" option.', + ) + parser.add_argument( + "--buffer_trimming_sec", + type=float, + default=15, + help="Buffer trimming length threshold in seconds. If buffer length is longer, trimming sentence/segment is triggered.", + ) + parser.add_argument( + "-l", + "--log-level", + dest="log_level", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Set the log level", + default="DEBUG", + ) + + +def asr_factory(args, logfile=sys.stderr): + """ + Creates and configures an ASR and ASR Online instance based on the specified backend and arguments. + """ + backend = args.backend + if backend == "openai-api": + logger.debug("Using OpenAI API.") + asr = OpenaiApiASR(lan=args.lan) + else: + if backend == "faster-whisper": + asr_cls = FasterWhisperASR + elif backend == "mlx-whisper": + asr_cls = MLXWhisper + else: + asr_cls = WhisperTimestampedASR + + # Only for FasterWhisperASR and WhisperTimestampedASR + size = args.model + t = time.time() + logger.info(f"Loading Whisper {size} model for {args.lan}...") + asr = asr_cls( + modelsize=size, + lan=args.lan, + cache_dir=args.model_cache_dir, + model_dir=args.model_dir, + ) + e = time.time() + logger.info(f"done. It took {round(e - t, 2)} seconds.") + + # Apply common configurations + if getattr(args, "vad", False): # Checks if VAD argument is present and True + logger.info("Setting VAD filter") + asr.use_vad() + + language = args.lan + if args.task == "translate": + asr.set_translate_task() + tgt_language = "en" # Whisper translates into English + else: + tgt_language = language # Whisper transcribes in this language + + # Create the tokenizer + if args.buffer_trimming == "sentence": + tokenizer = create_tokenizer(tgt_language) + else: + tokenizer = None + + # Create the OnlineASRProcessor + if args.vac: + online = VACOnlineASRProcessor( + args.min_chunk_size, + asr, + tokenizer, + logfile=logfile, + buffer_trimming=(args.buffer_trimming, args.buffer_trimming_sec), + ) + else: + online = OnlineASRProcessor( + asr, + tokenizer, + logfile=logfile, + buffer_trimming=(args.buffer_trimming, args.buffer_trimming_sec), + ) + + return asr, online + + +def set_logging(args, logger, other="_server"): + logging.basicConfig( # format='%(name)s + format="%(levelname)s\t%(message)s" + ) + logger.setLevel(args.log_level) + logging.getLogger("whisper_online" + other).setLevel(args.log_level) + + +# logging.getLogger("whisper_online_server").setLevel(args.log_level) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "audio_path", + type=str, + help="Filename of 16kHz mono channel wav, on which live streaming is simulated.", + ) + add_shared_args(parser) + parser.add_argument( + "--start_at", + type=float, + default=0.0, + help="Start processing audio at this time.", + ) + parser.add_argument( + "--offline", action="store_true", default=False, help="Offline mode." + ) + parser.add_argument( + "--comp_unaware", + action="store_true", + default=False, + help="Computationally unaware simulation.", + ) + + args = parser.parse_args() + + # reset to store stderr to different file stream, e.g. open(os.devnull,"w") + logfile = sys.stderr + + if args.offline and args.comp_unaware: + logger.error( + "No or one option from --offline and --comp_unaware are available, not both. Exiting." + ) + sys.exit(1) + + # if args.log_level: + # logging.basicConfig(format='whisper-%(levelname)s:%(name)s: %(message)s', + # level=getattr(logging, args.log_level)) + + set_logging(args, logger) + + audio_path = args.audio_path + + SAMPLING_RATE = 16000 + duration = len(load_audio(audio_path)) / SAMPLING_RATE + logger.info("Audio duration is: %2.2f seconds" % duration) + + asr, online = asr_factory(args, logfile=logfile) + if args.vac: + min_chunk = args.vac_chunk_size + else: + min_chunk = args.min_chunk_size + + # load the audio into the LRU cache before we start the timer + a = load_audio_chunk(audio_path, 0, 1) + + # warm up the ASR because the very first transcribe takes much more time than the other + asr.transcribe(a) + + beg = args.start_at + start = time.time() - beg + + def output_transcript(o, now=None): + # output format in stdout is like: + # 4186.3606 0 1720 Takhle to je + # - the first three words are: + # - emission time from beginning of processing, in milliseconds + # - beg and end timestamp of the text segment, as estimated by Whisper model. The timestamps are not accurate, but they're useful anyway + # - the next words: segment transcript + if now is None: + now = time.time() - start + if o[0] is not None: + print( + "%1.4f %1.0f %1.0f %s" % (now * 1000, o[0] * 1000, o[1] * 1000, o[2]), + file=logfile, + flush=True, + ) + print( + "%1.4f %1.0f %1.0f %s" % (now * 1000, o[0] * 1000, o[1] * 1000, o[2]), + flush=True, + ) + else: + # No text, so no output + pass + + if args.offline: ## offline mode processing (for testing/debugging) + a = load_audio(audio_path) + online.insert_audio_chunk(a) + try: + o = online.process_iter() + except AssertionError as e: + logger.error(f"assertion error: {repr(e)}") + else: + output_transcript(o) + now = None + elif args.comp_unaware: # computational unaware mode + end = beg + min_chunk + while True: + a = load_audio_chunk(audio_path, beg, end) + online.insert_audio_chunk(a) + try: + o = online.process_iter() + except AssertionError as e: + logger.error(f"assertion error: {repr(e)}") + pass + else: + output_transcript(o, now=end) + + logger.debug(f"## last processed {end:.2f}s") + + if end >= duration: + break + + beg = end + + if end + min_chunk > duration: + end = duration + else: + end += min_chunk + now = duration + + else: # online = simultaneous mode + end = 0 + while True: + now = time.time() - start + if now < end + min_chunk: + time.sleep(min_chunk + end - now) + end = time.time() - start + a = load_audio_chunk(audio_path, beg, end) + beg = end + online.insert_audio_chunk(a) + + try: + o = online.process_iter() + except AssertionError as e: + logger.error(f"assertion error: {e}") + pass + else: + output_transcript(o) + now = time.time() - start + logger.debug( + f"## last processed {end:.2f} s, now is {now:.2f}, the latency is {now - end:.2f}" + ) + + if end >= duration: + break + now = None + + o = online.finish() + output_transcript(o, now=now) diff --git a/frigate/data_processing/types.py b/frigate/data_processing/types.py index a19a856bf..c77880535 100644 --- a/frigate/data_processing/types.py +++ b/frigate/data_processing/types.py @@ -1,9 +1,13 @@ """Embeddings types.""" -import multiprocessing as mp from enum import Enum +from multiprocessing.managers import SyncManager from multiprocessing.sharedctypes import Synchronized +import sherpa_onnx + +from frigate.data_processing.real_time.whisper_online import FasterWhisperASR + class DataProcessorMetrics: image_embeddings_speed: Synchronized @@ -16,18 +20,31 @@ class DataProcessorMetrics: alpr_pps: Synchronized yolov9_lpr_speed: Synchronized yolov9_lpr_pps: Synchronized + review_desc_speed: Synchronized + review_desc_dps: Synchronized + classification_speeds: dict[str, Synchronized] + classification_cps: dict[str, Synchronized] - def __init__(self): - self.image_embeddings_speed = mp.Value("d", 0.0) - self.image_embeddings_eps = mp.Value("d", 0.0) - self.text_embeddings_speed = mp.Value("d", 0.0) - self.text_embeddings_eps = mp.Value("d", 0.0) - self.face_rec_speed = mp.Value("d", 0.0) - self.face_rec_fps = mp.Value("d", 0.0) - self.alpr_speed = mp.Value("d", 0.0) - self.alpr_pps = mp.Value("d", 0.0) - self.yolov9_lpr_speed = mp.Value("d", 0.0) - self.yolov9_lpr_pps = mp.Value("d", 0.0) + def __init__(self, manager: SyncManager, custom_classification_models: list[str]): + self.image_embeddings_speed = manager.Value("d", 0.0) + self.image_embeddings_eps = manager.Value("d", 0.0) + self.text_embeddings_speed = manager.Value("d", 0.0) + self.text_embeddings_eps = manager.Value("d", 0.0) + self.face_rec_speed = manager.Value("d", 0.0) + self.face_rec_fps = manager.Value("d", 0.0) + self.alpr_speed = manager.Value("d", 0.0) + self.alpr_pps = manager.Value("d", 0.0) + self.yolov9_lpr_speed = manager.Value("d", 0.0) + self.yolov9_lpr_pps = manager.Value("d", 0.0) + self.review_desc_speed = manager.Value("d", 0.0) + self.review_desc_dps = manager.Value("d", 0.0) + self.classification_speeds = manager.dict() + self.classification_cps = manager.dict() + + if custom_classification_models: + for key in custom_classification_models: + self.classification_speeds[key] = manager.Value("d", 0.0) + self.classification_cps[key] = manager.Value("d", 0.0) class DataProcessorModelRunner: @@ -41,3 +58,6 @@ class PostProcessDataEnum(str, Enum): recording = "recording" review = "review" tracked_object = "tracked_object" + + +AudioTranscriptionModel = FasterWhisperASR | sherpa_onnx.OnlineRecognizer | None diff --git a/frigate/detectors/detector_utils.py b/frigate/detectors/detector_utils.py new file mode 100644 index 000000000..d732de871 --- /dev/null +++ b/frigate/detectors/detector_utils.py @@ -0,0 +1,74 @@ +import logging +import os + +import numpy as np + +try: + from tflite_runtime.interpreter import Interpreter, load_delegate +except ModuleNotFoundError: + from tensorflow.lite.python.interpreter import Interpreter, load_delegate + + +logger = logging.getLogger(__name__) + + +def tflite_init(self, interpreter): + self.interpreter = interpreter + + self.interpreter.allocate_tensors() + + self.tensor_input_details = self.interpreter.get_input_details() + self.tensor_output_details = self.interpreter.get_output_details() + + +def tflite_detect_raw(self, tensor_input): + self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input) + self.interpreter.invoke() + + boxes = self.interpreter.tensor(self.tensor_output_details[0]["index"])()[0] + class_ids = self.interpreter.tensor(self.tensor_output_details[1]["index"])()[0] + scores = self.interpreter.tensor(self.tensor_output_details[2]["index"])()[0] + count = int(self.interpreter.tensor(self.tensor_output_details[3]["index"])()[0]) + + detections = np.zeros((20, 6), np.float32) + + for i in range(count): + if scores[i] < 0.4 or i == 20: + break + detections[i] = [ + class_ids[i], + float(scores[i]), + boxes[i][0], + boxes[i][1], + boxes[i][2], + boxes[i][3], + ] + + return detections + + +def tflite_load_delegate_interpreter( + delegate_library: str, detector_config, device_config +): + try: + logger.info("Attempting to load NPU") + tf_delegate = load_delegate(delegate_library, device_config) + logger.info("NPU found") + interpreter = Interpreter( + model_path=detector_config.model.path, + experimental_delegates=[tf_delegate], + ) + return interpreter + except ValueError: + _, ext = os.path.splitext(detector_config.model.path) + + if ext and ext != ".tflite": + logger.error( + "Incorrect model used with NPU. Only .tflite models can be used with a TFLite delegate." + ) + else: + logger.error( + "No NPU was detected. If you do not have a TFLite device yet, you must configure CPU detectors." + ) + + raise diff --git a/frigate/detectors/plugins/cpu_tfl.py b/frigate/detectors/plugins/cpu_tfl.py index 8a54363e1..37cc10777 100644 --- a/frigate/detectors/plugins/cpu_tfl.py +++ b/frigate/detectors/plugins/cpu_tfl.py @@ -1,11 +1,13 @@ import logging -import numpy as np from pydantic import Field from typing_extensions import Literal from frigate.detectors.detection_api import DetectionApi from frigate.detectors.detector_config import BaseDetectorConfig +from frigate.log import redirect_output_to_logger + +from ..detector_utils import tflite_detect_raw, tflite_init try: from tflite_runtime.interpreter import Interpreter @@ -26,40 +28,14 @@ class CpuDetectorConfig(BaseDetectorConfig): class CpuTfl(DetectionApi): type_key = DETECTOR_KEY + @redirect_output_to_logger(logger, logging.DEBUG) def __init__(self, detector_config: CpuDetectorConfig): - self.interpreter = Interpreter( + interpreter = Interpreter( model_path=detector_config.model.path, num_threads=detector_config.num_threads or 3, ) - self.interpreter.allocate_tensors() - - self.tensor_input_details = self.interpreter.get_input_details() - self.tensor_output_details = self.interpreter.get_output_details() + tflite_init(self, interpreter) def detect_raw(self, tensor_input): - self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input) - self.interpreter.invoke() - - boxes = self.interpreter.tensor(self.tensor_output_details[0]["index"])()[0] - class_ids = self.interpreter.tensor(self.tensor_output_details[1]["index"])()[0] - scores = self.interpreter.tensor(self.tensor_output_details[2]["index"])()[0] - count = int( - self.interpreter.tensor(self.tensor_output_details[3]["index"])()[0] - ) - - detections = np.zeros((20, 6), np.float32) - - for i in range(count): - if scores[i] < 0.4 or i == 20: - break - detections[i] = [ - class_ids[i], - float(scores[i]), - boxes[i][0], - boxes[i][1], - boxes[i][2], - boxes[i][3], - ] - - return detections + return tflite_detect_raw(self, tensor_input) diff --git a/frigate/detectors/plugins/teflon_tfl.py b/frigate/detectors/plugins/teflon_tfl.py new file mode 100644 index 000000000..7e29d6630 --- /dev/null +++ b/frigate/detectors/plugins/teflon_tfl.py @@ -0,0 +1,38 @@ +import logging + +from typing_extensions import Literal + +from frigate.detectors.detection_api import DetectionApi +from frigate.detectors.detector_config import BaseDetectorConfig + +from ..detector_utils import ( + tflite_detect_raw, + tflite_init, + tflite_load_delegate_interpreter, +) + +logger = logging.getLogger(__name__) + +# Use _tfl suffix to default tflite model +DETECTOR_KEY = "teflon_tfl" + + +class TeflonDetectorConfig(BaseDetectorConfig): + type: Literal[DETECTOR_KEY] + + +class TeflonTfl(DetectionApi): + type_key = DETECTOR_KEY + + def __init__(self, detector_config: TeflonDetectorConfig): + # Location in Debian's mesa-teflon-delegate + delegate_library = "/usr/lib/teflon/libteflon.so" + device_config = {} + + interpreter = tflite_load_delegate_interpreter( + delegate_library, detector_config, device_config + ) + tflite_init(self, interpreter) + + def detect_raw(self, tensor_input): + return tflite_detect_raw(self, tensor_input) diff --git a/frigate/detectors/plugins/zmq_ipc.py b/frigate/detectors/plugins/zmq_ipc.py new file mode 100644 index 000000000..112176c1a --- /dev/null +++ b/frigate/detectors/plugins/zmq_ipc.py @@ -0,0 +1,151 @@ +import json +import logging +from typing import Any, List + +import numpy as np +import zmq +from pydantic import Field +from typing_extensions import Literal + +from frigate.detectors.detection_api import DetectionApi +from frigate.detectors.detector_config import BaseDetectorConfig + +logger = logging.getLogger(__name__) + +DETECTOR_KEY = "zmq" + + +class ZmqDetectorConfig(BaseDetectorConfig): + type: Literal[DETECTOR_KEY] + endpoint: str = Field( + default="ipc:///tmp/cache/zmq_detector", title="ZMQ IPC endpoint" + ) + request_timeout_ms: int = Field( + default=200, title="ZMQ request timeout in milliseconds" + ) + linger_ms: int = Field(default=0, title="ZMQ socket linger in milliseconds") + + +class ZmqIpcDetector(DetectionApi): + """ + ZMQ-based detector plugin using a REQ/REP socket over an IPC endpoint. + + Protocol: + - Request is sent as a multipart message: + [ header_json_bytes, tensor_bytes ] + where header is a JSON object containing: + { + "shape": List[int], + "dtype": str, # numpy dtype string, e.g. "uint8", "float32" + } + tensor_bytes are the raw bytes of the numpy array in C-order. + + - Response is expected to be either: + a) Multipart [ header_json_bytes, tensor_bytes ] with header specifying + shape [20,6] and dtype "float32"; or + b) Single frame tensor_bytes of length 20*6*4 bytes (float32). + + On any error or timeout, this detector returns a zero array of shape (20, 6). + """ + + type_key = DETECTOR_KEY + + def __init__(self, detector_config: ZmqDetectorConfig): + super().__init__(detector_config) + + self._context = zmq.Context() + self._endpoint = detector_config.endpoint + self._request_timeout_ms = detector_config.request_timeout_ms + self._linger_ms = detector_config.linger_ms + self._socket = None + self._create_socket() + + # Preallocate zero result for error paths + self._zero_result = np.zeros((20, 6), np.float32) + + def _create_socket(self) -> None: + if self._socket is not None: + try: + self._socket.close(linger=self._linger_ms) + except Exception: + pass + self._socket = self._context.socket(zmq.REQ) + # Apply timeouts and linger so calls don't block indefinitely + self._socket.setsockopt(zmq.RCVTIMEO, self._request_timeout_ms) + self._socket.setsockopt(zmq.SNDTIMEO, self._request_timeout_ms) + self._socket.setsockopt(zmq.LINGER, self._linger_ms) + + logger.debug(f"ZMQ detector connecting to {self._endpoint}") + self._socket.connect(self._endpoint) + + def _build_header(self, tensor_input: np.ndarray) -> bytes: + header: dict[str, Any] = { + "shape": list(tensor_input.shape), + "dtype": str(tensor_input.dtype.name), + } + return json.dumps(header).encode("utf-8") + + def _decode_response(self, frames: List[bytes]) -> np.ndarray: + try: + if len(frames) == 1: + # Single-frame raw float32 (20x6) + buf = frames[0] + if len(buf) != 20 * 6 * 4: + logger.warning( + f"ZMQ detector received unexpected payload size: {len(buf)}" + ) + return self._zero_result + return np.frombuffer(buf, dtype=np.float32).reshape((20, 6)) + + if len(frames) >= 2: + header = json.loads(frames[0].decode("utf-8")) + shape = tuple(header.get("shape", [])) + dtype = np.dtype(header.get("dtype", "float32")) + return np.frombuffer(frames[1], dtype=dtype).reshape(shape) + + logger.warning("ZMQ detector received empty reply") + return self._zero_result + except Exception as exc: # noqa: BLE001 + logger.error(f"ZMQ detector failed to decode response: {exc}") + return self._zero_result + + def detect_raw(self, tensor_input: np.ndarray) -> np.ndarray: + try: + header_bytes = self._build_header(tensor_input) + payload_bytes = memoryview(tensor_input.tobytes(order="C")) + + # Send request + self._socket.send_multipart([header_bytes, payload_bytes]) + + # Receive reply + reply_frames = self._socket.recv_multipart() + detections = self._decode_response(reply_frames) + + # Ensure output shape and dtype are exactly as expected + + return detections + except zmq.Again: + # Timeout + logger.debug("ZMQ detector request timed out; resetting socket") + try: + self._create_socket() + except Exception: + pass + return self._zero_result + except zmq.ZMQError as exc: + logger.error(f"ZMQ detector ZMQError: {exc}; resetting socket") + try: + self._create_socket() + except Exception: + pass + return self._zero_result + except Exception as exc: # noqa: BLE001 + logger.error(f"ZMQ detector unexpected error: {exc}") + return self._zero_result + + def __del__(self) -> None: # pragma: no cover - best-effort cleanup + try: + if self._socket is not None: + self._socket.close(linger=self.detector_config.linger_ms) + except Exception: + pass diff --git a/frigate/embeddings/__init__.py b/frigate/embeddings/__init__.py index fbdc8d940..0a854fcfa 100644 --- a/frigate/embeddings/__init__.py +++ b/frigate/embeddings/__init__.py @@ -3,26 +3,24 @@ import base64 import json import logging -import multiprocessing as mp import os -import signal import threading from json.decoder import JSONDecodeError -from types import FrameType -from typing import Any, Optional, Union +from multiprocessing.synchronize import Event as MpEvent +from typing import Any, Union import regex from pathvalidate import ValidationError, sanitize_filename -from setproctitle import setproctitle from frigate.comms.embeddings_updater import EmbeddingsRequestEnum, EmbeddingsRequestor from frigate.config import FrigateConfig -from frigate.const import CONFIG_DIR, FACE_DIR +from frigate.const import CONFIG_DIR, FACE_DIR, PROCESS_PRIORITY_HIGH from frigate.data_processing.types import DataProcessorMetrics from frigate.db.sqlitevecq import SqliteVecQueueDatabase -from frigate.models import Event, Recordings +from frigate.models import Event from frigate.util.builtin import serialize -from frigate.util.services import listen +from frigate.util.classification import kickoff_model_training +from frigate.util.process import FrigateProcess from .maintainer import EmbeddingMaintainer from .util import ZScoreNormalization @@ -30,40 +28,30 @@ from .util import ZScoreNormalization logger = logging.getLogger(__name__) -def manage_embeddings(config: FrigateConfig, metrics: DataProcessorMetrics) -> None: - stop_event = mp.Event() +class EmbeddingProcess(FrigateProcess): + def __init__( + self, + config: FrigateConfig, + metrics: DataProcessorMetrics | None, + stop_event: MpEvent, + ) -> None: + super().__init__( + stop_event, + PROCESS_PRIORITY_HIGH, + name="frigate.embeddings_manager", + daemon=True, + ) + self.config = config + self.metrics = metrics - def receiveSignal(signalNumber: int, frame: Optional[FrameType]) -> None: - stop_event.set() - - signal.signal(signal.SIGTERM, receiveSignal) - signal.signal(signal.SIGINT, receiveSignal) - - threading.current_thread().name = "process:embeddings_manager" - setproctitle("frigate.embeddings_manager") - listen() - - # Configure Frigate DB - db = SqliteVecQueueDatabase( - config.database.path, - pragmas={ - "auto_vacuum": "FULL", # Does not defragment database - "cache_size": -512 * 1000, # 512MB of cache - "synchronous": "NORMAL", # Safe when using WAL https://www.sqlite.org/pragma.html#pragma_synchronous - }, - timeout=max(60, 10 * len([c for c in config.cameras.values() if c.enabled])), - load_vec_extension=True, - ) - models = [Event, Recordings] - db.bind(models) - - maintainer = EmbeddingMaintainer( - db, - config, - metrics, - stop_event, - ) - maintainer.start() + def run(self) -> None: + self.pre_run_setup(self.config.logger) + maintainer = EmbeddingMaintainer( + self.config, + self.metrics, + self.stop_event, + ) + maintainer.start() class EmbeddingsContext: @@ -300,3 +288,34 @@ class EmbeddingsContext: def reindex_embeddings(self) -> dict[str, Any]: return self.requestor.send_data(EmbeddingsRequestEnum.reindex.value, {}) + + def start_classification_training(self, model_name: str) -> dict[str, Any]: + threading.Thread( + target=kickoff_model_training, + args=(self.requestor, model_name), + daemon=True, + ).start() + return {"success": True, "message": f"Began training {model_name} model."} + + def transcribe_audio(self, event: dict[str, any]) -> dict[str, any]: + return self.requestor.send_data( + EmbeddingsRequestEnum.transcribe_audio.value, {"event": event} + ) + + def generate_description_embedding(self, text: str) -> None: + return self.requestor.send_data( + EmbeddingsRequestEnum.embed_description.value, + {"id": None, "description": text, "upsert": False}, + ) + + def generate_image_embedding(self, event_id: str, thumbnail: bytes) -> None: + return self.requestor.send_data( + EmbeddingsRequestEnum.embed_thumbnail.value, + {"id": str(event_id), "thumbnail": str(thumbnail), "upsert": False}, + ) + + def generate_review_summary(self, start_ts: float, end_ts: float) -> str | None: + return self.requestor.send_data( + EmbeddingsRequestEnum.summarize_review.value, + {"start_ts": start_ts, "end_ts": end_ts}, + ) diff --git a/frigate/embeddings/embeddings.py b/frigate/embeddings/embeddings.py index 833ab9ab2..a0981f669 100644 --- a/frigate/embeddings/embeddings.py +++ b/frigate/embeddings/embeddings.py @@ -7,21 +7,26 @@ import os import threading import time -from numpy import ndarray +import numpy as np +from peewee import DoesNotExist, IntegrityError from PIL import Image from playhouse.shortcuts import model_to_dict +from frigate.comms.embeddings_updater import ( + EmbeddingsRequestEnum, +) from frigate.comms.inter_process import InterProcessRequestor from frigate.config import FrigateConfig from frigate.config.classification import SemanticSearchModelEnum from frigate.const import ( CONFIG_DIR, + TRIGGER_DIR, UPDATE_EMBEDDINGS_REINDEX_PROGRESS, UPDATE_MODEL_STATE, ) from frigate.data_processing.types import DataProcessorMetrics from frigate.db.sqlitevecq import SqliteVecQueueDatabase -from frigate.models import Event +from frigate.models import Event, Trigger from frigate.types import ModelStatusTypesEnum from frigate.util.builtin import EventsPerSecond, InferenceSpeed, serialize from frigate.util.path import get_event_thumbnail_bytes @@ -167,7 +172,7 @@ class Embeddings: def embed_thumbnail( self, event_id: str, thumbnail: bytes, upsert: bool = True - ) -> ndarray: + ) -> np.ndarray: """Embed thumbnail and optionally insert into DB. @param: event_id in Events DB @@ -194,7 +199,7 @@ class Embeddings: def batch_embed_thumbnail( self, event_thumbs: dict[str, bytes], upsert: bool = True - ) -> list[ndarray]: + ) -> list[np.ndarray]: """Embed thumbnails and optionally insert into DB. @param: event_thumbs Map of Event IDs in DB to thumbnail bytes in jpg format @@ -244,7 +249,7 @@ class Embeddings: def embed_description( self, event_id: str, description: str, upsert: bool = True - ) -> ndarray: + ) -> np.ndarray: start = datetime.datetime.now().timestamp() embedding = self.text_embedding([description])[0] @@ -264,7 +269,7 @@ class Embeddings: def batch_embed_description( self, event_descriptions: dict[str, str], upsert: bool = True - ) -> ndarray: + ) -> np.ndarray: start = datetime.datetime.now().timestamp() # upsert embeddings one by one to avoid token limit embeddings = [] @@ -417,3 +422,224 @@ class Embeddings: with self.reindex_lock: self.reindex_running = False self.reindex_thread = None + + def sync_triggers(self) -> None: + for camera in self.config.cameras.values(): + # Get all existing triggers for this camera + existing_triggers = { + trigger.name: trigger + for trigger in Trigger.select().where(Trigger.camera == camera.name) + } + + # Get all configured trigger names + configured_trigger_names = set(camera.semantic_search.triggers or {}) + + # Create or update triggers from config + for trigger_name, trigger in ( + camera.semantic_search.triggers or {} + ).items(): + if trigger_name in existing_triggers: + existing_trigger = existing_triggers[trigger_name] + needs_embedding_update = False + thumbnail_missing = False + + # Check if data has changed or thumbnail is missing for thumbnail type + if trigger.type == "thumbnail": + thumbnail_path = os.path.join( + TRIGGER_DIR, camera.name, f"{trigger.data}.webp" + ) + try: + event = Event.get(Event.id == trigger.data) + if event.data.get("type") != "object": + logger.warning( + f"Event {trigger.data} is not a tracked object for {trigger.type} trigger" + ) + continue # Skip if not an object + + # Check if thumbnail needs to be updated (data changed or missing) + if ( + existing_trigger.data != trigger.data + or not os.path.exists(thumbnail_path) + ): + thumbnail = get_event_thumbnail_bytes(event) + if not thumbnail: + logger.warning( + f"Unable to retrieve thumbnail for event ID {trigger.data} for {trigger_name}." + ) + continue + self.write_trigger_thumbnail( + camera.name, trigger.data, thumbnail + ) + thumbnail_missing = True + except DoesNotExist: + logger.warning( + f"Event ID {trigger.data} for trigger {trigger_name} does not exist." + ) + continue + + # Update existing trigger if data has changed + if ( + existing_trigger.type != trigger.type + or existing_trigger.data != trigger.data + or existing_trigger.threshold != trigger.threshold + ): + existing_trigger.type = trigger.type + existing_trigger.data = trigger.data + existing_trigger.threshold = trigger.threshold + needs_embedding_update = True + + # Check if embedding is missing or needs update + if ( + not existing_trigger.embedding + or needs_embedding_update + or thumbnail_missing + ): + existing_trigger.embedding = self._calculate_trigger_embedding( + trigger + ) + needs_embedding_update = True + + if needs_embedding_update: + existing_trigger.save() + else: + # Create new trigger + try: + try: + event: Event = Event.get(Event.id == trigger.data) + except DoesNotExist: + logger.warning( + f"Event ID {trigger.data} for trigger {trigger_name} does not exist." + ) + continue + + # Skip the event if not an object + if event.data.get("type") != "object": + logger.warning( + f"Event ID {trigger.data} for trigger {trigger_name} is not a tracked object." + ) + continue + + thumbnail = get_event_thumbnail_bytes(event) + + if not thumbnail: + logger.warning( + f"Unable to retrieve thumbnail for event ID {trigger.data} for {trigger_name}." + ) + continue + + self.write_trigger_thumbnail( + camera.name, trigger.data, thumbnail + ) + + # Calculate embedding for new trigger + embedding = self._calculate_trigger_embedding(trigger) + + Trigger.create( + camera=camera.name, + name=trigger_name, + type=trigger.type, + data=trigger.data, + threshold=trigger.threshold, + model=self.config.semantic_search.model, + embedding=embedding, + triggering_event_id="", + last_triggered=None, + ) + + except IntegrityError: + pass # Handle duplicate creation attempts + + # Remove triggers that are no longer in config + triggers_to_remove = ( + set(existing_triggers.keys()) - configured_trigger_names + ) + if triggers_to_remove: + Trigger.delete().where( + Trigger.camera == camera.name, Trigger.name.in_(triggers_to_remove) + ).execute() + for trigger_name in triggers_to_remove: + self.remove_trigger_thumbnail(camera.name, trigger_name) + + def write_trigger_thumbnail( + self, camera: str, event_id: str, thumbnail: bytes + ) -> None: + """Write the thumbnail to the trigger directory.""" + try: + os.makedirs(os.path.join(TRIGGER_DIR, camera), exist_ok=True) + with open(os.path.join(TRIGGER_DIR, camera, f"{event_id}.webp"), "wb") as f: + f.write(thumbnail) + logger.debug( + f"Writing thumbnail for trigger with data {event_id} in {camera}." + ) + except Exception as e: + logger.error( + f"Failed to write thumbnail for trigger with data {event_id} in {camera}: {e}" + ) + + def remove_trigger_thumbnail(self, camera: str, event_id: str) -> None: + """Write the thumbnail to the trigger directory.""" + try: + os.remove(os.path.join(TRIGGER_DIR, camera, f"{event_id}.webp")) + logger.debug( + f"Deleted thumbnail for trigger with data {event_id} in {camera}." + ) + except Exception as e: + logger.error( + f"Failed to delete thumbnail for trigger with data {event_id} in {camera}: {e}" + ) + + def _calculate_trigger_embedding(self, trigger) -> bytes: + """Calculate embedding for a trigger based on its type and data.""" + if trigger.type == "description": + logger.debug(f"Generating embedding for trigger description {trigger.name}") + embedding = self.requestor.send_data( + EmbeddingsRequestEnum.embed_description.value, + {"id": None, "description": trigger.data, "upsert": False}, + ) + return embedding.astype(np.float32).tobytes() + + elif trigger.type == "thumbnail": + # For image triggers, trigger.data should be an image ID + # Try to get embedding from vec_thumbnails table first + cursor = self.db.execute_sql( + "SELECT thumbnail_embedding FROM vec_thumbnails WHERE id = ?", + [trigger.data], + ) + row = cursor.fetchone() if cursor else None + if row: + return row[0] # Already in bytes format + else: + logger.debug( + f"No thumbnail embedding found for image ID: {trigger.data}, generating from saved trigger thumbnail" + ) + + try: + with open( + os.path.join( + TRIGGER_DIR, trigger.camera, f"{trigger.data}.webp" + ), + "rb", + ) as f: + thumbnail = f.read() + except Exception as e: + logger.error( + f"Failed to read thumbnail for trigger {trigger.name} with ID {trigger.data}: {e}" + ) + return b"" + + logger.debug( + f"Generating embedding for trigger thumbnail {trigger.name} with ID {trigger.data}" + ) + embedding = self.requestor.send_data( + EmbeddingsRequestEnum.embed_thumbnail.value, + { + "id": str(trigger.data), + "thumbnail": str(thumbnail), + "upsert": False, + }, + ) + return embedding.astype(np.float32).tobytes() + + else: + logger.warning(f"Unknown trigger type: {trigger.type}") + return b"" diff --git a/frigate/embeddings/maintainer.py b/frigate/embeddings/maintainer.py index 86bc75737..a129b9677 100644 --- a/frigate/embeddings/maintainer.py +++ b/frigate/embeddings/maintainer.py @@ -12,10 +12,12 @@ from typing import Any, Optional import cv2 import numpy as np from peewee import DoesNotExist -from playhouse.sqliteq import SqliteQueueDatabase from frigate.comms.detections_updater import DetectionSubscriber, DetectionTypeEnum -from frigate.comms.embeddings_updater import EmbeddingsRequestEnum, EmbeddingsResponder +from frigate.comms.embeddings_updater import ( + EmbeddingsRequestEnum, + EmbeddingsResponder, +) from frigate.comms.event_metadata_updater import ( EventMetadataPublisher, EventMetadataSubscriber, @@ -27,8 +29,13 @@ from frigate.comms.recordings_updater import ( RecordingsDataSubscriber, RecordingsDataTypeEnum, ) -from frigate.config import FrigateConfig +from frigate.comms.review_updater import ReviewDataSubscriber +from frigate.config import CameraConfig, FrigateConfig from frigate.config.camera.camera import CameraTypeEnum +from frigate.config.camera.updater import ( + CameraConfigUpdateEnum, + CameraConfigUpdateSubscriber, +) from frigate.const import ( CLIPS_DIR, UPDATE_EVENT_DESCRIPTION, @@ -37,19 +44,29 @@ from frigate.data_processing.common.license_plate.model import ( LicensePlateModelRunner, ) from frigate.data_processing.post.api import PostProcessorApi +from frigate.data_processing.post.audio_transcription import ( + AudioTranscriptionPostProcessor, +) from frigate.data_processing.post.license_plate import ( LicensePlatePostProcessor, ) +from frigate.data_processing.post.review_descriptions import ReviewDescriptionProcessor +from frigate.data_processing.post.semantic_trigger import SemanticTriggerProcessor from frigate.data_processing.real_time.api import RealTimeProcessorApi from frigate.data_processing.real_time.bird import BirdRealTimeProcessor +from frigate.data_processing.real_time.custom_classification import ( + CustomObjectClassificationProcessor, + CustomStateClassificationProcessor, +) from frigate.data_processing.real_time.face import FaceRealTimeProcessor from frigate.data_processing.real_time.license_plate import ( LicensePlateRealTimeProcessor, ) from frigate.data_processing.types import DataProcessorMetrics, PostProcessDataEnum +from frigate.db.sqlitevecq import SqliteVecQueueDatabase from frigate.events.types import EventTypeEnum, RegenerateDescriptionEnum from frigate.genai import get_genai_client -from frigate.models import Event +from frigate.models import Event, Recordings, ReviewSegment, Trigger from frigate.types import TrackedObjectUpdateTypesEnum from frigate.util.builtin import serialize from frigate.util.image import ( @@ -71,15 +88,41 @@ class EmbeddingMaintainer(threading.Thread): def __init__( self, - db: SqliteQueueDatabase, config: FrigateConfig, - metrics: DataProcessorMetrics, + metrics: DataProcessorMetrics | None, stop_event: MpEvent, ) -> None: super().__init__(name="embeddings_maintainer") self.config = config self.metrics = metrics self.embeddings = None + self.config_updater = CameraConfigUpdateSubscriber( + self.config, + self.config.cameras, + [ + CameraConfigUpdateEnum.add, + CameraConfigUpdateEnum.remove, + CameraConfigUpdateEnum.object_genai, + CameraConfigUpdateEnum.review_genai, + CameraConfigUpdateEnum.semantic_search, + ], + ) + + # Configure Frigate DB + db = SqliteVecQueueDatabase( + config.database.path, + pragmas={ + "auto_vacuum": "FULL", # Does not defragment database + "cache_size": -512 * 1000, # 512MB of cache + "synchronous": "NORMAL", # Safe when using WAL https://www.sqlite.org/pragma.html#pragma_synchronous + }, + timeout=max( + 60, 10 * len([c for c in config.cameras.values() if c.enabled]) + ), + load_vec_extension=True, + ) + models = [Event, Recordings, ReviewSegment, Trigger] + db.bind(models) if config.semantic_search.enabled: self.embeddings = Embeddings(config, db, metrics) @@ -88,6 +131,9 @@ class EmbeddingMaintainer(threading.Thread): if config.semantic_search.reindex: self.embeddings.reindex() + # Sync semantic search triggers in db with config + self.embeddings.sync_triggers() + # create communication for updating event descriptions self.requestor = InterProcessRequestor() @@ -100,11 +146,13 @@ class EmbeddingMaintainer(threading.Thread): self.recordings_subscriber = RecordingsDataSubscriber( RecordingsDataTypeEnum.recordings_available_through ) - self.detection_subscriber = DetectionSubscriber(DetectionTypeEnum.video) + self.review_subscriber = ReviewDataSubscriber("") + self.detection_subscriber = DetectionSubscriber(DetectionTypeEnum.video.value) self.embeddings_responder = EmbeddingsResponder() self.frame_manager = SharedMemoryFrameManager() self.detected_license_plates: dict[str, dict[str, Any]] = {} + self.genai_client = get_genai_client(config) # model runners to share between realtime and post processors if self.config.lpr.enabled: @@ -143,9 +191,30 @@ class EmbeddingMaintainer(threading.Thread): ) ) + for model_config in self.config.classification.custom.values(): + self.realtime_processors.append( + CustomStateClassificationProcessor( + self.config, model_config, self.requestor, self.metrics + ) + if model_config.state_config != None + else CustomObjectClassificationProcessor( + self.config, + model_config, + self.event_metadata_publisher, + self.metrics, + ) + ) + # post processors self.post_processors: list[PostProcessorApi] = [] + if any(c.review.genai.enabled_in_config for c in self.config.cameras.values()): + self.post_processors.append( + ReviewDescriptionProcessor( + self.config, self.requestor, self.metrics, self.genai_client + ) + ) + if self.config.lpr.enabled: self.post_processors.append( LicensePlatePostProcessor( @@ -158,10 +227,28 @@ class EmbeddingMaintainer(threading.Thread): ) ) + if any( + c.enabled_in_config and c.audio_transcription.enabled + for c in self.config.cameras.values() + ): + self.post_processors.append( + AudioTranscriptionPostProcessor(self.config, self.requestor, metrics) + ) + + if self.config.semantic_search.enabled: + self.post_processors.append( + SemanticTriggerProcessor( + db, + self.config, + self.requestor, + metrics, + self.embeddings, + ) + ) + self.stop_event = stop_event self.tracked_events: dict[str, list[Any]] = {} self.early_request_sent: dict[str, bool] = {} - self.genai_client = get_genai_client(config) # recordings data self.recordings_available_through: dict[str, float] = {} @@ -169,14 +256,17 @@ class EmbeddingMaintainer(threading.Thread): def run(self) -> None: """Maintain a SQLite-vec database for semantic search.""" while not self.stop_event.is_set(): + self.config_updater.check_for_updates() self._process_requests() self._process_updates() self._process_recordings_updates() - self._process_dedicated_lpr() + self._process_review_updates() + self._process_frame_updates() self._expire_dedicated_lpr() self._process_finalized() self._process_event_metadata() + self.config_updater.stop() self.event_subscriber.stop() self.event_end_subscriber.stop() self.recordings_subscriber.stop() @@ -247,7 +337,10 @@ class EmbeddingMaintainer(threading.Thread): camera_config = self.config.cameras[camera] # no need to process updated objects if face recognition, lpr, genai are disabled - if not camera_config.genai.enabled and len(self.realtime_processors) == 0: + if ( + not camera_config.objects.genai.enabled + and len(self.realtime_processors) == 0 + ): return # Create our own thumbnail based on the bounding box and the frame time @@ -285,23 +378,23 @@ class EmbeddingMaintainer(threading.Thread): # check if we're configured to send an early request after a minimum number of updates received if ( self.genai_client is not None - and camera_config.genai.send_triggers.after_significant_updates + and camera_config.objects.genai.send_triggers.after_significant_updates ): if ( len(self.tracked_events.get(data["id"], [])) - >= camera_config.genai.send_triggers.after_significant_updates + >= camera_config.objects.genai.send_triggers.after_significant_updates and data["id"] not in self.early_request_sent ): if data["has_clip"] and data["has_snapshot"]: event: Event = Event.get(Event.id == data["id"]) if ( - not camera_config.genai.objects - or event.label in camera_config.genai.objects + not camera_config.objects.genai.objects + or event.label in camera_config.objects.genai.objects ) and ( - not camera_config.genai.required_zones + not camera_config.objects.genai.required_zones or set(data["entered_zones"]) - & set(camera_config.genai.required_zones) + & set(camera_config.objects.genai.required_zones) ): logger.debug(f"{camera} sending early request to GenAI") @@ -332,31 +425,6 @@ class EmbeddingMaintainer(threading.Thread): event_id, camera, updated_db = ended camera_config = self.config.cameras[camera] - # call any defined post processors - for processor in self.post_processors: - if isinstance(processor, LicensePlatePostProcessor): - recordings_available = self.recordings_available_through.get(camera) - if ( - recordings_available is not None - and event_id in self.detected_license_plates - and self.config.cameras[camera].type != "lpr" - ): - processor.process_data( - { - "event_id": event_id, - "camera": camera, - "recordings_available": self.recordings_available_through[ - camera - ], - "obj_data": self.detected_license_plates[event_id][ - "obj_data" - ], - }, - PostProcessDataEnum.recording, - ) - else: - processor.process_data(event_id, PostProcessDataEnum.event_id) - # expire in realtime processors for processor in self.realtime_processors: processor.expire_object(event_id, camera) @@ -379,20 +447,56 @@ class EmbeddingMaintainer(threading.Thread): # Run GenAI if ( - camera_config.genai.enabled - and camera_config.genai.send_triggers.tracked_object_end + camera_config.objects.genai.enabled + and camera_config.objects.genai.send_triggers.tracked_object_end and self.genai_client is not None and ( - not camera_config.genai.objects - or event.label in camera_config.genai.objects + not camera_config.objects.genai.objects + or event.label in camera_config.objects.genai.objects ) and ( - not camera_config.genai.required_zones - or set(event.zones) & set(camera_config.genai.required_zones) + not camera_config.objects.genai.required_zones + or set(event.zones) + & set(camera_config.objects.genai.required_zones) ) ): self._process_genai_description(event, camera_config, thumbnail) + # call any defined post processors + for processor in self.post_processors: + if isinstance(processor, LicensePlatePostProcessor): + recordings_available = self.recordings_available_through.get(camera) + if ( + recordings_available is not None + and event_id in self.detected_license_plates + and self.config.cameras[camera].type != "lpr" + ): + processor.process_data( + { + "event_id": event_id, + "camera": camera, + "recordings_available": self.recordings_available_through[ + camera + ], + "obj_data": self.detected_license_plates[event_id][ + "obj_data" + ], + }, + PostProcessDataEnum.recording, + ) + elif isinstance(processor, AudioTranscriptionPostProcessor): + continue + elif isinstance(processor, SemanticTriggerProcessor): + processor.process_data( + {"event_id": event_id, "camera": camera, "type": "image"}, + PostProcessDataEnum.tracked_object, + ) + else: + processor.process_data( + {"event_id": event_id, "camera": camera}, + PostProcessDataEnum.tracked_object, + ) + # Delete tracked events based on the event_id if event_id in self.tracked_events: del self.tracked_events[event_id] @@ -412,8 +516,8 @@ class EmbeddingMaintainer(threading.Thread): to_remove.append(id) for id in to_remove: self.event_metadata_publisher.publish( - EventMetadataTypeEnum.manual_event_end, (id, now), + EventMetadataTypeEnum.manual_event_end.value, ) self.detected_license_plates.pop(id) @@ -435,6 +539,18 @@ class EmbeddingMaintainer(threading.Thread): f"{camera} now has recordings available through {recordings_available_through_timestamp}" ) + def _process_review_updates(self) -> None: + """Process review updates.""" + while True: + review_updates = self.review_subscriber.check_for_update() + + if review_updates == None: + break + + for processor in self.post_processors: + if isinstance(processor, ReviewDescriptionProcessor): + processor.process_data(review_updates, PostProcessDataEnum.review) + def _process_event_metadata(self): # Check for regenerate description requests (topic, payload) = self.event_metadata_subscriber.check_for_update() @@ -442,14 +558,14 @@ class EmbeddingMaintainer(threading.Thread): if topic is None: return - event_id, source = payload + event_id, source, force = payload if event_id: self.handle_regenerate_description( - event_id, RegenerateDescriptionEnum(source) + event_id, RegenerateDescriptionEnum(source), force ) - def _process_dedicated_lpr(self) -> None: + def _process_frame_updates(self) -> None: """Process event updates""" (topic, data) = self.detection_subscriber.check_for_update() @@ -458,16 +574,17 @@ class EmbeddingMaintainer(threading.Thread): camera, frame_name, _, _, motion_boxes, _ = data - if not camera or not self.config.lpr.enabled or len(motion_boxes) == 0: + if not camera or len(motion_boxes) == 0: return camera_config = self.config.cameras[camera] + dedicated_lpr_enabled = ( + camera_config.type == CameraTypeEnum.lpr + and "license_plate" not in camera_config.objects.track + ) - if ( - camera_config.type != CameraTypeEnum.lpr - or "license_plate" in camera_config.objects.track - ): - # we're not a dedicated lpr camera or we are one but we're using frigate+ + if not dedicated_lpr_enabled and len(self.config.classification.custom) == 0: + # no active features that use this data return try: @@ -484,9 +601,16 @@ class EmbeddingMaintainer(threading.Thread): return for processor in self.realtime_processors: - if isinstance(processor, LicensePlateRealTimeProcessor): + if dedicated_lpr_enabled and isinstance( + processor, LicensePlateRealTimeProcessor + ): processor.process_frame(camera, yuv_frame, True) + if isinstance(processor, CustomStateClassificationProcessor): + processor.process_frame( + {"camera": camera, "motion": motion_boxes}, yuv_frame + ) + self.frame_manager.close(frame_name) def _create_thumbnail(self, yuv_frame, box, height=500) -> Optional[bytes]: @@ -512,8 +636,10 @@ class EmbeddingMaintainer(threading.Thread): self.embeddings.embed_thumbnail(event_id, thumbnail) - def _process_genai_description(self, event, camera_config, thumbnail) -> None: - if event.has_snapshot and camera_config.genai.use_snapshot: + def _process_genai_description( + self, event: Event, camera_config: CameraConfig, thumbnail + ) -> None: + if event.has_snapshot and camera_config.objects.genai.use_snapshot: snapshot_image = self._read_and_crop_snapshot(event, camera_config) if not snapshot_image: return @@ -525,7 +651,7 @@ class EmbeddingMaintainer(threading.Thread): embed_image = ( [snapshot_image] - if event.has_snapshot and camera_config.genai.use_snapshot + if event.has_snapshot and camera_config.objects.genai.use_snapshot else ( [data["thumbnail"] for data in self.tracked_events[event.id]] if num_thumbnails > 0 @@ -533,7 +659,7 @@ class EmbeddingMaintainer(threading.Thread): ) ) - if camera_config.genai.debug_save_thumbnails and num_thumbnails > 0: + if camera_config.objects.genai.debug_save_thumbnails and num_thumbnails > 0: logger.debug(f"Saving {num_thumbnails} thumbnails for event {event.id}") Path(os.path.join(CLIPS_DIR, f"genai-requests/{event.id}")).mkdir( @@ -570,7 +696,7 @@ class EmbeddingMaintainer(threading.Thread): """Embed the description for an event.""" camera_config = self.config.cameras[event.camera] - description = self.genai_client.generate_description( + description = self.genai_client.generate_object_description( camera_config, thumbnails, event ) @@ -593,6 +719,16 @@ class EmbeddingMaintainer(threading.Thread): if self.config.semantic_search.enabled: self.embeddings.embed_description(event.id, description) + # Check semantic trigger for this description + for processor in self.post_processors: + if isinstance(processor, SemanticTriggerProcessor): + processor.process_data( + {"event_id": event.id, "camera": event.camera, "type": "text"}, + PostProcessDataEnum.tracked_object, + ) + else: + continue + logger.debug( "Generated description for %s (%d images): %s", event.id, @@ -639,15 +775,21 @@ class EmbeddingMaintainer(threading.Thread): except Exception: return None - def handle_regenerate_description(self, event_id: str, source: str) -> None: + def handle_regenerate_description( + self, event_id: str, source: str, force: bool + ) -> None: try: event: Event = Event.get(Event.id == event_id) except DoesNotExist: logger.error(f"Event {event_id} not found for description regeneration") return + if self.genai_client is None: + logger.error("GenAI not enabled") + return + camera_config = self.config.cameras[event.camera] - if not camera_config.genai.enabled or self.genai_client is None: + if not camera_config.objects.genai.enabled and not force: logger.error(f"GenAI not enabled for camera {event.camera}") return diff --git a/frigate/embeddings/onnx/face_embedding.py b/frigate/embeddings/onnx/face_embedding.py index c0f35a581..acb4507a2 100644 --- a/frigate/embeddings/onnx/face_embedding.py +++ b/frigate/embeddings/onnx/face_embedding.py @@ -6,6 +6,7 @@ import os import numpy as np from frigate.const import MODEL_CACHE_DIR +from frigate.log import redirect_output_to_logger from frigate.util.downloader import ModelDownloader from .base_embedding import BaseEmbedding @@ -53,6 +54,7 @@ class FaceNetEmbedding(BaseEmbedding): self._load_model_and_utils() logger.debug(f"models are already downloaded for {self.model_name}") + @redirect_output_to_logger(logger, logging.DEBUG) def _load_model_and_utils(self): if self.runner is None: if self.downloader: diff --git a/frigate/embeddings/onnx/lpr_embedding.py b/frigate/embeddings/onnx/lpr_embedding.py index ac981da8d..1b5b9acd0 100644 --- a/frigate/embeddings/onnx/lpr_embedding.py +++ b/frigate/embeddings/onnx/lpr_embedding.py @@ -32,13 +32,15 @@ class PaddleOCRDetection(BaseEmbedding): device: str = "AUTO", ): model_file = ( - "detection-large.onnx" if model_size == "large" else "detection-small.onnx" + "detection_v5-large.onnx" + if model_size == "large" + else "detection_v5-small.onnx" ) super().__init__( model_name="paddleocr-onnx", model_file=model_file, download_urls={ - model_file: f"https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/{model_file}" + model_file: f"https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/v5/{model_file}" }, ) self.requestor = requestor @@ -156,9 +158,10 @@ class PaddleOCRRecognition(BaseEmbedding): ): super().__init__( model_name="paddleocr-onnx", - model_file="recognition.onnx", + model_file="recognition_v4.onnx", download_urls={ - "recognition.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/recognition.onnx" + "recognition_v4.onnx": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/v4/recognition_v4.onnx", + "ppocr_keys_v1.txt": "https://github.com/hawkeye217/paddleocr-onnx/raw/refs/heads/master/models/v4/ppocr_keys_v1.txt", }, ) self.requestor = requestor diff --git a/frigate/events/audio.py b/frigate/events/audio.py index f2a217fd3..cb1fe392b 100644 --- a/frigate/events/audio.py +++ b/frigate/events/audio.py @@ -6,31 +6,42 @@ import random import string import threading import time +from multiprocessing.managers import DictProxy +from multiprocessing.synchronize import Event as MpEvent from typing import Any, Tuple import numpy as np -import frigate.util as util -from frigate.camera import CameraMetrics -from frigate.comms.config_updater import ConfigSubscriber from frigate.comms.detections_updater import DetectionPublisher, DetectionTypeEnum from frigate.comms.event_metadata_updater import ( EventMetadataPublisher, EventMetadataTypeEnum, ) from frigate.comms.inter_process import InterProcessRequestor -from frigate.config import CameraConfig, CameraInput, FfmpegConfig +from frigate.config import CameraConfig, CameraInput, FfmpegConfig, FrigateConfig +from frigate.config.camera.updater import ( + CameraConfigUpdateEnum, + CameraConfigUpdateSubscriber, +) from frigate.const import ( AUDIO_DURATION, AUDIO_FORMAT, AUDIO_MAX_BIT_RANGE, AUDIO_MIN_CONFIDENCE, AUDIO_SAMPLE_RATE, + PROCESS_PRIORITY_HIGH, +) +from frigate.data_processing.common.audio_transcription.model import ( + AudioTranscriptionModelRunner, +) +from frigate.data_processing.real_time.audio_transcription import ( + AudioTranscriptionRealTimeProcessor, ) from frigate.ffmpeg_presets import parse_preset_input -from frigate.log import LogPipe +from frigate.log import LogPipe, redirect_output_to_logger from frigate.object_detection.base import load_labels from frigate.util.builtin import get_ffmpeg_arg_list +from frigate.util.process import FrigateProcess from frigate.video import start_or_restart_ffmpeg, stop_ffmpeg try: @@ -39,6 +50,9 @@ except ModuleNotFoundError: from tensorflow.lite.python.interpreter import Interpreter +logger = logging.getLogger(__name__) + + def get_ffmpeg_command(ffmpeg: FfmpegConfig) -> list[str]: ffmpeg_input: CameraInput = [i for i in ffmpeg.inputs if "audio" in i.roles][0] input_args = get_ffmpeg_arg_list(ffmpeg.global_args) + ( @@ -67,20 +81,34 @@ def get_ffmpeg_command(ffmpeg: FfmpegConfig) -> list[str]: ) -class AudioProcessor(util.Process): +class AudioProcessor(FrigateProcess): name = "frigate.audio_manager" def __init__( self, + config: FrigateConfig, cameras: list[CameraConfig], - camera_metrics: dict[str, CameraMetrics], + camera_metrics: DictProxy, + stop_event: MpEvent, ): - super().__init__(name="frigate.audio_manager", daemon=True) + super().__init__( + stop_event, PROCESS_PRIORITY_HIGH, name="frigate.audio_manager", daemon=True + ) self.camera_metrics = camera_metrics self.cameras = cameras + self.config = config + + if self.config.audio_transcription.enabled: + self.transcription_model_runner = AudioTranscriptionModelRunner( + self.config.audio_transcription.device, + self.config.audio_transcription.model_size, + ) + else: + self.transcription_model_runner = None def run(self) -> None: + self.pre_run_setup(self.config.logger) audio_threads: list[AudioEventMaintainer] = [] threading.current_thread().name = "process:audio_manager" @@ -91,7 +119,9 @@ class AudioProcessor(util.Process): for camera in self.cameras: audio_thread = AudioEventMaintainer( camera, + self.config, self.camera_metrics, + self.transcription_model_runner, self.stop_event, ) audio_threads.append(audio_thread) @@ -119,46 +149,75 @@ class AudioEventMaintainer(threading.Thread): def __init__( self, camera: CameraConfig, - camera_metrics: dict[str, CameraMetrics], + config: FrigateConfig, + camera_metrics: DictProxy, + audio_transcription_model_runner: AudioTranscriptionModelRunner | None, stop_event: threading.Event, ) -> None: super().__init__(name=f"{camera.name}_audio_event_processor") - self.config = camera + self.config = config + self.camera_config = camera self.camera_metrics = camera_metrics self.detections: dict[dict[str, Any]] = {} self.stop_event = stop_event - self.detector = AudioTfl(stop_event, self.config.audio.num_threads) + self.detector = AudioTfl(stop_event, self.camera_config.audio.num_threads) self.shape = (int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE)),) self.chunk_size = int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE * 2)) - self.logger = logging.getLogger(f"audio.{self.config.name}") - self.ffmpeg_cmd = get_ffmpeg_command(self.config.ffmpeg) - self.logpipe = LogPipe(f"ffmpeg.{self.config.name}.audio") + self.logger = logging.getLogger(f"audio.{self.camera_config.name}") + self.ffmpeg_cmd = get_ffmpeg_command(self.camera_config.ffmpeg) + self.logpipe = LogPipe(f"ffmpeg.{self.camera_config.name}.audio") self.audio_listener = None + self.audio_transcription_model_runner = audio_transcription_model_runner + self.transcription_processor = None + self.transcription_thread = None # create communication for audio detections self.requestor = InterProcessRequestor() - self.config_subscriber = ConfigSubscriber(f"config/audio/{camera.name}") - self.enabled_subscriber = ConfigSubscriber( - f"config/enabled/{camera.name}", True + self.config_subscriber = CameraConfigUpdateSubscriber( + None, + {self.camera_config.name: self.camera_config}, + [ + CameraConfigUpdateEnum.audio, + CameraConfigUpdateEnum.enabled, + CameraConfigUpdateEnum.audio_transcription, + ], ) - self.detection_publisher = DetectionPublisher(DetectionTypeEnum.audio) + self.detection_publisher = DetectionPublisher(DetectionTypeEnum.audio.value) self.event_metadata_publisher = EventMetadataPublisher() + if self.camera_config.audio_transcription.enabled_in_config: + # init the transcription processor for this camera + self.transcription_processor = AudioTranscriptionRealTimeProcessor( + config=self.config, + camera_config=self.camera_config, + requestor=self.requestor, + model_runner=self.audio_transcription_model_runner, + metrics=self.camera_metrics[self.camera_config.name], + stop_event=self.stop_event, + ) + + self.transcription_thread = threading.Thread( + target=self.transcription_processor.run, + name=f"{self.camera_config.name}_transcription_processor", + daemon=True, + ) + self.transcription_thread.start() + self.was_enabled = camera.enabled def detect_audio(self, audio) -> None: - if not self.config.audio.enabled or self.stop_event.is_set(): + if not self.camera_config.audio.enabled or self.stop_event.is_set(): return audio_as_float = audio.astype(np.float32) rms, dBFS = self.calculate_audio_levels(audio_as_float) - self.camera_metrics[self.config.name].audio_rms.value = rms - self.camera_metrics[self.config.name].audio_dBFS.value = dBFS + self.camera_metrics[self.camera_config.name].audio_rms.value = rms + self.camera_metrics[self.camera_config.name].audio_dBFS.value = dBFS # only run audio detection when volume is above min_volume - if rms >= self.config.audio.min_volume: + if rms >= self.camera_config.audio.min_volume: # create waveform relative to max range and look for detections waveform = (audio / AUDIO_MAX_BIT_RANGE).astype(np.float32) model_detections = self.detector.detect(waveform) @@ -166,28 +225,42 @@ class AudioEventMaintainer(threading.Thread): for label, score, _ in model_detections: self.logger.debug( - f"{self.config.name} heard {label} with a score of {score}" + f"{self.camera_config.name} heard {label} with a score of {score}" ) - if label not in self.config.audio.listen: + if label not in self.camera_config.audio.listen: continue - if score > dict((self.config.audio.filters or {}).get(label, {})).get( - "threshold", 0.8 - ): + if score > dict( + (self.camera_config.audio.filters or {}).get(label, {}) + ).get("threshold", 0.8): self.handle_detection(label, score) audio_detections.append(label) # send audio detection data self.detection_publisher.publish( ( - self.config.name, + self.camera_config.name, datetime.datetime.now().timestamp(), dBFS, audio_detections, ) ) + # run audio transcription + if self.transcription_processor is not None: + if self.camera_config.audio_transcription.live_enabled: + # process audio until we've reached the endpoint + self.transcription_processor.process_audio( + { + "id": f"{self.camera_config.name}_audio", + "camera": self.camera_config.name, + }, + audio, + ) + else: + self.transcription_processor.check_unload_model() + self.expire_detections() def calculate_audio_levels(self, audio_as_float: np.float32) -> Tuple[float, float]: @@ -201,8 +274,8 @@ class AudioEventMaintainer(threading.Thread): else: dBFS = 0 - self.requestor.send_data(f"{self.config.name}/audio/dBFS", float(dBFS)) - self.requestor.send_data(f"{self.config.name}/audio/rms", float(rms)) + self.requestor.send_data(f"{self.camera_config.name}/audio/dBFS", float(dBFS)) + self.requestor.send_data(f"{self.camera_config.name}/audio/rms", float(rms)) return float(rms), float(dBFS) @@ -217,13 +290,12 @@ class AudioEventMaintainer(threading.Thread): random.choices(string.ascii_lowercase + string.digits, k=6) ) event_id = f"{now}-{rand_id}" - self.requestor.send_data(f"{self.config.name}/audio/{label}", "ON") + self.requestor.send_data(f"{self.camera_config.name}/audio/{label}", "ON") self.event_metadata_publisher.publish( - EventMetadataTypeEnum.manual_event_create, ( now, - self.config.name, + self.camera_config.name, label, event_id, True, @@ -233,6 +305,7 @@ class AudioEventMaintainer(threading.Thread): "audio", {}, ), + EventMetadataTypeEnum.manual_event_create.value, ) self.detections[label] = { "id": event_id, @@ -249,15 +322,15 @@ class AudioEventMaintainer(threading.Thread): if ( now - detection.get("last_detection", now) - > self.config.audio.max_not_heard + > self.camera_config.audio.max_not_heard ): self.requestor.send_data( - f"{self.config.name}/audio/{detection['label']}", "OFF" + f"{self.camera_config.name}/audio/{detection['label']}", "OFF" ) self.event_metadata_publisher.publish( - EventMetadataTypeEnum.manual_event_end, (detection["id"], detection["last_detection"]), + EventMetadataTypeEnum.manual_event_end.value, ) self.detections[detection["label"]] = None @@ -266,10 +339,12 @@ class AudioEventMaintainer(threading.Thread): now = datetime.datetime.now().timestamp() for label, detection in list(self.detections.items()): if detection: - self.requestor.send_data(f"{self.config.name}/audio/{label}", "OFF") + self.requestor.send_data( + f"{self.camera_config.name}/audio/{label}", "OFF" + ) self.event_metadata_publisher.publish( - EventMetadataTypeEnum.manual_event_end, (detection["id"], now), + EventMetadataTypeEnum.manual_event_end.value, ) self.detections[label] = None @@ -287,7 +362,7 @@ class AudioEventMaintainer(threading.Thread): if self.stop_event.is_set(): return - time.sleep(self.config.ffmpeg.retry_interval) + time.sleep(self.camera_config.ffmpeg.retry_interval) self.logpipe.dump() self.start_or_restart_ffmpeg() @@ -308,30 +383,21 @@ class AudioEventMaintainer(threading.Thread): self.logger.error(f"Error reading audio data from ffmpeg process: {e}") log_and_restart() - def _update_enabled_state(self) -> bool: - """Fetch the latest config and update enabled state.""" - _, config_data = self.enabled_subscriber.check_for_update() - if config_data: - self.config.enabled = config_data.enabled - return config_data.enabled - - return self.config.enabled - def run(self) -> None: - if self._update_enabled_state(): + if self.camera_config.enabled: self.start_or_restart_ffmpeg() while not self.stop_event.is_set(): - enabled = self._update_enabled_state() + enabled = self.camera_config.enabled if enabled != self.was_enabled: if enabled: self.logger.debug( - f"Enabling audio detections for {self.config.name}" + f"Enabling audio detections for {self.camera_config.name}" ) self.start_or_restart_ffmpeg() else: self.logger.debug( - f"Disabling audio detections for {self.config.name}, ending events" + f"Disabling audio detections for {self.camera_config.name}, ending events" ) self.expire_all_detections() stop_ffmpeg(self.audio_listener, self.logger) @@ -344,26 +410,26 @@ class AudioEventMaintainer(threading.Thread): continue # check if there is an updated config - ( - updated_topic, - updated_audio_config, - ) = self.config_subscriber.check_for_update() - - if updated_topic: - self.config.audio = updated_audio_config + self.config_subscriber.check_for_updates() self.read_audio() if self.audio_listener: stop_ffmpeg(self.audio_listener, self.logger) + if self.transcription_thread: + self.transcription_thread.join(timeout=2) + if self.transcription_thread.is_alive(): + self.logger.warning( + f"Audio transcription thread {self.transcription_thread.name} is still alive" + ) self.logpipe.close() self.requestor.stop() self.config_subscriber.stop() - self.enabled_subscriber.stop() self.detection_publisher.stop() class AudioTfl: + @redirect_output_to_logger(logger, logging.DEBUG) def __init__(self, stop_event: threading.Event, num_threads=2): self.stop_event = stop_event self.num_threads = num_threads diff --git a/frigate/genai/__init__.py b/frigate/genai/__init__.py index 2c0aadbd9..4a9789097 100644 --- a/frigate/genai/__init__.py +++ b/frigate/genai/__init__.py @@ -1,13 +1,17 @@ """Generative AI module for Frigate.""" +import datetime import importlib import logging import os -from typing import Optional +import re +from typing import Any, Optional from playhouse.shortcuts import model_to_dict from frigate.config import CameraConfig, FrigateConfig, GenAIConfig, GenAIProviderEnum +from frigate.const import CLIPS_DIR +from frigate.data_processing.post.types import ReviewMetadata from frigate.models import Event logger = logging.getLogger(__name__) @@ -33,16 +37,154 @@ class GenAIClient: self.timeout = timeout self.provider = self._init_provider() - def generate_description( + def generate_review_description( + self, + review_data: dict[str, Any], + thumbnails: list[bytes], + concerns: list[str], + preferred_language: str | None, + debug_save: bool, + ) -> ReviewMetadata | None: + """Generate a description for the review item activity.""" + + def get_concern_prompt() -> str: + if concerns: + concern_list = "\n - ".join(concerns) + return f""" +- `other_concerns` (list of strings): Include a list of any of the following concerns that are occurring: + - {concern_list}""" + else: + return "" + + def get_language_prompt() -> str: + if preferred_language: + return f"Provide your answer in {preferred_language}" + else: + return "" + + context_prompt = f""" +Please analyze the sequence of images ({len(thumbnails)} total) taken in chronological order from the perspective of the {review_data["camera"].replace("_", " ")} security camera. + +Your task is to provide a clear, security-focused description of the scene that: +1. States exactly what is happening based on observable actions and movements. +2. Identifies and emphasizes behaviors that match patterns of suspicious activity. +3. Assigns a potential_threat_level based on the definitions below, applying them consistently. + +Facts come first, but identifying security risks is the primary goal. + +When forming your description: +- Describe the time, people, and objects exactly as seen. Include any observable environmental changes (e.g., lighting changes triggered by activity). +- Time of day should **increase suspicion only when paired with unusual or security-relevant behaviors**. Do not raise the threat level for common residential activities (e.g., residents walking pets, retrieving mail, gardening, playing with pets, supervising children) even at unusual hours, unless other suspicious indicators are present. +- Focus on behaviors that are uncharacteristic of innocent activity: loitering without clear purpose, avoiding cameras, inspecting vehicles/doors, changing behavior when lights activate, scanning surroundings without an apparent benign reason. +- **Benign context override**: If scanning or looking around is clearly part of an innocent activity (such as playing with a dog, gardening, supervising children, or watching for a pet), do not treat it as suspicious. + +Your response MUST be a flat JSON object with: +- `scene` (string): A full description including setting, entities, actions, and any plausible supported inferences. +- `confidence` (float): 0-1 confidence in the analysis. +- `potential_threat_level` (integer): 0, 1, or 2 as defined below. +{get_concern_prompt()} + +Threat-level definitions: +- 0 — Typical or expected activity for this location/time (includes residents, guests, or known animals engaged in normal activities, even if they glance around or scan surroundings). +- 1 — Unusual or suspicious activity: At least one security-relevant behavior is present **and not explainable by a normal residential activity**. +- 2 — Active or immediate threat: Breaking in, vandalism, aggression, weapon display. + +Sequence details: +- Frame 1 = earliest, Frame {len(thumbnails)} = latest +- Activity started at {review_data["start"]} and lasted {review_data["duration"]} seconds +- Detected objects: {", ".join(review_data["objects"])} +- Verified recognized objects: {", ".join(review_data["recognized_objects"]) or "None"} +- Zones involved: {", ".join(z.replace("_", " ").title() for z in review_data["zones"]) or "None"} + +**IMPORTANT:** +- Values must be plain strings, floats, or integers — no nested objects, no extra commentary. +{get_language_prompt()} + """ + logger.debug( + f"Sending {len(thumbnails)} images to create review description on {review_data['camera']}" + ) + + if debug_save: + with open( + os.path.join( + CLIPS_DIR, "genai-requests", review_data["id"], "prompt.txt" + ), + "w", + ) as f: + f.write(context_prompt) + + response = self._send(context_prompt, thumbnails) + + if debug_save: + with open( + os.path.join( + CLIPS_DIR, "genai-requests", review_data["id"], "response.txt" + ), + "w", + ) as f: + f.write(response) + + if response: + clean_json = re.sub( + r"\n?```$", "", re.sub(r"^```[a-zA-Z0-9]*\n?", "", response) + ) + + try: + metadata = ReviewMetadata.model_validate_json(clean_json) + + if review_data["recognized_objects"]: + metadata.potential_threat_level = 0 + + return metadata + except Exception as e: + # rarely LLMs can fail to follow directions on output format + logger.warning( + f"Failed to parse review description as the response did not match expected format. {e}" + ) + return None + else: + return None + + def generate_review_summary( + self, start_ts: float, end_ts: float, segments: list[dict[str, Any]] + ) -> str | None: + """Generate a summary of review item descriptions over a period of time.""" + time_range = f"{datetime.datetime.fromtimestamp(start_ts).strftime('%I:%M %p')} to {datetime.datetime.fromtimestamp(end_ts).strftime('%I:%M %p')}" + timeline_summary_prompt = f""" +You are a security officer. Time range: {time_range}. +Input: JSON list with "scene", "confidence", "potential_threat_level" (1-2), "other_concerns". +Write a report: + +Security Summary - {time_range} +[One-sentence overview of activity] +[Chronological bullet list of events with timestamps if in scene] +[Final threat assessment] + +Rules: +- List events in order. +- Highlight potential_threat_level ≥ 1 with exact times. +- Note any of the additional concerns which are present. +- Note unusual activity even if not threats. +- If no threats: "Final assessment: Only normal activity observed during this period." +- No commentary, questions, or recommendations. +- Output only the report. + """ + + for item in segments: + timeline_summary_prompt += f"\n{item}" + + return self._send(timeline_summary_prompt, []) + + def generate_object_description( self, camera_config: CameraConfig, thumbnails: list[bytes], event: Event, ) -> Optional[str]: """Generate a description for the frame.""" - prompt = camera_config.genai.object_prompts.get( + prompt = camera_config.objects.genai.object_prompts.get( event.label, - camera_config.genai.prompt, + camera_config.objects.genai.prompt, ).format(**model_to_dict(event)) logger.debug(f"Sending images to genai provider with prompt: {prompt}") return self._send(prompt, thumbnails) @@ -58,16 +200,13 @@ class GenAIClient: def get_genai_client(config: FrigateConfig) -> Optional[GenAIClient]: """Get the GenAI client.""" - genai_config = config.genai - genai_cameras = [ - c for c in config.cameras.values() if c.enabled and c.genai.enabled - ] + if not config.genai.provider: + return None - if genai_cameras: - load_providers() - provider = PROVIDERS.get(genai_config.provider) - if provider: - return provider(genai_config) + load_providers() + provider = PROVIDERS.get(config.genai.provider) + if provider: + return provider(config.genai) return None diff --git a/frigate/genai/gemini.py b/frigate/genai/gemini.py index 750454e25..8c355b37a 100644 --- a/frigate/genai/gemini.py +++ b/frigate/genai/gemini.py @@ -21,7 +21,9 @@ class GeminiClient(GenAIClient): def _init_provider(self): """Initialize the client.""" genai.configure(api_key=self.genai_config.api_key) - return genai.GenerativeModel(self.genai_config.model) + return genai.GenerativeModel( + self.genai_config.model, **self.genai_config.provider_options + ) def _send(self, prompt: str, images: list[bytes]) -> Optional[str]: """Submit a request to Gemini.""" diff --git a/frigate/genai/ollama.py b/frigate/genai/ollama.py index e67d532f0..0fb44d785 100644 --- a/frigate/genai/ollama.py +++ b/frigate/genai/ollama.py @@ -47,7 +47,8 @@ class OllamaClient(GenAIClient): result = self.provider.generate( self.genai_config.model, prompt, - images=images, + images=images if images else None, + **self.genai_config.provider_options, ) return result["response"].strip() except (TimeoutException, ResponseError) as e: diff --git a/frigate/genai/openai.py b/frigate/genai/openai.py index 76ba8cb44..eb3016fad 100644 --- a/frigate/genai/openai.py +++ b/frigate/genai/openai.py @@ -21,7 +21,9 @@ class OpenAIClient(GenAIClient): def _init_provider(self): """Initialize the client.""" - return OpenAI(api_key=self.genai_config.api_key) + return OpenAI( + api_key=self.genai_config.api_key, **self.genai_config.provider_options + ) def _send(self, prompt: str, images: list[bytes]) -> Optional[str]: """Submit a request to OpenAI.""" diff --git a/frigate/log.py b/frigate/log.py index 096b52215..f2171ffe0 100644 --- a/frigate/log.py +++ b/frigate/log.py @@ -1,14 +1,18 @@ # In log.py import atexit +import io import logging -import multiprocessing as mp import os import sys import threading from collections import deque +from contextlib import contextmanager +from enum import Enum +from functools import wraps from logging.handlers import QueueHandler, QueueListener -from queue import Queue -from typing import Deque, Optional +from multiprocessing.managers import SyncManager +from queue import Empty, Queue +from typing import Any, Callable, Deque, Generator, Optional from frigate.util.builtin import clean_camera_user_pass @@ -33,14 +37,21 @@ LOG_HANDLER.addFilter( not in record.getMessage() ) + +class LogLevel(str, Enum): + debug = "debug" + info = "info" + warning = "warning" + error = "error" + critical = "critical" + + log_listener: Optional[QueueListener] = None log_queue: Optional[Queue] = None -manager = None -def setup_logging() -> None: - global log_listener, log_queue, manager - manager = mp.Manager() +def setup_logging(manager: SyncManager) -> None: + global log_listener, log_queue log_queue = manager.Queue() log_listener = QueueListener(log_queue, LOG_HANDLER, respect_handler_level=True) @@ -57,13 +68,27 @@ def setup_logging() -> None: def _stop_logging() -> None: - global log_listener, manager + global log_listener if log_listener is not None: log_listener.stop() log_listener = None - if manager is not None: - manager.shutdown() - manager = None + + +def apply_log_levels(default: str, log_levels: dict[str, LogLevel]) -> None: + logging.getLogger().setLevel(default) + + log_levels = { + "absl": LogLevel.error, + "httpx": LogLevel.error, + "matplotlib": LogLevel.error, + "tensorflow": LogLevel.error, + "werkzeug": LogLevel.error, + "ws4py": LogLevel.error, + **log_levels, + } + + for log, level in log_levels.items(): + logging.getLogger(log).setLevel(level.value.upper()) # When a multiprocessing.Process exits, python tries to flush stdout and stderr. However, if the @@ -81,11 +106,11 @@ os.register_at_fork(after_in_child=reopen_std_streams) # based on https://codereview.stackexchange.com/a/17959 class LogPipe(threading.Thread): - def __init__(self, log_name: str): + def __init__(self, log_name: str, level: int = logging.ERROR): """Setup the object with a logger and start the thread""" super().__init__(daemon=False) self.logger = logging.getLogger(log_name) - self.level = logging.ERROR + self.level = level self.deque: Deque[str] = deque(maxlen=100) self.fdRead, self.fdWrite = os.pipe() self.pipeReader = os.fdopen(self.fdRead) @@ -114,3 +139,182 @@ class LogPipe(threading.Thread): def close(self) -> None: """Close the write end of the pipe.""" os.close(self.fdWrite) + + +class LogRedirect(io.StringIO): + """ + A custom file-like object to capture stdout and process it. + It extends io.StringIO to capture output and then processes it + line by line. + """ + + def __init__(self, logger_instance: logging.Logger, level: int): + super().__init__() + self.logger = logger_instance + self.log_level = level + self._line_buffer: list[str] = [] + + def write(self, s: Any) -> int: + if not isinstance(s, str): + s = str(s) + + self._line_buffer.append(s) + + # Process output line by line if a newline is present + if "\n" in s: + full_output = "".join(self._line_buffer) + lines = full_output.splitlines(keepends=True) + self._line_buffer = [] + + for line in lines: + if line.endswith("\n"): + self._process_line(line.rstrip("\n")) + else: + self._line_buffer.append(line) + + return len(s) + + def _process_line(self, line: str) -> None: + self.logger.log(self.log_level, line) + + def flush(self) -> None: + if self._line_buffer: + full_output = "".join(self._line_buffer) + self._line_buffer = [] + if full_output: # Only process if there's content + self._process_line(full_output) + + def __enter__(self) -> "LogRedirect": + """Context manager entry point.""" + return self + + def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: + """Context manager exit point. Ensures buffered content is flushed.""" + self.flush() + + +@contextmanager +def __redirect_fd_to_queue(queue: Queue[str]) -> Generator[None, None, None]: + """Redirect file descriptor 1 (stdout) to a pipe and capture output in a queue.""" + stdout_fd = os.dup(1) + read_fd, write_fd = os.pipe() + os.dup2(write_fd, 1) + os.close(write_fd) + + stop_event = threading.Event() + + def reader() -> None: + """Read from pipe and put lines in queue until stop_event is set.""" + try: + with os.fdopen(read_fd, "r") as pipe: + while not stop_event.is_set(): + line = pipe.readline() + if not line: # EOF + break + queue.put(line.strip()) + except OSError as e: + queue.put(f"Reader error: {e}") + finally: + if not stop_event.is_set(): + stop_event.set() + + reader_thread = threading.Thread(target=reader, daemon=False) + reader_thread.start() + + try: + yield + finally: + os.dup2(stdout_fd, 1) + os.close(stdout_fd) + stop_event.set() + reader_thread.join(timeout=1.0) + try: + os.close(read_fd) + except OSError: + pass + + +def redirect_output_to_logger(logger: logging.Logger, level: int) -> Any: + """Decorator to redirect both Python sys.stdout/stderr and C-level stdout to logger.""" + + def decorator(func: Callable) -> Callable: + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> Any: + queue: Queue[str] = Queue() + + log_redirect = LogRedirect(logger, level) + old_stdout = sys.stdout + old_stderr = sys.stderr + sys.stdout = log_redirect + sys.stderr = log_redirect + + try: + # Redirect C-level stdout + with __redirect_fd_to_queue(queue): + result = func(*args, **kwargs) + finally: + # Restore Python stdout/stderr + sys.stdout = old_stdout + sys.stderr = old_stderr + log_redirect.flush() + + # Log C-level output from queue + while True: + try: + logger.log(level, queue.get_nowait()) + except Empty: + break + + return result + + return wrapper + + return decorator + + +def suppress_os_output(func: Callable) -> Callable: + """ + A decorator that suppresses all output (stdout and stderr) + at the operating system file descriptor level for the decorated function. + This is useful for silencing noisy C/C++ libraries. + Note: This is a Unix-specific solution using os.dup2 and os.pipe. + It temporarily redirects file descriptors 1 (stdout) and 2 (stderr) + to a non-read pipe, effectively discarding their output. + """ + + @wraps(func) + def wrapper(*args: tuple, **kwargs: dict[str, Any]) -> Any: + # Save the original file descriptors for stdout (1) and stderr (2) + original_stdout_fd = os.dup(1) + original_stderr_fd = os.dup(2) + + # Create dummy pipes. We only need the write ends to redirect to. + # The data written to these pipes will be discarded as nothing + # will read from the read ends. + devnull_read_fd, devnull_write_fd = os.pipe() + + try: + # Redirect stdout (FD 1) and stderr (FD 2) to the write end of our dummy pipe + os.dup2(devnull_write_fd, 1) # Redirect stdout to devnull pipe + os.dup2(devnull_write_fd, 2) # Redirect stderr to devnull pipe + + # Execute the original function + result = func(*args, **kwargs) + + finally: + # Restore original stdout and stderr file descriptors (1 and 2) + # This is crucial to ensure normal printing resumes after the decorated function. + os.dup2(original_stdout_fd, 1) + os.dup2(original_stderr_fd, 2) + + # Close all duplicated and pipe file descriptors to prevent resource leaks. + # It's important to close the read end of the dummy pipe too, + # as nothing is explicitly reading from it. + os.close(original_stdout_fd) + os.close(original_stderr_fd) + os.close(devnull_read_fd) + os.close(devnull_write_fd) + + return result + + return wrapper diff --git a/frigate/models.py b/frigate/models.py index 5aa0dc5b2..61889fd1e 100644 --- a/frigate/models.py +++ b/frigate/models.py @@ -1,6 +1,8 @@ from peewee import ( + BlobField, BooleanField, CharField, + CompositeKey, DateTimeField, FloatField, ForeignKeyField, @@ -11,7 +13,7 @@ from peewee import ( from playhouse.sqlite_ext import JSONField -class Event(Model): # type: ignore[misc] +class Event(Model): id = CharField(null=False, primary_key=True, max_length=30) label = CharField(index=True, max_length=20) sub_label = CharField(max_length=100, null=True) @@ -49,7 +51,7 @@ class Event(Model): # type: ignore[misc] data = JSONField() # ex: tracked object box, region, etc. -class Timeline(Model): # type: ignore[misc] +class Timeline(Model): timestamp = DateTimeField() camera = CharField(index=True, max_length=20) source = CharField(index=True, max_length=20) # ex: tracked object, audio, external @@ -58,13 +60,13 @@ class Timeline(Model): # type: ignore[misc] data = JSONField() # ex: tracked object id, region, box, etc. -class Regions(Model): # type: ignore[misc] +class Regions(Model): camera = CharField(null=False, primary_key=True, max_length=20) grid = JSONField() # json blob of grid last_update = DateTimeField() -class Recordings(Model): # type: ignore[misc] +class Recordings(Model): id = CharField(null=False, primary_key=True, max_length=30) camera = CharField(index=True, max_length=20) path = CharField(unique=True) @@ -78,7 +80,7 @@ class Recordings(Model): # type: ignore[misc] regions = IntegerField(null=True) -class Export(Model): # type: ignore[misc] +class Export(Model): id = CharField(null=False, primary_key=True, max_length=30) camera = CharField(index=True, max_length=20) name = CharField(index=True, max_length=100) @@ -88,7 +90,7 @@ class Export(Model): # type: ignore[misc] in_progress = BooleanField() -class ReviewSegment(Model): # type: ignore[misc] +class ReviewSegment(Model): id = CharField(null=False, primary_key=True, max_length=30) camera = CharField(index=True, max_length=20) start_time = DateTimeField() @@ -98,7 +100,7 @@ class ReviewSegment(Model): # type: ignore[misc] data = JSONField() # additional data about detection like list of labels, zone, areas of significant motion -class UserReviewStatus(Model): # type: ignore[misc] +class UserReviewStatus(Model): user_id = CharField(max_length=30) review_segment = ForeignKeyField(ReviewSegment, backref="user_reviews") has_been_reviewed = BooleanField(default=False) @@ -107,7 +109,7 @@ class UserReviewStatus(Model): # type: ignore[misc] indexes = ((("user_id", "review_segment"), True),) -class Previews(Model): # type: ignore[misc] +class Previews(Model): id = CharField(null=False, primary_key=True, max_length=30) camera = CharField(index=True, max_length=20) path = CharField(unique=True) @@ -117,14 +119,14 @@ class Previews(Model): # type: ignore[misc] # Used for temporary table in record/cleanup.py -class RecordingsToDelete(Model): # type: ignore[misc] +class RecordingsToDelete(Model): id = CharField(null=False, primary_key=False, max_length=30) class Meta: temporary = True -class User(Model): # type: ignore[misc] +class User(Model): username = CharField(null=False, primary_key=True, max_length=30) role = CharField( max_length=20, @@ -132,3 +134,18 @@ class User(Model): # type: ignore[misc] ) password_hash = CharField(null=False, max_length=120) notification_tokens = JSONField() + + +class Trigger(Model): + camera = CharField(max_length=20) + name = CharField() + type = CharField(max_length=10) + data = TextField() + threshold = FloatField() + model = CharField(max_length=30) + embedding = BlobField() + triggering_event_id = CharField(max_length=30) + last_triggered = DateTimeField() + + class Meta: + primary_key = CompositeKey("camera", "name") diff --git a/frigate/motion/__init__.py b/frigate/motion/__init__.py index db5f25879..1f6785d5d 100644 --- a/frigate/motion/__init__.py +++ b/frigate/motion/__init__.py @@ -1,6 +1,8 @@ from abc import ABC, abstractmethod from typing import Tuple +from numpy import ndarray + from frigate.config import MotionConfig @@ -18,13 +20,21 @@ class MotionDetector(ABC): pass @abstractmethod - def detect(self, frame): + def detect(self, frame: ndarray) -> list: + """Detect motion and return motion boxes.""" pass @abstractmethod def is_calibrating(self): + """Return if motion is recalibrating.""" + pass + + @abstractmethod + def update_mask(self) -> None: + """Update the motion mask after a config change.""" pass @abstractmethod def stop(self): + """Stop any ongoing work and processes.""" pass diff --git a/frigate/motion/improved_motion.py b/frigate/motion/improved_motion.py index 69de6d015..77eae26a9 100644 --- a/frigate/motion/improved_motion.py +++ b/frigate/motion/improved_motion.py @@ -5,7 +5,6 @@ import numpy as np from scipy.ndimage import gaussian_filter from frigate.camera import PTZMetrics -from frigate.comms.config_updater import ConfigSubscriber from frigate.config import MotionConfig from frigate.motion import MotionDetector from frigate.util.image import grab_cv2_contours @@ -36,12 +35,7 @@ class ImprovedMotionDetector(MotionDetector): self.avg_frame = np.zeros(self.motion_frame_size, np.float32) self.motion_frame_count = 0 self.frame_counter = 0 - resized_mask = cv2.resize( - config.mask, - dsize=(self.motion_frame_size[1], self.motion_frame_size[0]), - interpolation=cv2.INTER_AREA, - ) - self.mask = np.where(resized_mask == [0]) + self.update_mask() self.save_images = False self.calibrating = True self.blur_radius = blur_radius @@ -49,7 +43,6 @@ class ImprovedMotionDetector(MotionDetector): self.contrast_values = np.zeros((contrast_frame_history, 2), np.uint8) self.contrast_values[:, 1:2] = 255 self.contrast_values_index = 0 - self.config_subscriber = ConfigSubscriber(f"config/motion/{name}", True) self.ptz_metrics = ptz_metrics self.last_stop_time = None @@ -59,12 +52,6 @@ class ImprovedMotionDetector(MotionDetector): def detect(self, frame): motion_boxes = [] - # check for updated motion config - _, updated_motion_config = self.config_subscriber.check_for_update() - - if updated_motion_config: - self.config = updated_motion_config - if not self.config.enabled: return motion_boxes @@ -244,6 +231,14 @@ class ImprovedMotionDetector(MotionDetector): return motion_boxes + def update_mask(self) -> None: + resized_mask = cv2.resize( + self.config.mask, + dsize=(self.motion_frame_size[1], self.motion_frame_size[0]), + interpolation=cv2.INTER_AREA, + ) + self.mask = np.where(resized_mask == [0]) + def stop(self) -> None: """stop the motion detector.""" - self.config_subscriber.stop() + pass diff --git a/frigate/mypy.ini b/frigate/mypy.ini index c687a254d..5bad10f49 100644 --- a/frigate/mypy.ini +++ b/frigate/mypy.ini @@ -35,6 +35,9 @@ disallow_untyped_calls = false [mypy-frigate.const] ignore_errors = false +[mypy-frigate.comms.*] +ignore_errors = false + [mypy-frigate.events] ignore_errors = false @@ -50,6 +53,9 @@ ignore_errors = false [mypy-frigate.stats] ignore_errors = false +[mypy-frigate.track.*] +ignore_errors = false + [mypy-frigate.types] ignore_errors = false diff --git a/frigate/object_detection/base.py b/frigate/object_detection/base.py index c77a720a0..921f88b46 100644 --- a/frigate/object_detection/base.py +++ b/frigate/object_detection/base.py @@ -1,18 +1,18 @@ import datetime import logging -import multiprocessing as mp -import os import queue -import signal -import threading from abc import ABC, abstractmethod from multiprocessing import Queue, Value from multiprocessing.synchronize import Event as MpEvent import numpy as np -from setproctitle import setproctitle -import frigate.util as util +from frigate.comms.object_detector_signaler import ( + ObjectDetectorPublisher, + ObjectDetectorSubscriber, +) +from frigate.config import FrigateConfig +from frigate.const import PROCESS_PRIORITY_HIGH from frigate.detectors import create_detector from frigate.detectors.detector_config import ( BaseDetectorConfig, @@ -21,7 +21,7 @@ from frigate.detectors.detector_config import ( ) from frigate.util.builtin import EventsPerSecond, load_labels from frigate.util.image import SharedMemoryFrameManager, UntrackedSharedMemory -from frigate.util.services import listen +from frigate.util.process import FrigateProcess from .util import tensor_transform @@ -86,63 +86,78 @@ class LocalObjectDetector(ObjectDetector): return self.detect_api.detect_raw(tensor_input=tensor_input) -def run_detector( - name: str, - detection_queue: Queue, - out_events: dict[str, MpEvent], - avg_speed: Value, - start: Value, - detector_config: BaseDetectorConfig, -): - threading.current_thread().name = f"detector:{name}" - logger = logging.getLogger(f"detector.{name}") - logger.info(f"Starting detection process: {os.getpid()}") - setproctitle(f"frigate.detector.{name}") - listen() +class DetectorRunner(FrigateProcess): + def __init__( + self, + name, + detection_queue: Queue, + cameras: list[str], + avg_speed: Value, + start_time: Value, + config: FrigateConfig, + detector_config: BaseDetectorConfig, + stop_event: MpEvent, + ) -> None: + super().__init__(stop_event, PROCESS_PRIORITY_HIGH, name=name, daemon=True) + self.detection_queue = detection_queue + self.cameras = cameras + self.avg_speed = avg_speed + self.start_time = start_time + self.config = config + self.detector_config = detector_config + self.outputs: dict = {} - stop_event: MpEvent = mp.Event() - - def receiveSignal(signalNumber, frame): - stop_event.set() - - signal.signal(signal.SIGTERM, receiveSignal) - signal.signal(signal.SIGINT, receiveSignal) - - frame_manager = SharedMemoryFrameManager() - object_detector = LocalObjectDetector(detector_config=detector_config) - - outputs = {} - for name in out_events.keys(): + def create_output_shm(self, name: str): out_shm = UntrackedSharedMemory(name=f"out-{name}", create=False) out_np = np.ndarray((20, 6), dtype=np.float32, buffer=out_shm.buf) - outputs[name] = {"shm": out_shm, "np": out_np} + self.outputs[name] = {"shm": out_shm, "np": out_np} - while not stop_event.is_set(): - try: - connection_id = detection_queue.get(timeout=1) - except queue.Empty: - continue - input_frame = frame_manager.get( - connection_id, - (1, detector_config.model.height, detector_config.model.width, 3), - ) + def run(self) -> None: + self.pre_run_setup(self.config.logger) - if input_frame is None: - logger.warning(f"Failed to get frame {connection_id} from SHM") - continue + frame_manager = SharedMemoryFrameManager() + object_detector = LocalObjectDetector(detector_config=self.detector_config) + detector_publisher = ObjectDetectorPublisher() - # detect and send the output - start.value = datetime.datetime.now().timestamp() - detections = object_detector.detect_raw(input_frame) - duration = datetime.datetime.now().timestamp() - start.value - frame_manager.close(connection_id) - outputs[connection_id]["np"][:] = detections[:] - out_events[connection_id].set() - start.value = 0.0 + for name in self.cameras: + self.create_output_shm(name) - avg_speed.value = (avg_speed.value * 9 + duration) / 10 + while not self.stop_event.is_set(): + try: + connection_id = self.detection_queue.get(timeout=1) + except queue.Empty: + continue + input_frame = frame_manager.get( + connection_id, + ( + 1, + self.detector_config.model.height, + self.detector_config.model.width, + 3, + ), + ) - logger.info("Exited detection process...") + if input_frame is None: + logger.warning(f"Failed to get frame {connection_id} from SHM") + continue + + # detect and send the output + self.start_time.value = datetime.datetime.now().timestamp() + detections = object_detector.detect_raw(input_frame) + duration = datetime.datetime.now().timestamp() - self.start_time.value + frame_manager.close(connection_id) + + if connection_id not in self.outputs: + self.create_output_shm(connection_id) + + self.outputs[connection_id]["np"][:] = detections[:] + detector_publisher.publish(connection_id) + self.start_time.value = 0.0 + + self.avg_speed.value = (self.avg_speed.value * 9 + duration) / 10 + + detector_publisher.stop() + logger.info("Exited detection process...") class ObjectDetectProcess: @@ -150,16 +165,20 @@ class ObjectDetectProcess: self, name: str, detection_queue: Queue, - out_events: dict[str, MpEvent], + cameras: list[str], + config: FrigateConfig, detector_config: BaseDetectorConfig, + stop_event: MpEvent, ): self.name = name - self.out_events = out_events + self.cameras = cameras self.detection_queue = detection_queue self.avg_inference_speed = Value("d", 0.01) self.detection_start = Value("d", 0.0) - self.detect_process: util.Process | None = None + self.detect_process: FrigateProcess | None = None + self.config = config self.detector_config = detector_config + self.stop_event = stop_event self.start_or_restart() def stop(self): @@ -179,19 +198,16 @@ class ObjectDetectProcess: self.detection_start.value = 0.0 if (self.detect_process is not None) and self.detect_process.is_alive(): self.stop() - self.detect_process = util.Process( - target=run_detector, - name=f"detector:{self.name}", - args=( - self.name, - self.detection_queue, - self.out_events, - self.avg_inference_speed, - self.detection_start, - self.detector_config, - ), + self.detect_process = DetectorRunner( + f"frigate.detector:{self.name}", + self.detection_queue, + self.cameras, + self.avg_inference_speed, + self.detection_start, + self.config, + self.detector_config, + self.stop_event, ) - self.detect_process.daemon = True self.detect_process.start() @@ -201,7 +217,6 @@ class RemoteObjectDetector: name: str, labels: dict[int, str], detection_queue: Queue, - event: MpEvent, model_config: ModelConfig, stop_event: MpEvent, ): @@ -209,7 +224,6 @@ class RemoteObjectDetector: self.name = name self.fps = EventsPerSecond() self.detection_queue = detection_queue - self.event = event self.stop_event = stop_event self.shm = UntrackedSharedMemory(name=self.name, create=False) self.np_shm = np.ndarray( @@ -219,6 +233,7 @@ class RemoteObjectDetector: ) self.out_shm = UntrackedSharedMemory(name=f"out-{self.name}", create=False) self.out_np_shm = np.ndarray((20, 6), dtype=np.float32, buffer=self.out_shm.buf) + self.detector_subscriber = ObjectDetectorSubscriber(name) def detect(self, tensor_input, threshold=0.4): detections = [] @@ -228,9 +243,8 @@ class RemoteObjectDetector: # copy input to shared memory self.np_shm[:] = tensor_input[:] - self.event.clear() self.detection_queue.put(self.name) - result = self.event.wait(timeout=5.0) + result = self.detector_subscriber.check_for_update() # if it timed out if result is None: @@ -246,5 +260,6 @@ class RemoteObjectDetector: return detections def cleanup(self): + self.detector_subscriber.stop() self.shm.unlink() self.out_shm.unlink() diff --git a/frigate/output/birdseye.py b/frigate/output/birdseye.py index b295af82e..0939b5ce4 100644 --- a/frigate/output/birdseye.py +++ b/frigate/output/birdseye.py @@ -15,9 +15,9 @@ from typing import Any, Optional import cv2 import numpy as np -from frigate.comms.config_updater import ConfigSubscriber +from frigate.comms.inter_process import InterProcessRequestor from frigate.config import BirdseyeModeEnum, FfmpegConfig, FrigateConfig -from frigate.const import BASE_DIR, BIRDSEYE_PIPE, INSTALL_DIR +from frigate.const import BASE_DIR, BIRDSEYE_PIPE, INSTALL_DIR, UPDATE_BIRDSEYE_LAYOUT from frigate.util.image import ( SharedMemoryFrameManager, copy_yuv_to_position, @@ -319,35 +319,48 @@ class BirdsEyeFrameManager: self.frame[:] = self.blank_frame self.cameras = {} - for camera, settings in self.config.cameras.items(): - # precalculate the coordinates for all the channels - y, u1, u2, v1, v2 = get_yuv_crop( - settings.frame_shape_yuv, - ( - 0, - 0, - settings.frame_shape[1], - settings.frame_shape[0], - ), - ) - self.cameras[camera] = { - "dimensions": [settings.detect.width, settings.detect.height], - "last_active_frame": 0.0, - "current_frame": 0.0, - "layout_frame": 0.0, - "channel_dims": { - "y": y, - "u1": u1, - "u2": u2, - "v1": v1, - "v2": v2, - }, - } + for camera in self.config.cameras.keys(): + self.add_camera(camera) self.camera_layout = [] self.active_cameras = set() self.last_output_time = 0.0 + def add_camera(self, cam: str): + """Add a camera to self.cameras with the correct structure.""" + settings = self.config.cameras[cam] + # precalculate the coordinates for all the channels + y, u1, u2, v1, v2 = get_yuv_crop( + settings.frame_shape_yuv, + ( + 0, + 0, + settings.frame_shape[1], + settings.frame_shape[0], + ), + ) + self.cameras[cam] = { + "dimensions": [ + settings.detect.width, + settings.detect.height, + ], + "last_active_frame": 0.0, + "current_frame": 0.0, + "layout_frame": 0.0, + "channel_dims": { + "y": y, + "u1": u1, + "u2": u2, + "v1": v1, + "v2": v2, + }, + } + + def remove_camera(self, cam: str): + """Remove a camera from self.cameras.""" + if cam in self.cameras: + del self.cameras[cam] + def clear_frame(self): logger.debug("Clearing the birdseye frame") self.frame[:] = self.blank_frame @@ -381,10 +394,24 @@ class BirdsEyeFrameManager: if mode == BirdseyeModeEnum.objects and object_box_count > 0: return True - def update_frame(self, frame: Optional[np.ndarray] = None) -> bool: + def get_camera_coordinates(self) -> dict[str, dict[str, int]]: + """Return the coordinates of each camera in the current layout.""" + coordinates = {} + for row in self.camera_layout: + for position in row: + camera_name, (x, y, width, height) = position + coordinates[camera_name] = { + "x": x, + "y": y, + "width": width, + "height": height, + } + return coordinates + + def update_frame(self, frame: Optional[np.ndarray] = None) -> tuple[bool, bool]: """ Update birdseye, optionally with a new frame. - When no frame is passed, check the layout and update for any disabled cameras. + Returns (frame_changed, layout_changed) to indicate if the frame or layout changed. """ # determine how many cameras are tracking objects within the last inactivity_threshold seconds @@ -422,19 +449,21 @@ class BirdsEyeFrameManager: max_camera_refresh = True self.last_refresh_time = now - # Track if the frame changes + # Track if the frame or layout changes frame_changed = False + layout_changed = False # If no active cameras and layout is already empty, no update needed if len(active_cameras) == 0: # if the layout is already cleared if len(self.camera_layout) == 0: - return False + return False, False # if the layout needs to be cleared self.camera_layout = [] self.active_cameras = set() self.clear_frame() frame_changed = True + layout_changed = True else: # Determine if layout needs resetting if len(self.active_cameras) - len(active_cameras) == 0: @@ -454,7 +483,7 @@ class BirdsEyeFrameManager: logger.debug("Resetting Birdseye layout...") self.clear_frame() self.active_cameras = active_cameras - + layout_changed = True # Layout is changing due to reset # this also converts added_cameras from a set to a list since we need # to pop elements in order active_cameras_to_add = sorted( @@ -504,7 +533,7 @@ class BirdsEyeFrameManager: # decrease scaling coefficient until height of all cameras can fit into the birdseye canvas while calculating: if self.stop_event.is_set(): - return + return frame_changed, layout_changed layout_candidate = self.calculate_layout( active_cameras_to_add, coefficient @@ -518,7 +547,7 @@ class BirdsEyeFrameManager: logger.error( "Error finding appropriate birdseye layout" ) - return + return frame_changed, layout_changed calculating = False self.canvas.set_coefficient(len(active_cameras), coefficient) @@ -536,7 +565,7 @@ class BirdsEyeFrameManager: if frame is not None: # Frame presence indicates a potential change frame_changed = True - return frame_changed + return frame_changed, layout_changed def calculate_layout( self, @@ -688,7 +717,11 @@ class BirdsEyeFrameManager: motion_count: int, frame_time: float, frame: np.ndarray, - ) -> bool: + ) -> tuple[bool, bool]: + """ + Update birdseye for a specific camera with new frame data. + Returns (frame_changed, layout_changed) to indicate if the frame or layout changed. + """ # don't process if birdseye is disabled for this camera camera_config = self.config.cameras[camera] force_update = False @@ -701,7 +734,7 @@ class BirdsEyeFrameManager: self.cameras[camera]["last_active_frame"] = 0 force_update = True else: - return False + return False, False # update the last active frame for the camera self.cameras[camera]["current_frame"] = frame.copy() @@ -713,21 +746,22 @@ class BirdsEyeFrameManager: # limit output to 10 fps if not force_update and (now - self.last_output_time) < 1 / 10: - return False + return False, False try: - updated_frame = self.update_frame(frame) + frame_changed, layout_changed = self.update_frame(frame) except Exception: - updated_frame = False + frame_changed, layout_changed = False, False self.active_cameras = [] self.camera_layout = [] print(traceback.format_exc()) # if the frame was updated or the fps is too low, send frame - if force_update or updated_frame or (now - self.last_output_time) > 1: + if force_update or frame_changed or (now - self.last_output_time) > 1: self.last_output_time = now - return True - return False + return True, layout_changed + + return False, layout_changed class Birdseye: @@ -753,10 +787,10 @@ class Birdseye: self.broadcaster = BroadcastThread( "birdseye", self.converter, websocket_server, stop_event ) - self.birdseye_manager = BirdsEyeFrameManager(config, stop_event) - self.birdseye_subscriber = ConfigSubscriber("config/birdseye/") + self.birdseye_manager = BirdsEyeFrameManager(self.config, stop_event) self.frame_manager = SharedMemoryFrameManager() self.stop_event = stop_event + self.requestor = InterProcessRequestor() if config.birdseye.restream: self.birdseye_buffer = self.frame_manager.create( @@ -783,6 +817,16 @@ class Birdseye: self.birdseye_manager.clear_frame() self.__send_new_frame() + def add_camera(self, camera: str) -> None: + """Add a camera to the birdseye manager.""" + self.birdseye_manager.add_camera(camera) + logger.debug(f"Added camera {camera} to birdseye") + + def remove_camera(self, camera: str) -> None: + """Remove a camera from the birdseye manager.""" + self.birdseye_manager.remove_camera(camera) + logger.debug(f"Removed camera {camera} from birdseye") + def write_data( self, camera: str, @@ -791,30 +835,20 @@ class Birdseye: frame_time: float, frame: np.ndarray, ) -> None: - # check if there is an updated config - while True: - ( - updated_birdseye_topic, - updated_birdseye_config, - ) = self.birdseye_subscriber.check_for_update() - - if not updated_birdseye_topic: - break - - if updated_birdseye_config: - camera_name = updated_birdseye_topic.rpartition("/")[-1] - self.config.cameras[camera_name].birdseye = updated_birdseye_config - - if self.birdseye_manager.update( + frame_changed, frame_layout_changed = self.birdseye_manager.update( camera, len([o for o in current_tracked_objects if not o["stationary"]]), len(motion_boxes), frame_time, frame, - ): + ) + if frame_changed: self.__send_new_frame() + if frame_layout_changed: + coordinates = self.birdseye_manager.get_camera_coordinates() + self.requestor.send_data(UPDATE_BIRDSEYE_LAYOUT, coordinates) + def stop(self) -> None: - self.birdseye_subscriber.stop() self.converter.join() self.broadcaster.join() diff --git a/frigate/output/output.py b/frigate/output/output.py index 1723ac73c..674c02b78 100644 --- a/frigate/output/output.py +++ b/frigate/output/output.py @@ -2,14 +2,12 @@ import datetime import logging -import multiprocessing as mp import os import shutil -import signal import threading +from multiprocessing.synchronize import Event as MpEvent from wsgiref.simple_server import make_server -from setproctitle import setproctitle from ws4py.server.wsgirefserver import ( WebSocketWSGIHandler, WebSocketWSGIRequestHandler, @@ -17,15 +15,19 @@ from ws4py.server.wsgirefserver import ( ) from ws4py.server.wsgiutils import WebSocketWSGIApplication -from frigate.comms.config_updater import ConfigSubscriber from frigate.comms.detections_updater import DetectionSubscriber, DetectionTypeEnum from frigate.comms.ws import WebSocket from frigate.config import FrigateConfig -from frigate.const import CACHE_DIR, CLIPS_DIR +from frigate.config.camera.updater import ( + CameraConfigUpdateEnum, + CameraConfigUpdateSubscriber, +) +from frigate.const import CACHE_DIR, CLIPS_DIR, PROCESS_PRIORITY_MED from frigate.output.birdseye import Birdseye from frigate.output.camera import JsmpegCamera from frigate.output.preview import PreviewRecorder from frigate.util.image import SharedMemoryFrameManager, get_blank_yuv_frame +from frigate.util.process import FrigateProcess logger = logging.getLogger(__name__) @@ -70,183 +72,201 @@ def check_disabled_camera_update( birdseye.all_cameras_disabled() -def output_frames( - config: FrigateConfig, -): - threading.current_thread().name = "output" - setproctitle("frigate.output") +class OutputProcess(FrigateProcess): + def __init__(self, config: FrigateConfig, stop_event: MpEvent) -> None: + super().__init__( + stop_event, PROCESS_PRIORITY_MED, name="frigate.output", daemon=True + ) + self.config = config - stop_event = mp.Event() + def run(self) -> None: + self.pre_run_setup(self.config.logger) - def receiveSignal(signalNumber, frame): - stop_event.set() + frame_manager = SharedMemoryFrameManager() - signal.signal(signal.SIGTERM, receiveSignal) - signal.signal(signal.SIGINT, receiveSignal) + # start a websocket server on 8082 + WebSocketWSGIHandler.http_version = "1.1" + websocket_server = make_server( + "127.0.0.1", + 8082, + server_class=WSGIServer, + handler_class=WebSocketWSGIRequestHandler, + app=WebSocketWSGIApplication(handler_cls=WebSocket), + ) + websocket_server.initialize_websockets_manager() + websocket_thread = threading.Thread(target=websocket_server.serve_forever) - frame_manager = SharedMemoryFrameManager() + detection_subscriber = DetectionSubscriber(DetectionTypeEnum.video.value) + config_subscriber = CameraConfigUpdateSubscriber( + self.config, + self.config.cameras, + [ + CameraConfigUpdateEnum.add, + CameraConfigUpdateEnum.birdseye, + CameraConfigUpdateEnum.enabled, + CameraConfigUpdateEnum.record, + ], + ) - # start a websocket server on 8082 - WebSocketWSGIHandler.http_version = "1.1" - websocket_server = make_server( - "127.0.0.1", - 8082, - server_class=WSGIServer, - handler_class=WebSocketWSGIRequestHandler, - app=WebSocketWSGIApplication(handler_cls=WebSocket), - ) - websocket_server.initialize_websockets_manager() - websocket_thread = threading.Thread(target=websocket_server.serve_forever) + jsmpeg_cameras: dict[str, JsmpegCamera] = {} + birdseye: Birdseye | None = None + preview_recorders: dict[str, PreviewRecorder] = {} + preview_write_times: dict[str, float] = {} + failed_frame_requests: dict[str, int] = {} + last_disabled_cam_check = datetime.datetime.now().timestamp() - detection_subscriber = DetectionSubscriber(DetectionTypeEnum.video) - config_enabled_subscriber = ConfigSubscriber("config/enabled/") + move_preview_frames("cache") - jsmpeg_cameras: dict[str, JsmpegCamera] = {} - birdseye: Birdseye | None = None - preview_recorders: dict[str, PreviewRecorder] = {} - preview_write_times: dict[str, float] = {} - failed_frame_requests: dict[str, int] = {} - last_disabled_cam_check = datetime.datetime.now().timestamp() + for camera, cam_config in self.config.cameras.items(): + if not cam_config.enabled_in_config: + continue - move_preview_frames("cache") - - for camera, cam_config in config.cameras.items(): - if not cam_config.enabled_in_config: - continue - - jsmpeg_cameras[camera] = JsmpegCamera(cam_config, stop_event, websocket_server) - preview_recorders[camera] = PreviewRecorder(cam_config) - preview_write_times[camera] = 0 - - if config.birdseye.enabled: - birdseye = Birdseye(config, stop_event, websocket_server) - - websocket_thread.start() - - while not stop_event.is_set(): - # check if there is an updated config - while True: - ( - updated_enabled_topic, - updated_enabled_config, - ) = config_enabled_subscriber.check_for_update() - - if not updated_enabled_topic: - break - - if updated_enabled_config: - camera_name = updated_enabled_topic.rpartition("/")[-1] - config.cameras[camera_name].enabled = updated_enabled_config.enabled - - (topic, data) = detection_subscriber.check_for_update(timeout=1) - now = datetime.datetime.now().timestamp() - - if now - last_disabled_cam_check > 5: - # check disabled cameras every 5 seconds - last_disabled_cam_check = now - check_disabled_camera_update( - config, birdseye, preview_recorders, preview_write_times + jsmpeg_cameras[camera] = JsmpegCamera( + cam_config, self.stop_event, websocket_server ) + preview_recorders[camera] = PreviewRecorder(cam_config) + preview_write_times[camera] = 0 - if not topic: - continue + if self.config.birdseye.enabled: + birdseye = Birdseye(self.config, self.stop_event, websocket_server) - ( - camera, - frame_name, - frame_time, - current_tracked_objects, - motion_boxes, - _, - ) = data + websocket_thread.start() - if not config.cameras[camera].enabled: - continue + while not self.stop_event.is_set(): + # check if there is an updated config + updates = config_subscriber.check_for_updates() - frame = frame_manager.get(frame_name, config.cameras[camera].frame_shape_yuv) + if CameraConfigUpdateEnum.add in updates: + for camera in updates["add"]: + jsmpeg_cameras[camera] = JsmpegCamera( + cam_config, self.stop_event, websocket_server + ) + preview_recorders[camera] = PreviewRecorder(cam_config) + preview_write_times[camera] = 0 - if frame is None: - logger.debug(f"Failed to get frame {frame_name} from SHM") - failed_frame_requests[camera] = failed_frame_requests.get(camera, 0) + 1 + if ( + self.config.birdseye.enabled + and self.config.cameras[camera].birdseye.enabled + ): + birdseye.add_camera(camera) - if failed_frame_requests[camera] > config.cameras[camera].detect.fps: - logger.warning( - f"Failed to retrieve many frames for {camera} from SHM, consider increasing SHM size if this continues." + (topic, data) = detection_subscriber.check_for_update(timeout=1) + now = datetime.datetime.now().timestamp() + + if now - last_disabled_cam_check > 5: + # check disabled cameras every 5 seconds + last_disabled_cam_check = now + check_disabled_camera_update( + self.config, birdseye, preview_recorders, preview_write_times ) - continue - else: - failed_frame_requests[camera] = 0 + if not topic: + continue - # send frames for low fps recording - preview_recorders[camera].write_data( - current_tracked_objects, motion_boxes, frame_time, frame - ) - preview_write_times[camera] = frame_time - - # send camera frame to ffmpeg process if websockets are connected - if any( - ws.environ["PATH_INFO"].endswith(camera) for ws in websocket_server.manager - ): - # write to the converter for the camera if clients are listening to the specific camera - jsmpeg_cameras[camera].write_frame(frame.tobytes()) - - # send output data to birdseye if websocket is connected or restreaming - if config.birdseye.enabled and ( - config.birdseye.restream - or any( - ws.environ["PATH_INFO"].endswith("birdseye") - for ws in websocket_server.manager - ) - ): - birdseye.write_data( + ( camera, + frame_name, + frame_time, current_tracked_objects, motion_boxes, - frame_time, - frame, + _, + ) = data + + if not self.config.cameras[camera].enabled: + continue + + frame = frame_manager.get( + frame_name, self.config.cameras[camera].frame_shape_yuv ) - frame_manager.close(frame_name) + if frame is None: + logger.debug(f"Failed to get frame {frame_name} from SHM") + failed_frame_requests[camera] = failed_frame_requests.get(camera, 0) + 1 - move_preview_frames("clips") + if ( + failed_frame_requests[camera] + > self.config.cameras[camera].detect.fps + ): + logger.warning( + f"Failed to retrieve many frames for {camera} from SHM, consider increasing SHM size if this continues." + ) - while True: - (topic, data) = detection_subscriber.check_for_update(timeout=0) + continue + else: + failed_frame_requests[camera] = 0 - if not topic: - break + # send frames for low fps recording + preview_recorders[camera].write_data( + current_tracked_objects, motion_boxes, frame_time, frame + ) + preview_write_times[camera] = frame_time - ( - camera, - frame_name, - frame_time, - current_tracked_objects, - motion_boxes, - regions, - ) = data + # send camera frame to ffmpeg process if websockets are connected + if any( + ws.environ["PATH_INFO"].endswith(camera) + for ws in websocket_server.manager + ): + # write to the converter for the camera if clients are listening to the specific camera + jsmpeg_cameras[camera].write_frame(frame.tobytes()) - frame = frame_manager.get(frame_name, config.cameras[camera].frame_shape_yuv) - frame_manager.close(frame_name) + # send output data to birdseye if websocket is connected or restreaming + if self.config.birdseye.enabled and ( + self.config.birdseye.restream + or any( + ws.environ["PATH_INFO"].endswith("birdseye") + for ws in websocket_server.manager + ) + ): + birdseye.write_data( + camera, + current_tracked_objects, + motion_boxes, + frame_time, + frame, + ) - detection_subscriber.stop() + frame_manager.close(frame_name) - for jsmpeg in jsmpeg_cameras.values(): - jsmpeg.stop() + move_preview_frames("clips") - for preview in preview_recorders.values(): - preview.stop() + while True: + (topic, data) = detection_subscriber.check_for_update(timeout=0) - if birdseye is not None: - birdseye.stop() + if not topic: + break - config_enabled_subscriber.stop() - websocket_server.manager.close_all() - websocket_server.manager.stop() - websocket_server.manager.join() - websocket_server.shutdown() - websocket_thread.join() - logger.info("exiting output process...") + ( + camera, + frame_name, + frame_time, + current_tracked_objects, + motion_boxes, + regions, + ) = data + + frame = frame_manager.get( + frame_name, self.config.cameras[camera].frame_shape_yuv + ) + frame_manager.close(frame_name) + + detection_subscriber.stop() + + for jsmpeg in jsmpeg_cameras.values(): + jsmpeg.stop() + + for preview in preview_recorders.values(): + preview.stop() + + if birdseye is not None: + birdseye.stop() + + config_subscriber.stop() + websocket_server.manager.close_all() + websocket_server.manager.stop() + websocket_server.manager.join() + websocket_server.shutdown() + websocket_thread.join() + logger.info("exiting output process...") def move_preview_frames(loc: str): diff --git a/frigate/output/preview.py b/frigate/output/preview.py index 08caa6738..6dfd90904 100644 --- a/frigate/output/preview.py +++ b/frigate/output/preview.py @@ -13,7 +13,6 @@ from typing import Any import cv2 import numpy as np -from frigate.comms.config_updater import ConfigSubscriber from frigate.comms.inter_process import InterProcessRequestor from frigate.config import CameraConfig, RecordQualityEnum from frigate.const import CACHE_DIR, CLIPS_DIR, INSERT_PREVIEW, PREVIEW_FRAME_TYPE @@ -174,9 +173,6 @@ class PreviewRecorder: # create communication for finished previews self.requestor = InterProcessRequestor() - self.config_subscriber = ConfigSubscriber( - f"config/record/{self.config.name}", True - ) y, u1, u2, v1, v2 = get_yuv_crop( self.config.frame_shape_yuv, @@ -323,12 +319,6 @@ class PreviewRecorder: ) -> None: self.offline = False - # check for updated record config - _, updated_record_config = self.config_subscriber.check_for_update() - - if updated_record_config: - self.config.record = updated_record_config - # always write the first frame if self.start_time == 0: self.start_time = frame_time diff --git a/frigate/ptz/autotrack.py b/frigate/ptz/autotrack.py index f38bf1f5f..beecc62ab 100644 --- a/frigate/ptz/autotrack.py +++ b/frigate/ptz/autotrack.py @@ -31,7 +31,7 @@ from frigate.const import ( ) from frigate.ptz.onvif import OnvifController from frigate.track.tracked_object import TrackedObject -from frigate.util.builtin import update_yaml_file +from frigate.util.builtin import update_yaml_file_bulk from frigate.util.config import find_config_file from frigate.util.image import SharedMemoryFrameManager, intersection_over_union @@ -60,10 +60,10 @@ class PtzMotionEstimator: def motion_estimator( self, - detections: list[dict[str, Any]], + detections: list[tuple[Any, Any, Any, Any, Any, Any]], frame_name: str, frame_time: float, - camera: str, + camera: str | None, ): # If we've just started up or returned to our preset, reset motion estimator for new tracking session if self.ptz_metrics.reset.is_set(): @@ -348,10 +348,13 @@ class PtzAutoTracker: f"{camera}: Writing new config with autotracker motion coefficients: {self.config.cameras[camera].onvif.autotracking.movement_weights}" ) - update_yaml_file( + update_yaml_file_bulk( config_file, - ["cameras", camera, "onvif", "autotracking", "movement_weights"], - self.config.cameras[camera].onvif.autotracking.movement_weights, + { + f"cameras.{camera}.onvif.autotracking.movement_weights": self.config.cameras[ + camera + ].onvif.autotracking.movement_weights + }, ) async def _calibrate_camera(self, camera): diff --git a/frigate/ptz/onvif.py b/frigate/ptz/onvif.py index 81c8b9852..bd5bef0b0 100644 --- a/frigate/ptz/onvif.py +++ b/frigate/ptz/onvif.py @@ -33,6 +33,8 @@ class OnvifCommandEnum(str, Enum): stop = "stop" zoom_in = "zoom_in" zoom_out = "zoom_out" + focus_in = "focus_in" + focus_out = "focus_out" class OnvifController: @@ -185,6 +187,16 @@ class OnvifController: ptz: ONVIFService = await onvif.create_ptz_service() self.cams[camera_name]["ptz"] = ptz + imaging: ONVIFService = await onvif.create_imaging_service() + self.cams[camera_name]["imaging"] = imaging + try: + video_sources = await media.GetVideoSources() + if video_sources and len(video_sources) > 0: + self.cams[camera_name]["video_source_token"] = video_sources[0].token + except (Fault, ONVIFError, TransportError, Exception) as e: + logger.debug(f"Unable to get video sources for {camera_name}: {e}") + self.cams[camera_name]["video_source_token"] = None + # setup continuous moving request move_request = ptz.create_type("ContinuousMove") move_request.ProfileToken = profile.token @@ -366,7 +378,19 @@ class OnvifController: f"Disabling autotracking zooming for {camera_name}: Absolute zoom not supported. Exception: {e}" ) - # set relative pan/tilt space for autotracker + if self.cams[camera_name]["video_source_token"] is not None: + try: + imaging_capabilities = await imaging.GetImagingSettings( + {"VideoSourceToken": self.cams[camera_name]["video_source_token"]} + ) + if ( + hasattr(imaging_capabilities, "Focus") + and imaging_capabilities.Focus + ): + supported_features.append("focus") + except (Fault, ONVIFError, TransportError, Exception) as e: + logger.debug(f"Focus not supported for {camera_name}: {e}") + if ( self.config.cameras[camera_name].onvif.autotracking.enabled_in_config and self.config.cameras[camera_name].onvif.autotracking.enabled @@ -391,6 +415,18 @@ class OnvifController: "Zoom": True, } ) + if ( + "focus" in self.cams[camera_name]["features"] + and self.cams[camera_name]["video_source_token"] + ): + try: + stop_request = self.cams[camera_name]["imaging"].create_type("Stop") + stop_request.VideoSourceToken = self.cams[camera_name][ + "video_source_token" + ] + await self.cams[camera_name]["imaging"].Stop(stop_request) + except (Fault, ONVIFError, TransportError, Exception) as e: + logger.warning(f"Failed to stop focus for {camera_name}: {e}") self.cams[camera_name]["active"] = False async def _move(self, camera_name: str, command: OnvifCommandEnum) -> None: @@ -599,6 +635,35 @@ class OnvifController: self.cams[camera_name]["active"] = False + async def _focus(self, camera_name: str, command: OnvifCommandEnum) -> None: + if self.cams[camera_name]["active"]: + logger.warning( + f"{camera_name} is already performing an action, not moving..." + ) + await self._stop(camera_name) + + if ( + "focus" not in self.cams[camera_name]["features"] + or not self.cams[camera_name]["video_source_token"] + ): + logger.error(f"{camera_name} does not support ONVIF continuous focus.") + return + + self.cams[camera_name]["active"] = True + move_request = self.cams[camera_name]["imaging"].create_type("Move") + move_request.VideoSourceToken = self.cams[camera_name]["video_source_token"] + move_request.Focus = { + "Continuous": { + "Speed": 0.5 if command == OnvifCommandEnum.focus_in else -0.5 + } + } + + try: + await self.cams[camera_name]["imaging"].Move(move_request) + except (Fault, ONVIFError, TransportError, Exception) as e: + logger.warning(f"Onvif sending focus request to {camera_name} failed: {e}") + self.cams[camera_name]["active"] = False + async def handle_command_async( self, camera_name: str, command: OnvifCommandEnum, param: str = "" ) -> None: @@ -622,11 +687,10 @@ class OnvifController: elif command == OnvifCommandEnum.move_relative: _, pan, tilt = param.split("_") await self._move_relative(camera_name, float(pan), float(tilt), 0, 1) - elif ( - command == OnvifCommandEnum.zoom_in - or command == OnvifCommandEnum.zoom_out - ): + elif command in (OnvifCommandEnum.zoom_in, OnvifCommandEnum.zoom_out): await self._zoom(camera_name, command) + elif command in (OnvifCommandEnum.focus_in, OnvifCommandEnum.focus_out): + await self._focus(camera_name, command) else: await self._move(camera_name, command) except (Fault, ONVIFError, TransportError, Exception) as e: @@ -637,7 +701,6 @@ class OnvifController: ) -> None: """ Handle ONVIF commands by scheduling them in the event loop. - This is the synchronous interface that schedules async work. """ future = asyncio.run_coroutine_threadsafe( self.handle_command_async(camera_name, command, param), self.loop diff --git a/frigate/record/cleanup.py b/frigate/record/cleanup.py index 1de08a899..9d1e28306 100644 --- a/frigate/record/cleanup.py +++ b/frigate/record/cleanup.py @@ -100,7 +100,11 @@ class RecordingCleanup(threading.Thread): ).execute() def expire_existing_camera_recordings( - self, expire_date: float, config: CameraConfig, reviews: ReviewSegment + self, + continuous_expire_date: float, + motion_expire_date: float, + config: CameraConfig, + reviews: ReviewSegment, ) -> None: """Delete recordings for existing camera based on retention config.""" # Get the timestamp for cutoff of retained days @@ -116,8 +120,14 @@ class RecordingCleanup(threading.Thread): Recordings.motion, ) .where( - Recordings.camera == config.name, - Recordings.end_time < expire_date, + (Recordings.camera == config.name) + & ( + ( + (Recordings.end_time < continuous_expire_date) + & (Recordings.motion == 0) + ) + | (Recordings.end_time < motion_expire_date) + ) ) .order_by(Recordings.start_time) .namedtuples() @@ -188,7 +198,7 @@ class RecordingCleanup(threading.Thread): Recordings.id << deleted_recordings_list[i : i + max_deletes] ).execute() - previews: Previews = ( + previews: list[Previews] = ( Previews.select( Previews.id, Previews.start_time, @@ -196,8 +206,9 @@ class RecordingCleanup(threading.Thread): Previews.path, ) .where( - Previews.camera == config.name, - Previews.end_time < expire_date, + (Previews.camera == config.name) + & (Previews.end_time < continuous_expire_date) + & (Previews.end_time < motion_expire_date) ) .order_by(Previews.start_time) .namedtuples() @@ -253,7 +264,9 @@ class RecordingCleanup(threading.Thread): logger.debug("Start deleted cameras.") # Handle deleted cameras - expire_days = self.config.record.retain.days + expire_days = max( + self.config.record.continuous.days, self.config.record.motion.days + ) expire_before = ( datetime.datetime.now() - datetime.timedelta(days=expire_days) ).timestamp() @@ -291,9 +304,12 @@ class RecordingCleanup(threading.Thread): now = datetime.datetime.now() self.expire_review_segments(config, now) - - expire_days = config.record.retain.days - expire_date = (now - datetime.timedelta(days=expire_days)).timestamp() + continuous_expire_date = ( + now - datetime.timedelta(days=config.record.continuous.days) + ).timestamp() + motion_expire_date = ( + now - datetime.timedelta(days=config.record.motion.days) + ).timestamp() # Get all the reviews to check against reviews: ReviewSegment = ( @@ -306,13 +322,15 @@ class RecordingCleanup(threading.Thread): ReviewSegment.camera == camera, # need to ensure segments for all reviews starting # before the expire date are included - ReviewSegment.start_time < expire_date, + ReviewSegment.start_time < motion_expire_date, ) .order_by(ReviewSegment.start_time) .namedtuples() ) - self.expire_existing_camera_recordings(expire_date, config, reviews) + self.expire_existing_camera_recordings( + continuous_expire_date, motion_expire_date, config, reviews + ) logger.debug(f"End camera: {camera}.") logger.debug("End all cameras.") diff --git a/frigate/record/export.py b/frigate/record/export.py index 0d3f96da0..1d56baf15 100644 --- a/frigate/record/export.py +++ b/frigate/record/export.py @@ -21,6 +21,7 @@ from frigate.const import ( EXPORT_DIR, MAX_PLAYLIST_SECONDS, PREVIEW_FRAME_TYPE, + PROCESS_PRIORITY_LOW, ) from frigate.ffmpeg_presets import ( EncodeTypeEnum, @@ -36,7 +37,7 @@ TIMELAPSE_DATA_INPUT_ARGS = "-an -skip_frame nokey" def lower_priority(): - os.nice(10) + os.nice(PROCESS_PRIORITY_LOW) class PlaybackFactorEnum(str, Enum): diff --git a/frigate/record/maintainer.py b/frigate/record/maintainer.py index f1b9a600e..20f1eb289 100644 --- a/frigate/record/maintainer.py +++ b/frigate/record/maintainer.py @@ -16,7 +16,6 @@ from typing import Any, Optional, Tuple import numpy as np import psutil -from frigate.comms.config_updater import ConfigSubscriber from frigate.comms.detections_updater import DetectionSubscriber, DetectionTypeEnum from frigate.comms.inter_process import InterProcessRequestor from frigate.comms.recordings_updater import ( @@ -24,6 +23,10 @@ from frigate.comms.recordings_updater import ( RecordingsDataTypeEnum, ) from frigate.config import FrigateConfig, RetainModeEnum +from frigate.config.camera.updater import ( + CameraConfigUpdateEnum, + CameraConfigUpdateSubscriber, +) from frigate.const import ( CACHE_DIR, CACHE_SEGMENT_FORMAT, @@ -71,8 +74,12 @@ class RecordingMaintainer(threading.Thread): # create communication for retained recordings self.requestor = InterProcessRequestor() - self.config_subscriber = ConfigSubscriber("config/record/") - self.detection_subscriber = DetectionSubscriber(DetectionTypeEnum.all) + self.config_subscriber = CameraConfigUpdateSubscriber( + self.config, + self.config.cameras, + [CameraConfigUpdateEnum.add, CameraConfigUpdateEnum.record], + ) + self.detection_subscriber = DetectionSubscriber(DetectionTypeEnum.all.value) self.recordings_publisher = RecordingsDataPublisher( RecordingsDataTypeEnum.recordings_available_through ) @@ -280,12 +287,16 @@ class RecordingMaintainer(threading.Thread): Path(cache_path).unlink(missing_ok=True) return - # if cached file's start_time is earlier than the retain days for the camera - # meaning continuous recording is not enabled - if start_time <= ( - datetime.datetime.now().astimezone(datetime.timezone.utc) - - datetime.timedelta(days=self.config.cameras[camera].record.retain.days) - ): + record_config = self.config.cameras[camera].record + highest = None + + if record_config.continuous.days > 0: + highest = "continuous" + elif record_config.motion.days > 0: + highest = "motion" + + # continuous / motion recording is not enabled + if highest is None: # if the cached segment overlaps with the review items: overlaps = False for review in reviews: @@ -339,8 +350,7 @@ class RecordingMaintainer(threading.Thread): ).astimezone(datetime.timezone.utc) if end_time < retain_cutoff: self.drop_segment(cache_path) - # else retain days includes this segment - # meaning continuous recording is enabled + # continuous / motion is enabled else: # assume that empty means the relevant recording info has not been received yet camera_info = self.object_recordings_info[camera] @@ -355,7 +365,11 @@ class RecordingMaintainer(threading.Thread): ).astimezone(datetime.timezone.utc) >= end_time ): - record_mode = self.config.cameras[camera].record.retain.mode + record_mode = ( + RetainModeEnum.all + if highest == "continuous" + else RetainModeEnum.motion + ) return await self.move_segment( camera, start_time, end_time, duration, cache_path, record_mode ) @@ -518,17 +532,7 @@ class RecordingMaintainer(threading.Thread): run_start = datetime.datetime.now().timestamp() # check if there is an updated config - while True: - ( - updated_topic, - updated_record_config, - ) = self.config_subscriber.check_for_update() - - if not updated_topic: - break - - camera_name = updated_topic.rpartition("/")[-1] - self.config.cameras[camera_name].record = updated_record_config + self.config_subscriber.check_for_updates() stale_frame_count = 0 stale_frame_count_threshold = 10 @@ -541,7 +545,7 @@ class RecordingMaintainer(threading.Thread): if not topic: break - if topic == DetectionTypeEnum.video: + if topic == DetectionTypeEnum.video.value: ( camera, _, @@ -560,7 +564,7 @@ class RecordingMaintainer(threading.Thread): regions, ) ) - elif topic == DetectionTypeEnum.audio: + elif topic == DetectionTypeEnum.audio.value: ( camera, frame_time, @@ -576,7 +580,9 @@ class RecordingMaintainer(threading.Thread): audio_detections, ) ) - elif topic == DetectionTypeEnum.api or DetectionTypeEnum.lpr: + elif ( + topic == DetectionTypeEnum.api.value or DetectionTypeEnum.lpr.value + ): continue if frame_time < run_start - stale_frame_count_threshold: diff --git a/frigate/record/record.py b/frigate/record/record.py index 252b80545..624ed6e9a 100644 --- a/frigate/record/record.py +++ b/frigate/record/record.py @@ -1,50 +1,47 @@ """Run recording maintainer and cleanup.""" import logging -import multiprocessing as mp -import signal -import threading -from types import FrameType -from typing import Optional +from multiprocessing.synchronize import Event as MpEvent from playhouse.sqliteq import SqliteQueueDatabase -from setproctitle import setproctitle from frigate.config import FrigateConfig +from frigate.const import PROCESS_PRIORITY_HIGH from frigate.models import Recordings, ReviewSegment from frigate.record.maintainer import RecordingMaintainer -from frigate.util.services import listen +from frigate.util.process import FrigateProcess logger = logging.getLogger(__name__) -def manage_recordings(config: FrigateConfig) -> None: - stop_event = mp.Event() +class RecordProcess(FrigateProcess): + def __init__(self, config: FrigateConfig, stop_event: MpEvent) -> None: + super().__init__( + stop_event, + PROCESS_PRIORITY_HIGH, + name="frigate.recording_manager", + daemon=True, + ) + self.config = config - def receiveSignal(signalNumber: int, frame: Optional[FrameType]) -> None: - stop_event.set() + def run(self) -> None: + self.pre_run_setup(self.config.logger) + db = SqliteQueueDatabase( + self.config.database.path, + pragmas={ + "auto_vacuum": "FULL", # Does not defragment database + "cache_size": -512 * 1000, # 512MB of cache + "synchronous": "NORMAL", # Safe when using WAL https://www.sqlite.org/pragma.html#pragma_synchronous + }, + timeout=max( + 60, 10 * len([c for c in self.config.cameras.values() if c.enabled]) + ), + ) + models = [ReviewSegment, Recordings] + db.bind(models) - signal.signal(signal.SIGTERM, receiveSignal) - signal.signal(signal.SIGINT, receiveSignal) - - threading.current_thread().name = "process:recording_manager" - setproctitle("frigate.recording_manager") - listen() - - db = SqliteQueueDatabase( - config.database.path, - pragmas={ - "auto_vacuum": "FULL", # Does not defragment database - "cache_size": -512 * 1000, # 512MB of cache - "synchronous": "NORMAL", # Safe when using WAL https://www.sqlite.org/pragma.html#pragma_synchronous - }, - timeout=max(60, 10 * len([c for c in config.cameras.values() if c.enabled])), - ) - models = [ReviewSegment, Recordings] - db.bind(models) - - maintainer = RecordingMaintainer( - config, - stop_event, - ) - maintainer.start() + maintainer = RecordingMaintainer( + self.config, + self.stop_event, + ) + maintainer.start() diff --git a/frigate/record/util.py b/frigate/record/util.py index 37a2b4645..6a91c1aaf 100644 --- a/frigate/record/util.py +++ b/frigate/record/util.py @@ -66,7 +66,7 @@ def sync_recordings(limited: bool) -> None: if float(len(recordings_to_delete)) / max(1, recordings.count()) > 0.5: logger.warning( - f"Deleting {(float(len(recordings_to_delete)) / recordings.count()):2f}% of recordings DB entries, could be due to configuration error. Aborting..." + f"Deleting {(len(recordings_to_delete) / max(1, recordings.count()) * 100):.2f}% of recordings DB entries, could be due to configuration error. Aborting..." ) return False @@ -106,7 +106,7 @@ def sync_recordings(limited: bool) -> None: if float(len(files_to_delete)) / max(1, len(files_on_disk)) > 0.5: logger.debug( - f"Deleting {(float(len(files_to_delete)) / len(files_on_disk)):2f}% of recordings DB entries, could be due to configuration error. Aborting..." + f"Deleting {(len(files_to_delete) / max(1, len(files_on_disk)) * 100):.2f}% of recordings DB entries, could be due to configuration error. Aborting..." ) return False diff --git a/frigate/review/maintainer.py b/frigate/review/maintainer.py index b144b6e52..b279c9c44 100644 --- a/frigate/review/maintainer.py +++ b/frigate/review/maintainer.py @@ -1,6 +1,7 @@ """Maintain review segments in db.""" import copy +import datetime import json import logging import os @@ -15,10 +16,14 @@ from typing import Any, Optional import cv2 import numpy as np -from frigate.comms.config_updater import ConfigSubscriber from frigate.comms.detections_updater import DetectionSubscriber, DetectionTypeEnum from frigate.comms.inter_process import InterProcessRequestor +from frigate.comms.review_updater import ReviewDataPublisher from frigate.config import CameraConfig, FrigateConfig +from frigate.config.camera.updater import ( + CameraConfigUpdateEnum, + CameraConfigUpdateSubscriber, +) from frigate.const import ( CLEAR_ONGOING_REVIEW_SEGMENTS, CLIPS_DIR, @@ -60,6 +65,7 @@ class PendingReviewSegment: self.zones = zones self.audio = audio self.last_update = frame_time + self.thumb_time: float | None = None # thumbnail self._frame = np.zeros((THUMB_HEIGHT * 3 // 2, THUMB_WIDTH), np.uint8) @@ -101,6 +107,7 @@ class PendingReviewSegment: ) if self._frame is not None: + self.thumb_time = datetime.datetime.now().timestamp() self.has_frame = True cv2.imwrite( self.frame_path, self._frame, [int(cv2.IMWRITE_WEBP_QUALITY), 60] @@ -134,6 +141,8 @@ class PendingReviewSegment: "sub_labels": list(self.sub_labels.values()), "zones": self.zones, "audio": list(self.audio), + "thumb_time": self.thumb_time, + "metadata": None, }, } ) @@ -150,10 +159,19 @@ class ReviewSegmentMaintainer(threading.Thread): # create communication for review segments self.requestor = InterProcessRequestor() - self.record_config_subscriber = ConfigSubscriber("config/record/") - self.review_config_subscriber = ConfigSubscriber("config/review/") - self.enabled_config_subscriber = ConfigSubscriber("config/enabled/") - self.detection_subscriber = DetectionSubscriber(DetectionTypeEnum.all) + self.config_subscriber = CameraConfigUpdateSubscriber( + config, + config.cameras, + [ + CameraConfigUpdateEnum.add, + CameraConfigUpdateEnum.enabled, + CameraConfigUpdateEnum.record, + CameraConfigUpdateEnum.remove, + CameraConfigUpdateEnum.review, + ], + ) + self.detection_subscriber = DetectionSubscriber(DetectionTypeEnum.all.value) + self.review_publisher = ReviewDataPublisher("") # manual events self.indefinite_events: dict[str, dict[str, Any]] = {} @@ -174,16 +192,16 @@ class ReviewSegmentMaintainer(threading.Thread): new_data = segment.get_data(ended=False) self.requestor.send_data(UPSERT_REVIEW_SEGMENT, new_data) start_data = {k: v for k, v in new_data.items()} + review_update = { + "type": "new", + "before": start_data, + "after": start_data, + } self.requestor.send_data( "reviews", - json.dumps( - { - "type": "new", - "before": start_data, - "after": start_data, - } - ), + json.dumps(review_update), ) + self.review_publisher.publish(review_update, segment.camera) self.requestor.send_data( f"{segment.camera}/review_status", segment.severity.value.upper() ) @@ -202,16 +220,16 @@ class ReviewSegmentMaintainer(threading.Thread): new_data = segment.get_data(ended=False) self.requestor.send_data(UPSERT_REVIEW_SEGMENT, new_data) + review_update = { + "type": "update", + "before": {k: v for k, v in prev_data.items()}, + "after": {k: v for k, v in new_data.items()}, + } self.requestor.send_data( "reviews", - json.dumps( - { - "type": "update", - "before": {k: v for k, v in prev_data.items()}, - "after": {k: v for k, v in new_data.items()}, - } - ), + json.dumps(review_update), ) + self.review_publisher.publish(review_update, segment.camera) self.requestor.send_data( f"{segment.camera}/review_status", segment.severity.value.upper() ) @@ -224,16 +242,16 @@ class ReviewSegmentMaintainer(threading.Thread): """End segment.""" final_data = segment.get_data(ended=True) self.requestor.send_data(UPSERT_REVIEW_SEGMENT, final_data) + review_update = { + "type": "end", + "before": {k: v for k, v in prev_data.items()}, + "after": {k: v for k, v in final_data.items()}, + } self.requestor.send_data( "reviews", - json.dumps( - { - "type": "end", - "before": {k: v for k, v in prev_data.items()}, - "after": {k: v for k, v in final_data.items()}, - } - ), + json.dumps(review_update), ) + self.review_publisher.publish(review_update, segment.camera) self.requestor.send_data(f"{segment.camera}/review_status", "NONE") self.active_review_segments[segment.camera] = None @@ -458,57 +476,22 @@ class ReviewSegmentMaintainer(threading.Thread): def run(self) -> None: while not self.stop_event.is_set(): # check if there is an updated config - while True: - ( - updated_record_topic, - updated_record_config, - ) = self.record_config_subscriber.check_for_update() + updated_topics = self.config_subscriber.check_for_updates() - ( - updated_review_topic, - updated_review_config, - ) = self.review_config_subscriber.check_for_update() + if "record" in updated_topics: + for camera in updated_topics["record"]: + self.end_segment(camera) - ( - updated_enabled_topic, - updated_enabled_config, - ) = self.enabled_config_subscriber.check_for_update() - - if ( - not updated_record_topic - and not updated_review_topic - and not updated_enabled_topic - ): - break - - if updated_record_topic: - camera_name = updated_record_topic.rpartition("/")[-1] - self.config.cameras[camera_name].record = updated_record_config - - # immediately end segment - if not updated_record_config.enabled: - self.end_segment(camera_name) - - if updated_review_topic: - camera_name = updated_review_topic.rpartition("/")[-1] - self.config.cameras[camera_name].review = updated_review_config - - if updated_enabled_config: - camera_name = updated_enabled_topic.rpartition("/")[-1] - self.config.cameras[ - camera_name - ].enabled = updated_enabled_config.enabled - - # immediately end segment as we may not get another update - if not updated_enabled_config.enabled: - self.end_segment(camera_name) + if "enabled" in updated_topics: + for camera in updated_topics["enabled"]: + self.end_segment(camera) (topic, data) = self.detection_subscriber.check_for_update(timeout=1) if not topic: continue - if topic == DetectionTypeEnum.video: + if topic == DetectionTypeEnum.video.value: ( camera, frame_name, @@ -517,14 +500,14 @@ class ReviewSegmentMaintainer(threading.Thread): _, _, ) = data - elif topic == DetectionTypeEnum.audio: + elif topic == DetectionTypeEnum.audio.value: ( camera, frame_time, _, audio_detections, ) = data - elif topic == DetectionTypeEnum.api or DetectionTypeEnum.lpr: + elif topic == DetectionTypeEnum.api.value or DetectionTypeEnum.lpr.value: ( camera, frame_time, @@ -730,8 +713,7 @@ class ReviewSegmentMaintainer(threading.Thread): f"Dedicated LPR camera API has been called for {camera}, but detections are disabled. LPR events will not appear as a detection." ) - self.record_config_subscriber.stop() - self.review_config_subscriber.stop() + self.config_subscriber.stop() self.requestor.stop() self.detection_subscriber.stop() logger.info("Exiting review maintainer...") diff --git a/frigate/review/review.py b/frigate/review/review.py index dafa6c802..c00c302a2 100644 --- a/frigate/review/review.py +++ b/frigate/review/review.py @@ -1,36 +1,30 @@ """Run recording maintainer and cleanup.""" import logging -import multiprocessing as mp -import signal -import threading -from types import FrameType -from typing import Optional - -from setproctitle import setproctitle +from multiprocessing.synchronize import Event as MpEvent from frigate.config import FrigateConfig +from frigate.const import PROCESS_PRIORITY_MED from frigate.review.maintainer import ReviewSegmentMaintainer -from frigate.util.services import listen +from frigate.util.process import FrigateProcess logger = logging.getLogger(__name__) -def manage_review_segments(config: FrigateConfig) -> None: - stop_event = mp.Event() +class ReviewProcess(FrigateProcess): + def __init__(self, config: FrigateConfig, stop_event: MpEvent) -> None: + super().__init__( + stop_event, + PROCESS_PRIORITY_MED, + name="frigate.review_segment_manager", + daemon=True, + ) + self.config = config - def receiveSignal(signalNumber: int, frame: Optional[FrameType]) -> None: - stop_event.set() - - signal.signal(signal.SIGTERM, receiveSignal) - signal.signal(signal.SIGINT, receiveSignal) - - threading.current_thread().name = "process:review_segment_manager" - setproctitle("frigate.review_segment_manager") - listen() - - maintainer = ReviewSegmentMaintainer( - config, - stop_event, - ) - maintainer.start() + def run(self) -> None: + self.pre_run_setup(self.config.logger) + maintainer = ReviewSegmentMaintainer( + self.config, + self.stop_event, + ) + maintainer.start() diff --git a/frigate/stats/util.py b/frigate/stats/util.py index e098bc541..ee93bb6e6 100644 --- a/frigate/stats/util.py +++ b/frigate/stats/util.py @@ -5,13 +5,13 @@ import os import shutil import time from json import JSONDecodeError +from multiprocessing.managers import DictProxy from typing import Any, Optional import psutil import requests from requests.exceptions import RequestException -from frigate.camera import CameraMetrics from frigate.config import FrigateConfig from frigate.const import CACHE_DIR, CLIPS_DIR, RECORD_DIR from frigate.data_processing.types import DataProcessorMetrics @@ -53,7 +53,7 @@ def get_latest_version(config: FrigateConfig) -> str: def stats_init( config: FrigateConfig, - camera_metrics: dict[str, CameraMetrics], + camera_metrics: DictProxy, embeddings_metrics: DataProcessorMetrics | None, detectors: dict[str, ObjectDetectProcess], processes: dict[str, int], @@ -268,15 +268,20 @@ def stats_snapshot( camera_metrics = stats_tracking["camera_metrics"] stats: dict[str, Any] = {} - total_detection_fps = 0 + total_camera_fps = total_process_fps = total_skipped_fps = total_detection_fps = 0 stats["cameras"] = {} for name, camera_stats in camera_metrics.items(): + total_camera_fps += camera_stats.camera_fps.value + total_process_fps += camera_stats.process_fps.value + total_skipped_fps += camera_stats.skipped_fps.value total_detection_fps += camera_stats.detection_fps.value - pid = camera_stats.process.pid if camera_stats.process else None + pid = camera_stats.process_pid.value if camera_stats.process_pid.value else None ffmpeg_pid = camera_stats.ffmpeg_pid.value if camera_stats.ffmpeg_pid else None capture_pid = ( - camera_stats.capture_process.pid if camera_stats.capture_process else None + camera_stats.capture_process_pid.value + if camera_stats.capture_process_pid.value + else None ) stats["cameras"][name] = { "camera_fps": round(camera_stats.camera_fps.value, 2), @@ -303,6 +308,9 @@ def stats_snapshot( # from mypy 0.981 onwards "pid": pid, } + stats["camera_fps"] = round(total_camera_fps, 2) + stats["process_fps"] = round(total_process_fps, 2) + stats["skipped_fps"] = round(total_skipped_fps, 2) stats["detection_fps"] = round(total_detection_fps, 2) stats["embeddings"] = {} @@ -354,6 +362,22 @@ def stats_snapshot( embeddings_metrics.yolov9_lpr_pps.value, 2 ) + if embeddings_metrics.review_desc_speed.value > 0.0: + stats["embeddings"]["review_description_speed"] = round( + embeddings_metrics.review_desc_speed.value * 1000, 2 + ) + stats["embeddings"]["review_descriptions"] = round( + embeddings_metrics.review_desc_dps.value, 2 + ) + + for key in embeddings_metrics.classification_speeds.keys(): + stats["embeddings"][f"{key}_classification_speed"] = round( + embeddings_metrics.classification_speeds[key].value * 1000, 2 + ) + stats["embeddings"][f"{key}_classification"] = round( + embeddings_metrics.classification_cps[key].value, 2 + ) + get_processing_stats(config, stats, hwaccel_errors) stats["service"] = { diff --git a/frigate/test/http_api/base_http_test.py b/frigate/test/http_api/base_http_test.py index 3c4a7ccdc..e0e5fbf03 100644 --- a/frigate/test/http_api/base_http_test.py +++ b/frigate/test/http_api/base_http_test.py @@ -45,6 +45,9 @@ class BaseTestHttp(unittest.TestCase): }, } self.test_stats = { + "camera_fps": 5.0, + "process_fps": 5.0, + "skipped_fps": 0.0, "detection_fps": 13.7, "detectors": { "cpu1": { @@ -119,6 +122,7 @@ class BaseTestHttp(unittest.TestCase): None, stats, None, + None, ) def insert_mock_event( diff --git a/frigate/test/test_http.py b/frigate/test/test_http.py index 4d949c543..6d60932a5 100644 --- a/frigate/test/test_http.py +++ b/frigate/test/test_http.py @@ -2,6 +2,7 @@ import datetime import logging import os import unittest +from typing import Any from unittest.mock import Mock from fastapi.testclient import TestClient @@ -48,6 +49,9 @@ class TestHttp(unittest.TestCase): }, } self.test_stats = { + "camera_fps": 5.0, + "process_fps": 5.0, + "skipped_fps": 0.0, "detection_fps": 13.7, "detectors": { "cpu1": { @@ -112,8 +116,8 @@ class TestHttp(unittest.TestCase): except OSError: pass - def test_get_good_event(self): - app = create_fastapi_app( + def __init_app(self, updater: Any | None = None) -> Any: + return create_fastapi_app( FrigateConfig(**self.minimal_config), self.db, None, @@ -121,8 +125,12 @@ class TestHttp(unittest.TestCase): None, None, None, + updater, None, ) + + def test_get_good_event(self): + app = self.__init_app() id = "123456.random" with TestClient(app) as client: @@ -134,16 +142,7 @@ class TestHttp(unittest.TestCase): assert event["id"] == model_to_dict(Event.get(Event.id == id))["id"] def test_get_bad_event(self): - app = create_fastapi_app( - FrigateConfig(**self.minimal_config), - self.db, - None, - None, - None, - None, - None, - None, - ) + app = self.__init_app() id = "123456.random" bad_id = "654321.other" @@ -154,16 +153,7 @@ class TestHttp(unittest.TestCase): assert event_response.json() == "Event not found" def test_delete_event(self): - app = create_fastapi_app( - FrigateConfig(**self.minimal_config), - self.db, - None, - None, - None, - None, - None, - None, - ) + app = self.__init_app() id = "123456.random" with TestClient(app) as client: @@ -176,16 +166,7 @@ class TestHttp(unittest.TestCase): assert event == "Event not found" def test_event_retention(self): - app = create_fastapi_app( - FrigateConfig(**self.minimal_config), - self.db, - None, - None, - None, - None, - None, - None, - ) + app = self.__init_app() id = "123456.random" with TestClient(app) as client: @@ -202,16 +183,7 @@ class TestHttp(unittest.TestCase): assert event["retain_indefinitely"] is False def test_event_time_filtering(self): - app = create_fastapi_app( - FrigateConfig(**self.minimal_config), - self.db, - None, - None, - None, - None, - None, - None, - ) + app = self.__init_app() morning_id = "123456.random" evening_id = "654321.random" morning = 1656590400 # 06/30/2022 6 am (GMT) @@ -241,20 +213,11 @@ class TestHttp(unittest.TestCase): def test_set_delete_sub_label(self): mock_event_updater = Mock(spec=EventMetadataPublisher) - app = create_fastapi_app( - FrigateConfig(**self.minimal_config), - self.db, - None, - None, - None, - None, - None, - mock_event_updater, - ) + app = app = self.__init_app(updater=mock_event_updater) id = "123456.random" sub_label = "sub" - def update_event(topic, payload): + def update_event(payload: Any, topic: str): event = Event.get(id=id) event.sub_label = payload[1] event.save() @@ -286,20 +249,11 @@ class TestHttp(unittest.TestCase): def test_sub_label_list(self): mock_event_updater = Mock(spec=EventMetadataPublisher) - app = create_fastapi_app( - FrigateConfig(**self.minimal_config), - self.db, - None, - None, - None, - None, - None, - mock_event_updater, - ) + app = self.__init_app(updater=mock_event_updater) id = "123456.random" sub_label = "sub" - def update_event(topic, payload): + def update_event(payload: Any, _: str): event = Event.get(id=id) event.sub_label = payload[1] event.save() @@ -318,16 +272,7 @@ class TestHttp(unittest.TestCase): assert sub_labels == [sub_label] def test_config(self): - app = create_fastapi_app( - FrigateConfig(**self.minimal_config), - self.db, - None, - None, - None, - None, - None, - None, - ) + app = self.__init_app() with TestClient(app) as client: config = client.get("/config").json() @@ -335,16 +280,7 @@ class TestHttp(unittest.TestCase): assert config["cameras"]["front_door"] def test_recordings(self): - app = create_fastapi_app( - FrigateConfig(**self.minimal_config), - self.db, - None, - None, - None, - None, - None, - None, - ) + app = self.__init_app() id = "123456.random" with TestClient(app) as client: diff --git a/frigate/track/__init__.py b/frigate/track/__init__.py index dc72be4f0..b5453aaeb 100644 --- a/frigate/track/__init__.py +++ b/frigate/track/__init__.py @@ -11,6 +11,9 @@ class ObjectTracker(ABC): @abstractmethod def match_and_update( - self, frame_name: str, frame_time: float, detections: list[dict[str, Any]] + self, + frame_name: str, + frame_time: float, + detections: list[tuple[Any, Any, Any, Any, Any, Any]], ) -> None: pass diff --git a/frigate/track/centroid_tracker.py b/frigate/track/centroid_tracker.py index 25d4cb860..56f20629c 100644 --- a/frigate/track/centroid_tracker.py +++ b/frigate/track/centroid_tracker.py @@ -1,25 +1,26 @@ import random import string from collections import defaultdict +from typing import Any import numpy as np from scipy.spatial import distance as dist from frigate.config import DetectConfig from frigate.track import ObjectTracker -from frigate.util import intersection_over_union +from frigate.util.image import intersection_over_union class CentroidTracker(ObjectTracker): def __init__(self, config: DetectConfig): - self.tracked_objects = {} - self.untracked_object_boxes = [] - self.disappeared = {} - self.positions = {} + self.tracked_objects: dict[str, dict[str, Any]] = {} + self.untracked_object_boxes: list[tuple[int, int, int, int]] = [] + self.disappeared: dict[str, Any] = {} + self.positions: dict[str, Any] = {} self.max_disappeared = config.max_disappeared self.detect_config = config - def register(self, index, obj): + def register(self, obj: dict[str, Any]) -> None: rand_id = "".join(random.choices(string.ascii_lowercase + string.digits, k=6)) id = f"{obj['frame_time']}-{rand_id}" obj["id"] = id @@ -39,13 +40,13 @@ class CentroidTracker(ObjectTracker): "ymax": self.detect_config.height, } - def deregister(self, id): + def deregister(self, id: str) -> None: del self.tracked_objects[id] del self.disappeared[id] # tracks the current position of the object based on the last N bounding boxes # returns False if the object has moved outside its previous position - def update_position(self, id, box): + def update_position(self, id: str, box: tuple[int, int, int, int]) -> bool: position = self.positions[id] position_box = ( position["xmin"], @@ -88,7 +89,7 @@ class CentroidTracker(ObjectTracker): return True - def is_expired(self, id): + def is_expired(self, id: str) -> bool: obj = self.tracked_objects[id] # get the max frames for this label type or the default max_frames = self.detect_config.stationary.max_frames.objects.get( @@ -108,7 +109,7 @@ class CentroidTracker(ObjectTracker): return False - def update(self, id, new_obj): + def update(self, id: str, new_obj: dict[str, Any]) -> None: self.disappeared[id] = 0 # update the motionless count if the object has not moved to a new position if self.update_position(id, new_obj["box"]): @@ -129,25 +130,30 @@ class CentroidTracker(ObjectTracker): self.tracked_objects[id].update(new_obj) - def update_frame_times(self, frame_name, frame_time): + def update_frame_times(self, frame_name: str, frame_time: float) -> None: for id in list(self.tracked_objects.keys()): self.tracked_objects[id]["frame_time"] = frame_time self.tracked_objects[id]["motionless_count"] += 1 if self.is_expired(id): self.deregister(id) - def match_and_update(self, frame_time, detections): + def match_and_update( + self, + frame_name: str, + frame_time: float, + detections: list[tuple[Any, Any, Any, Any, Any, Any]], + ) -> None: # group by name detection_groups = defaultdict(lambda: []) - for obj in detections: - detection_groups[obj[0]].append( + for det in detections: + detection_groups[det[0]].append( { - "label": obj[0], - "score": obj[1], - "box": obj[2], - "area": obj[3], - "ratio": obj[4], - "region": obj[5], + "label": det[0], + "score": det[1], + "box": det[2], + "area": det[3], + "ratio": det[4], + "region": det[5], "frame_time": frame_time, } ) @@ -180,7 +186,7 @@ class CentroidTracker(ObjectTracker): if len(current_objects) == 0: for index, obj in enumerate(group): - self.register(index, obj) + self.register(obj) continue new_centroids = np.array([o["centroid"] for o in group]) @@ -238,4 +244,4 @@ class CentroidTracker(ObjectTracker): # register each new input centroid as a trackable object else: for col in unusedCols: - self.register(col, group[col]) + self.register(group[col]) diff --git a/frigate/track/norfair_tracker.py b/frigate/track/norfair_tracker.py index 900971e0d..5bb15f94e 100644 --- a/frigate/track/norfair_tracker.py +++ b/frigate/track/norfair_tracker.py @@ -13,6 +13,7 @@ from norfair import ( draw_boxes, ) from norfair.drawing.drawer import Drawer +from norfair.tracker import TrackedObject from rich import print from rich.console import Console from rich.table import Table @@ -43,7 +44,7 @@ MAX_STATIONARY_HISTORY = 10 # - could be variable based on time since last_detection # - include estimated velocity in the distance (car driving by of a parked car) # - include some visual similarity factor in the distance for occlusions -def distance(detection: np.array, estimate: np.array) -> float: +def distance(detection: np.ndarray, estimate: np.ndarray) -> float: # ultimately, this should try and estimate distance in 3-dimensional space # consider change in location, width, and height @@ -73,14 +74,16 @@ def distance(detection: np.array, estimate: np.array) -> float: change = np.append(distance, np.array([width_ratio, height_ratio])) # calculate euclidean distance of the change vector - return np.linalg.norm(change) + return float(np.linalg.norm(change)) -def frigate_distance(detection: Detection, tracked_object) -> float: +def frigate_distance(detection: Detection, tracked_object: TrackedObject) -> float: return distance(detection.points, tracked_object.estimate) -def histogram_distance(matched_not_init_trackers, unmatched_trackers): +def histogram_distance( + matched_not_init_trackers: TrackedObject, unmatched_trackers: TrackedObject +) -> float: snd_embedding = unmatched_trackers.last_detection.embedding if snd_embedding is None: @@ -110,17 +113,17 @@ class NorfairTracker(ObjectTracker): ptz_metrics: PTZMetrics, ): self.frame_manager = SharedMemoryFrameManager() - self.tracked_objects = {} + self.tracked_objects: dict[str, dict[str, Any]] = {} self.untracked_object_boxes: list[list[int]] = [] - self.disappeared = {} - self.positions = {} - self.stationary_box_history: dict[str, list[list[int, int, int, int]]] = {} + self.disappeared: dict[str, int] = {} + self.positions: dict[str, dict[str, Any]] = {} + self.stationary_box_history: dict[str, list[list[int]]] = {} self.camera_config = config self.detect_config = config.detect self.ptz_metrics = ptz_metrics - self.ptz_motion_estimator = {} + self.ptz_motion_estimator: PtzMotionEstimator | None = None self.camera_name = config.name - self.track_id_map = {} + self.track_id_map: dict[str, str] = {} # Define tracker configurations for static camera self.object_type_configs = { @@ -169,7 +172,7 @@ class NorfairTracker(ObjectTracker): "distance_threshold": 3, } - self.trackers = {} + self.trackers: dict[str, dict[str, Tracker]] = {} # Handle static trackers for obj_type, tracker_config in self.object_type_configs.items(): if obj_type in self.camera_config.objects.track: @@ -216,7 +219,7 @@ class NorfairTracker(ObjectTracker): self.camera_config, self.ptz_metrics ) - def _create_tracker(self, obj_type, tracker_config): + def _create_tracker(self, obj_type: str, tracker_config: dict[str, Any]) -> Tracker: """Helper function to create a tracker with given configuration.""" tracker_params = { "distance_function": tracker_config["distance_function"], @@ -258,7 +261,7 @@ class NorfairTracker(ObjectTracker): return self.trackers[object_type][mode] return self.default_tracker[mode] - def register(self, track_id, obj): + def register(self, track_id: str, obj: dict[str, Any]) -> None: rand_id = "".join(random.choices(string.ascii_lowercase + string.digits, k=6)) id = f"{obj['frame_time']}-{rand_id}" self.track_id_map[track_id] = id @@ -297,7 +300,7 @@ class NorfairTracker(ObjectTracker): } self.stationary_box_history[id] = boxes - def deregister(self, id, track_id): + def deregister(self, id: str, track_id: str) -> None: obj = self.tracked_objects[id] del self.tracked_objects[id] @@ -321,7 +324,7 @@ class NorfairTracker(ObjectTracker): # tracks the current position of the object based on the last N bounding boxes # returns False if the object has moved outside its previous position - def update_position(self, id: str, box: list[int, int, int, int], stationary: bool): + def update_position(self, id: str, box: list[int], stationary: bool) -> bool: xmin, ymin, xmax, ymax = box position = self.positions[id] self.stationary_box_history[id].append(box) @@ -396,7 +399,7 @@ class NorfairTracker(ObjectTracker): return True - def is_expired(self, id): + def is_expired(self, id: str) -> bool: obj = self.tracked_objects[id] # get the max frames for this label type or the default max_frames = self.detect_config.stationary.max_frames.objects.get( @@ -416,7 +419,7 @@ class NorfairTracker(ObjectTracker): return False - def update(self, track_id, obj): + def update(self, track_id: str, obj: dict[str, Any]) -> None: id = self.track_id_map[track_id] self.disappeared[id] = 0 stationary = ( @@ -443,7 +446,7 @@ class NorfairTracker(ObjectTracker): self.tracked_objects[id].update(obj) - def update_frame_times(self, frame_name: str, frame_time: float): + def update_frame_times(self, frame_name: str, frame_time: float) -> None: # if the object was there in the last frame, assume it's still there detections = [ ( @@ -460,10 +463,13 @@ class NorfairTracker(ObjectTracker): self.match_and_update(frame_name, frame_time, detections=detections) def match_and_update( - self, frame_name: str, frame_time: float, detections: list[dict[str, Any]] - ): + self, + frame_name: str, + frame_time: float, + detections: list[tuple[Any, Any, Any, Any, Any, Any]], + ) -> None: # Group detections by object type - detections_by_type = {} + detections_by_type: dict[str, list[Detection]] = {} for obj in detections: label = obj[0] if label not in detections_by_type: @@ -551,17 +557,17 @@ class NorfairTracker(ObjectTracker): estimate = ( max(0, estimate[0]), max(0, estimate[1]), - min(self.detect_config.width - 1, estimate[2]), - min(self.detect_config.height - 1, estimate[3]), + min(self.detect_config.width - 1, estimate[2]), # type: ignore[operator] + min(self.detect_config.height - 1, estimate[3]), # type: ignore[operator] ) - obj = { + new_obj = { **t.last_detection.data, "estimate": estimate, "estimate_velocity": t.estimate_velocity, } active_ids.append(t.global_id) if t.global_id not in self.track_id_map: - self.register(t.global_id, obj) + self.register(t.global_id, new_obj) # if there wasn't a detection in this frame, increment disappeared elif t.last_detection.data["frame_time"] != frame_time: id = self.track_id_map[t.global_id] @@ -569,10 +575,10 @@ class NorfairTracker(ObjectTracker): # sometimes the estimate gets way off # only update if the upper left corner is actually upper left if estimate[0] < estimate[2] and estimate[1] < estimate[3]: - self.tracked_objects[id]["estimate"] = obj["estimate"] + self.tracked_objects[id]["estimate"] = new_obj["estimate"] # else update it else: - self.update(t.global_id, obj) + self.update(t.global_id, new_obj) # clear expired tracks expired_ids = [k for k in self.track_id_map.keys() if k not in active_ids] @@ -585,7 +591,7 @@ class NorfairTracker(ObjectTracker): o[2] for o in detections if o[2] not in tracked_object_boxes ] - def print_objects_as_table(self, tracked_objects: Sequence): + def print_objects_as_table(self, tracked_objects: Sequence) -> None: """Used for helping in debugging""" print() console = Console() @@ -605,13 +611,13 @@ class NorfairTracker(ObjectTracker): ) console.print(table) - def debug_draw(self, frame, frame_time): + def debug_draw(self, frame: np.ndarray, frame_time: float) -> None: # Collect all tracked objects from each tracker all_tracked_objects = [] # print a table to the console with norfair tracked object info if False: - if len(self.trackers["license_plate"]["static"].tracked_objects) > 0: + if len(self.trackers["license_plate"]["static"].tracked_objects) > 0: # type: ignore[unreachable] self.print_objects_as_table( self.trackers["license_plate"]["static"].tracked_objects ) @@ -662,7 +668,7 @@ class NorfairTracker(ObjectTracker): if False: # draw the current formatted time on the frame - from datetime import datetime + from datetime import datetime # type: ignore[unreachable] formatted_time = datetime.fromtimestamp(frame_time).strftime( "%m/%d/%Y %I:%M:%S %p" diff --git a/frigate/track/object_processing.py b/frigate/track/object_processing.py index 773c6da30..25128d6df 100644 --- a/frigate/track/object_processing.py +++ b/frigate/track/object_processing.py @@ -6,6 +6,7 @@ import queue import threading from collections import defaultdict from enum import Enum +from multiprocessing import Queue as MpQueue from multiprocessing.synchronize import Event as MpEvent from typing import Any @@ -14,7 +15,6 @@ import numpy as np from peewee import SQL, DoesNotExist from frigate.camera.state import CameraState -from frigate.comms.config_updater import ConfigSubscriber from frigate.comms.detections_updater import DetectionPublisher, DetectionTypeEnum from frigate.comms.dispatcher import Dispatcher from frigate.comms.event_metadata_updater import ( @@ -29,6 +29,10 @@ from frigate.config import ( RecordConfig, SnapshotsConfig, ) +from frigate.config.camera.updater import ( + CameraConfigUpdateEnum, + CameraConfigUpdateSubscriber, +) from frigate.const import ( FAST_QUEUE_TIMEOUT, UPDATE_CAMERA_ACTIVITY, @@ -36,6 +40,7 @@ from frigate.const import ( ) from frigate.events.types import EventStateEnum, EventTypeEnum from frigate.models import Event, ReviewSegment, Timeline +from frigate.ptz.autotrack import PtzAutoTrackerThread from frigate.track.tracked_object import TrackedObject from frigate.util.image import SharedMemoryFrameManager @@ -53,10 +58,10 @@ class TrackedObjectProcessor(threading.Thread): self, config: FrigateConfig, dispatcher: Dispatcher, - tracked_objects_queue, - ptz_autotracker_thread, - stop_event, - ): + tracked_objects_queue: MpQueue, + ptz_autotracker_thread: PtzAutoTrackerThread, + stop_event: MpEvent, + ) -> None: super().__init__(name="detected_frames_processor") self.config = config self.dispatcher = dispatcher @@ -67,10 +72,19 @@ class TrackedObjectProcessor(threading.Thread): self.last_motion_detected: dict[str, float] = {} self.ptz_autotracker_thread = ptz_autotracker_thread - self.config_enabled_subscriber = ConfigSubscriber("config/enabled/") + self.camera_config_subscriber = CameraConfigUpdateSubscriber( + self.config, + self.config.cameras, + [ + CameraConfigUpdateEnum.add, + CameraConfigUpdateEnum.enabled, + CameraConfigUpdateEnum.remove, + CameraConfigUpdateEnum.zones, + ], + ) self.requestor = InterProcessRequestor() - self.detection_publisher = DetectionPublisher(DetectionTypeEnum.all) + self.detection_publisher = DetectionPublisher(DetectionTypeEnum.all.value) self.event_sender = EventUpdatePublisher() self.event_end_subscriber = EventEndSubscriber() self.sub_label_subscriber = EventMetadataSubscriber(EventMetadataTypeEnum.all) @@ -86,10 +100,20 @@ class TrackedObjectProcessor(threading.Thread): # } # } # } - self.zone_data = defaultdict(lambda: defaultdict(dict)) - self.active_zone_data = defaultdict(lambda: defaultdict(dict)) + self.zone_data: dict[str, dict[str, Any]] = defaultdict( + lambda: defaultdict(dict) + ) + self.active_zone_data: dict[str, dict[str, Any]] = defaultdict( + lambda: defaultdict(dict) + ) - def start(camera: str, obj: TrackedObject, frame_name: str): + for camera in self.config.cameras.keys(): + self.create_camera_state(camera) + + def create_camera_state(self, camera: str) -> None: + """Creates a new camera state.""" + + def start(camera: str, obj: TrackedObject, frame_name: str) -> None: self.event_sender.publish( ( EventTypeEnum.tracked_object, @@ -100,7 +124,7 @@ class TrackedObjectProcessor(threading.Thread): ) ) - def update(camera: str, obj: TrackedObject, frame_name: str): + def update(camera: str, obj: TrackedObject, frame_name: str) -> None: obj.has_snapshot = self.should_save_snapshot(camera, obj) obj.has_clip = self.should_retain_recording(camera, obj) after = obj.to_dict() @@ -121,10 +145,10 @@ class TrackedObjectProcessor(threading.Thread): ) ) - def autotrack(camera: str, obj: TrackedObject, frame_name: str): + def autotrack(camera: str, obj: TrackedObject, frame_name: str) -> None: self.ptz_autotracker_thread.ptz_autotracker.autotrack_object(camera, obj) - def end(camera: str, obj: TrackedObject, frame_name: str): + def end(camera: str, obj: TrackedObject, frame_name: str) -> None: # populate has_snapshot obj.has_snapshot = self.should_save_snapshot(camera, obj) obj.has_clip = self.should_retain_recording(camera, obj) @@ -193,26 +217,25 @@ class TrackedObjectProcessor(threading.Thread): return False - def camera_activity(camera, activity): + def camera_activity(camera: str, activity: dict[str, Any]) -> None: last_activity = self.camera_activity.get(camera) if not last_activity or activity != last_activity: self.camera_activity[camera] = activity self.requestor.send_data(UPDATE_CAMERA_ACTIVITY, self.camera_activity) - for camera in self.config.cameras.keys(): - camera_state = CameraState( - camera, self.config, self.frame_manager, self.ptz_autotracker_thread - ) - camera_state.on("start", start) - camera_state.on("autotrack", autotrack) - camera_state.on("update", update) - camera_state.on("end", end) - camera_state.on("snapshot", snapshot) - camera_state.on("camera_activity", camera_activity) - self.camera_states[camera] = camera_state + camera_state = CameraState( + camera, self.config, self.frame_manager, self.ptz_autotracker_thread + ) + camera_state.on("start", start) + camera_state.on("autotrack", autotrack) + camera_state.on("update", update) + camera_state.on("end", end) + camera_state.on("snapshot", snapshot) + camera_state.on("camera_activity", camera_activity) + self.camera_states[camera] = camera_state - def should_save_snapshot(self, camera, obj: TrackedObject): + def should_save_snapshot(self, camera: str, obj: TrackedObject) -> bool: if obj.false_positive: return False @@ -235,7 +258,7 @@ class TrackedObjectProcessor(threading.Thread): return True - def should_retain_recording(self, camera: str, obj: TrackedObject): + def should_retain_recording(self, camera: str, obj: TrackedObject) -> bool: if obj.false_positive: return False @@ -255,7 +278,7 @@ class TrackedObjectProcessor(threading.Thread): return True - def should_mqtt_snapshot(self, camera, obj: TrackedObject): + def should_mqtt_snapshot(self, camera: str, obj: TrackedObject) -> bool: # object never changed position if obj.is_stationary(): return False @@ -270,7 +293,9 @@ class TrackedObjectProcessor(threading.Thread): return True - def update_mqtt_motion(self, camera, frame_time, motion_boxes): + def update_mqtt_motion( + self, camera: str, frame_time: float, motion_boxes: list + ) -> None: # publish if motion is currently being detected if motion_boxes: # only send ON if motion isn't already active @@ -296,11 +321,15 @@ class TrackedObjectProcessor(threading.Thread): # reset the last_motion so redundant `off` commands aren't sent self.last_motion_detected[camera] = 0 - def get_best(self, camera, label): + def get_best(self, camera: str, label: str) -> dict[str, Any]: # TODO: need a lock here camera_state = self.camera_states[camera] if label in camera_state.best_objects: best_obj = camera_state.best_objects[label] + + if not best_obj.thumbnail_data: + return {} + best = best_obj.thumbnail_data.copy() best["frame"] = camera_state.frame_cache.get( best_obj.thumbnail_data["frame_time"] @@ -323,7 +352,7 @@ class TrackedObjectProcessor(threading.Thread): return self.camera_states[camera].get_current_frame(draw_options) - def get_current_frame_time(self, camera) -> int: + def get_current_frame_time(self, camera: str) -> float: """Returns the latest frame time for a given camera.""" return self.camera_states[camera].current_frame_time @@ -331,7 +360,7 @@ class TrackedObjectProcessor(threading.Thread): self, event_id: str, sub_label: str | None, score: float | None ) -> None: """Update sub label for given event id.""" - tracked_obj: TrackedObject = None + tracked_obj: TrackedObject | None = None for state in self.camera_states.values(): tracked_obj = state.tracked_objects.get(event_id) @@ -340,7 +369,7 @@ class TrackedObjectProcessor(threading.Thread): break try: - event: Event = Event.get(Event.id == event_id) + event: Event | None = Event.get(Event.id == event_id) except DoesNotExist: event = None @@ -351,12 +380,12 @@ class TrackedObjectProcessor(threading.Thread): tracked_obj.obj_data["sub_label"] = (sub_label, score) if event: - event.sub_label = sub_label + event.sub_label = sub_label # type: ignore[assignment] data = event.data if sub_label is None: - data["sub_label_score"] = None + data["sub_label_score"] = None # type: ignore[index] elif score is not None: - data["sub_label_score"] = score + data["sub_label_score"] = score # type: ignore[index] event.data = data event.save() @@ -385,7 +414,7 @@ class TrackedObjectProcessor(threading.Thread): objects_list = [] sub_labels = set() events = Event.select(Event.id, Event.label, Event.sub_label).where( - Event.id.in_(detection_ids) + Event.id.in_(detection_ids) # type: ignore[call-arg, misc] ) for det_event in events: if det_event.sub_label: @@ -414,18 +443,20 @@ class TrackedObjectProcessor(threading.Thread): f"Updated sub_label for event {event_id} in review segment {review_segment.id}" ) - except ReviewSegment.DoesNotExist: + except DoesNotExist: logger.debug( f"No review segment found with event ID {event_id} when updating sub_label" ) - return True - - def set_recognized_license_plate( - self, event_id: str, recognized_license_plate: str | None, score: float | None + def set_object_attribute( + self, + event_id: str, + field_name: str, + field_value: str | None, + score: float | None, ) -> None: - """Update recognized license plate for given event id.""" - tracked_obj: TrackedObject = None + """Update attribute for given event id.""" + tracked_obj: TrackedObject | None = None for state in self.camera_states.values(): tracked_obj = state.tracked_objects.get(event_id) @@ -434,7 +465,7 @@ class TrackedObjectProcessor(threading.Thread): break try: - event: Event = Event.get(Event.id == event_id) + event: Event | None = Event.get(Event.id == event_id) except DoesNotExist: event = None @@ -442,23 +473,21 @@ class TrackedObjectProcessor(threading.Thread): return if tracked_obj: - tracked_obj.obj_data["recognized_license_plate"] = ( - recognized_license_plate, + tracked_obj.obj_data[field_name] = ( + field_value, score, ) if event: data = event.data - data["recognized_license_plate"] = recognized_license_plate - if recognized_license_plate is None: - data["recognized_license_plate_score"] = None + data[field_name] = field_value # type: ignore[index] + if field_value is None: + data[f"{field_name}_score"] = None # type: ignore[index] elif score is not None: - data["recognized_license_plate_score"] = score + data[f"{field_name}_score"] = score # type: ignore[index] event.data = data event.save() - return True - def save_lpr_snapshot(self, payload: tuple) -> None: # save the snapshot image (frame, event_id, camera) = payload @@ -617,7 +646,7 @@ class TrackedObjectProcessor(threading.Thread): ) self.ongoing_manual_events.pop(event_id) - def force_end_all_events(self, camera: str, camera_state: CameraState): + def force_end_all_events(self, camera: str, camera_state: CameraState) -> None: """Ends all active events on camera when disabling.""" last_frame_name = camera_state.previous_frame_id for obj_id, obj in list(camera_state.tracked_objects.items()): @@ -635,27 +664,28 @@ class TrackedObjectProcessor(threading.Thread): {"enabled": False, "motion": 0, "objects": []}, ) - def run(self): + def run(self) -> None: while not self.stop_event.is_set(): # check for config updates - while True: - ( - updated_enabled_topic, - updated_enabled_config, - ) = self.config_enabled_subscriber.check_for_update() + updated_topics = self.camera_config_subscriber.check_for_updates() - if not updated_enabled_topic: - break - - camera_name = updated_enabled_topic.rpartition("/")[-1] - self.config.cameras[ - camera_name - ].enabled = updated_enabled_config.enabled - - if self.camera_states[camera_name].prev_enabled is None: - self.camera_states[ - camera_name - ].prev_enabled = updated_enabled_config.enabled + if "enabled" in updated_topics: + for camera in updated_topics["enabled"]: + if self.camera_states[camera].prev_enabled is None: + self.camera_states[camera].prev_enabled = self.config.cameras[ + camera + ].enabled + elif "add" in updated_topics: + for camera in updated_topics["add"]: + self.config.cameras[camera] = ( + self.camera_config_subscriber.camera_configs[camera] + ) + self.create_camera_state(camera) + elif "remove" in updated_topics: + for camera in updated_topics["remove"]: + camera_state = self.camera_states[camera] + camera_state.shutdown() + self.camera_states.pop(camera) # manage camera disabled state for camera, config in self.config.cameras.items(): @@ -676,11 +706,14 @@ class TrackedObjectProcessor(threading.Thread): # check for sub label updates while True: - (raw_topic, payload) = self.sub_label_subscriber.check_for_update( - timeout=0 - ) + update = self.sub_label_subscriber.check_for_update(timeout=0) - if not raw_topic: + if not update: + break + + (raw_topic, payload) = update + + if not raw_topic or not payload: break topic = str(raw_topic) @@ -688,11 +721,9 @@ class TrackedObjectProcessor(threading.Thread): if topic.endswith(EventMetadataTypeEnum.sub_label.value): (event_id, sub_label, score) = payload self.set_sub_label(event_id, sub_label, score) - if topic.endswith(EventMetadataTypeEnum.recognized_license_plate.value): - (event_id, recognized_license_plate, score) = payload - self.set_recognized_license_plate( - event_id, recognized_license_plate, score - ) + if topic.endswith(EventMetadataTypeEnum.attribute.value): + (event_id, field_name, field_value, score) = payload + self.set_object_attribute(event_id, field_name, field_value, score) elif topic.endswith(EventMetadataTypeEnum.lpr_event_create.value): self.create_lpr_event(payload) elif topic.endswith(EventMetadataTypeEnum.save_lpr_snapshot.value): @@ -764,6 +795,6 @@ class TrackedObjectProcessor(threading.Thread): self.event_sender.stop() self.event_end_subscriber.stop() self.sub_label_subscriber.stop() - self.config_enabled_subscriber.stop() + self.camera_config_subscriber.stop() logger.info("Exiting object processor...") diff --git a/frigate/track/tracked_object.py b/frigate/track/tracked_object.py index 2cb028a9a..111dd2c40 100644 --- a/frigate/track/tracked_object.py +++ b/frigate/track/tracked_object.py @@ -5,18 +5,19 @@ import math import os from collections import defaultdict from statistics import median -from typing import Any, Optional +from typing import Any, Optional, cast import cv2 import numpy as np from frigate.config import ( CameraConfig, - ModelConfig, + FilterConfig, SnapshotsConfig, UIConfig, ) from frigate.const import CLIPS_DIR, THUMB_DIR +from frigate.detectors.detector_config import ModelConfig from frigate.review.types import SeverityEnum from frigate.util.builtin import sanitize_float from frigate.util.image import ( @@ -32,17 +33,25 @@ from frigate.util.velocity import calculate_real_world_speed logger = logging.getLogger(__name__) +# In most cases objects that loiter in a loitering zone should alert, +# but can still be expected to stay stationary for extended periods of time +# (ex: car loitering on the street vs when a known person parks on the street) +# person is the main object that should keep alerts going as long as they loiter +# even if they are stationary. +EXTENDED_LOITERING_OBJECTS = ["person"] + + class TrackedObject: def __init__( self, model_config: ModelConfig, camera_config: CameraConfig, ui_config: UIConfig, - frame_cache, + frame_cache: dict[float, dict[str, Any]], obj_data: dict[str, Any], - ): + ) -> None: # set the score history then remove as it is not part of object state - self.score_history = obj_data["score_history"] + self.score_history: list[float] = obj_data["score_history"] del obj_data["score_history"] self.obj_data = obj_data @@ -53,24 +62,24 @@ class TrackedObject: self.frame_cache = frame_cache self.zone_presence: dict[str, int] = {} self.zone_loitering: dict[str, int] = {} - self.current_zones = [] - self.entered_zones = [] - self.attributes = defaultdict(float) + self.current_zones: list[str] = [] + self.entered_zones: list[str] = [] + self.attributes: dict[str, float] = defaultdict(float) self.false_positive = True self.has_clip = False self.has_snapshot = False self.top_score = self.computed_score = 0.0 - self.thumbnail_data = None + self.thumbnail_data: dict[str, Any] | None = None self.last_updated = 0 self.last_published = 0 self.frame = None self.active = True self.pending_loitering = False - self.speed_history = [] - self.current_estimated_speed = 0 - self.average_estimated_speed = 0 + self.speed_history: list[float] = [] + self.current_estimated_speed: float = 0 + self.average_estimated_speed: float = 0 self.velocity_angle = 0 - self.path_data = [] + self.path_data: list[tuple[Any, float]] = [] self.previous = self.to_dict() @property @@ -103,7 +112,7 @@ class TrackedObject: return None - def _is_false_positive(self): + def _is_false_positive(self) -> bool: # once a true positive, always a true positive if not self.false_positive: return False @@ -111,11 +120,13 @@ class TrackedObject: threshold = self.camera_config.objects.filters[self.obj_data["label"]].threshold return self.computed_score < threshold - def compute_score(self): + def compute_score(self) -> float: """get median of scores for object.""" return median(self.score_history) - def update(self, current_frame_time: float, obj_data, has_valid_frame: bool): + def update( + self, current_frame_time: float, obj_data: dict[str, Any], has_valid_frame: bool + ) -> tuple[bool, bool, bool, bool]: thumb_update = False significant_change = False path_update = False @@ -247,8 +258,12 @@ class TrackedObject: if zone.distances and not in_speed_zone: continue # Skip zone entry for speed zones until speed threshold met - # if the zone has loitering time, update loitering status - if zone.loitering_time > 0: + # if the zone has loitering time, and the object is an extended loiter object + # always mark it as loitering actively + if ( + self.obj_data["label"] in EXTENDED_LOITERING_OBJECTS + and zone.loitering_time > 0 + ): in_loitering_zone = True loitering_score = self.zone_loitering.get(name, 0) + 1 @@ -264,6 +279,10 @@ class TrackedObject: self.entered_zones.append(name) else: self.zone_loitering[name] = loitering_score + + # this object is pending loitering but has not entered the zone yet + if zone.loitering_time > 0: + in_loitering_zone = True else: self.zone_presence[name] = zone_score else: @@ -289,7 +308,7 @@ class TrackedObject: k: self.attributes[k] for k in self.logos if k in self.attributes } if len(recognized_logos) > 0: - max_logo = max(recognized_logos, key=recognized_logos.get) + max_logo = max(recognized_logos, key=recognized_logos.get) # type: ignore[arg-type] # don't overwrite sub label if it is already set if ( @@ -326,28 +345,30 @@ class TrackedObject: # update path width = self.camera_config.detect.width height = self.camera_config.detect.height - bottom_center = ( - round(obj_data["centroid"][0] / width, 4), - round(obj_data["box"][3] / height, 4), - ) - # calculate a reasonable movement threshold (e.g., 5% of the frame diagonal) - threshold = 0.05 * math.sqrt(width**2 + height**2) / max(width, height) - - if not self.path_data: - self.path_data.append((bottom_center, obj_data["frame_time"])) - path_update = True - elif ( - math.dist(self.path_data[-1][0], bottom_center) >= threshold - or len(self.path_data) == 1 - ): - # check Euclidean distance before appending - self.path_data.append((bottom_center, obj_data["frame_time"])) - path_update = True - logger.debug( - f"Point tracking: {obj_data['id']}, {bottom_center}, {obj_data['frame_time']}" + if width is not None and height is not None: + bottom_center = ( + round(obj_data["centroid"][0] / width, 4), + round(obj_data["box"][3] / height, 4), ) + # calculate a reasonable movement threshold (e.g., 5% of the frame diagonal) + threshold = 0.05 * math.sqrt(width**2 + height**2) / max(width, height) + + if not self.path_data: + self.path_data.append((bottom_center, obj_data["frame_time"])) + path_update = True + elif ( + math.dist(self.path_data[-1][0], bottom_center) >= threshold + or len(self.path_data) == 1 + ): + # check Euclidean distance before appending + self.path_data.append((bottom_center, obj_data["frame_time"])) + path_update = True + logger.debug( + f"Point tracking: {obj_data['id']}, {bottom_center}, {obj_data['frame_time']}" + ) + self.obj_data.update(obj_data) self.current_zones = current_zones logger.debug( @@ -355,7 +376,7 @@ class TrackedObject: ) return (thumb_update, significant_change, path_update, autotracker_update) - def to_dict(self): + def to_dict(self) -> dict[str, Any]: event = { "id": self.obj_data["id"], "camera": self.camera_config.name, @@ -397,10 +418,8 @@ class TrackedObject: return not self.is_stationary() def is_stationary(self) -> bool: - return ( - self.obj_data["motionless_count"] - > self.camera_config.detect.stationary.threshold - ) + count = cast(int | float, self.obj_data["motionless_count"]) + return count > (self.camera_config.detect.stationary.threshold or 50) def get_thumbnail(self, ext: str) -> bytes | None: img_bytes = self.get_img_bytes( @@ -437,9 +456,9 @@ class TrackedObject: def get_img_bytes( self, ext: str, - timestamp=False, - bounding_box=False, - crop=False, + timestamp: bool = False, + bounding_box: bool = False, + crop: bool = False, height: int | None = None, quality: int | None = None, ) -> bytes | None: @@ -516,18 +535,18 @@ class TrackedObject: best_frame, dsize=(width, height), interpolation=cv2.INTER_AREA ) if timestamp: - color = self.camera_config.timestamp_style.color + colors = self.camera_config.timestamp_style.color draw_timestamp( best_frame, self.thumbnail_data["frame_time"], self.camera_config.timestamp_style.format, font_effect=self.camera_config.timestamp_style.effect, font_thickness=self.camera_config.timestamp_style.thickness, - font_color=(color.blue, color.green, color.red), + font_color=(colors.blue, colors.green, colors.red), position=self.camera_config.timestamp_style.position, ) - quality_params = None + quality_params = [] if ext == "jpg": quality_params = [int(cv2.IMWRITE_JPEG_QUALITY), quality or 70] @@ -580,6 +599,9 @@ class TrackedObject: p.write(png_bytes) def write_thumbnail_to_disk(self) -> None: + if not self.camera_config.name: + return + directory = os.path.join(THUMB_DIR, self.camera_config.name) if not os.path.exists(directory): @@ -587,11 +609,14 @@ class TrackedObject: thumb_bytes = self.get_thumbnail("webp") - with open(os.path.join(directory, f"{self.obj_data['id']}.webp"), "wb") as f: - f.write(thumb_bytes) + if thumb_bytes: + with open( + os.path.join(directory, f"{self.obj_data['id']}.webp"), "wb" + ) as f: + f.write(thumb_bytes) -def zone_filtered(obj: TrackedObject, object_config): +def zone_filtered(obj: TrackedObject, object_config: dict[str, FilterConfig]) -> bool: object_name = obj.obj_data["label"] if object_name in object_config: @@ -641,9 +666,9 @@ class TrackedObjectAttribute: def find_best_object(self, objects: list[dict[str, Any]]) -> Optional[str]: """Find the best attribute for each object and return its ID.""" - best_object_area = None - best_object_id = None - best_object_label = None + best_object_area: float | None = None + best_object_id: str | None = None + best_object_label: str | None = None for obj in objects: if not box_inside(obj["box"], self.box): diff --git a/frigate/types.py b/frigate/types.py index ee48cc02b..a9e27ba90 100644 --- a/frigate/types.py +++ b/frigate/types.py @@ -21,6 +21,8 @@ class ModelStatusTypesEnum(str, Enum): downloading = "downloading" downloaded = "downloaded" error = "error" + training = "training" + complete = "complete" class TrackedObjectUpdateTypesEnum(str, Enum): diff --git a/frigate/util/__init__.py b/frigate/util/__init__.py index 307bf4f8b..e69de29bb 100644 --- a/frigate/util/__init__.py +++ b/frigate/util/__init__.py @@ -1,3 +0,0 @@ -from .process import Process - -__all__ = ["Process"] diff --git a/frigate/util/audio.py b/frigate/util/audio.py new file mode 100644 index 000000000..eede9c0ea --- /dev/null +++ b/frigate/util/audio.py @@ -0,0 +1,116 @@ +"""Utilities for creating and manipulating audio.""" + +import logging +import os +import subprocess as sp +from typing import Optional + +from pathvalidate import sanitize_filename + +from frigate.const import CACHE_DIR +from frigate.models import Recordings + +logger = logging.getLogger(__name__) + + +def get_audio_from_recording( + ffmpeg, + camera_name: str, + start_ts: float, + end_ts: float, + sample_rate: int = 16000, +) -> Optional[bytes]: + """Extract audio from recording files between start_ts and end_ts in WAV format suitable for sherpa-onnx. + + Args: + ffmpeg: FFmpeg configuration object + camera_name: Name of the camera + start_ts: Start timestamp in seconds + end_ts: End timestamp in seconds + sample_rate: Sample rate for output audio (default 16kHz for sherpa-onnx) + + Returns: + Bytes of WAV audio data or None if extraction failed + """ + # Fetch all relevant recording segments + recordings = ( + Recordings.select( + Recordings.path, + Recordings.start_time, + Recordings.end_time, + ) + .where( + (Recordings.start_time.between(start_ts, end_ts)) + | (Recordings.end_time.between(start_ts, end_ts)) + | ((start_ts > Recordings.start_time) & (end_ts < Recordings.end_time)) + ) + .where(Recordings.camera == camera_name) + .order_by(Recordings.start_time.asc()) + ) + + if not recordings: + logger.debug( + f"No recordings found for {camera_name} between {start_ts} and {end_ts}" + ) + return None + + # Generate concat playlist file + file_name = sanitize_filename( + f"audio_playlist_{camera_name}_{start_ts}-{end_ts}.txt" + ) + file_path = os.path.join(CACHE_DIR, file_name) + try: + with open(file_path, "w") as file: + for clip in recordings: + file.write(f"file '{clip.path}'\n") + if clip.start_time < start_ts: + file.write(f"inpoint {int(start_ts - clip.start_time)}\n") + if clip.end_time > end_ts: + file.write(f"outpoint {int(end_ts - clip.start_time)}\n") + + ffmpeg_cmd = [ + ffmpeg.ffmpeg_path, + "-hide_banner", + "-loglevel", + "warning", + "-protocol_whitelist", + "pipe,file", + "-f", + "concat", + "-safe", + "0", + "-i", + file_path, + "-vn", # No video + "-acodec", + "pcm_s16le", # 16-bit PCM encoding + "-ar", + str(sample_rate), + "-ac", + "1", # Mono audio + "-f", + "wav", + "-", + ] + + process = sp.run( + ffmpeg_cmd, + capture_output=True, + ) + + if process.returncode == 0: + logger.debug( + f"Successfully extracted audio for {camera_name} from {start_ts} to {end_ts}" + ) + return process.stdout + else: + logger.error(f"Failed to extract audio: {process.stderr.decode()}") + return None + except Exception as e: + logger.error(f"Error extracting audio from recordings: {e}") + return None + finally: + try: + os.unlink(file_path) + except OSError: + pass diff --git a/frigate/util/builtin.py b/frigate/util/builtin.py index 52280ecd8..5ab29a6ea 100644 --- a/frigate/util/builtin.py +++ b/frigate/util/builtin.py @@ -5,7 +5,7 @@ import copy import datetime import logging import math -import multiprocessing as mp +import multiprocessing.queues import queue import re import shlex @@ -14,7 +14,7 @@ import urllib.parse from collections.abc import Mapping from multiprocessing.sharedctypes import Synchronized from pathlib import Path -from typing import Any, Optional, Tuple, Union +from typing import Any, Dict, Optional, Tuple, Union from zoneinfo import ZoneInfoNotFoundError import numpy as np @@ -184,25 +184,12 @@ def create_mask(frame_shape, mask): mask_img[:] = 255 -def update_yaml_from_url(file_path, url): - parsed_url = urllib.parse.urlparse(url) - query_string = urllib.parse.parse_qs(parsed_url.query, keep_blank_values=True) - - # Filter out empty keys but keep blank values for non-empty keys - query_string = {k: v for k, v in query_string.items() if k} - +def process_config_query_string(query_string: Dict[str, list]) -> Dict[str, Any]: + updates = {} for key_path_str, new_value_list in query_string.items(): - key_path = key_path_str.split(".") - for i in range(len(key_path)): - try: - index = int(key_path[i]) - key_path[i] = (key_path[i - 1], index) - key_path.pop(i - 1) - except ValueError: - pass - + # use the string key as-is for updates dictionary if len(new_value_list) > 1: - update_yaml_file(file_path, key_path, new_value_list) + updates[key_path_str] = new_value_list else: value = new_value_list[0] try: @@ -210,10 +197,24 @@ def update_yaml_from_url(file_path, url): value = ast.literal_eval(value) if "," not in value else value except (ValueError, SyntaxError): pass - update_yaml_file(file_path, key_path, value) + updates[key_path_str] = value + return updates -def update_yaml_file(file_path, key_path, new_value): +def flatten_config_data( + config_data: Dict[str, Any], parent_key: str = "" +) -> Dict[str, Any]: + items = [] + for key, value in config_data.items(): + new_key = f"{parent_key}.{key}" if parent_key else key + if isinstance(value, dict): + items.extend(flatten_config_data(value, new_key).items()) + else: + items.append((new_key, value)) + return dict(items) + + +def update_yaml_file_bulk(file_path: str, updates: Dict[str, Any]): yaml = YAML() yaml.indent(mapping=2, sequence=4, offset=2) @@ -226,7 +227,17 @@ def update_yaml_file(file_path, key_path, new_value): ) return - data = update_yaml(data, key_path, new_value) + # Apply all updates + for key_path_str, new_value in updates.items(): + key_path = key_path_str.split(".") + for i in range(len(key_path)): + try: + index = int(key_path[i]) + key_path[i] = (key_path[i - 1], index) + key_path.pop(i - 1) + except ValueError: + pass + data = update_yaml(data, key_path, new_value) try: with open(file_path, "w") as f: @@ -327,14 +338,24 @@ def clear_and_unlink(file: Path, missing_ok: bool = True) -> None: file.unlink(missing_ok=missing_ok) -def empty_and_close_queue(q: mp.Queue): +def empty_and_close_queue(q): while True: try: q.get(block=True, timeout=0.5) - except queue.Empty: + except (queue.Empty, EOFError): + break + except Exception as e: + logger.debug(f"Error while emptying queue: {e}") + break + + # close the queue if it is a multiprocessing queue + # manager proxy queues do not have close or join_thread method + if isinstance(q, multiprocessing.queues.Queue): + try: q.close() q.join_thread() - return + except Exception: + pass def generate_color_palette(n): @@ -407,3 +428,19 @@ def sanitize_float(value): if isinstance(value, (int, float)) and not math.isfinite(value): return 0.0 return value + + +def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: + return 1 - cosine_distance(a, b) + + +def cosine_distance(a: np.ndarray, b: np.ndarray) -> float: + """Returns cosine distance to match sqlite-vec's calculation.""" + dot = np.dot(a, b) + a_mag = np.dot(a, a) # ||a||^2 + b_mag = np.dot(b, b) # ||b||^2 + + if a_mag == 0 or b_mag == 0: + return 1.0 + + return 1.0 - (dot / (np.sqrt(a_mag) * np.sqrt(b_mag))) diff --git a/frigate/util/classification.py b/frigate/util/classification.py new file mode 100644 index 000000000..e4133ded4 --- /dev/null +++ b/frigate/util/classification.py @@ -0,0 +1,174 @@ +"""Util for classification models.""" + +import logging +import os + +import cv2 +import numpy as np + +from frigate.comms.embeddings_updater import EmbeddingsRequestEnum, EmbeddingsRequestor +from frigate.comms.inter_process import InterProcessRequestor +from frigate.const import ( + CLIPS_DIR, + MODEL_CACHE_DIR, + PROCESS_PRIORITY_LOW, + UPDATE_MODEL_STATE, +) +from frigate.log import redirect_output_to_logger +from frigate.types import ModelStatusTypesEnum +from frigate.util.process import FrigateProcess + +BATCH_SIZE = 16 +EPOCHS = 50 +LEARNING_RATE = 0.001 + +logger = logging.getLogger(__name__) + + +class ClassificationTrainingProcess(FrigateProcess): + def __init__(self, model_name: str) -> None: + super().__init__( + stop_event=None, + priority=PROCESS_PRIORITY_LOW, + name=f"model_training:{model_name}", + ) + self.model_name = model_name + + def run(self) -> None: + self.pre_run_setup() + self.__train_classification_model() + + def __generate_representative_dataset_factory(self, dataset_dir: str): + def generate_representative_dataset(): + image_paths = [] + for root, dirs, files in os.walk(dataset_dir): + for file in files: + if file.lower().endswith((".jpg", ".jpeg", ".png")): + image_paths.append(os.path.join(root, file)) + + for path in image_paths[:300]: + img = cv2.imread(path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = cv2.resize(img, (224, 224)) + img_array = np.array(img, dtype=np.float32) / 255.0 + img_array = img_array[None, ...] + yield [img_array] + + return generate_representative_dataset + + @redirect_output_to_logger(logger, logging.DEBUG) + def __train_classification_model(self) -> bool: + """Train a classification model.""" + + # import in the function so that tensorflow is not initialized multiple times + import tensorflow as tf + from tensorflow.keras import layers, models, optimizers + from tensorflow.keras.applications import MobileNetV2 + from tensorflow.keras.preprocessing.image import ImageDataGenerator + + logger.info(f"Kicking off classification training for {self.model_name}.") + dataset_dir = os.path.join(CLIPS_DIR, self.model_name, "dataset") + model_dir = os.path.join(MODEL_CACHE_DIR, self.model_name) + num_classes = len( + [ + d + for d in os.listdir(dataset_dir) + if os.path.isdir(os.path.join(dataset_dir, d)) + ] + ) + + # Start with imagenet base model with 35% of channels in each layer + base_model = MobileNetV2( + input_shape=(224, 224, 3), + include_top=False, + weights="imagenet", + alpha=0.35, + ) + base_model.trainable = False # Freeze pre-trained layers + + model = models.Sequential( + [ + base_model, + layers.GlobalAveragePooling2D(), + layers.Dense(128, activation="relu"), + layers.Dropout(0.3), + layers.Dense(num_classes, activation="softmax"), + ] + ) + + model.compile( + optimizer=optimizers.Adam(learning_rate=LEARNING_RATE), + loss="categorical_crossentropy", + metrics=["accuracy"], + ) + + # create training set + datagen = ImageDataGenerator(rescale=1.0 / 255, validation_split=0.2) + train_gen = datagen.flow_from_directory( + dataset_dir, + target_size=(224, 224), + batch_size=BATCH_SIZE, + class_mode="categorical", + subset="training", + ) + + # write labelmap + class_indices = train_gen.class_indices + index_to_class = {v: k for k, v in class_indices.items()} + sorted_classes = [index_to_class[i] for i in range(len(index_to_class))] + with open(os.path.join(model_dir, "labelmap.txt"), "w") as f: + for class_name in sorted_classes: + f.write(f"{class_name}\n") + + # train the model + model.fit(train_gen, epochs=EPOCHS, verbose=0) + + # convert model to tflite + converter = tf.lite.TFLiteConverter.from_keras_model(model) + converter.optimizations = [tf.lite.Optimize.DEFAULT] + converter.representative_dataset = ( + self.__generate_representative_dataset_factory(dataset_dir) + ) + converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] + converter.inference_input_type = tf.uint8 + converter.inference_output_type = tf.uint8 + tflite_model = converter.convert() + + # write model + with open(os.path.join(model_dir, "model.tflite"), "wb") as f: + f.write(tflite_model) + + +@staticmethod +def kickoff_model_training( + embeddingRequestor: EmbeddingsRequestor, model_name: str +) -> None: + requestor = InterProcessRequestor() + requestor.send_data( + UPDATE_MODEL_STATE, + { + "model": model_name, + "state": ModelStatusTypesEnum.training, + }, + ) + + # run training in sub process so that + # tensorflow will free CPU / GPU memory + # upon training completion + training_process = ClassificationTrainingProcess(model_name) + training_process.start() + training_process.join() + + # reload model and mark training as complete + embeddingRequestor.send_data( + EmbeddingsRequestEnum.reload_classification_model.value, + {"model_name": model_name}, + ) + requestor.send_data( + UPDATE_MODEL_STATE, + { + "model": model_name, + "state": ModelStatusTypesEnum.complete, + }, + ) + requestor.stop() diff --git a/frigate/util/config.py b/frigate/util/config.py index 70492adbc..5b4671b75 100644 --- a/frigate/util/config.py +++ b/frigate/util/config.py @@ -13,7 +13,7 @@ from frigate.util.services import get_video_properties logger = logging.getLogger(__name__) -CURRENT_CONFIG_VERSION = "0.16-0" +CURRENT_CONFIG_VERSION = "0.17-0" DEFAULT_CONFIG_FILE = os.path.join(CONFIG_DIR, "config.yml") @@ -91,6 +91,13 @@ def migrate_frigate_config(config_file: str): yaml.dump(new_config, f) previous_version = "0.16-0" + if previous_version < "0.17-0": + logger.info(f"Migrating frigate config from {previous_version} to 0.17-0...") + new_config = migrate_017_0(config) + with open(config_file, "w") as f: + yaml.dump(new_config, f) + previous_version = "0.17-0" + logger.info("Finished frigate config migration...") @@ -340,6 +347,80 @@ def migrate_016_0(config: dict[str, dict[str, Any]]) -> dict[str, dict[str, Any] return new_config +def migrate_017_0(config: dict[str, dict[str, Any]]) -> dict[str, dict[str, Any]]: + """Handle migrating frigate config to 0.16-0""" + new_config = config.copy() + + # migrate global to new recording configuration + global_record_retain = config.get("record", {}).get("retain") + + if global_record_retain: + continuous = {"days": 0} + motion = {"days": 0} + days = global_record_retain.get("days") + mode = global_record_retain.get("mode", "all") + + if days: + if mode == "all": + continuous["days"] = days + else: + motion["days"] = days + + new_config["record"]["continuous"] = continuous + new_config["record"]["motion"] = motion + + del new_config["record"]["retain"] + + # migrate global genai to new objects config + global_genai = config.get("genai", {}) + + if global_genai: + new_genai_config = {} + new_object_config = config.get("objects", {}) + new_object_config["genai"] = {} + + for key in global_genai.keys(): + if key not in ["provider", "base_url", "api_key"]: + new_object_config["genai"][key] = global_genai[key] + else: + new_genai_config[key] = global_genai[key] + + config["genai"] = new_genai_config + + for name, camera in config.get("cameras", {}).items(): + camera_config: dict[str, dict[str, Any]] = camera.copy() + camera_record_retain = camera_config.get("record", {}).get("retain") + + if camera_record_retain: + continuous = {"days": 0} + motion = {"days": 0} + days = camera_record_retain.get("days") + mode = camera_record_retain.get("mode", "all") + + if days: + if mode == "all": + continuous["days"] = days + else: + motion["days"] = days + + camera_config["record"]["continuous"] = continuous + camera_config["record"]["motion"] = motion + + del camera_config["record"]["retain"] + + camera_genai = camera_config.get("genai", {}) + + if camera_genai: + new_object_config = config.get("objects", {}) + new_object_config["genai"] = camera_genai + del camera_config["genai"] + + new_config["cameras"][name] = camera_config + + new_config["version"] = "0.17-0" + return new_config + + def get_relative_coordinates( mask: Optional[Union[str, list]], frame_shape: tuple[int, int] ) -> Union[str, list]: diff --git a/frigate/util/process.py b/frigate/util/process.py index ac15539fe..b9fede44e 100644 --- a/frigate/util/process.py +++ b/frigate/util/process.py @@ -1,19 +1,23 @@ import faulthandler import logging import multiprocessing as mp -import signal -import sys +import os import threading -from functools import wraps from logging.handlers import QueueHandler -from typing import Any, Callable, Optional +from multiprocessing.synchronize import Event as MpEvent +from typing import Callable, Optional + +from setproctitle import setproctitle import frigate.log +from frigate.config.logger import LoggerConfig class BaseProcess(mp.Process): def __init__( self, + stop_event: MpEvent, + priority: int, *, name: Optional[str] = None, target: Optional[Callable] = None, @@ -21,6 +25,8 @@ class BaseProcess(mp.Process): kwargs: dict = {}, daemon: Optional[bool] = None, ): + self.priority = priority + self.stop_event = stop_event super().__init__( name=name, target=target, args=args, kwargs=kwargs, daemon=daemon ) @@ -30,66 +36,31 @@ class BaseProcess(mp.Process): super().start(*args, **kwargs) self.after_start() - def __getattribute__(self, name: str) -> Any: - if name == "run": - run = super().__getattribute__("run") - - @wraps(run) - def run_wrapper(*args, **kwargs): - try: - self.before_run() - return run(*args, **kwargs) - finally: - self.after_run() - - return run_wrapper - - return super().__getattribute__(name) - def before_start(self) -> None: pass def after_start(self) -> None: pass - def before_run(self) -> None: - pass - def after_run(self) -> None: - pass - - -class Process(BaseProcess): +class FrigateProcess(BaseProcess): logger: logging.Logger - @property - def stop_event(self) -> threading.Event: - # Lazily create the stop_event. This allows the signal handler to tell if anyone is - # monitoring the stop event, and to raise a SystemExit if not. - if "stop_event" not in self.__dict__: - self.__dict__["stop_event"] = threading.Event() - return self.__dict__["stop_event"] - def before_start(self) -> None: self.__log_queue = frigate.log.log_listener.queue - def before_run(self) -> None: + def pre_run_setup(self, logConfig: LoggerConfig | None = None) -> None: + os.nice(self.priority) + setproctitle(self.name) + threading.current_thread().name = f"process:{self.name}" faulthandler.enable() - def receiveSignal(signalNumber, frame): - # Get the stop_event through the dict to bypass lazy initialization. - stop_event = self.__dict__.get("stop_event") - if stop_event is not None: - # Someone is monitoring stop_event. We should set it. - stop_event.set() - else: - # Nobody is monitoring stop_event. We should raise SystemExit. - sys.exit() - - signal.signal(signal.SIGTERM, receiveSignal) - signal.signal(signal.SIGINT, receiveSignal) - + # setup logging self.logger = logging.getLogger(self.name) - logging.basicConfig(handlers=[], force=True) logging.getLogger().addHandler(QueueHandler(self.__log_queue)) + + if logConfig: + frigate.log.apply_log_levels( + logConfig.default.value.upper(), logConfig.logs + ) diff --git a/frigate/video.py b/frigate/video.py index f2197ed66..dc3f1c430 100755 --- a/frigate/video.py +++ b/frigate/video.py @@ -1,9 +1,7 @@ import datetime import logging -import multiprocessing as mp import os import queue -import signal import subprocess as sp import threading import time @@ -12,16 +10,19 @@ from multiprocessing.synchronize import Event as MpEvent from typing import Any import cv2 -from setproctitle import setproctitle from frigate.camera import CameraMetrics, PTZMetrics -from frigate.comms.config_updater import ConfigSubscriber from frigate.comms.inter_process import InterProcessRequestor from frigate.config import CameraConfig, DetectConfig, ModelConfig from frigate.config.camera.camera import CameraTypeEnum +from frigate.config.camera.updater import ( + CameraConfigUpdateEnum, + CameraConfigUpdateSubscriber, +) from frigate.const import ( CACHE_DIR, CACHE_SEGMENT_FORMAT, + PROCESS_PRIORITY_HIGH, REQUEST_REGION_GRID, ) from frigate.log import LogPipe @@ -50,12 +51,12 @@ from frigate.util.object import ( is_object_filtered, reduce_detections, ) -from frigate.util.services import listen +from frigate.util.process import FrigateProcess logger = logging.getLogger(__name__) -def stop_ffmpeg(ffmpeg_process, logger): +def stop_ffmpeg(ffmpeg_process: sp.Popen[Any], logger: logging.Logger): logger.info("Terminating the existing ffmpeg process...") ffmpeg_process.terminate() try: @@ -112,15 +113,13 @@ def capture_frames( frame_rate.start() skipped_eps = EventsPerSecond() skipped_eps.start() - config_subscriber = ConfigSubscriber(f"config/enabled/{config.name}", True) + config_subscriber = CameraConfigUpdateSubscriber( + None, {config.name: config}, [CameraConfigUpdateEnum.enabled] + ) def get_enabled_state(): """Fetch the latest enabled state from ZMQ.""" - _, config_data = config_subscriber.check_for_update() - - if config_data: - config.enabled = config_data.enabled - + config_subscriber.check_for_updates() return config.enabled while not stop_event.is_set(): @@ -167,7 +166,6 @@ def capture_frames( class CameraWatchdog(threading.Thread): def __init__( self, - camera_name, config: CameraConfig, shm_frame_count: int, frame_queue: Queue, @@ -177,13 +175,12 @@ class CameraWatchdog(threading.Thread): stop_event, ): threading.Thread.__init__(self) - self.logger = logging.getLogger(f"watchdog.{camera_name}") - self.camera_name = camera_name + self.logger = logging.getLogger(f"watchdog.{config.name}") self.config = config self.shm_frame_count = shm_frame_count self.capture_thread = None self.ffmpeg_detect_process = None - self.logpipe = LogPipe(f"ffmpeg.{self.camera_name}.detect") + self.logpipe = LogPipe(f"ffmpeg.{self.config.name}.detect") self.ffmpeg_other_processes: list[dict[str, Any]] = [] self.camera_fps = camera_fps self.skipped_fps = skipped_fps @@ -196,16 +193,14 @@ class CameraWatchdog(threading.Thread): self.stop_event = stop_event self.sleeptime = self.config.ffmpeg.retry_interval - self.config_subscriber = ConfigSubscriber(f"config/enabled/{camera_name}", True) + self.config_subscriber = CameraConfigUpdateSubscriber( + None, {config.name: config}, [CameraConfigUpdateEnum.enabled] + ) self.was_enabled = self.config.enabled def _update_enabled_state(self) -> bool: """Fetch the latest config and update enabled state.""" - _, config_data = self.config_subscriber.check_for_update() - if config_data: - self.config.enabled = config_data.enabled - return config_data.enabled - + self.config_subscriber.check_for_updates() return self.config.enabled def reset_capture_thread( @@ -245,10 +240,10 @@ class CameraWatchdog(threading.Thread): enabled = self._update_enabled_state() if enabled != self.was_enabled: if enabled: - self.logger.debug(f"Enabling camera {self.camera_name}") + self.logger.debug(f"Enabling camera {self.config.name}") self.start_all_ffmpeg() else: - self.logger.debug(f"Disabling camera {self.camera_name}") + self.logger.debug(f"Disabling camera {self.config.name}") self.stop_all_ffmpeg() self.was_enabled = enabled continue @@ -261,7 +256,7 @@ class CameraWatchdog(threading.Thread): if not self.capture_thread.is_alive(): self.camera_fps.value = 0 self.logger.error( - f"Ffmpeg process crashed unexpectedly for {self.camera_name}." + f"Ffmpeg process crashed unexpectedly for {self.config.name}." ) self.reset_capture_thread(terminate=False) elif self.camera_fps.value >= (self.config.detect.fps + 10): @@ -271,13 +266,13 @@ class CameraWatchdog(threading.Thread): self.fps_overflow_count = 0 self.camera_fps.value = 0 self.logger.info( - f"{self.camera_name} exceeded fps limit. Exiting ffmpeg..." + f"{self.config.name} exceeded fps limit. Exiting ffmpeg..." ) self.reset_capture_thread(drain_output=False) elif now - self.capture_thread.current_frame.value > 20: self.camera_fps.value = 0 self.logger.info( - f"No frames received from {self.camera_name} in 20 seconds. Exiting ffmpeg..." + f"No frames received from {self.config.name} in 20 seconds. Exiting ffmpeg..." ) self.reset_capture_thread() else: @@ -299,7 +294,7 @@ class CameraWatchdog(threading.Thread): latest_segment_time + datetime.timedelta(seconds=120) ): self.logger.error( - f"No new recording segments were created for {self.camera_name} in the last 120s. restarting the ffmpeg record process..." + f"No new recording segments were created for {self.config.name} in the last 120s. restarting the ffmpeg record process..." ) p["process"] = start_or_restart_ffmpeg( p["cmd"], @@ -331,7 +326,7 @@ class CameraWatchdog(threading.Thread): ffmpeg_cmd, self.logger, self.logpipe, self.frame_size ) self.ffmpeg_pid.value = self.ffmpeg_detect_process.pid - self.capture_thread = CameraCapture( + self.capture_thread = CameraCaptureRunner( self.config, self.shm_frame_count, self.frame_index, @@ -346,13 +341,13 @@ class CameraWatchdog(threading.Thread): def start_all_ffmpeg(self): """Start all ffmpeg processes (detection and others).""" - logger.debug(f"Starting all ffmpeg processes for {self.camera_name}") + logger.debug(f"Starting all ffmpeg processes for {self.config.name}") self.start_ffmpeg_detect() for c in self.config.ffmpeg_cmds: if "detect" in c["roles"]: continue logpipe = LogPipe( - f"ffmpeg.{self.camera_name}.{'_'.join(sorted(c['roles']))}" + f"ffmpeg.{self.config.name}.{'_'.join(sorted(c['roles']))}" ) self.ffmpeg_other_processes.append( { @@ -365,12 +360,12 @@ class CameraWatchdog(threading.Thread): def stop_all_ffmpeg(self): """Stop all ffmpeg processes (detection and others).""" - logger.debug(f"Stopping all ffmpeg processes for {self.camera_name}") + logger.debug(f"Stopping all ffmpeg processes for {self.config.name}") if self.capture_thread is not None and self.capture_thread.is_alive(): self.capture_thread.join(timeout=5) if self.capture_thread.is_alive(): self.logger.warning( - f"Capture thread for {self.camera_name} did not stop gracefully." + f"Capture thread for {self.config.name} did not stop gracefully." ) if self.ffmpeg_detect_process is not None: stop_ffmpeg(self.ffmpeg_detect_process, self.logger) @@ -397,7 +392,7 @@ class CameraWatchdog(threading.Thread): newest_segment_time = latest_segment for file in cache_files: - if self.camera_name in file: + if self.config.name in file: basename = os.path.splitext(file)[0] _, date = basename.rsplit("@", maxsplit=1) segment_time = datetime.datetime.strptime( @@ -409,7 +404,7 @@ class CameraWatchdog(threading.Thread): return newest_segment_time -class CameraCapture(threading.Thread): +class CameraCaptureRunner(threading.Thread): def __init__( self, config: CameraConfig, @@ -453,110 +448,118 @@ class CameraCapture(threading.Thread): ) -def capture_camera( - name, config: CameraConfig, shm_frame_count: int, camera_metrics: CameraMetrics -): - stop_event = mp.Event() +class CameraCapture(FrigateProcess): + def __init__( + self, + config: CameraConfig, + shm_frame_count: int, + camera_metrics: CameraMetrics, + stop_event: MpEvent, + ) -> None: + super().__init__( + stop_event, + PROCESS_PRIORITY_HIGH, + name=f"frigate.capture:{config.name}", + daemon=True, + ) + self.config = config + self.shm_frame_count = shm_frame_count + self.camera_metrics = camera_metrics - def receiveSignal(signalNumber, frame): - stop_event.set() - - signal.signal(signal.SIGTERM, receiveSignal) - signal.signal(signal.SIGINT, receiveSignal) - - threading.current_thread().name = f"capture:{name}" - setproctitle(f"frigate.capture:{name}") - - camera_watchdog = CameraWatchdog( - name, - config, - shm_frame_count, - camera_metrics.frame_queue, - camera_metrics.camera_fps, - camera_metrics.skipped_fps, - camera_metrics.ffmpeg_pid, - stop_event, - ) - camera_watchdog.start() - camera_watchdog.join() + def run(self) -> None: + self.pre_run_setup() + camera_watchdog = CameraWatchdog( + self.config, + self.shm_frame_count, + self.camera_metrics.frame_queue, + self.camera_metrics.camera_fps, + self.camera_metrics.skipped_fps, + self.camera_metrics.ffmpeg_pid, + self.stop_event, + ) + camera_watchdog.start() + camera_watchdog.join() -def track_camera( - name, - config: CameraConfig, - model_config: ModelConfig, - labelmap: dict[int, str], - detection_queue: Queue, - result_connection: MpEvent, - detected_objects_queue, - camera_metrics: CameraMetrics, - ptz_metrics: PTZMetrics, - region_grid: list[list[dict[str, Any]]], -): - stop_event = mp.Event() - - def receiveSignal(signalNumber, frame): - stop_event.set() - - signal.signal(signal.SIGTERM, receiveSignal) - signal.signal(signal.SIGINT, receiveSignal) - - threading.current_thread().name = f"process:{name}" - setproctitle(f"frigate.process:{name}") - listen() - - frame_queue = camera_metrics.frame_queue - - frame_shape = config.frame_shape - objects_to_track = config.objects.track - object_filters = config.objects.filters - - motion_detector = ImprovedMotionDetector( - frame_shape, - config.motion, - config.detect.fps, - name=config.name, - ptz_metrics=ptz_metrics, - ) - object_detector = RemoteObjectDetector( - name, labelmap, detection_queue, result_connection, model_config, stop_event - ) - - object_tracker = NorfairTracker(config, ptz_metrics) - - frame_manager = SharedMemoryFrameManager() - - # create communication for region grid updates - requestor = InterProcessRequestor() - - process_frames( - name, - requestor, - frame_queue, - frame_shape, - model_config, - config, - config.detect, - frame_manager, - motion_detector, - object_detector, - object_tracker, +class CameraTracker(FrigateProcess): + def __init__( + self, + config: CameraConfig, + model_config: ModelConfig, + labelmap: dict[int, str], + detection_queue: Queue, detected_objects_queue, - camera_metrics, - objects_to_track, - object_filters, - stop_event, - ptz_metrics, - region_grid, - ) + camera_metrics: CameraMetrics, + ptz_metrics: PTZMetrics, + region_grid: list[list[dict[str, Any]]], + stop_event: MpEvent, + ) -> None: + super().__init__( + stop_event, + PROCESS_PRIORITY_HIGH, + name=f"frigate.process:{config.name}", + daemon=True, + ) + self.config = config + self.model_config = model_config + self.labelmap = labelmap + self.detection_queue = detection_queue + self.detected_objects_queue = detected_objects_queue + self.camera_metrics = camera_metrics + self.ptz_metrics = ptz_metrics + self.region_grid = region_grid - # empty the frame queue - logger.info(f"{name}: emptying frame queue") - while not frame_queue.empty(): - (frame_name, _) = frame_queue.get(False) - frame_manager.delete(frame_name) + def run(self) -> None: + self.pre_run_setup() + frame_queue = self.camera_metrics.frame_queue + frame_shape = self.config.frame_shape - logger.info(f"{name}: exiting subprocess") + motion_detector = ImprovedMotionDetector( + frame_shape, + self.config.motion, + self.config.detect.fps, + name=self.config.name, + ptz_metrics=self.ptz_metrics, + ) + object_detector = RemoteObjectDetector( + self.config.name, + self.labelmap, + self.detection_queue, + self.model_config, + self.stop_event, + ) + + object_tracker = NorfairTracker(self.config, self.ptz_metrics) + + frame_manager = SharedMemoryFrameManager() + + # create communication for region grid updates + requestor = InterProcessRequestor() + + process_frames( + requestor, + frame_queue, + frame_shape, + self.model_config, + self.config, + frame_manager, + motion_detector, + object_detector, + object_tracker, + self.detected_objects_queue, + self.camera_metrics, + self.stop_event, + self.ptz_metrics, + self.region_grid, + ) + + # empty the frame queue + logger.info(f"{self.config.name}: emptying frame queue") + while not frame_queue.empty(): + (frame_name, _) = frame_queue.get(False) + frame_manager.delete(frame_name) + + logger.info(f"{self.config.name}: exiting subprocess") def detect( @@ -597,29 +600,33 @@ def detect( def process_frames( - camera_name: str, requestor: InterProcessRequestor, frame_queue: Queue, frame_shape: tuple[int, int], model_config: ModelConfig, camera_config: CameraConfig, - detect_config: DetectConfig, frame_manager: FrameManager, motion_detector: MotionDetector, object_detector: RemoteObjectDetector, object_tracker: ObjectTracker, detected_objects_queue: Queue, camera_metrics: CameraMetrics, - objects_to_track: list[str], - object_filters, stop_event: MpEvent, ptz_metrics: PTZMetrics, region_grid: list[list[dict[str, Any]]], exit_on_empty: bool = False, ): next_region_update = get_tomorrow_at_time(2) - detect_config_subscriber = ConfigSubscriber(f"config/detect/{camera_name}", True) - enabled_config_subscriber = ConfigSubscriber(f"config/enabled/{camera_name}", True) + config_subscriber = CameraConfigUpdateSubscriber( + None, + {camera_config.name: camera_config}, + [ + CameraConfigUpdateEnum.detect, + CameraConfigUpdateEnum.enabled, + CameraConfigUpdateEnum.motion, + CameraConfigUpdateEnum.objects, + ], + ) fps_tracker = EventsPerSecond() fps_tracker.start() @@ -654,18 +661,23 @@ def process_frames( ] while not stop_event.is_set(): - _, updated_enabled_config = enabled_config_subscriber.check_for_update() + updated_configs = config_subscriber.check_for_updates() - if updated_enabled_config: + if "enabled" in updated_configs: prev_enabled = camera_enabled - camera_enabled = updated_enabled_config.enabled + camera_enabled = camera_config.enabled + + if "motion" in updated_configs: + motion_detector.update_mask() if ( not camera_enabled and prev_enabled != camera_enabled and camera_metrics.frame_queue.empty() ): - logger.debug(f"Camera {camera_name} disabled, clearing tracked objects") + logger.debug( + f"Camera {camera_config.name} disabled, clearing tracked objects" + ) prev_enabled = camera_enabled # Clear norfair's dictionaries @@ -686,17 +698,11 @@ def process_frames( time.sleep(0.1) continue - # check for updated detect config - _, updated_detect_config = detect_config_subscriber.check_for_update() - - if updated_detect_config: - detect_config = updated_detect_config - if ( datetime.datetime.now().astimezone(datetime.timezone.utc) > next_region_update ): - region_grid = requestor.send_data(REQUEST_REGION_GRID, camera_name) + region_grid = requestor.send_data(REQUEST_REGION_GRID, camera_config.name) next_region_update = get_tomorrow_at_time(2) try: @@ -716,7 +722,9 @@ def process_frames( frame = frame_manager.get(frame_name, (frame_shape[0] * 3 // 2, frame_shape[1])) if frame is None: - logger.debug(f"{camera_name}: frame {frame_time} is not in memory store.") + logger.debug( + f"{camera_config.name}: frame {frame_time} is not in memory store." + ) continue # look for motion if enabled @@ -726,14 +734,14 @@ def process_frames( consolidated_detections = [] # if detection is disabled - if not detect_config.enabled: + if not camera_config.detect.enabled: object_tracker.match_and_update(frame_name, frame_time, []) else: # get stationary object ids # check every Nth frame for stationary objects # disappeared objects are not stationary # also check for overlapping motion boxes - if stationary_frame_counter == detect_config.stationary.interval: + if stationary_frame_counter == camera_config.detect.stationary.interval: stationary_frame_counter = 0 stationary_object_ids = [] else: @@ -742,7 +750,8 @@ def process_frames( obj["id"] for obj in object_tracker.tracked_objects.values() # if it has exceeded the stationary threshold - if obj["motionless_count"] >= detect_config.stationary.threshold + if obj["motionless_count"] + >= camera_config.detect.stationary.threshold # and it hasn't disappeared and object_tracker.disappeared[obj["id"]] == 0 # and it doesn't overlap with any current motion boxes when not calibrating @@ -757,7 +766,8 @@ def process_frames( ( # use existing object box for stationary objects obj["estimate"] - if obj["motionless_count"] < detect_config.stationary.threshold + if obj["motionless_count"] + < camera_config.detect.stationary.threshold else obj["box"] ) for obj in object_tracker.tracked_objects.values() @@ -831,13 +841,13 @@ def process_frames( for region in regions: detections.extend( detect( - detect_config, + camera_config.detect, object_detector, frame, model_config, region, - objects_to_track, - object_filters, + camera_config.objects.track, + camera_config.objects.filters, ) ) @@ -953,7 +963,7 @@ def process_frames( ) cv2.imwrite( - f"debug/frames/{camera_name}-{'{:.6f}'.format(frame_time)}.jpg", + f"debug/frames/{camera_config.name}-{'{:.6f}'.format(frame_time)}.jpg", bgr_frame, ) # add to the queue if not full @@ -965,7 +975,7 @@ def process_frames( camera_metrics.process_fps.value = fps_tracker.eps() detected_objects_queue.put( ( - camera_name, + camera_config.name, frame_name, frame_time, detections, @@ -978,5 +988,4 @@ def process_frames( motion_detector.stop() requestor.stop() - detect_config_subscriber.stop() - enabled_config_subscriber.stop() + config_subscriber.stop() diff --git a/migrations/031_create_trigger_table.py b/migrations/031_create_trigger_table.py new file mode 100644 index 000000000..7c8c289cc --- /dev/null +++ b/migrations/031_create_trigger_table.py @@ -0,0 +1,50 @@ +"""Peewee migrations -- 031_create_trigger_table.py. + +This migration creates the Trigger table to track semantic search triggers for cameras. + +Some examples (model - class or model_name):: + + > Model = migrator.orm['model_name'] # Return model in current state by name + > migrator.sql(sql) # Run custom SQL + > migrator.python(func, *args, **kwargs) # Run python code + > migrator.create_model(Model) # Create a model (could be used as decorator) + > migrator.remove_model(model, cascade=True) # Remove a model + > migrator.add_fields(model, **fields) # Add fields to a model + > migrator.change_fields(model, **fields) # Change fields + > migrator.remove_fields(model, *field_names, cascade=True) + > migrator.rename_field(model, old_field_name, new_field_name) + > migrator.rename_table(model, new_table_name) + > migrator.add_index(model, *col_names, unique=False) + > migrator.drop_index(model, *col_names) + > migrator.add_not_null(model, *field_names) + > migrator.drop_not_null(model, *field_names) + > migrator.add_default(model, field_name, default) + +""" + +import peewee as pw + +SQL = pw.SQL + + +def migrate(migrator, database, fake=False, **kwargs): + migrator.sql( + """ + CREATE TABLE IF NOT EXISTS trigger ( + camera VARCHAR(20) NOT NULL, + name VARCHAR NOT NULL, + type VARCHAR(10) NOT NULL, + model VARCHAR(30) NOT NULL, + data TEXT NOT NULL, + threshold REAL, + embedding BLOB, + triggering_event_id VARCHAR(30), + last_triggered DATETIME, + PRIMARY KEY (camera, name) + ) + """ + ) + + +def rollback(migrator, database, fake=False, **kwargs): + migrator.sql("DROP TABLE IF EXISTS trigger") diff --git a/web/public/locales/en/components/dialog.json b/web/public/locales/en/components/dialog.json index 8b2dc0b88..02ab43c4c 100644 --- a/web/public/locales/en/components/dialog.json +++ b/web/public/locales/en/components/dialog.json @@ -109,5 +109,12 @@ "markAsReviewed": "Mark as reviewed", "deleteNow": "Delete Now" } + }, + "imagePicker": { + "selectImage": "Select a tracked object's thumbnail", + "search": { + "placeholder": "Search by label or sub label..." + }, + "noImages": "No thumbnails found for this camera" } } diff --git a/web/public/locales/en/components/filter.json b/web/public/locales/en/components/filter.json index 08a0ee2b2..1eaccbb69 100644 --- a/web/public/locales/en/components/filter.json +++ b/web/public/locales/en/components/filter.json @@ -1,5 +1,11 @@ { "filter": "Filter", + "classes": { + "label": "Classes", + "all": { "title": "All Classes" }, + "count_one": "{{count}} Class", + "count_other": "{{count}} Classes" + }, "labels": { "label": "Labels", "all": { diff --git a/web/public/locales/en/views/classificationModel.json b/web/public/locales/en/views/classificationModel.json new file mode 100644 index 000000000..47b2b13bf --- /dev/null +++ b/web/public/locales/en/views/classificationModel.json @@ -0,0 +1,53 @@ +{ + "button": { + "deleteClassificationAttempts": "Delete Classification Images", + "renameCategory": "Rename Class", + "deleteCategory": "Delete Class", + "deleteImages": "Delete Images", + "trainModel": "Train Model" + }, + "toast": { + "success": { + "deletedCategory": "Deleted Class", + "deletedImage": "Deleted Images", + "categorizedImage": "Successfully Classified Image", + "trainedModel": "Successfully trained model.", + "trainingModel": "Successfully started model training." + }, + "error": { + "deleteImageFailed": "Failed to delete: {{errorMessage}}", + "deleteCategoryFailed": "Failed to delete class: {{errorMessage}}", + "categorizeFailed": "Failed to categorize image: {{errorMessage}}", + "trainingFailed": "Failed to start model training: {{errorMessage}}" + } + }, + "deleteCategory": { + "title": "Delete Class", + "desc": "Are you sure you want to delete the class {{name}}? This will permanently delete all associated images and require re-training the model." + }, + "deleteDatasetImages": { + "title": "Delete Dataset Images", + "desc": "Are you sure you want to delete {{count}} images from {{dataset}}? This action cannot be undone and will require re-training the model." + }, + "deleteTrainImages": { + "title": "Delete Train Images", + "desc": "Are you sure you want to delete {{count}} images? This action cannot be undone." + }, + "renameCategory": { + "title": "Rename Class", + "desc": "Enter a new name for {{name}}. You will be required to retrain the model for the name change to take affect." + }, + "description": { + "invalidName": "Invalid name. Names can only include letters, numbers, spaces, apostrophes, underscores, and hyphens." + }, + "train": { + "title": "Train", + "aria": "Select Train" + }, + "categories": "Classes", + "createCategory": { + "new": "Create New Class" + }, + "categorizeImageAs": "Classify Image As:", + "categorizeImage": "Classify Image" +} diff --git a/web/public/locales/en/views/configEditor.json b/web/public/locales/en/views/configEditor.json index ef3035f38..614143c16 100644 --- a/web/public/locales/en/views/configEditor.json +++ b/web/public/locales/en/views/configEditor.json @@ -1,6 +1,8 @@ { "documentTitle": "Config Editor - Frigate", "configEditor": "Config Editor", + "safeConfigEditor": "Config Editor (Safe Mode)", + "safeModeDescription": "Frigate is in safe mode due to a config validation error.", "copyConfig": "Copy Config", "saveAndRestart": "Save & Restart", "saveOnly": "Save Only", diff --git a/web/public/locales/en/views/explore.json b/web/public/locales/en/views/explore.json index 7e2381445..d754fee77 100644 --- a/web/public/locales/en/views/explore.json +++ b/web/public/locales/en/views/explore.json @@ -103,12 +103,14 @@ "success": { "regenerate": "A new description has been requested from {{provider}}. Depending on the speed of your provider, the new description may take some time to regenerate.", "updatedSublabel": "Successfully updated sub label.", - "updatedLPR": "Successfully updated license plate." + "updatedLPR": "Successfully updated license plate.", + "audioTranscription": "Successfully requested audio transcription." }, "error": { "regenerate": "Failed to call {{provider}} for a new description: {{errorMessage}}", "updatedSublabelFailed": "Failed to update sub label: {{errorMessage}}", - "updatedLPRFailed": "Failed to update license plate: {{errorMessage}}" + "updatedLPRFailed": "Failed to update license plate: {{errorMessage}}", + "audioTranscription": "Failed to request audio transcription: {{errorMessage}}" } } }, @@ -173,6 +175,14 @@ "label": "Find similar", "aria": "Find similar tracked objects" }, + "addTrigger": { + "label": "Add trigger", + "aria": "Add a trigger for this tracked object" + }, + "audioTranscription": { + "label": "Transcribe", + "aria": "Request audio transcription" + }, "submitToPlus": { "label": "Submit to Frigate+", "aria": "Submit to Frigate Plus" diff --git a/web/public/locales/en/views/live.json b/web/public/locales/en/views/live.json index 1790467d2..2af399296 100644 --- a/web/public/locales/en/views/live.json +++ b/web/public/locales/en/views/live.json @@ -38,6 +38,14 @@ "label": "Zoom PTZ camera out" } }, + "focus": { + "in": { + "label": "Focus PTZ camera in" + }, + "out": { + "label": "Focus PTZ camera out" + } + }, "frame": { "center": { "label": "Click in the frame to center the PTZ camera" @@ -69,6 +77,10 @@ "enable": "Enable Audio Detect", "disable": "Disable Audio Detect" }, + "transcription": { + "enable": "Enable Live Audio Transcription", + "disable": "Disable Live Audio Transcription" + }, "autotracking": { "enable": "Enable Autotracking", "disable": "Disable Autotracking" @@ -135,6 +147,7 @@ "recording": "Recording", "snapshots": "Snapshots", "audioDetection": "Audio Detection", + "transcription": "Audio Transcription", "autotracking": "Autotracking" }, "history": { diff --git a/web/public/locales/en/views/settings.json b/web/public/locales/en/views/settings.json index 2b92e81cd..516ddf9f2 100644 --- a/web/public/locales/en/views/settings.json +++ b/web/public/locales/en/views/settings.json @@ -150,6 +150,14 @@ "title": "Streams", "desc": "Temporarily disable a camera until Frigate restarts. Disabling a camera completely stops Frigate's processing of this camera's streams. Detection, recording, and debugging will be unavailable.
Note: This does not disable go2rtc restreams." }, + "object_descriptions": { + "title": "Generative AI Object Descriptions", + "desc": "Temporarily enable/disable Generative AI object descriptions for this camera. When disabled, AI generated descriptions will not be requested for tracked objects on this camera." + }, + "review_descriptions": { + "title": "Generative AI Review Descriptions", + "desc": "Temporarily enable/disable Generative AI review descriptions for this camera. When disabled, AI generated descriptions will not be requested for review items on this camera." + }, "review": { "title": "Review", "desc": "Temporarily enable/disable alerts and detections for this camera until Frigate restarts. When disabled, no new review items will be generated. ", @@ -176,6 +184,35 @@ "toast": { "success": "Review Classification configuration has been saved. Restart Frigate to apply changes." } + }, + "addCamera": "Add New Camera", + "editCamera": "Edit Camera:", + "selectCamera": "Select a Camera", + "backToSettings": "Back to Camera Settings", + "cameraConfig": { + "add": "Add Camera", + "edit": "Edit Camera", + "description": "Configure camera settings including stream inputs and roles.", + "name": "Camera Name", + "nameRequired": "Camera name is required", + "nameInvalid": "Camera name must contain only letters, numbers, underscores, or hyphens", + "namePlaceholder": "e.g., front_door", + "enabled": "Enabled", + "ffmpeg": { + "inputs": "Input Streams", + "path": "Stream Path", + "pathRequired": "Stream path is required", + "pathPlaceholder": "rtsp://...", + "roles": "Roles", + "rolesRequired": "At least one role is required", + "rolesUnique": "Each role (audio, detect, record) can only be assigned to one stream", + "addInput": "Add Input Stream", + "removeInput": "Remove Input Stream", + "inputsRequired": "At least one input stream is required" + }, + "toast": { + "success": "Camera {{cameraName}} saved successfully" + } } }, "masksAndZones": { @@ -410,6 +447,11 @@ "desc": "Show a box of the region of interest sent to the object detector", "tips": "

Region Boxes


Bright green boxes will be overlaid on areas of interest in the frame that are being sent to the object detector.

" }, + "paths": { + "title": "Paths", + "desc": "Show significant points of the tracked object's path", + "tips": "

Paths


Lines and circles will indicate significant points the tracked object has moved during its lifecycle.

" + }, "objectShapeFilterDrawing": { "title": "Object Shape Filter Drawing", "desc": "Draw a rectangle on the image to view area and ratio details", @@ -615,5 +657,100 @@ "success": "Frigate+ settings have been saved. Restart Frigate to apply changes.", "error": "Failed to save config changes: {{errorMessage}}" } + }, + "triggers": { + "documentTitle": "Triggers", + "management": { + "title": "Trigger Management", + "desc": "Manage triggers for {{camera}}. Use the thumbnail type to trigger on similar thumbnails to your selected tracked object, and the description type to trigger on similar descriptions to text you specify." + }, + "addTrigger": "Add Trigger", + "table": { + "name": "Name", + "type": "Type", + "content": "Content", + "threshold": "Threshold", + "actions": "Actions", + "noTriggers": "No triggers configured for this camera.", + "edit": "Edit", + "deleteTrigger": "Delete Trigger", + "lastTriggered": "Last triggered" + }, + "type": { + "thumbnail": "Thumbnail", + "description": "Description" + }, + "actions": { + "alert": "Mark as Alert", + "notification": "Send Notification" + }, + "dialog": { + "createTrigger": { + "title": "Create Trigger", + "desc": "Create a trigger for camera {{camera}}" + }, + "editTrigger": { + "title": "Edit Trigger", + "desc": "Edit the settings for trigger on camera {{camera}}" + }, + "deleteTrigger": { + "title": "Delete Trigger", + "desc": "Are you sure you want to delete the trigger {{triggerName}}? This action cannot be undone." + }, + "form": { + "name": { + "title": "Name", + "placeholder": "Enter trigger name", + "error": { + "minLength": "Name must be at least 2 characters long.", + "invalidCharacters": "Name can only contain letters, numbers, underscores, and hyphens.", + "alreadyExists": "A trigger with this name already exists for this camera." + } + }, + "enabled": { + "description": "Enable or disable this trigger" + }, + "type": { + "title": "Type", + "placeholder": "Select trigger type" + }, + "content": { + "title": "Content", + "imagePlaceholder": "Select an image", + "textPlaceholder": "Enter text content", + "imageDesc": "Select an image to trigger this action when a similar image is detected.", + "textDesc": "Enter text to trigger this action when a similar tracked object description is detected.", + "error": { + "required": "Content is required." + } + }, + "threshold": { + "title": "Threshold", + "error": { + "min": "Threshold must be at least 0", + "max": "Threshold must be at most 1" + } + }, + "actions": { + "title": "Actions", + "desc": "By default, Frigate fires an MQTT message for all triggers. Choose an additional action to perform when this trigger fires.", + "error": { + "min": "At least one action must be selected." + } + } + } + }, + "toast": { + "success": { + "createTrigger": "Trigger {{name}} created successfully.", + "updateTrigger": "Trigger {{name}} updated successfully.", + "deleteTrigger": "Trigger {{name}} deleted successfully." + }, + "error": { + "createTriggerFailed": "Failed to create trigger: {{errorMessage}}", + "updateTriggerFailed": "Failed to update trigger: {{errorMessage}}", + "deleteTriggerFailed": "Failed to delete trigger: {{errorMessage}}" + } + } } } diff --git a/web/src/App.tsx b/web/src/App.tsx index a0062549f..cd7906e97 100644 --- a/web/src/App.tsx +++ b/web/src/App.tsx @@ -12,6 +12,8 @@ import { cn } from "./lib/utils"; import { isPWA } from "./utils/isPWA"; import ProtectedRoute from "@/components/auth/ProtectedRoute"; import { AuthProvider } from "@/context/auth-context"; +import useSWR from "swr"; +import { FrigateConfig } from "./types/frigateConfig"; const Live = lazy(() => import("@/pages/Live")); const Events = lazy(() => import("@/pages/Events")); @@ -22,56 +24,21 @@ const System = lazy(() => import("@/pages/System")); const Settings = lazy(() => import("@/pages/Settings")); const UIPlayground = lazy(() => import("@/pages/UIPlayground")); const FaceLibrary = lazy(() => import("@/pages/FaceLibrary")); +const Classification = lazy(() => import("@/pages/ClassificationModel")); const Logs = lazy(() => import("@/pages/Logs")); const AccessDenied = lazy(() => import("@/pages/AccessDenied")); function App() { + const { data: config } = useSWR("config", { + revalidateOnFocus: false, + }); + return ( -
- {isDesktop && } - {isDesktop && } - {isMobile && } -
- - - - } - > - } /> - } /> - } /> - } /> - } /> - - } - > - } /> - } /> - } /> - } /> - } /> - - } /> - } /> - - -
-
+ {config?.safe_mode ? : }
@@ -79,4 +46,62 @@ function App() { ); } +function DefaultAppView() { + return ( +
+ {isDesktop && } + {isDesktop && } + {isMobile && } +
+ + + } + > + } /> + } /> + } /> + } /> + } /> + + }> + } /> + } /> + } /> + } /> + } /> + } /> + + } /> + } /> + + +
+
+ ); +} + +function SafeAppView() { + return ( +
+
+ + + +
+
+ ); +} + export default App; diff --git a/web/src/api/ws.tsx b/web/src/api/ws.tsx index 3e9c8c14f..0cef235a0 100644 --- a/web/src/api/ws.tsx +++ b/web/src/api/ws.tsx @@ -8,6 +8,8 @@ import { FrigateReview, ModelState, ToggleableSetting, + TrackedObjectUpdateReturnType, + TriggerStatus, } from "@/types/ws"; import { FrigateStats } from "@/types/stats"; import { createContainer } from "react-tracked"; @@ -60,17 +62,23 @@ function useValue(): useValueReturn { enabled, snapshots, audio, + audio_transcription, notifications, notifications_suspended, autotracking, alerts, detections, + object_descriptions, + review_descriptions, } = state["config"]; cameraStates[`${name}/recordings/state`] = record ? "ON" : "OFF"; cameraStates[`${name}/enabled/state`] = enabled ? "ON" : "OFF"; cameraStates[`${name}/detect/state`] = detect ? "ON" : "OFF"; cameraStates[`${name}/snapshots/state`] = snapshots ? "ON" : "OFF"; cameraStates[`${name}/audio/state`] = audio ? "ON" : "OFF"; + cameraStates[`${name}/audio_transcription/state`] = audio_transcription + ? "ON" + : "OFF"; cameraStates[`${name}/notifications/state`] = notifications ? "ON" : "OFF"; @@ -83,6 +91,12 @@ function useValue(): useValueReturn { cameraStates[`${name}/review_detections/state`] = detections ? "ON" : "OFF"; + cameraStates[`${name}/object_descriptions/state`] = object_descriptions + ? "ON" + : "OFF"; + cameraStates[`${name}/review_descriptions/state`] = review_descriptions + ? "ON" + : "OFF"; }); setWsState((prevState) => ({ @@ -220,6 +234,20 @@ export function useAudioState(camera: string): { return { payload: payload as ToggleableSetting, send }; } +export function useAudioTranscriptionState(camera: string): { + payload: ToggleableSetting; + send: (payload: ToggleableSetting, retain?: boolean) => void; +} { + const { + value: { payload }, + send, + } = useWs( + `${camera}/audio_transcription/state`, + `${camera}/audio_transcription/set`, + ); + return { payload: payload as ToggleableSetting, send }; +} + export function useAutotrackingState(camera: string): { payload: ToggleableSetting; send: (payload: ToggleableSetting, retain?: boolean) => void; @@ -256,6 +284,34 @@ export function useDetectionsState(camera: string): { return { payload: payload as ToggleableSetting, send }; } +export function useObjectDescriptionState(camera: string): { + payload: ToggleableSetting; + send: (payload: ToggleableSetting, retain?: boolean) => void; +} { + const { + value: { payload }, + send, + } = useWs( + `${camera}/object_descriptions/state`, + `${camera}/object_descriptions/set`, + ); + return { payload: payload as ToggleableSetting, send }; +} + +export function useReviewDescriptionState(camera: string): { + payload: ToggleableSetting; + send: (payload: ToggleableSetting, retain?: boolean) => void; +} { + const { + value: { payload }, + send, + } = useWs( + `${camera}/review_descriptions/state`, + `${camera}/review_descriptions/set`, + ); + return { payload: payload as ToggleableSetting, send }; +} + export function usePtzCommand(camera: string): { payload: string; send: (payload: string, retain?: boolean) => void; @@ -407,6 +463,40 @@ export function useEmbeddingsReindexProgress( return { payload: data }; } +export function useBirdseyeLayout(revalidateOnFocus: boolean = true): { + payload: string; +} { + const { + value: { payload }, + send: sendCommand, + } = useWs("birdseye_layout", "birdseyeLayout"); + + const data = useDeepMemo(JSON.parse(payload as string)); + + useEffect(() => { + let listener = undefined; + if (revalidateOnFocus) { + sendCommand("birdseyeLayout"); + listener = () => { + if (document.visibilityState == "visible") { + sendCommand("birdseyeLayout"); + } + }; + addEventListener("visibilitychange", listener); + } + + return () => { + if (listener) { + removeEventListener("visibilitychange", listener); + } + }; + // we know that these deps are correct + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [revalidateOnFocus]); + + return { payload: data }; +} + export function useMotionActivity(camera: string): { payload: string } { const { value: { payload }, @@ -421,6 +511,15 @@ export function useAudioActivity(camera: string): { payload: number } { return { payload: payload as number }; } +export function useAudioLiveTranscription(camera: string): { + payload: string; +} { + const { + value: { payload }, + } = useWs(`${camera}/audio/transcription`, ""); + return { payload: payload as string }; +} + export function useMotionThreshold(camera: string): { payload: string; send: (payload: number, retain?: boolean) => void; @@ -463,11 +562,16 @@ export function useImproveContrast(camera: string): { return { payload: payload as ToggleableSetting, send }; } -export function useTrackedObjectUpdate(): { payload: string } { +export function useTrackedObjectUpdate(): { + payload: TrackedObjectUpdateReturnType; +} { const { value: { payload }, } = useWs("tracked_object_update", ""); - return useDeepMemo(JSON.parse(payload as string)); + const parsed = payload + ? JSON.parse(payload as string) + : { type: "", id: "", camera: "" }; + return { payload: useDeepMemo(parsed) }; } export function useNotifications(camera: string): { @@ -505,3 +609,13 @@ export function useNotificationTest(): { } = useWs("notification_test", "notification_test"); return { payload: payload as string, send }; } + +export function useTriggers(): { payload: TriggerStatus } { + const { + value: { payload }, + } = useWs("triggers", ""); + const parsed = payload + ? JSON.parse(payload as string) + : { name: "", camera: "", event_id: "", type: "", score: 0 }; + return { payload: useDeepMemo(parsed) }; +} diff --git a/web/src/components/camera/DebugCameraImage.tsx b/web/src/components/camera/DebugCameraImage.tsx index 3d840d0d3..bc3b6a8c3 100644 --- a/web/src/components/camera/DebugCameraImage.tsx +++ b/web/src/components/camera/DebugCameraImage.tsx @@ -158,6 +158,16 @@ function DebugSettings({ handleSetOption, options }: DebugSettingsProps) { /> +
+ { + handleSetOption("paths", isChecked); + }} + /> + +
); } diff --git a/web/src/components/card/AnimatedEventCard.tsx b/web/src/components/card/AnimatedEventCard.tsx index d46509eb6..a04804641 100644 --- a/web/src/components/card/AnimatedEventCard.tsx +++ b/web/src/components/card/AnimatedEventCard.tsx @@ -91,7 +91,10 @@ export function AnimatedEventCard({ // image behavior - const [alertVideos] = usePersistence("alertVideos", true); + const [alertVideos, _, alertVideosLoaded] = usePersistence( + "alertVideos", + true, + ); const aspectRatio = useMemo(() => { if ( @@ -135,7 +138,7 @@ export function AnimatedEventCard({ {t("markAsReviewed")} )} - {previews != undefined && ( + {previews != undefined && alertVideosLoaded && (
void; showObjectLifecycle: () => void; showSnapshot: () => void; + addTrigger: () => void; }; export default function SearchThumbnailFooter({ @@ -24,6 +25,7 @@ export default function SearchThumbnailFooter({ refreshResults, showObjectLifecycle, showSnapshot, + addTrigger, }: SearchThumbnailProps) { const { t } = useTranslation(["views/search"]); const { data: config } = useSWR("config"); @@ -61,6 +63,7 @@ export default function SearchThumbnailFooter({ refreshResults={refreshResults} showObjectLifecycle={showObjectLifecycle} showSnapshot={showSnapshot} + addTrigger={addTrigger} />
diff --git a/web/src/components/filter/SearchFilterGroup.tsx b/web/src/components/filter/SearchFilterGroup.tsx index 198f27b63..1702fcc2a 100644 --- a/web/src/components/filter/SearchFilterGroup.tsx +++ b/web/src/components/filter/SearchFilterGroup.tsx @@ -131,10 +131,7 @@ export default function SearchFilterGroup({ ); const availableSortTypes = useMemo(() => { - const sortTypes = ["date_asc", "date_desc"]; - if (filter?.min_score || filter?.max_score) { - sortTypes.push("score_desc", "score_asc"); - } + const sortTypes = ["date_asc", "date_desc", "score_desc", "score_asc"]; if (filter?.min_speed || filter?.max_speed) { sortTypes.push("speed_desc", "speed_asc"); } diff --git a/web/src/components/menu/SearchResultActions.tsx b/web/src/components/menu/SearchResultActions.tsx index 1779430f0..2c928becf 100644 --- a/web/src/components/menu/SearchResultActions.tsx +++ b/web/src/components/menu/SearchResultActions.tsx @@ -41,6 +41,7 @@ import { import useSWR from "swr"; import { Trans, useTranslation } from "react-i18next"; +import { BsFillLightningFill } from "react-icons/bs"; type SearchResultActionsProps = { searchResult: SearchResult; @@ -48,6 +49,7 @@ type SearchResultActionsProps = { refreshResults: () => void; showObjectLifecycle: () => void; showSnapshot: () => void; + addTrigger: () => void; isContextMenu?: boolean; children?: ReactNode; }; @@ -58,6 +60,7 @@ export default function SearchResultActions({ refreshResults, showObjectLifecycle, showSnapshot, + addTrigger, isContextMenu = false, children, }: SearchResultActionsProps) { @@ -138,6 +141,16 @@ export default function SearchResultActions({ {t("itemMenu.findSimilar.label")} )} + {config?.semantic_search?.enabled && + searchResult.data.type == "object" && ( + + + {t("itemMenu.addTrigger.label")} + + )} {isMobileOnly && config?.plus?.enabled && searchResult.has_snapshot && diff --git a/web/src/components/overlay/ClassificationSelectionDialog.tsx b/web/src/components/overlay/ClassificationSelectionDialog.tsx new file mode 100644 index 000000000..f86ced19a --- /dev/null +++ b/web/src/components/overlay/ClassificationSelectionDialog.tsx @@ -0,0 +1,155 @@ +import { + Drawer, + DrawerClose, + DrawerContent, + DrawerDescription, + DrawerHeader, + DrawerTitle, + DrawerTrigger, +} from "@/components/ui/drawer"; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuLabel, + DropdownMenuTrigger, +} from "@/components/ui/dropdown-menu"; +import { + Tooltip, + TooltipContent, + TooltipTrigger, +} from "@/components/ui/tooltip"; +import { isDesktop, isMobile } from "react-device-detect"; +import { LuPlus } from "react-icons/lu"; +import { useTranslation } from "react-i18next"; +import { cn } from "@/lib/utils"; +import React, { ReactNode, useCallback, useMemo, useState } from "react"; +import TextEntryDialog from "./dialog/TextEntryDialog"; +import { Button } from "../ui/button"; +import { MdCategory } from "react-icons/md"; +import axios from "axios"; +import { toast } from "sonner"; + +type ClassificationSelectionDialogProps = { + className?: string; + classes: string[]; + modelName: string; + image: string; + onRefresh: () => void; + children: ReactNode; +}; +export default function ClassificationSelectionDialog({ + className, + classes, + modelName, + image, + onRefresh, + children, +}: ClassificationSelectionDialogProps) { + const { t } = useTranslation(["views/classificationModel"]); + + const onCategorizeImage = useCallback( + (category: string) => { + axios + .post(`/classification/${modelName}/dataset/categorize`, { + category, + training_file: image, + }) + .then((resp) => { + if (resp.status == 200) { + toast.success(t("toast.success.categorizedImage"), { + position: "top-center", + }); + onRefresh(); + } + }) + .catch((error) => { + const errorMessage = + error.response?.data?.message || + error.response?.data?.detail || + "Unknown error"; + toast.error(t("toast.error.categorizeFailed", { errorMessage }), { + position: "top-center", + }); + }); + }, + [modelName, image, onRefresh, t], + ); + + const isChildButton = useMemo( + () => React.isValidElement(children) && children.type === Button, + [children], + ); + + // control + const [newClass, setNewClass] = useState(false); + + // components + const Selector = isDesktop ? DropdownMenu : Drawer; + const SelectorTrigger = isDesktop ? DropdownMenuTrigger : DrawerTrigger; + const SelectorContent = isDesktop ? DropdownMenuContent : DrawerContent; + const SelectorItem = isDesktop + ? DropdownMenuItem + : (props: React.HTMLAttributes) => ( + +
+ + ); + + return ( +
+ {newClass && ( + onCategorizeImage(newCat)} + /> + )} + + + + + {children} + + + {isMobile && ( + + Details + Details + + )} + {t("categorizeImageAs")} +
+ setNewClass(true)} + > + + {t("createCategory.new")} + + {classes.sort().map((category) => ( + onCategorizeImage(category)} + > + + {category.replaceAll("_", " ")} + + ))} +
+
+
+ {t("categorizeImage")} +
+
+ ); +} diff --git a/web/src/components/overlay/CreateTriggerDialog.tsx b/web/src/components/overlay/CreateTriggerDialog.tsx new file mode 100644 index 000000000..5672c4802 --- /dev/null +++ b/web/src/components/overlay/CreateTriggerDialog.tsx @@ -0,0 +1,416 @@ +import { useEffect, useMemo } from "react"; +import { useTranslation } from "react-i18next"; +import { useForm } from "react-hook-form"; +import { zodResolver } from "@hookform/resolvers/zod"; +import { z } from "zod"; +import useSWR from "swr"; +import { + Dialog, + DialogContent, + DialogDescription, + DialogFooter, + DialogHeader, + DialogTitle, +} from "@/components/ui/dialog"; +import { + Form, + FormControl, + FormDescription, + FormField, + FormItem, + FormLabel, + FormMessage, +} from "@/components/ui/form"; +import { Input } from "@/components/ui/input"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { Checkbox } from "@/components/ui/checkbox"; +import { Button } from "@/components/ui/button"; +import ActivityIndicator from "@/components/indicators/activity-indicator"; +import { FrigateConfig } from "@/types/frigateConfig"; +import ImagePicker from "@/components/overlay/ImagePicker"; +import { Trigger, TriggerAction, TriggerType } from "@/types/trigger"; +import { Switch } from "@/components/ui/switch"; +import { Textarea } from "../ui/textarea"; + +type CreateTriggerDialogProps = { + show: boolean; + trigger: Trigger | null; + selectedCamera: string; + isLoading: boolean; + onCreate: ( + enabled: boolean, + name: string, + type: TriggerType, + data: string, + threshold: number, + actions: TriggerAction[], + ) => void; + onEdit: (trigger: Trigger) => void; + onCancel: () => void; +}; + +export default function CreateTriggerDialog({ + show, + trigger, + selectedCamera, + isLoading, + onCreate, + onEdit, + onCancel, +}: CreateTriggerDialogProps) { + const { t } = useTranslation("views/settings"); + const { data: config } = useSWR("config"); + + const existingTriggerNames = useMemo(() => { + if ( + !config || + !selectedCamera || + !config.cameras[selectedCamera]?.semantic_search?.triggers + ) { + return []; + } + return Object.keys(config.cameras[selectedCamera].semantic_search.triggers); + }, [config, selectedCamera]); + + const formSchema = z.object({ + enabled: z.boolean(), + name: z + .string() + .min(2, t("triggers.dialog.form.name.error.minLength")) + .regex( + /^[a-zA-Z0-9_-]+$/, + t("triggers.dialog.form.name.error.invalidCharacters"), + ) + .refine( + (value) => + !existingTriggerNames.includes(value) || value === trigger?.name, + t("triggers.dialog.form.name.error.alreadyExists"), + ), + type: z.enum(["thumbnail", "description"]), + data: z.string().min(1, t("triggers.dialog.form.content.error.required")), + threshold: z + .number() + .min(0, t("triggers.dialog.form.threshold.error.min")) + .max(1, t("triggers.dialog.form.threshold.error.max")), + actions: z.array(z.enum(["notification"])), + }); + + const form = useForm>({ + resolver: zodResolver(formSchema), + mode: "onChange", + defaultValues: { + enabled: trigger?.enabled ?? true, + name: trigger?.name ?? "", + type: trigger?.type ?? "description", + data: trigger?.data ?? "", + threshold: trigger?.threshold ?? 0.5, + actions: trigger?.actions ?? [], + }, + }); + + const onSubmit = async (values: z.infer) => { + if (trigger) { + onEdit({ ...values }); + } else { + onCreate( + values.enabled, + values.name, + values.type, + values.data, + values.threshold, + values.actions, + ); + } + }; + + useEffect(() => { + if (!show) { + form.reset({ + enabled: true, + name: "", + type: "description", + data: "", + threshold: 0.5, + actions: [], + }); + } else if (trigger) { + form.reset( + { + enabled: trigger.enabled, + name: trigger.name, + type: trigger.type, + data: trigger.data, + threshold: trigger.threshold, + actions: trigger.actions, + }, + { keepDirty: false, keepTouched: false }, // Reset validation state + ); + // Trigger validation to ensure isValid updates + // form.trigger(); + } + }, [show, trigger, form]); + + const handleCancel = () => { + form.reset(); + onCancel(); + }; + + return ( + + + + + {t( + trigger + ? "triggers.dialog.editTrigger.title" + : "triggers.dialog.createTrigger.title", + )} + + + {t( + trigger + ? "triggers.dialog.editTrigger.desc" + : "triggers.dialog.createTrigger.desc", + { camera: selectedCamera }, + )} + + + +
+ + ( + + {t("triggers.dialog.form.name.title")} + + + + + + )} + /> + + ( + +
+ + {t("enabled", { ns: "common" })} + +
+ {t("triggers.dialog.form.enabled.description")} +
+
+ + + +
+ )} + /> + + ( + + {t("triggers.dialog.form.type.title")} + + + + )} + /> + + ( + + + {t("triggers.dialog.form.content.title")} + + {form.watch("type") === "thumbnail" ? ( + <> + + + + + {t("triggers.dialog.form.content.imageDesc")} + + + ) : ( + <> + +