From 3a258d758a31b83d89ebeec40e26059440ca7578 Mon Sep 17 00:00:00 2001 From: Ryan Gregg Date: Sat, 14 Mar 2026 03:41:12 +0000 Subject: [PATCH] Add drag-to-zoom and always-on RelativeMove for PTZ cameras MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Frontend: - Drag a rectangle on the live view to zoom into that region - Click to move to a point without zooming (unchanged behavior) - Blue rectangle overlay shown while dragging (>20px threshold) - Box expanded to camera aspect ratio before calculating zoom - TransformWrapper panning disabled when click-to-move is active - Touch support included Backend: - RelativeMove/FOV capability detection no longer requires autotracking to be enabled — works for all PTZ cameras - GetConfigurationOptions and RelativeMove request setup runs whenever the camera supports it - move_relative command accepts optional zoom parameter - Properly clears residual zoom from zeep move_request object between calls to prevent unwanted zoom on click-to-move - Includes Dockerfile.local for easy local testing by layering changes on the official Frigate image Co-Authored-By: Claude Opus 4.6 (1M context) --- frigate/ptz/onvif.py | 157 +++++++++++---------- web/src/views/live/LiveCameraView.tsx | 194 +++++++++++++++++++++++--- 2 files changed, 251 insertions(+), 100 deletions(-) diff --git a/frigate/ptz/onvif.py b/frigate/ptz/onvif.py index 488dbd278..be3a83bb7 100644 --- a/frigate/ptz/onvif.py +++ b/frigate/ptz/onvif.py @@ -218,11 +218,10 @@ class OnvifController: move_request.ProfileToken = profile.token self.cams[camera_name]["move_request"] = move_request - # extra setup for autotracking cameras - if ( - self.config.cameras[camera_name].onvif.autotracking.enabled_in_config - and self.config.cameras[camera_name].onvif.autotracking.enabled - ): + # Probe PTZ configuration options for RelativeMove/AbsoluteMove support. + # This is needed for click-to-move, drag-to-zoom, and autotracking. + fov_space_id = None + try: request = ptz.create_type("GetConfigurationOptions") request.ConfigurationToken = profile.PTZConfiguration.token ptz_config = await ptz.GetConfigurationOptions(request) @@ -244,7 +243,7 @@ class OnvifController: None, ) - # status request for autotracking and filling ptz-parameters + # status request for position reporting status_request = ptz.create_type("GetStatus") status_request.ProfileToken = profile.token self.cams[camera_name]["status_request"] = status_request @@ -255,77 +254,66 @@ class OnvifController: logger.warning(f"Unable to get status from camera: {camera_name}: {e}") status = None - # autotracking relative panning/tilting needs a relative zoom value set to 0 - # if camera supports relative movement - if ( - self.config.cameras[camera_name].onvif.autotracking.zooming - != ZoomingModeEnum.disabled - ): - zoom_space_id = next( - ( - i - for i, space in enumerate( - ptz_config.Spaces.RelativeZoomTranslationSpace - ) - if "TranslationGenericSpace" in space["URI"] - ), - None, - ) + # Check for relative zoom support + zoom_space_id = next( + ( + i + for i, space in enumerate( + ptz_config.Spaces.RelativeZoomTranslationSpace + ) + if "TranslationGenericSpace" in space["URI"] + ), + None, + ) - # setup relative moving request for autotracking - move_request = ptz.create_type("RelativeMove") - move_request.ProfileToken = profile.token - logger.debug(f"{camera_name}: Relative move request: {move_request}") - if move_request.Translation is None and fov_space_id is not None: - move_request.Translation = status.Position - move_request.Translation.PanTilt.space = ptz_config["Spaces"][ - "RelativePanTiltTranslationSpace" - ][fov_space_id]["URI"] + # setup relative moving request (for click-to-move, drag-to-zoom, and autotracking) + if fov_space_id is not None: + move_request = ptz.create_type("RelativeMove") + move_request.ProfileToken = profile.token + logger.debug(f"{camera_name}: Relative move request: {move_request}") + if move_request.Translation is None: + move_request.Translation = status.Position + move_request.Translation.PanTilt.space = ptz_config["Spaces"][ + "RelativePanTiltTranslationSpace" + ][fov_space_id]["URI"] - # try setting relative zoom translation space - try: - if ( - self.config.cameras[camera_name].onvif.autotracking.zooming - != ZoomingModeEnum.disabled - ): + # set up relative zoom translation space if supported + try: if zoom_space_id is not None: move_request.Translation.Zoom.space = ptz_config["Spaces"][ "RelativeZoomTranslationSpace" ][zoom_space_id]["URI"] - else: - if ( - move_request["Translation"] is not None - and "Zoom" in move_request["Translation"] - ): - del move_request["Translation"]["Zoom"] - if ( - move_request["Speed"] is not None - and "Zoom" in move_request["Speed"] - ): - del move_request["Speed"]["Zoom"] - logger.debug( - f"{camera_name}: Relative move request after deleting zoom: {move_request}" + else: + if ( + move_request["Translation"] is not None + and "Zoom" in move_request["Translation"] + ): + del move_request["Translation"]["Zoom"] + if ( + move_request["Speed"] is not None + and "Zoom" in move_request["Speed"] + ): + del move_request["Speed"]["Zoom"] + except Exception as e: + logger.warning( + f"Relative zoom not supported for {camera_name}: {e}" ) - except Exception as e: - self.config.cameras[ - camera_name - ].onvif.autotracking.zooming = ZoomingModeEnum.disabled - logger.warning( - f"Disabling autotracking zooming for {camera_name}: Relative zoom not supported. Exception: {e}" + + if move_request.Speed is None: + move_request.Speed = configs.DefaultPTZSpeed if configs else None + logger.debug( + f"{camera_name}: Relative move request after setup: {move_request}" ) + self.cams[camera_name]["relative_move_request"] = move_request - if move_request.Speed is None: - move_request.Speed = configs.DefaultPTZSpeed if configs else None - logger.debug( - f"{camera_name}: Relative move request after setup: {move_request}" - ) - self.cams[camera_name]["relative_move_request"] = move_request - - # setup absolute moving request for autotracking zooming + # setup absolute moving request (for zoom positioning) move_request = ptz.create_type("AbsoluteMove") move_request.ProfileToken = profile.token self.cams[camera_name]["absolute_move_request"] = move_request + except (Fault, ONVIFError, TransportError, Exception) as e: + logger.debug(f"PTZ configuration options not available for {camera_name}: {e}") + # setup existing presets try: presets: list[dict] = await ptz.GetPresets({"ProfileToken": profile.token}) @@ -417,9 +405,7 @@ class OnvifController: logger.debug(f"Focus not supported for {camera_name}: {e}") if ( - self.config.cameras[camera_name].onvif.autotracking.enabled_in_config - and self.config.cameras[camera_name].onvif.autotracking.enabled - and fov_space_id is not None + fov_space_id is not None and configs.DefaultRelativePanTiltTranslationSpace is not None ): supported_features.append("pt-r-fov") @@ -550,8 +536,11 @@ class OnvifController: if ( "zoom-r" in self.cams[camera_name]["features"] - and self.config.cameras[camera_name].onvif.autotracking.zooming - == ZoomingModeEnum.relative + and ( + zoom != 0 + or self.config.cameras[camera_name].onvif.autotracking.zooming + == ZoomingModeEnum.relative + ) ): move_request.Speed = { "PanTilt": { @@ -561,6 +550,19 @@ class OnvifController: "Zoom": {"x": speed}, } move_request.Translation.Zoom.x = zoom + else: + move_request.Speed = { + "PanTilt": { + "x": speed, + "y": speed, + }, + } + # Ensure no residual zoom from previous calls + try: + if move_request.Translation is not None: + move_request.Translation.Zoom = None + except (AttributeError, KeyError): + pass await self.cams[camera_name]["ptz"].RelativeMove(move_request) @@ -568,12 +570,11 @@ class OnvifController: move_request.Translation.PanTilt.x = 0 move_request.Translation.PanTilt.y = 0 - if ( - "zoom-r" in self.cams[camera_name]["features"] - and self.config.cameras[camera_name].onvif.autotracking.zooming - == ZoomingModeEnum.relative - ): - move_request.Translation.Zoom.x = 0 + try: + if move_request.Translation.Zoom is not None: + move_request.Translation.Zoom.x = 0 + except (AttributeError, KeyError): + pass self.cams[camera_name]["active"] = False @@ -717,8 +718,10 @@ class OnvifController: elif command == OnvifCommandEnum.preset: await self._move_to_preset(camera_name, param) elif command == OnvifCommandEnum.move_relative: - _, pan, tilt = param.split("_") - await self._move_relative(camera_name, float(pan), float(tilt), 0, 1) + parts = param.split("_") + _, pan, tilt = parts[0], parts[1], parts[2] + zoom = float(parts[3]) if len(parts) > 3 else 0 + await self._move_relative(camera_name, float(pan), float(tilt), zoom, 1) elif command in (OnvifCommandEnum.zoom_in, OnvifCommandEnum.zoom_out): await self._zoom(camera_name, command) elif command in (OnvifCommandEnum.focus_in, OnvifCommandEnum.focus_out): diff --git a/web/src/views/live/LiveCameraView.tsx b/web/src/views/live/LiveCameraView.tsx index 418c74068..3a2cd6c95 100644 --- a/web/src/views/live/LiveCameraView.tsx +++ b/web/src/views/live/LiveCameraView.tsx @@ -213,45 +213,156 @@ export default function LiveCameraView({ }; }, [audioTranscriptionState, sendTranscription]); - // click overlay for ptzs + // click/drag overlay for ptzs const [clickOverlay, setClickOverlay] = useState(false); const clickOverlayRef = useRef(null); const { send: sendPtz } = usePtzCommand(camera.name); - const handleOverlayClick = useCallback( - ( - e: React.MouseEvent | React.TouchEvent, - ) => { - if (!clickOverlay) { + // Drag-to-zoom state + const [dragStart, setDragStart] = useState<{ + x: number; + y: number; + } | null>(null); + const [dragCurrent, setDragCurrent] = useState<{ + x: number; + y: number; + } | null>(null); + const isDragging = dragStart !== null && dragCurrent !== null; + + const getClientPos = ( + e: React.MouseEvent | React.TouchEvent, + ): { x: number; y: number } | null => { + if ("TouchEvent" in window && e.nativeEvent instanceof TouchEvent) { + const touch = e.nativeEvent.touches[0] || e.nativeEvent.changedTouches[0]; + if (touch) return { x: touch.clientX, y: touch.clientY }; + } else if (e.nativeEvent instanceof MouseEvent) { + return { x: e.nativeEvent.clientX, y: e.nativeEvent.clientY }; + } + return null; + }; + + const handleOverlayMouseDown = useCallback( + (e: React.MouseEvent | React.TouchEvent) => { + if (!clickOverlay || !clickOverlayRef.current) return; + e.preventDefault(); + const pos = getClientPos(e); + if (pos) { + setDragStart(pos); + setDragCurrent(pos); + } + }, + [clickOverlay], + ); + + const handleOverlayMouseMove = useCallback( + (e: React.MouseEvent | React.TouchEvent) => { + if (!dragStart) return; + e.preventDefault(); + const pos = getClientPos(e); + if (pos) { + setDragCurrent(pos); + } + }, + [dragStart], + ); + + const handleOverlayMouseUp = useCallback( + (e: React.MouseEvent | React.TouchEvent) => { + if (!clickOverlay || !clickOverlayRef.current || !dragStart) { + setDragStart(null); + setDragCurrent(null); return; } - let clientX; - let clientY; - if ("TouchEvent" in window && e.nativeEvent instanceof TouchEvent) { - clientX = e.nativeEvent.touches[0].clientX; - clientY = e.nativeEvent.touches[0].clientY; - } else if (e.nativeEvent instanceof MouseEvent) { - clientX = e.nativeEvent.clientX; - clientY = e.nativeEvent.clientY; + const pos = getClientPos(e); + if (!pos) { + setDragStart(null); + setDragCurrent(null); + return; } - if (clickOverlayRef.current && clientX && clientY) { - const rect = clickOverlayRef.current.getBoundingClientRect(); - - const normalizedX = (clientX - rect.left) / rect.width; - const normalizedY = (clientY - rect.top) / rect.height; + const rect = clickOverlayRef.current.getBoundingClientRect(); + const dx = Math.abs(pos.x - dragStart.x); + const dy = Math.abs(pos.y - dragStart.y); + // Minimum drag distance of 20px to distinguish from click + if (dx < 20 && dy < 20) { + // Click (not drag) — move to point without zoom + const normalizedX = (pos.x - rect.left) / rect.width; + const normalizedY = (pos.y - rect.top) / rect.height; const pan = (normalizedX - 0.5) * 2; const tilt = (0.5 - normalizedY) * 2; - sendPtz(`move_relative_${pan}_${tilt}`); + } else { + // Drag — zoom to rectangle + const x1 = Math.min(dragStart.x, pos.x); + const y1 = Math.min(dragStart.y, pos.y); + const x2 = Math.max(dragStart.x, pos.x); + const y2 = Math.max(dragStart.y, pos.y); + + // Normalize to 0-1 within the overlay + const normX1 = (x1 - rect.left) / rect.width; + const normY1 = (y1 - rect.top) / rect.height; + const normX2 = (x2 - rect.left) / rect.width; + const normY2 = (y2 - rect.top) / rect.height; + + let boxW = normX2 - normX1; + let boxH = normY2 - normY1; + + // Expand box to match camera aspect ratio + const frameAspect = rect.width / rect.height; + const boxAspect = boxW / boxH; + if (boxAspect > frameAspect) { + // Box is wider than frame aspect — expand height + boxH = boxW / frameAspect; + } else { + // Box is taller — expand width + boxW = boxH * frameAspect; + } + + // Center of the box + const centerX = (normX1 + normX2) / 2; + const centerY = (normY1 + normY2) / 2; + const pan = (centerX - 0.5) * 2; + const tilt = (0.5 - centerY) * 2; + + // Zoom: ratio of box to frame (smaller box = more zoom) + const zoom = 1 - Math.max(boxW, boxH); + const clampedZoom = Math.max(0, Math.min(1, zoom)); + + // Send single command with pan, tilt, and zoom. + // The proxy translates this to a Set3DPos command for + // atomic pan+tilt+zoom in one camera movement. + sendPtz(`move_relative_${pan}_${tilt}_${clampedZoom}`); } + + setDragStart(null); + setDragCurrent(null); }, - [clickOverlayRef, clickOverlay, sendPtz], + [clickOverlayRef, clickOverlay, dragStart, sendPtz], ); + // Calculate drag rectangle for rendering + const dragRect = React.useMemo(() => { + if (!isDragging || !clickOverlayRef.current) return null; + const dx = Math.abs(dragCurrent.x - dragStart.x); + const dy = Math.abs(dragCurrent.y - dragStart.y); + if (dx < 20 && dy < 20) return null; // Don't show rectangle for small movements + + const rect = clickOverlayRef.current.getBoundingClientRect(); + const x1 = Math.min(dragStart.x, dragCurrent.x) - rect.left; + const y1 = Math.min(dragStart.y, dragCurrent.y) - rect.top; + const x2 = Math.max(dragStart.x, dragCurrent.x) - rect.left; + const y2 = Math.max(dragStart.y, dragCurrent.y) - rect.top; + return { + left: x1, + top: y1, + width: x2 - x1, + height: y2 - y1, + }; + }, [isDragging, dragStart, dragCurrent]); + // pip state useEffect(() => { @@ -440,7 +551,8 @@ export default function LiveCameraView({
+ {clickOverlay && ( +
e.preventDefault()} + style={{ + position: "absolute", + top: 0, + left: 0, + width: "100%", + height: "100%", + zIndex: 40, + cursor: "crosshair", + userSelect: "none", + }} + /> + )} + {isDragging && dragRect && clickOverlay && ( +
+ )}