Add drag-to-zoom and always-on RelativeMove for PTZ cameras

Frontend:
- Drag a rectangle on the live view to zoom into that region
- Click to move to a point without zooming (unchanged behavior)
- Blue rectangle overlay shown while dragging (>20px threshold)
- Box expanded to camera aspect ratio before calculating zoom
- TransformWrapper panning disabled when click-to-move is active
- Touch support included

Backend:
- RelativeMove/FOV capability detection no longer requires
  autotracking to be enabled — works for all PTZ cameras
- GetConfigurationOptions and RelativeMove request setup runs
  whenever the camera supports it
- move_relative command accepts optional zoom parameter
- Properly clears residual zoom from zeep move_request object
  between calls to prevent unwanted zoom on click-to-move
- Includes Dockerfile.local for easy local testing by layering
  changes on the official Frigate image

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Ryan Gregg 2026-03-14 03:41:12 +00:00
parent dc27d4ad16
commit 3a258d758a
2 changed files with 251 additions and 100 deletions

View File

@ -218,11 +218,10 @@ class OnvifController:
move_request.ProfileToken = profile.token
self.cams[camera_name]["move_request"] = move_request
# extra setup for autotracking cameras
if (
self.config.cameras[camera_name].onvif.autotracking.enabled_in_config
and self.config.cameras[camera_name].onvif.autotracking.enabled
):
# Probe PTZ configuration options for RelativeMove/AbsoluteMove support.
# This is needed for click-to-move, drag-to-zoom, and autotracking.
fov_space_id = None
try:
request = ptz.create_type("GetConfigurationOptions")
request.ConfigurationToken = profile.PTZConfiguration.token
ptz_config = await ptz.GetConfigurationOptions(request)
@ -244,7 +243,7 @@ class OnvifController:
None,
)
# status request for autotracking and filling ptz-parameters
# status request for position reporting
status_request = ptz.create_type("GetStatus")
status_request.ProfileToken = profile.token
self.cams[camera_name]["status_request"] = status_request
@ -255,77 +254,66 @@ class OnvifController:
logger.warning(f"Unable to get status from camera: {camera_name}: {e}")
status = None
# autotracking relative panning/tilting needs a relative zoom value set to 0
# if camera supports relative movement
if (
self.config.cameras[camera_name].onvif.autotracking.zooming
!= ZoomingModeEnum.disabled
):
zoom_space_id = next(
(
i
for i, space in enumerate(
ptz_config.Spaces.RelativeZoomTranslationSpace
)
if "TranslationGenericSpace" in space["URI"]
),
None,
)
# Check for relative zoom support
zoom_space_id = next(
(
i
for i, space in enumerate(
ptz_config.Spaces.RelativeZoomTranslationSpace
)
if "TranslationGenericSpace" in space["URI"]
),
None,
)
# setup relative moving request for autotracking
move_request = ptz.create_type("RelativeMove")
move_request.ProfileToken = profile.token
logger.debug(f"{camera_name}: Relative move request: {move_request}")
if move_request.Translation is None and fov_space_id is not None:
move_request.Translation = status.Position
move_request.Translation.PanTilt.space = ptz_config["Spaces"][
"RelativePanTiltTranslationSpace"
][fov_space_id]["URI"]
# setup relative moving request (for click-to-move, drag-to-zoom, and autotracking)
if fov_space_id is not None:
move_request = ptz.create_type("RelativeMove")
move_request.ProfileToken = profile.token
logger.debug(f"{camera_name}: Relative move request: {move_request}")
if move_request.Translation is None:
move_request.Translation = status.Position
move_request.Translation.PanTilt.space = ptz_config["Spaces"][
"RelativePanTiltTranslationSpace"
][fov_space_id]["URI"]
# try setting relative zoom translation space
try:
if (
self.config.cameras[camera_name].onvif.autotracking.zooming
!= ZoomingModeEnum.disabled
):
# set up relative zoom translation space if supported
try:
if zoom_space_id is not None:
move_request.Translation.Zoom.space = ptz_config["Spaces"][
"RelativeZoomTranslationSpace"
][zoom_space_id]["URI"]
else:
if (
move_request["Translation"] is not None
and "Zoom" in move_request["Translation"]
):
del move_request["Translation"]["Zoom"]
if (
move_request["Speed"] is not None
and "Zoom" in move_request["Speed"]
):
del move_request["Speed"]["Zoom"]
logger.debug(
f"{camera_name}: Relative move request after deleting zoom: {move_request}"
else:
if (
move_request["Translation"] is not None
and "Zoom" in move_request["Translation"]
):
del move_request["Translation"]["Zoom"]
if (
move_request["Speed"] is not None
and "Zoom" in move_request["Speed"]
):
del move_request["Speed"]["Zoom"]
except Exception as e:
logger.warning(
f"Relative zoom not supported for {camera_name}: {e}"
)
except Exception as e:
self.config.cameras[
camera_name
].onvif.autotracking.zooming = ZoomingModeEnum.disabled
logger.warning(
f"Disabling autotracking zooming for {camera_name}: Relative zoom not supported. Exception: {e}"
if move_request.Speed is None:
move_request.Speed = configs.DefaultPTZSpeed if configs else None
logger.debug(
f"{camera_name}: Relative move request after setup: {move_request}"
)
self.cams[camera_name]["relative_move_request"] = move_request
if move_request.Speed is None:
move_request.Speed = configs.DefaultPTZSpeed if configs else None
logger.debug(
f"{camera_name}: Relative move request after setup: {move_request}"
)
self.cams[camera_name]["relative_move_request"] = move_request
# setup absolute moving request for autotracking zooming
# setup absolute moving request (for zoom positioning)
move_request = ptz.create_type("AbsoluteMove")
move_request.ProfileToken = profile.token
self.cams[camera_name]["absolute_move_request"] = move_request
except (Fault, ONVIFError, TransportError, Exception) as e:
logger.debug(f"PTZ configuration options not available for {camera_name}: {e}")
# setup existing presets
try:
presets: list[dict] = await ptz.GetPresets({"ProfileToken": profile.token})
@ -417,9 +405,7 @@ class OnvifController:
logger.debug(f"Focus not supported for {camera_name}: {e}")
if (
self.config.cameras[camera_name].onvif.autotracking.enabled_in_config
and self.config.cameras[camera_name].onvif.autotracking.enabled
and fov_space_id is not None
fov_space_id is not None
and configs.DefaultRelativePanTiltTranslationSpace is not None
):
supported_features.append("pt-r-fov")
@ -550,8 +536,11 @@ class OnvifController:
if (
"zoom-r" in self.cams[camera_name]["features"]
and self.config.cameras[camera_name].onvif.autotracking.zooming
== ZoomingModeEnum.relative
and (
zoom != 0
or self.config.cameras[camera_name].onvif.autotracking.zooming
== ZoomingModeEnum.relative
)
):
move_request.Speed = {
"PanTilt": {
@ -561,6 +550,19 @@ class OnvifController:
"Zoom": {"x": speed},
}
move_request.Translation.Zoom.x = zoom
else:
move_request.Speed = {
"PanTilt": {
"x": speed,
"y": speed,
},
}
# Ensure no residual zoom from previous calls
try:
if move_request.Translation is not None:
move_request.Translation.Zoom = None
except (AttributeError, KeyError):
pass
await self.cams[camera_name]["ptz"].RelativeMove(move_request)
@ -568,12 +570,11 @@ class OnvifController:
move_request.Translation.PanTilt.x = 0
move_request.Translation.PanTilt.y = 0
if (
"zoom-r" in self.cams[camera_name]["features"]
and self.config.cameras[camera_name].onvif.autotracking.zooming
== ZoomingModeEnum.relative
):
move_request.Translation.Zoom.x = 0
try:
if move_request.Translation.Zoom is not None:
move_request.Translation.Zoom.x = 0
except (AttributeError, KeyError):
pass
self.cams[camera_name]["active"] = False
@ -717,8 +718,10 @@ class OnvifController:
elif command == OnvifCommandEnum.preset:
await self._move_to_preset(camera_name, param)
elif command == OnvifCommandEnum.move_relative:
_, pan, tilt = param.split("_")
await self._move_relative(camera_name, float(pan), float(tilt), 0, 1)
parts = param.split("_")
_, pan, tilt = parts[0], parts[1], parts[2]
zoom = float(parts[3]) if len(parts) > 3 else 0
await self._move_relative(camera_name, float(pan), float(tilt), zoom, 1)
elif command in (OnvifCommandEnum.zoom_in, OnvifCommandEnum.zoom_out):
await self._zoom(camera_name, command)
elif command in (OnvifCommandEnum.focus_in, OnvifCommandEnum.focus_out):

View File

@ -213,45 +213,156 @@ export default function LiveCameraView({
};
}, [audioTranscriptionState, sendTranscription]);
// click overlay for ptzs
// click/drag overlay for ptzs
const [clickOverlay, setClickOverlay] = useState(false);
const clickOverlayRef = useRef<HTMLDivElement>(null);
const { send: sendPtz } = usePtzCommand(camera.name);
const handleOverlayClick = useCallback(
(
e: React.MouseEvent<HTMLDivElement> | React.TouchEvent<HTMLDivElement>,
) => {
if (!clickOverlay) {
// Drag-to-zoom state
const [dragStart, setDragStart] = useState<{
x: number;
y: number;
} | null>(null);
const [dragCurrent, setDragCurrent] = useState<{
x: number;
y: number;
} | null>(null);
const isDragging = dragStart !== null && dragCurrent !== null;
const getClientPos = (
e: React.MouseEvent<HTMLDivElement> | React.TouchEvent<HTMLDivElement>,
): { x: number; y: number } | null => {
if ("TouchEvent" in window && e.nativeEvent instanceof TouchEvent) {
const touch = e.nativeEvent.touches[0] || e.nativeEvent.changedTouches[0];
if (touch) return { x: touch.clientX, y: touch.clientY };
} else if (e.nativeEvent instanceof MouseEvent) {
return { x: e.nativeEvent.clientX, y: e.nativeEvent.clientY };
}
return null;
};
const handleOverlayMouseDown = useCallback(
(e: React.MouseEvent<HTMLDivElement> | React.TouchEvent<HTMLDivElement>) => {
if (!clickOverlay || !clickOverlayRef.current) return;
e.preventDefault();
const pos = getClientPos(e);
if (pos) {
setDragStart(pos);
setDragCurrent(pos);
}
},
[clickOverlay],
);
const handleOverlayMouseMove = useCallback(
(e: React.MouseEvent<HTMLDivElement> | React.TouchEvent<HTMLDivElement>) => {
if (!dragStart) return;
e.preventDefault();
const pos = getClientPos(e);
if (pos) {
setDragCurrent(pos);
}
},
[dragStart],
);
const handleOverlayMouseUp = useCallback(
(e: React.MouseEvent<HTMLDivElement> | React.TouchEvent<HTMLDivElement>) => {
if (!clickOverlay || !clickOverlayRef.current || !dragStart) {
setDragStart(null);
setDragCurrent(null);
return;
}
let clientX;
let clientY;
if ("TouchEvent" in window && e.nativeEvent instanceof TouchEvent) {
clientX = e.nativeEvent.touches[0].clientX;
clientY = e.nativeEvent.touches[0].clientY;
} else if (e.nativeEvent instanceof MouseEvent) {
clientX = e.nativeEvent.clientX;
clientY = e.nativeEvent.clientY;
const pos = getClientPos(e);
if (!pos) {
setDragStart(null);
setDragCurrent(null);
return;
}
if (clickOverlayRef.current && clientX && clientY) {
const rect = clickOverlayRef.current.getBoundingClientRect();
const normalizedX = (clientX - rect.left) / rect.width;
const normalizedY = (clientY - rect.top) / rect.height;
const rect = clickOverlayRef.current.getBoundingClientRect();
const dx = Math.abs(pos.x - dragStart.x);
const dy = Math.abs(pos.y - dragStart.y);
// Minimum drag distance of 20px to distinguish from click
if (dx < 20 && dy < 20) {
// Click (not drag) — move to point without zoom
const normalizedX = (pos.x - rect.left) / rect.width;
const normalizedY = (pos.y - rect.top) / rect.height;
const pan = (normalizedX - 0.5) * 2;
const tilt = (0.5 - normalizedY) * 2;
sendPtz(`move_relative_${pan}_${tilt}`);
} else {
// Drag — zoom to rectangle
const x1 = Math.min(dragStart.x, pos.x);
const y1 = Math.min(dragStart.y, pos.y);
const x2 = Math.max(dragStart.x, pos.x);
const y2 = Math.max(dragStart.y, pos.y);
// Normalize to 0-1 within the overlay
const normX1 = (x1 - rect.left) / rect.width;
const normY1 = (y1 - rect.top) / rect.height;
const normX2 = (x2 - rect.left) / rect.width;
const normY2 = (y2 - rect.top) / rect.height;
let boxW = normX2 - normX1;
let boxH = normY2 - normY1;
// Expand box to match camera aspect ratio
const frameAspect = rect.width / rect.height;
const boxAspect = boxW / boxH;
if (boxAspect > frameAspect) {
// Box is wider than frame aspect — expand height
boxH = boxW / frameAspect;
} else {
// Box is taller — expand width
boxW = boxH * frameAspect;
}
// Center of the box
const centerX = (normX1 + normX2) / 2;
const centerY = (normY1 + normY2) / 2;
const pan = (centerX - 0.5) * 2;
const tilt = (0.5 - centerY) * 2;
// Zoom: ratio of box to frame (smaller box = more zoom)
const zoom = 1 - Math.max(boxW, boxH);
const clampedZoom = Math.max(0, Math.min(1, zoom));
// Send single command with pan, tilt, and zoom.
// The proxy translates this to a Set3DPos command for
// atomic pan+tilt+zoom in one camera movement.
sendPtz(`move_relative_${pan}_${tilt}_${clampedZoom}`);
}
setDragStart(null);
setDragCurrent(null);
},
[clickOverlayRef, clickOverlay, sendPtz],
[clickOverlayRef, clickOverlay, dragStart, sendPtz],
);
// Calculate drag rectangle for rendering
const dragRect = React.useMemo(() => {
if (!isDragging || !clickOverlayRef.current) return null;
const dx = Math.abs(dragCurrent.x - dragStart.x);
const dy = Math.abs(dragCurrent.y - dragStart.y);
if (dx < 20 && dy < 20) return null; // Don't show rectangle for small movements
const rect = clickOverlayRef.current.getBoundingClientRect();
const x1 = Math.min(dragStart.x, dragCurrent.x) - rect.left;
const y1 = Math.min(dragStart.y, dragCurrent.y) - rect.top;
const x2 = Math.max(dragStart.x, dragCurrent.x) - rect.left;
const y2 = Math.max(dragStart.y, dragCurrent.y) - rect.top;
return {
left: x1,
top: y1,
width: x2 - x1,
height: y2 - y1,
};
}, [isDragging, dragStart, dragCurrent]);
// pip state
useEffect(() => {
@ -440,7 +551,8 @@ export default function LiveCameraView({
<TransformWrapper
minScale={1.0}
wheel={{ smoothStep: 0.005 }}
disabled={debug}
disabled={debug || clickOverlay}
panning={{ disabled: clickOverlay }}
>
<Toaster position="top-center" closeButton={true} />
<div
@ -636,11 +748,47 @@ export default function LiveCameraView({
<div
className={`flex flex-col items-center justify-center ${growClassName}`}
ref={clickOverlayRef}
onClick={handleOverlayClick}
style={{
aspectRatio: constrainedAspectRatio,
position: "relative",
}}
>
{clickOverlay && (
<div
onMouseDown={handleOverlayMouseDown}
onMouseMove={handleOverlayMouseMove}
onMouseUp={handleOverlayMouseUp}
onTouchStart={handleOverlayMouseDown}
onTouchMove={handleOverlayMouseMove}
onTouchEnd={handleOverlayMouseUp}
onDragStart={(e) => e.preventDefault()}
style={{
position: "absolute",
top: 0,
left: 0,
width: "100%",
height: "100%",
zIndex: 40,
cursor: "crosshair",
userSelect: "none",
}}
/>
)}
{isDragging && dragRect && clickOverlay && (
<div
style={{
position: "absolute",
left: dragRect.left,
top: dragRect.top,
width: dragRect.width,
height: dragRect.height,
border: "2px solid rgba(59, 130, 246, 0.8)",
backgroundColor: "rgba(59, 130, 246, 0.15)",
pointerEvents: "none",
zIndex: 50,
}}
/>
)}
<LivePlayer
key={camera.name}
className={`${fullscreen ? "*:rounded-none" : ""}`}