mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-02-05 10:45:21 +03:00
clean up
This commit is contained in:
parent
c690bb8500
commit
bef54c537f
284
motion_estimator.py
Normal file
284
motion_estimator.py
Normal file
@ -0,0 +1,284 @@
|
||||
import argparse
|
||||
from functools import partial
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from norfair import (
|
||||
AbsolutePaths,
|
||||
Detection,
|
||||
FixedCamera,
|
||||
Tracker,
|
||||
Video,
|
||||
draw_absolute_grid,
|
||||
)
|
||||
from norfair.camera_motion import (
|
||||
HomographyTransformationGetter,
|
||||
MotionEstimator,
|
||||
TranslationTransformationGetter,
|
||||
)
|
||||
from norfair.drawing import draw_tracked_objects
|
||||
|
||||
|
||||
def yolo_detections_to_norfair_detections(yolo_detections, track_boxes):
|
||||
norfair_detections = []
|
||||
boxes = []
|
||||
detections_as_xyxy = yolo_detections.xyxy[0]
|
||||
for detection_as_xyxy in detections_as_xyxy:
|
||||
detection_as_xyxy = detection_as_xyxy.cpu().numpy()
|
||||
bbox = np.array(
|
||||
[
|
||||
[detection_as_xyxy[0].item(), detection_as_xyxy[1].item()],
|
||||
[detection_as_xyxy[2].item(), detection_as_xyxy[3].item()],
|
||||
]
|
||||
)
|
||||
boxes.append(bbox)
|
||||
if track_boxes:
|
||||
points = bbox
|
||||
scores = np.array([detection_as_xyxy[4], detection_as_xyxy[4]])
|
||||
else:
|
||||
points = bbox.mean(axis=0, keepdims=True)
|
||||
scores = detection_as_xyxy[[4]]
|
||||
|
||||
norfair_detections.append(
|
||||
Detection(points=points, scores=scores, label=detection_as_xyxy[-1].item())
|
||||
)
|
||||
|
||||
return norfair_detections, boxes
|
||||
|
||||
|
||||
def run():
|
||||
parser = argparse.ArgumentParser(description="Track objects in a video.")
|
||||
parser.add_argument("files", type=str, nargs="+", help="Video files to process")
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
type=str,
|
||||
default="yolov5n",
|
||||
help="YOLO model to use, possible values are yolov5n, yolov5s, yolov5m, yolov5l, yolov5x",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--confidence-threshold",
|
||||
type=float,
|
||||
help="Confidence threshold of detections",
|
||||
default=0.15,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--distance-threshold",
|
||||
type=float,
|
||||
default=0.8,
|
||||
help="Max distance to consider when matching detections and tracked objects",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--initialization-delay",
|
||||
type=float,
|
||||
default=3,
|
||||
help="Min detections needed to start the tracked object",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--track-boxes",
|
||||
dest="track_boxes",
|
||||
action="store_true",
|
||||
help="Pass it to track bounding boxes instead of just the centroids",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hit-counter-max",
|
||||
type=int,
|
||||
default=30,
|
||||
help="Max iteration the tracked object is kept after when there are no detections",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--iou-threshold", type=float, help="Iou threshold for detector", default=0.15
|
||||
)
|
||||
parser.add_argument(
|
||||
"--image-size", type=int, help="Size of the images for detector", default=480
|
||||
)
|
||||
parser.add_argument(
|
||||
"--classes", type=int, nargs="+", default=[0], help="Classes to track"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--transformation",
|
||||
default="homography",
|
||||
help="Type of transformation, possible values are homography, translation, none",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-points",
|
||||
type=int,
|
||||
default=500,
|
||||
help="Max points sampled to calculate camera motion",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--min-distance",
|
||||
type=float,
|
||||
default=7,
|
||||
help="Min distance between points sampled to calculate camera motion",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-mask-detections",
|
||||
dest="mask_detections",
|
||||
action="store_false",
|
||||
default=True,
|
||||
help="By default we don't sample regions where objects were detected when estimating camera motion. Pass this flag to disable this behavior",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--save",
|
||||
dest="save",
|
||||
action="store_true",
|
||||
help="Pass this flag to save the video instead of showing the frames",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-name",
|
||||
default=None,
|
||||
help="Name of the output file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--downsample-ratio",
|
||||
type=int,
|
||||
default=1,
|
||||
help="Downsample ratio when showing frames",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fixed-camera-scale",
|
||||
type=float,
|
||||
default=0,
|
||||
help="Scale of the fixed camera, set to 0 to disable. Note that this only works for translation",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--draw-absolute-grid",
|
||||
dest="absolute_grid",
|
||||
action="store_true",
|
||||
help="Pass this flag to draw absolute grid for reference",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--draw-objects",
|
||||
dest="draw_objects",
|
||||
action="store_true",
|
||||
help="Pass this flag to draw tracked object as points or as boxes if --track-boxes is used.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--draw-paths",
|
||||
dest="draw_paths",
|
||||
action="store_true",
|
||||
help="Pass this flag to draw the paths of the objects (SLOW)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--path-history",
|
||||
type=int,
|
||||
default=20,
|
||||
help="Length of the paths",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--id-size",
|
||||
type=float,
|
||||
default=None,
|
||||
help="Size multiplier of the ids when drawing. Thikness will addapt to size",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--draw-flow",
|
||||
dest="draw_flow",
|
||||
action="store_true",
|
||||
help="Pass this flag to draw the optical flow of the selected points",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
model = torch.hub.load("ultralytics/yolov5", args.model)
|
||||
model.conf_threshold = 0
|
||||
model.iou_threshold = args.iou_threshold
|
||||
model.image_size = args.image_size
|
||||
model.classes = args.classes
|
||||
|
||||
use_fixed_camera = args.fixed_camera_scale > 0
|
||||
tracked_objects = []
|
||||
# Process Videos
|
||||
for input_path in args.files:
|
||||
if args.transformation == "homography":
|
||||
transformations_getter = HomographyTransformationGetter()
|
||||
elif args.transformation == "translation":
|
||||
transformations_getter = TranslationTransformationGetter()
|
||||
elif args.transformation == "none":
|
||||
transformations_getter = None
|
||||
else:
|
||||
raise ValueError(f"invalid transformation {args.transformation}")
|
||||
if transformations_getter is not None:
|
||||
motion_estimator = MotionEstimator(
|
||||
max_points=args.max_points,
|
||||
min_distance=args.min_distance,
|
||||
transformations_getter=transformations_getter,
|
||||
draw_flow=args.draw_flow,
|
||||
)
|
||||
else:
|
||||
motion_estimator = None
|
||||
|
||||
if use_fixed_camera:
|
||||
fixed_camera = FixedCamera(scale=args.fixed_camera_scale)
|
||||
|
||||
if args.draw_paths:
|
||||
path_drawer = AbsolutePaths(max_history=args.path_history, thickness=2)
|
||||
|
||||
video = Video(input_path=input_path)
|
||||
show_or_write = (
|
||||
video.write
|
||||
if args.save
|
||||
else partial(video.show, downsample_ratio=args.downsample_ratio)
|
||||
)
|
||||
|
||||
tracker = Tracker(
|
||||
distance_function="euclidean",
|
||||
detection_threshold=args.confidence_threshold,
|
||||
distance_threshold=args.distance_threshold,
|
||||
initialization_delay=args.initialization_delay,
|
||||
hit_counter_max=args.hit_counter_max,
|
||||
)
|
||||
for frame in video:
|
||||
detections = model(frame)
|
||||
detections, boxes = yolo_detections_to_norfair_detections(
|
||||
detections, args.track_boxes
|
||||
)
|
||||
|
||||
mask = None
|
||||
if args.mask_detections:
|
||||
# create a mask of ones
|
||||
mask = np.ones(frame.shape[:2], frame.dtype)
|
||||
# set to 0 all detections
|
||||
for b in boxes:
|
||||
i = b.astype(int)
|
||||
mask[i[0, 1] : i[1, 1], i[0, 0] : i[1, 0]] = 0
|
||||
if args.track_boxes:
|
||||
for obj in tracked_objects:
|
||||
i = obj.estimate.astype(int)
|
||||
mask[i[0, 1] : i[1, 1], i[0, 0] : i[1, 0]] = 0
|
||||
|
||||
if motion_estimator is None:
|
||||
coord_transformations = None
|
||||
else:
|
||||
coord_transformations = motion_estimator.update(frame, mask)
|
||||
|
||||
tracked_objects = tracker.update(
|
||||
detections=detections, coord_transformations=coord_transformations
|
||||
)
|
||||
|
||||
if args.draw_objects:
|
||||
draw_tracked_objects(
|
||||
frame,
|
||||
tracked_objects,
|
||||
id_size=args.id_size,
|
||||
id_thickness=None
|
||||
if args.id_size is None
|
||||
else int(args.id_size * 2),
|
||||
)
|
||||
|
||||
if args.absolute_grid:
|
||||
draw_absolute_grid(frame, coord_transformations)
|
||||
|
||||
if args.draw_paths:
|
||||
frame = path_drawer.draw(
|
||||
frame, tracked_objects, coord_transform=coord_transformations
|
||||
)
|
||||
|
||||
if use_fixed_camera:
|
||||
frame = fixed_camera.adjust_frame(frame, coord_transformations)
|
||||
|
||||
show_or_write(frame)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
Loading…
Reference in New Issue
Block a user