diff --git a/motion_estimator.py b/motion_estimator.py
deleted file mode 100644
index 62949341e..000000000
--- a/motion_estimator.py
+++ /dev/null
@@ -1,284 +0,0 @@
-import argparse
-from functools import partial
-
-import numpy as np
-import torch
-from norfair import (
-    AbsolutePaths,
-    Detection,
-    FixedCamera,
-    Tracker,
-    Video,
-    draw_absolute_grid,
-)
-from norfair.camera_motion import (
-    HomographyTransformationGetter,
-    MotionEstimator,
-    TranslationTransformationGetter,
-)
-from norfair.drawing import draw_tracked_objects
-
-
-def yolo_detections_to_norfair_detections(yolo_detections, track_boxes):
-    norfair_detections = []
-    boxes = []
-    detections_as_xyxy = yolo_detections.xyxy[0]
-    for detection_as_xyxy in detections_as_xyxy:
-        detection_as_xyxy = detection_as_xyxy.cpu().numpy()
-        bbox = np.array(
-            [
-                [detection_as_xyxy[0].item(), detection_as_xyxy[1].item()],
-                [detection_as_xyxy[2].item(), detection_as_xyxy[3].item()],
-            ]
-        )
-        boxes.append(bbox)
-        if track_boxes:
-            points = bbox
-            scores = np.array([detection_as_xyxy[4], detection_as_xyxy[4]])
-        else:
-            points = bbox.mean(axis=0, keepdims=True)
-            scores = detection_as_xyxy[[4]]
-
-        norfair_detections.append(
-            Detection(points=points, scores=scores, label=detection_as_xyxy[-1].item())
-        )
-
-    return norfair_detections, boxes
-
-
-def run():
-    parser = argparse.ArgumentParser(description="Track objects in a video.")
-    parser.add_argument("files", type=str, nargs="+", help="Video files to process")
-    parser.add_argument(
-        "--model",
-        type=str,
-        default="yolov5n",
-        help="YOLO model to use, possible values are yolov5n, yolov5s, yolov5m, yolov5l, yolov5x",
-    )
-    parser.add_argument(
-        "--confidence-threshold",
-        type=float,
-        help="Confidence threshold of detections",
-        default=0.15,
-    )
-    parser.add_argument(
-        "--distance-threshold",
-        type=float,
-        default=0.8,
-        help="Max distance to consider when matching detections and tracked objects",
-    )
-    parser.add_argument(
-        "--initialization-delay",
-        type=float,
-        default=3,
-        help="Min detections needed to start the tracked object",
-    )
-    parser.add_argument(
-        "--track-boxes",
-        dest="track_boxes",
-        action="store_true",
-        help="Pass it to track bounding boxes instead of just the centroids",
-    )
-    parser.add_argument(
-        "--hit-counter-max",
-        type=int,
-        default=30,
-        help="Max iteration the tracked object is kept after when there are no detections",
-    )
-    parser.add_argument(
-        "--iou-threshold", type=float, help="Iou threshold for detector", default=0.15
-    )
-    parser.add_argument(
-        "--image-size", type=int, help="Size of the images for detector", default=480
-    )
-    parser.add_argument(
-        "--classes", type=int, nargs="+", default=[0], help="Classes to track"
-    )
-    parser.add_argument(
-        "--transformation",
-        default="homography",
-        help="Type of transformation, possible values are homography, translation, none",
-    )
-    parser.add_argument(
-        "--max-points",
-        type=int,
-        default=500,
-        help="Max points sampled to calculate camera motion",
-    )
-    parser.add_argument(
-        "--min-distance",
-        type=float,
-        default=7,
-        help="Min distance between points sampled to calculate camera motion",
-    )
-    parser.add_argument(
-        "--no-mask-detections",
-        dest="mask_detections",
-        action="store_false",
-        default=True,
-        help="By default we don't sample regions where objects were detected when estimating camera motion. Pass this flag to disable this behavior",
-    )
-    parser.add_argument(
-        "--save",
-        dest="save",
-        action="store_true",
-        help="Pass this flag to save the video instead of showing the frames",
-    )
-    parser.add_argument(
-        "--output-name",
-        default=None,
-        help="Name of the output file",
-    )
-    parser.add_argument(
-        "--downsample-ratio",
-        type=int,
-        default=1,
-        help="Downsample ratio when showing frames",
-    )
-    parser.add_argument(
-        "--fixed-camera-scale",
-        type=float,
-        default=0,
-        help="Scale of the fixed camera, set to 0 to disable. Note that this only works for translation",
-    )
-    parser.add_argument(
-        "--draw-absolute-grid",
-        dest="absolute_grid",
-        action="store_true",
-        help="Pass this flag to draw absolute grid for reference",
-    )
-    parser.add_argument(
-        "--draw-objects",
-        dest="draw_objects",
-        action="store_true",
-        help="Pass this flag to draw tracked object as points or as boxes if --track-boxes is used.",
-    )
-    parser.add_argument(
-        "--draw-paths",
-        dest="draw_paths",
-        action="store_true",
-        help="Pass this flag to draw the paths of the objects (SLOW)",
-    )
-    parser.add_argument(
-        "--path-history",
-        type=int,
-        default=20,
-        help="Length of the paths",
-    )
-    parser.add_argument(
-        "--id-size",
-        type=float,
-        default=None,
-        help="Size multiplier of the ids when drawing. Thikness will addapt to size",
-    )
-    parser.add_argument(
-        "--draw-flow",
-        dest="draw_flow",
-        action="store_true",
-        help="Pass this flag to draw the optical flow of the selected points",
-    )
-
-    args = parser.parse_args()
-
-    model = torch.hub.load("ultralytics/yolov5", args.model)
-    model.conf_threshold = 0
-    model.iou_threshold = args.iou_threshold
-    model.image_size = args.image_size
-    model.classes = args.classes
-
-    use_fixed_camera = args.fixed_camera_scale > 0
-    tracked_objects = []
-    # Process Videos
-    for input_path in args.files:
-        if args.transformation == "homography":
-            transformations_getter = HomographyTransformationGetter()
-        elif args.transformation == "translation":
-            transformations_getter = TranslationTransformationGetter()
-        elif args.transformation == "none":
-            transformations_getter = None
-        else:
-            raise ValueError(f"invalid transformation {args.transformation}")
-        if transformations_getter is not None:
-            motion_estimator = MotionEstimator(
-                max_points=args.max_points,
-                min_distance=args.min_distance,
-                transformations_getter=transformations_getter,
-                draw_flow=args.draw_flow,
-            )
-        else:
-            motion_estimator = None
-
-        if use_fixed_camera:
-            fixed_camera = FixedCamera(scale=args.fixed_camera_scale)
-
-        if args.draw_paths:
-            path_drawer = AbsolutePaths(max_history=args.path_history, thickness=2)
-
-        video = Video(input_path=input_path)
-        show_or_write = (
-            video.write
-            if args.save
-            else partial(video.show, downsample_ratio=args.downsample_ratio)
-        )
-
-        tracker = Tracker(
-            distance_function="euclidean",
-            detection_threshold=args.confidence_threshold,
-            distance_threshold=args.distance_threshold,
-            initialization_delay=args.initialization_delay,
-            hit_counter_max=args.hit_counter_max,
-        )
-        for frame in video:
-            detections = model(frame)
-            detections, boxes = yolo_detections_to_norfair_detections(
-                detections, args.track_boxes
-            )
-
-            mask = None
-            if args.mask_detections:
-                # create a mask of ones
-                mask = np.ones(frame.shape[:2], frame.dtype)
-                # set to 0 all detections
-                for b in boxes:
-                    i = b.astype(int)
-                    mask[i[0, 1] : i[1, 1], i[0, 0] : i[1, 0]] = 0
-                if args.track_boxes:
-                    for obj in tracked_objects:
-                        i = obj.estimate.astype(int)
-                        mask[i[0, 1] : i[1, 1], i[0, 0] : i[1, 0]] = 0
-
-            if motion_estimator is None:
-                coord_transformations = None
-            else:
-                coord_transformations = motion_estimator.update(frame, mask)
-
-            tracked_objects = tracker.update(
-                detections=detections, coord_transformations=coord_transformations
-            )
-
-            if args.draw_objects:
-                draw_tracked_objects(
-                    frame,
-                    tracked_objects,
-                    id_size=args.id_size,
-                    id_thickness=None
-                    if args.id_size is None
-                    else int(args.id_size * 2),
-                )
-
-            if args.absolute_grid:
-                draw_absolute_grid(frame, coord_transformations)
-
-            if args.draw_paths:
-                frame = path_drawer.draw(
-                    frame, tracked_objects, coord_transform=coord_transformations
-                )
-
-            if use_fixed_camera:
-                frame = fixed_camera.adjust_frame(frame, coord_transformations)
-
-            show_or_write(frame)
-
-
-if __name__ == "__main__":
-    run()