From 91e17e12b72202d236fa1d0676fc57e91ee383d1 Mon Sep 17 00:00:00 2001
From: shizhicheng <shizhicheng@axera-tech.com>
Date: Sun, 9 Nov 2025 13:21:17 +0000
Subject: [PATCH] Change the default detection model to YOLOv9

---
 docker/axcl/Dockerfile                      |   2 +-
 docs/docs/configuration/object_detectors.md |   6 +-
 docs/docs/frigate/hardware.md               |   4 +-
 frigate/detectors/plugins/axengine.py       | 236 +++++++-------------
 4 files changed, 90 insertions(+), 158 deletions(-)

diff --git a/docker/axcl/Dockerfile b/docker/axcl/Dockerfile
index 86e868b61..4a16bffaf 100644
--- a/docker/axcl/Dockerfile
+++ b/docker/axcl/Dockerfile
@@ -13,7 +13,7 @@ ARG PIP_BREAK_SYSTEM_PACKAGES
 
 # Install axmodels
 RUN mkdir -p /axmodels \
-    && wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/yolov5s_320.axmodel -O /axmodels/yolov5s_320.axmodel
+    && wget https://github.com/ivanshi1108/assets/releases/download/v0.16.2/yolov9_tiny_u16_npu3_bgr_320x320_nhwc.axmodel -O /axmodels/yolov9_320.axmodel
 
 # Install axpyengine
 RUN wget https://github.com/AXERA-TECH/pyaxengine/releases/download/0.1.3.rc1/axengine-0.1.3-py3-none-any.whl -O /axengine-0.1.3-py3-none-any.whl
diff --git a/docs/docs/configuration/object_detectors.md b/docs/docs/configuration/object_detectors.md
index 139f318d3..983e3e5e7 100644
--- a/docs/docs/configuration/object_detectors.md
+++ b/docs/docs/configuration/object_detectors.md
@@ -1119,9 +1119,9 @@ See the [installation docs](../frigate/installation.md#axera) for information on
 
 When configuring the AXEngine detector, you have to specify the model name.
 
-#### yolov5s
+#### yolov9
 
-A yolov5s model is provided in the container at /axmodels and is used by this detector type by default. 
+A yolov9 model is provided in the container at /axmodels and is used by this detector type by default.
 
 Use the model configuration shown below when using the axengine detector with the default axmodel:
 
@@ -1131,7 +1131,7 @@ detectors:  # required
     type: axengine  # required
 
 model:  # required
-  path: yolov5s_320  # required
+  path: yolov9_320  # required
   width: 320  # required
   height: 320  # required
   tensor_format: bgr  # required
diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md
index d70018b4a..1b6e425d8 100644
--- a/docs/docs/frigate/hardware.md
+++ b/docs/docs/frigate/hardware.md
@@ -112,11 +112,11 @@ Frigate supports multiple different detectors that work on different types of ha
 
 ### AXERA
 
-- **AXEngine** Default model is **yolov5s_320**
+- **AXEngine** Default model is **yolov9**
 
 | Name             | AXERA AX650N/AX8850N Inference Time |
 | ---------------- | ----------------------------------- |
-| yolov5s_320      | ~ 1.676 ms                          |
+| yolov9           | ~ 1.012 ms                          |
 
 ### Hailo-8
 
diff --git a/frigate/detectors/plugins/axengine.py b/frigate/detectors/plugins/axengine.py
index 206923093..333c61756 100644
--- a/frigate/detectors/plugins/axengine.py
+++ b/frigate/detectors/plugins/axengine.py
@@ -20,14 +20,9 @@ logger = logging.getLogger(__name__)
 
 DETECTOR_KEY = "axengine"
 
+NUM_CLASSES = 80
 CONF_THRESH = 0.65
 IOU_THRESH = 0.45
-STRIDES = [8, 16, 32]
-ANCHORS = [
-    [10, 13, 16, 30, 33, 23],
-    [30, 61, 62, 45, 59, 119],
-    [116, 90, 156, 198, 373, 326],
-]
 
 class AxengineDetectorConfig(BaseDetectorConfig):
     type: Literal[DETECTOR_KEY]
@@ -39,161 +34,98 @@ class Axengine(DetectionApi):
         super().__init__(config)
         self.height = config.model.height
         self.width = config.model.width
-        model_path = config.model.path or "yolov5s_320"
+        model_path = config.model.path or "yolov9_320"
         self.session = axe.InferenceSession(f"/axmodels/{model_path}.axmodel")
 
     def __del__(self):
         pass
 
-    def xywh2xyxy(self, x):
-        # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
-        y = np.copy(x)
-        y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
-        y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
-        y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
-        y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
-        return y
-
-    def bboxes_iou(self, boxes1, boxes2):
-        """calculate the Intersection Over Union value"""
-        boxes1 = np.array(boxes1)
-        boxes2 = np.array(boxes2)
-
-        boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (
-            boxes1[..., 3] - boxes1[..., 1]
-        )
-        boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (
-            boxes2[..., 3] - boxes2[..., 1]
-        )
-
-        left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
-        right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
-
-        inter_section = np.maximum(right_down - left_up, 0.0)
-        inter_area = inter_section[..., 0] * inter_section[..., 1]
-        union_area = boxes1_area + boxes2_area - inter_area
-        ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
-
-        return ious
-
-    def nms(self, proposals, iou_threshold, conf_threshold, multi_label=False):
+    def post_processing(self, raw_output, input_shape):
         """
-        :param bboxes: (xmin, ymin, xmax, ymax, score, class)
-
-        Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
-            https://github.com/bharatsingh430/soft-nms
+        raw_output: [1, 1, 84, 8400]
+        Returns: numpy array of shape (20, 6) [class_id, score, y_min, x_min, y_max, x_max] in normalized coordinates
         """
-        xc = proposals[..., 4] > conf_threshold
-        proposals = proposals[xc]
-        proposals[:, 5:] *= proposals[:, 4:5]
-        bboxes = self.xywh2xyxy(proposals[:, :4])
-        if multi_label:
-            mask = proposals[:, 5:] > conf_threshold
-            nonzero_indices = np.argwhere(mask)
-            if nonzero_indices.size < 0:
-                return
-            i, j = nonzero_indices.T
-            bboxes = np.hstack(
-                (bboxes[i], proposals[i, j + 5][:, None], j[:, None].astype(float))
-            )
-        else:
-            confidences = proposals[:, 5:]
-            conf = confidences.max(axis=1, keepdims=True)
-            j = confidences.argmax(axis=1)[:, None]
-
-            new_x_parts = [bboxes, conf, j.astype(float)]
-            bboxes = np.hstack(new_x_parts)
-
-            mask = conf.reshape(-1) > conf_threshold
-            bboxes = bboxes[mask]
-
-        classes_in_img = list(set(bboxes[:, 5]))
-        bboxes = bboxes[bboxes[:, 4].argsort()[::-1][:300]]
-        best_bboxes = []
-
-        for cls in classes_in_img:
-            cls_mask = bboxes[:, 5] == cls
-            cls_bboxes = bboxes[cls_mask]
-
-            while len(cls_bboxes) > 0:
-                max_ind = np.argmax(cls_bboxes[:, 4])
-                best_bbox = cls_bboxes[max_ind]
-                best_bboxes.append(best_bbox)
-                cls_bboxes = np.concatenate(
-                    [cls_bboxes[:max_ind], cls_bboxes[max_ind + 1 :]]
-                )
-                iou = self.bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
-                weight = np.ones((len(iou),), dtype=np.float32)
-
-                iou_mask = iou > iou_threshold
-                weight[iou_mask] = 0.0
-
-                cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
-                score_mask = cls_bboxes[:, 4] > 0.0
-                cls_bboxes = cls_bboxes[score_mask]
-
-        if len(best_bboxes) == 0:
-            return np.empty((0, 6))
-
-        best_bboxes = np.vstack(best_bboxes)
-        best_bboxes = best_bboxes[best_bboxes[:, 4].argsort()[::-1]]
-        return best_bboxes
-
-    def sigmoid(self, x):
-        return np.clip(0.2 * x + 0.5, 0, 1)
-
-    def gen_proposals(self, outputs):
-        new_pred = []
-        anchor_grid = np.array(ANCHORS).reshape(-1, 1, 1, 3, 2)
-        for i, pred in enumerate(outputs):
-            pred = self.sigmoid(pred)
-            n, h, w, c = pred.shape
-
-            pred = pred.reshape(n, h, w, 3, 85)
-            conv_shape = pred.shape
-            output_size = conv_shape[1]
-            conv_raw_dxdy = pred[..., 0:2]
-            conv_raw_dwdh = pred[..., 2:4]
-            xy_grid = np.meshgrid(np.arange(output_size), np.arange(output_size))
-            xy_grid = np.expand_dims(np.stack(xy_grid, axis=-1), axis=2)
-
-            xy_grid = np.tile(np.expand_dims(xy_grid, axis=0), [1, 1, 1, 3, 1])
-            xy_grid = xy_grid.astype(np.float32)
-            pred_xy = (conv_raw_dxdy * 2.0 - 0.5 + xy_grid) * STRIDES[i]
-            pred_wh = (conv_raw_dwdh * 2) ** 2 * anchor_grid[i]
-            pred[:, :, :, :, 0:4] = np.concatenate([pred_xy, pred_wh], axis=-1)
-
-            new_pred.append(np.reshape(pred, (-1, np.shape(pred)[-1])))
-
-        return np.concatenate(new_pred, axis=0)
-
-    def post_processing(self, outputs, input_shape, threshold=0.3):
-        proposals = self.gen_proposals(outputs)
-        bboxes = self.nms(proposals, IOU_THRESH, CONF_THRESH, multi_label=True)
-
-        """
-        bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
-        """
-
         results = np.zeros((20, 6), np.float32)
 
-        for i, bbox in enumerate(bboxes):
-            if i >= 20:
-                break
-            coor = np.array(bbox[:4], dtype=np.int32)
-            score = bbox[4]
-            if score < threshold:
-                continue
-            class_ind = int(bbox[5])
-            results[i] = [
-                class_ind,
-                score,
-                max(0, bbox[1]) / input_shape[1],
-                max(0, bbox[0]) / input_shape[0],
-                min(1, bbox[3] / input_shape[1]),
-                min(1, bbox[2] / input_shape[0]),
-            ]
-        return results
+        try:
+            if not isinstance(raw_output, np.ndarray):
+                raw_output = np.array(raw_output)
+
+            if len(raw_output.shape) == 4 and raw_output.shape[0] == 1 and raw_output.shape[1] == 1:
+                raw_output = raw_output.squeeze(1)
+
+            pred = raw_output[0].transpose(1, 0)
+
+            bxy = pred[:, :2]
+            bwh = pred[:, 2:4]
+            cls = pred[:, 4:4 + NUM_CLASSES]
+
+            cx = bxy[:, 0]
+            cy = bxy[:, 1]
+            w = bwh[:, 0]
+            h = bwh[:, 1]
+
+            x_min = cx - w / 2
+            y_min = cy - h / 2
+            x_max = cx + w / 2
+            y_max = cy + h / 2
+
+            scores = np.max(cls, axis=1)
+            class_ids = np.argmax(cls, axis=1)
+
+            mask = scores >= CONF_THRESH
+            boxes = np.stack([x_min, y_min, x_max, y_max], axis=1)[mask]
+            scores = scores[mask]
+            class_ids = class_ids[mask]
+
+            if len(boxes) == 0:
+                return results
+
+            boxes_nms = np.stack([x_min[mask], y_min[mask],
+                                x_max[mask] - x_min[mask],
+                                y_max[mask] - y_min[mask]], axis=1)
+
+            indices = cv2.dnn.NMSBoxes(
+                boxes_nms.tolist(),
+                scores.tolist(),
+                score_threshold=CONF_THRESH,
+                nms_threshold=IOU_THRESH
+            )
+
+            if len(indices) == 0:
+                return results
+
+            indices = indices.flatten()
+
+            sorted_indices = sorted(indices, key=lambda idx: scores[idx], reverse=True)
+            indices = sorted_indices
+
+            valid_detections = 0
+            for i, idx in enumerate(indices):
+                if i >= 20:
+                    break
+
+                x_min_val, y_min_val, x_max_val, y_max_val = boxes[idx]
+                score = scores[idx]
+                class_id = class_ids[idx]
+
+                if score < CONF_THRESH:
+                    continue
+
+                results[valid_detections] = [
+                    float(class_id),                     # class_id
+                    float(score),                        # score
+                    max(0, y_min_val) / input_shape[0],  # y_min
+                    max(0, x_min_val) / input_shape[1],  # x_min
+                    min(1, y_max_val / input_shape[0]),  # y_max
+                    min(1, x_max_val / input_shape[1])   # x_max
+                ]
+                valid_detections += 1
+
+            return results
+
+        except Exception as e:
+            return results
 
     def detect_raw(self, tensor_input):
         results = None