Implement yolox rknn inference and post processing

2026-05-05 04:57:42 +03:00 · 2025-04-18 10:41:47 -06:00 · 2025-04-18 10:41:47 -06:00 · 7d448eb986
commit 7d448eb986
parent 68382d89b4
3 changed files with 107 additions and 9 deletions
--- a/frigate/detectors/detection_api.py
+++ b/frigate/detectors/detection_api.py
@ -24,7 +24,7 @@ class DetectionApi(ABC):
    def detect_raw(self, tensor_input):
        pass

-    def calculate_grids_strides(self) -> None:
+    def calculate_grids_strides(self, expanded=True) -> None:
        grids = []
        expanded_strides = []

@ -35,10 +35,24 @@ class DetectionApi(ABC):

        for hsize, wsize, stride in zip(hsizes, wsizes, strides):
            xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
-            grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
-            grids.append(grid)
-            shape = grid.shape[:2]
-            expanded_strides.append(np.full((*shape, 1), stride))

-        self.grids = np.concatenate(grids, 1)
-        self.expanded_strides = np.concatenate(expanded_strides, 1)
+            if expanded:
+                grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
+                grids.append(grid)
+                shape = grid.shape[:2]
+                expanded_strides.append(np.full((*shape, 1), stride))
+            else:
+                xv = xv.reshape(1, 1, hsize, wsize)
+                yv = yv.reshape(1, 1, hsize, wsize)
+                grids.extend(np.concatenate((xv, yv), axis=1).tolist())
+                expanded_strides.extend(np.array([stride, stride]).reshape(1, 2, 1, 1).tolist())
+
+        if expanded:
+            self.grids = np.concatenate(grids, 1)
+            self.expanded_strides = np.concatenate(expanded_strides, 1)
+        else:
+            self.grids = grids
+            self.expanded_strides = expanded_strides
+
+        print(f"grids {self.grids}")
+        print(f"expanded strides {self.expanded_strides}")
--- a/frigate/detectors/plugins/rknn.py
+++ b/frigate/detectors/plugins/rknn.py
@ -4,6 +4,7 @@ import re
 import urllib.request
 from typing import Literal

+import cv2
 import numpy as np
 from pydantic import Field

@ -17,7 +18,10 @@ DETECTOR_KEY = "rknn"

 supported_socs = ["rk3562", "rk3566", "rk3568", "rk3576", "rk3588"]

-supported_models = {ModelTypeEnum.yolonas: "^deci-fp16-yolonas_[sml]$"}
+supported_models = {
+    ModelTypeEnum.yolonas: "^deci-fp16-yolonas_[sml]$",
+    ModelTypeEnum.yolox: None,
+}

 model_cache_dir = os.path.join(MODEL_CACHE_DIR, "rknn_cache/")

@ -41,6 +45,9 @@ class Rknn(DetectionApi):

        model_props = self.parse_model_input(model_path, soc)

+        if self.detector_config.model.model_type == ModelTypeEnum.yolox:
+            self.calculate_grids_strides(expanded=False)
+
        if model_props["preset"]:
            config.model.model_type = model_props["model_type"]

@ -199,9 +206,86 @@ class Rknn(DetectionApi):

        return np.resize(results, (20, 6))

+    def post_process_yolox(
+        self,
+        predictions: list[np.ndarray],
+        grids: np.ndarray,
+        expanded_strides: np.ndarray,
+    ) -> np.ndarray:
+        def sp_flatten(_in: np.ndarray):
+            ch = _in.shape[1]
+            _in = _in.transpose(0, 2, 3, 1)
+            return _in.reshape(-1, ch)
+
+        boxes, scores, classes_conf = [], [], []
+
+        input_data = [
+            _in.reshape([1, -1] + list(_in.shape[-2:])) for _in in predictions
+        ]
+
+        for i in range(len(input_data)):
+            unprocessed_box = input_data[i][:, :4, :, :]
+            box_xy = unprocessed_box[:, :2, :, :]
+            box_wh = np.exp(unprocessed_box[:, 2:4, :, :]) * expanded_strides[i]
+
+            box_xy += grids[i]
+            box_xy *= expanded_strides[i]
+            box = np.concatenate((box_xy, box_wh), axis=1)
+
+            # Convert [c_x, c_y, w, h] to [x1, y1, x2, y2]
+            xyxy = np.copy(box)
+            xyxy[:, 0, :, :] = box[:, 0, :, :] - box[:, 2, :, :] / 2  # top left x
+            xyxy[:, 1, :, :] = box[:, 1, :, :] - box[:, 3, :, :] / 2  # top left y
+            xyxy[:, 2, :, :] = box[:, 0, :, :] + box[:, 2, :, :] / 2  # bottom right x
+            xyxy[:, 3, :, :] = box[:, 1, :, :] + box[:, 3, :, :] / 2  # bottom right y
+
+            boxes.append(xyxy)
+            scores.append(input_data[i][:, 4:5, :, :])
+            classes_conf.append(input_data[i][:, 5:, :, :])
+
+        # flatten data
+        boxes = np.concatenate([sp_flatten(_v) for _v in boxes])
+        classes_conf = np.concatenate([sp_flatten(_v) for _v in classes_conf])
+        scores = np.concatenate([sp_flatten(_v) for _v in scores])
+
+        # reshape and filter boxes
+        box_confidences = scores.reshape(-1)
+        class_max_score = np.max(classes_conf, axis=-1)
+        classes = np.argmax(classes_conf, axis=-1)
+        _class_pos = np.where(class_max_score * box_confidences >= 0.4)
+        scores = (class_max_score * box_confidences)[_class_pos]
+        boxes = boxes[_class_pos]
+        classes = classes[_class_pos]
+
+        # run nms
+        indices = cv2.dnn.NMSBoxes(
+            bboxes=boxes,
+            scores=scores,
+            score_threshold=0.4,
+            nms_threshold=0.4,
+        )
+
+        results = np.zeros((20, 6), np.float32)
+
+        if len(indices) > 0:
+            for i, idx in enumerate(indices.flatten()[:20]):
+                box = boxes[idx]
+                results[i] = [
+                    classes[idx],
+                    scores[idx],
+                    box[1] / self.height,
+                    box[0] / self.width,
+                    box[3] / self.height,
+                    box[2] / self.width,
+                ]
+
+        return results
+
    def post_process(self, output):
        if self.detector_config.model.model_type == ModelTypeEnum.yolonas:
            return self.post_process_yolonas(output)
+        elif self.detector_config.model.model_type == ModelTypeEnum.yolox:
+            return self.post_process_yolox(output, self.grids, self.expanded_strides)
        else:
            raise ValueError(
                f'Model type "{self.detector_config.model.model_type}" is currently not supported.'
--- a/frigate/util/model.py
+++ b/frigate/util/model.py
@ -180,7 +180,7 @@ def __post_process_multipart_yolo(
                x2 / width,
            ]

-    return np.array(results, dtype=np.float32)
+    return results


 def __post_process_nms_yolo(predictions: np.ndarray, width, height) -> np.ndarray: