Consolidate grids and strides calculation

This commit is contained in:
Nicolas Mowen 2025-04-18 07:33:39 -06:00
parent 321eed5b27
commit 6979b3419e
3 changed files with 18 additions and 89 deletions

View File

@ -16,7 +16,7 @@ class DetectionApi(ABC):
@abstractmethod @abstractmethod
def __init__(self, detector_config: BaseDetectorConfig): def __init__(self, detector_config: BaseDetectorConfig):
self.detector_config = detector_config self.detector_config = detector_config
self.thresh = 0.5 self.thresh = 0.4
self.height = detector_config.model.height self.height = detector_config.model.height
self.width = detector_config.model.width self.width = detector_config.model.width
@ -24,58 +24,21 @@ class DetectionApi(ABC):
def detect_raw(self, tensor_input): def detect_raw(self, tensor_input):
pass pass
def post_process_yolonas(self, output): def calculate_grids_strides(self) -> None:
""" grids = []
@param output: output of inference expanded_strides = []
expected shape: [np.array(1, N, 4), np.array(1, N, 80)]
where N depends on the input size e.g. N=2100 for 320x320 images
@return: best results: np.array(20, 6) where each row is # decode and orient predictions
in this order (class_id, score, y1/height, x1/width, y2/height, x2/width) strides = [8, 16, 32]
""" hsizes = [self.height // stride for stride in strides]
wsizes = [self.width // stride for stride in strides]
N = output[0].shape[1] for hsize, wsize, stride in zip(hsizes, wsizes, strides):
xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
grids.append(grid)
shape = grid.shape[:2]
expanded_strides.append(np.full((*shape, 1), stride))
boxes = output[0].reshape(N, 4) self.grids = np.concatenate(grids, 1)
scores = output[1].reshape(N, 80) self.expanded_strides = np.concatenate(expanded_strides, 1)
class_ids = np.argmax(scores, axis=1)
scores = scores[np.arange(N), class_ids]
args_best = np.argwhere(scores > self.thresh)[:, 0]
num_matches = len(args_best)
if num_matches == 0:
return np.zeros((20, 6), np.float32)
elif num_matches > 20:
args_best20 = np.argpartition(scores[args_best], -20)[-20:]
args_best = args_best[args_best20]
boxes = boxes[args_best]
class_ids = class_ids[args_best]
scores = scores[args_best]
boxes = np.transpose(
np.vstack(
(
boxes[:, 1] / self.height,
boxes[:, 0] / self.width,
boxes[:, 3] / self.height,
boxes[:, 2] / self.width,
)
)
)
results = np.hstack(
(class_ids[..., np.newaxis], scores[..., np.newaxis], boxes)
)
return np.resize(results, (20, 6))
def post_process(self, output):
if self.detector_config.model.model_type == ModelTypeEnum.yolonas:
return self.post_process_yolonas(output)
else:
raise ValueError(
f'Model type "{self.detector_config.model.model_type}" is currently not supported.'
)

View File

@ -60,23 +60,7 @@ class ONNXDetector(DetectionApi):
path = detector_config.model.path path = detector_config.model.path
if self.onnx_model_type == ModelTypeEnum.yolox: if self.onnx_model_type == ModelTypeEnum.yolox:
grids = [] self.calculate_grids_strides()
expanded_strides = []
# decode and orient predictions
strides = [8, 16, 32]
hsizes = [self.h // stride for stride in strides]
wsizes = [self.w // stride for stride in strides]
for hsize, wsize, stride in zip(hsizes, wsizes, strides):
xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
grids.append(grid)
shape = grid.shape[:2]
expanded_strides.append(np.full((*shape, 1), stride))
self.grids = np.concatenate(grids, 1)
self.expanded_strides = np.concatenate(expanded_strides, 1)
logger.info(f"ONNX: {path} loaded") logger.info(f"ONNX: {path} loaded")

View File

@ -133,25 +133,7 @@ class OvDetector(DetectionApi):
break break
self.num_classes = tensor_shape[2] - 5 self.num_classes = tensor_shape[2] - 5
logger.info(f"YOLOX model has {self.num_classes} classes") logger.info(f"YOLOX model has {self.num_classes} classes")
self.set_strides_grids() self.calculate_grids_strides()
def set_strides_grids(self):
grids = []
expanded_strides = []
strides = [8, 16, 32]
hsize_list = [self.h // stride for stride in strides]
wsize_list = [self.w // stride for stride in strides]
for hsize, wsize, stride in zip(hsize_list, wsize_list, strides):
xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
grids.append(grid)
shape = grid.shape[:2]
expanded_strides.append(np.full((*shape, 1), stride))
self.grids = np.concatenate(grids, 1)
self.expanded_strides = np.concatenate(expanded_strides, 1)
## Takes in class ID, confidence score, and array of [x, y, w, h] that describes detection position, ## Takes in class ID, confidence score, and array of [x, y, w, h] that describes detection position,
## returns an array that's easily passable back to Frigate. ## returns an array that's easily passable back to Frigate.