Fixing the ASYNC API to work from 30ms to 10ms

This commit is contained in:
OmriAx 2025-03-05 14:17:45 +02:00
parent 9bdc2d4457
commit b8a20f5388

View File

@ -37,7 +37,6 @@ logger = logging.getLogger(__name__)
# ----------------- Inline Utility Functions ----------------- # # ----------------- Inline Utility Functions ----------------- #
def preprocess_tensor(image: np.ndarray, model_w: int, model_h: int) -> np.ndarray: def preprocess_tensor(image: np.ndarray, model_w: int, model_h: int) -> np.ndarray:
""" """
Resize a NumPy array image with unchanged aspect ratio using padding. Resize a NumPy array image with unchanged aspect ratio using padding.
@ -72,7 +71,6 @@ def preprocess_tensor(image: np.ndarray, model_w: int, model_h: int) -> np.ndarr
return padded_image return padded_image
def extract_detections(input_data: list, threshold: float = 0.5) -> dict: def extract_detections(input_data: list, threshold: float = 0.5) -> dict:
""" """
(Legacy extraction function; not used by detect_raw below.) (Legacy extraction function; not used by detect_raw below.)
@ -260,12 +258,19 @@ class HailoDetector(DetectionApi):
) )
self.input_shape = self.inference_engine.get_input_shape() self.input_shape = self.inference_engine.get_input_shape()
logger.debug(f"[INIT] Model input shape: {self.input_shape}") logger.debug(f"[INIT] Model input shape: {self.input_shape}")
# Start the inference loop in a background thread
self.inference_thread = threading.Thread(target=self.inference_engine.run, daemon=True)
self.inference_thread.start()
except Exception as e: except Exception as e:
logger.error(f"[INIT] Failed to initialize HailoAsyncInference: {e}") logger.error(f"[INIT] Failed to initialize HailoAsyncInference: {e}")
raise raise
def set_path_and_url(self, path: str = None): def set_path_and_url(self, path: str = None):
if not path:
self.model_path = None
self.url = None
return
if self.is_url(path): if self.is_url(path):
self.url = path self.url = path
self.model_path = None self.model_path = None
@ -338,7 +343,7 @@ class HailoDetector(DetectionApi):
def detect_raw(self, tensor_input): def detect_raw(self, tensor_input):
logger.debug("[DETECT_RAW] Starting detection") logger.debug("[DETECT_RAW] Starting detection")
# Pre process the input tensor # Preprocess the input tensor
logger.debug(f"[DETECT_RAW] Starting pre processing") logger.debug(f"[DETECT_RAW] Starting pre processing")
tensor_input = self.preprocess(tensor_input) tensor_input = self.preprocess(tensor_input)
@ -347,12 +352,10 @@ class HailoDetector(DetectionApi):
tensor_input = np.expand_dims(tensor_input, axis=0) tensor_input = np.expand_dims(tensor_input, axis=0)
logger.debug(f"[DETECT_RAW] Expanded input shape to {tensor_input.shape}") logger.debug(f"[DETECT_RAW] Expanded input shape to {tensor_input.shape}")
# Enqueue input and a sentinel value # Enqueue input for asynchronous inference
self.input_queue.put(tensor_input) self.input_queue.put(tensor_input)
self.input_queue.put(None) # Sentinel value
# Run the inference engine # Wait for inference result from the output queue
self.inference_engine.run()
result = self.output_queue.get() result = self.output_queue.get()
if result is None: if result is None:
logger.error("[DETECT_RAW] No inference result received") logger.error("[DETECT_RAW] No inference result received")
@ -369,27 +372,20 @@ class HailoDetector(DetectionApi):
threshold = 0.4 threshold = 0.4
all_detections = [] all_detections = []
# Use the outer loop index to determine the class # Process each detection set
for class_id, detection_set in enumerate(infer_results): for class_id, detection_set in enumerate(infer_results):
if not isinstance(detection_set, np.ndarray) or detection_set.size == 0: if not isinstance(detection_set, np.ndarray) or detection_set.size == 0:
continue continue
logger.debug(f"[DETECT_RAW] Processing detection set {class_id} with shape {detection_set.shape}") logger.debug(f"[DETECT_RAW] Processing detection set {class_id} with shape {detection_set.shape}")
for det in detection_set: for det in detection_set:
# Expect at least 5 elements: [ymin, xmin, ymax, xmax, confidence]
if det.shape[0] < 5: if det.shape[0] < 5:
continue continue
score = float(det[4]) score = float(det[4])
if score < threshold: if score < threshold:
continue continue
if hasattr(self, "labels") and self.labels:
logger.debug(f"[DETECT_RAW] Detected class id: {class_id} -> {self.labels[class_id]}")
else:
logger.debug(f"[DETECT_RAW] Detected class id: {class_id}")
all_detections.append([class_id, score, det[0], det[1], det[2], det[3]]) all_detections.append([class_id, score, det[0], det[1], det[2], det[3]])
if len(all_detections) == 0: if len(all_detections) == 0:
return np.zeros((20, 6), dtype=np.float32) return np.zeros((20, 6), dtype=np.float32)
@ -402,11 +398,9 @@ class HailoDetector(DetectionApi):
pad = np.zeros((20 - detections_array.shape[0], 6), dtype=np.float32) pad = np.zeros((20 - detections_array.shape[0], 6), dtype=np.float32)
detections_array = np.vstack((detections_array, pad)) detections_array = np.vstack((detections_array, pad))
logger.debug(f"[DETECT_RAW] Processed detections: {detections_array}") logger.debug(f"[DETECT_RAW] Processed detections: {detections_array}")
return detections_array return detections_array
# Preprocess method using inline utility
def preprocess(self, image): def preprocess(self, image):
if isinstance(image, np.ndarray): if isinstance(image, np.ndarray):
# Process the tensor input and reintroduce the batch dimension. # Process the tensor input and reintroduce the batch dimension.
@ -415,8 +409,6 @@ class HailoDetector(DetectionApi):
else: else:
raise ValueError("Unsupported image format for preprocessing") raise ValueError("Unsupported image format for preprocessing")
# Close the Hailo device
def close(self): def close(self):
logger.debug("[CLOSE] Closing HailoDetector") logger.debug("[CLOSE] Closing HailoDetector")
try: try: