mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-05-09 15:05:26 +03:00
Compare commits
No commits in common. "30788b2256ac2c21945072530e118fb88623c666" and "e61ca05b6ae1fae0cdf64df21c09260a9f2dbdf1" have entirely different histories.
30788b2256
...
e61ca05b6a
36
docker/rocm/CMakeLists.txt
Normal file
36
docker/rocm/CMakeLists.txt
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.1)
|
||||||
|
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||||
|
|
||||||
|
if(NOT CMAKE_BUILD_TYPE)
|
||||||
|
set(CMAKE_BUILD_TYPE Release)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
project(migraphx_py)
|
||||||
|
|
||||||
|
find_package(pybind11 REQUIRED)
|
||||||
|
pybind11_add_module(migraphx_py migraphx_py.cpp)
|
||||||
|
|
||||||
|
include_directories(
|
||||||
|
/opt/rocm/include
|
||||||
|
/opt/rocm/lib/migraphx/include
|
||||||
|
)
|
||||||
|
|
||||||
|
target_link_directories(migraphx_py PRIVATE
|
||||||
|
"/opt/rocm/lib"
|
||||||
|
"/opt/rocm/lib/migraphx/lib"
|
||||||
|
)
|
||||||
|
target_link_libraries(migraphx_py PRIVATE
|
||||||
|
migraphx
|
||||||
|
migraphx_tf
|
||||||
|
migraphx_onnx
|
||||||
|
)
|
||||||
|
SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
|
||||||
|
|
||||||
|
set_target_properties(migraphx_py PROPERTIES OUTPUT_NAME "migraphx")
|
||||||
|
install(TARGETS migraphx_py
|
||||||
|
COMPONENT python
|
||||||
|
LIBRARY DESTINATION lib
|
||||||
|
)
|
||||||
@ -58,6 +58,8 @@ RUN cd /opt/rocm-$ROCM/lib && cp -dpr \
|
|||||||
migraphx \
|
migraphx \
|
||||||
# NOTE: Include rocRoller library (only ROCm 7.0+)
|
# NOTE: Include rocRoller library (only ROCm 7.0+)
|
||||||
librocroller*.so* \
|
librocroller*.so* \
|
||||||
|
# NOTE: Include libdnnl for migraphx CPU device type support
|
||||||
|
libdnnl*.so* \
|
||||||
/opt/rocm-dist/opt/rocm-$ROCM/lib/
|
/opt/rocm-dist/opt/rocm-$ROCM/lib/
|
||||||
|
|
||||||
# Copy ROCm HIP include (only ROCm 7.2+)
|
# Copy ROCm HIP include (only ROCm 7.2+)
|
||||||
@ -74,6 +76,18 @@ RUN mkdir -p /opt/rocm-dist/etc/ld.so.conf.d/
|
|||||||
RUN printf "/opt/rocm/lib\n/opt/rocm/lib/migraphx/lib" | tee /opt/rocm-dist/etc/ld.so.conf.d/rocm.conf
|
RUN printf "/opt/rocm/lib\n/opt/rocm/lib/migraphx/lib" | tee /opt/rocm-dist/etc/ld.so.conf.d/rocm.conf
|
||||||
RUN printf "/usr/lib/llvm-14/lib" | tee /opt/rocm-dist/etc/ld.so.conf.d/llvm.conf
|
RUN printf "/usr/lib/llvm-14/lib" | tee /opt/rocm-dist/etc/ld.so.conf.d/llvm.conf
|
||||||
|
|
||||||
|
# Build migraphx python bindings for our python version
|
||||||
|
# NOTE: AMDMIGraphX repo does not contain a tag for all ROCm versions, hence the fallback to lower patch versions
|
||||||
|
WORKDIR /tmp/migraphx_py
|
||||||
|
RUN apt install -qq -y --no-install-recommends g++ python3-dev python3-pybind11 cmake make
|
||||||
|
COPY docker/rocm/CMakeLists.txt .
|
||||||
|
RUN for i in 0 1 2; do \
|
||||||
|
VERSION=$(echo $ROCM | awk -F. -v d=$i '{OFS="."; $3-=d; print}') && \
|
||||||
|
wget https://raw.githubusercontent.com/ROCm/AMDMIGraphX/refs/tags/rocm-$VERSION/src/py/migraphx_py.cpp && break; \
|
||||||
|
done
|
||||||
|
RUN cmake . -DCMAKE_INSTALL_PREFIX=/opt/rocm-dist/opt/rocm-$ROCM && \
|
||||||
|
make install
|
||||||
|
|
||||||
#######################################################################
|
#######################################################################
|
||||||
FROM deps AS deps-prelim
|
FROM deps AS deps-prelim
|
||||||
|
|
||||||
@ -83,6 +97,8 @@ RUN apt update && \
|
|||||||
apt install -qq -y -t bookworm-backports mesa-va-drivers mesa-vulkan-drivers && \
|
apt install -qq -y -t bookworm-backports mesa-va-drivers mesa-vulkan-drivers && \
|
||||||
# Install C++ standard library headers for HIPRTC kernel compilation fallback
|
# Install C++ standard library headers for HIPRTC kernel compilation fallback
|
||||||
apt install -qq -y libstdc++-12-dev libnuma1 && \
|
apt install -qq -y libstdc++-12-dev libnuma1 && \
|
||||||
|
# Install libomp for migraphx CPU device type support
|
||||||
|
apt install -qq -y libomp-dev && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
WORKDIR /opt/frigate
|
WORKDIR /opt/frigate
|
||||||
|
|||||||
@ -85,18 +85,10 @@ def apply_amd_compatibility_env_vars():
|
|||||||
logger.info(f"Setting AMD environment variables for {gfx_id} compatibility...")
|
logger.info(f"Setting AMD environment variables for {gfx_id} compatibility...")
|
||||||
|
|
||||||
configs = {
|
configs = {
|
||||||
("gfx902", "gfx909", "gfx90c", "gfx1035", "gfx1103"): {
|
("gfx902", "gfx909", "gfx90c"): {
|
||||||
"HSA_ENABLE_SDMA": "0", # Disable System Direct Memory Access for APU compatibility
|
"HSA_ENABLE_SDMA": "0", # Disable System Direct Memory Access for APU compatibility
|
||||||
"MIGRAPHX_DISABLE_MIOPEN_FUSION": "1", # Disable unsupported fusion optimization
|
|
||||||
},
|
|
||||||
("gfx902", "gfx909", "gfx90c"): { # Vega
|
|
||||||
"HSA_OVERRIDE_GFX_VERSION": "9.0.0", # Force compatible GFX version
|
"HSA_OVERRIDE_GFX_VERSION": "9.0.0", # Force compatible GFX version
|
||||||
},
|
"MIGRAPHX_DISABLE_MIOPEN_FUSION": "1", # Disable unsupported fusion optimization
|
||||||
("gfx1035"): { # 680M
|
|
||||||
"HSA_OVERRIDE_GFX_VERSION": "10.3.0", # Force compatible GFX version
|
|
||||||
},
|
|
||||||
("gfx1103"): { # 780M
|
|
||||||
"HSA_OVERRIDE_GFX_VERSION": "11.0.0", # Force compatible GFX version
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for gfx_ids, vars in configs.items():
|
for gfx_ids, vars in configs.items():
|
||||||
|
|||||||
263
frigate/detectors/plugins/migraphx.py
Normal file
263
frigate/detectors/plugins/migraphx.py
Normal file
@ -0,0 +1,263 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
import ctypes
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from pydantic import Field
|
||||||
|
|
||||||
|
from frigate.const import MODEL_CACHE_DIR
|
||||||
|
from frigate.detectors.detection_api import DetectionApi
|
||||||
|
from frigate.detectors.detector_config import BaseDetectorConfig
|
||||||
|
from frigate.detectors.detector_utils import apply_amd_compatibility_env_vars
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
DETECTOR_KEY = "migraphx"
|
||||||
|
|
||||||
|
class migraphxDetectorConfig(BaseDetectorConfig):
|
||||||
|
type: Literal[DETECTOR_KEY]
|
||||||
|
device: str = Field(default="gpu", title="Device Type (gpu or cpu)")
|
||||||
|
conserve_cpu: bool = Field(default=True, title="Conserve CPU at the expense of latency")
|
||||||
|
fast_math: bool = Field(default=True, title="Optimize math functions to use faster approximate versions")
|
||||||
|
exhaustive_tune: bool = Field(default=False, title="Use exhaustive search to find the fastest generated kernels")
|
||||||
|
|
||||||
|
class migraphxDetector(DetectionApi):
|
||||||
|
type_key = DETECTOR_KEY
|
||||||
|
|
||||||
|
def __init__(self, detector_config: migraphxDetectorConfig):
|
||||||
|
super().__init__(detector_config)
|
||||||
|
|
||||||
|
apply_amd_compatibility_env_vars()
|
||||||
|
|
||||||
|
try:
|
||||||
|
sys.path.append("/opt/rocm/lib")
|
||||||
|
import migraphx
|
||||||
|
logger.info(f"migraphx: loaded migraphx module")
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
logger.error("migraphx: module loading failed, missing migraphx")
|
||||||
|
raise
|
||||||
|
|
||||||
|
assert detector_config.model.path is not None, "No model.path configured, please configure model.path"
|
||||||
|
assert detector_config.model.labelmap_path is not None, "No model.labelmap_path configured, please configure model.labelmap_path"
|
||||||
|
|
||||||
|
device = detector_config.device.lower()
|
||||||
|
assert device in ["gpu", "cpu"], "Invalid device set, must be gpu (default) or cpu"
|
||||||
|
|
||||||
|
try:
|
||||||
|
if device == "gpu":
|
||||||
|
if detector_config.conserve_cpu:
|
||||||
|
logger.info(f"migraphx: enabling hipDeviceScheduleYield to forcefully conserve CPU (conserve_cpu=true)")
|
||||||
|
ctypes.CDLL('/opt/rocm/lib/libamdhip64.so').hipSetDeviceFlags(4)
|
||||||
|
else:
|
||||||
|
# Default to hipDeviceScheduleAuto
|
||||||
|
ctypes.CDLL('/opt/rocm/lib/libamdhip64.so').hipSetDeviceFlags(0)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"migraphx: could not set hipSetDeviceFlags: {e}")
|
||||||
|
|
||||||
|
cache_dir = Path(MODEL_CACHE_DIR) / "migraphx"
|
||||||
|
cache_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
path = Path(detector_config.model.path)
|
||||||
|
filename = path.stem + ('_cpu' if device == 'cpu' else ('_tune' if detector_config.exhaustive_tune else ''))
|
||||||
|
mxr_cache_path = cache_dir / f"{filename}.mxr"
|
||||||
|
mxr_path = path.parent / f"{filename}.mxr"
|
||||||
|
|
||||||
|
if mxr_cache_path.exists():
|
||||||
|
logger.info(f"migraphx: loading compiled model from cache {mxr_cache_path}")
|
||||||
|
self.model = migraphx.load(str(mxr_cache_path))
|
||||||
|
|
||||||
|
elif mxr_path.exists():
|
||||||
|
logger.info(f"migraphx: loading compiled model from {mxr_path}")
|
||||||
|
self.model = migraphx.load(str(mxr_path))
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.info(f"migraphx: loading model from {path}")
|
||||||
|
if path.suffix == '.onnx':
|
||||||
|
self.model = migraphx.parse_onnx(str(path))
|
||||||
|
else:
|
||||||
|
raise Exception(f"migraphx: unknown model format {path}")
|
||||||
|
|
||||||
|
self.model_output_shapes = self.model.get_output_shapes()
|
||||||
|
self.model_is_nms = self.model_output_shapes[0].lens()[2] <= 7
|
||||||
|
assert self.model_is_nms is False, "migraphx does not currently support NMS models"
|
||||||
|
|
||||||
|
logger.info(f"migraphx: compiling the model... (fast_math: {detector_config.fast_math}, exhaustive_tune: {detector_config.exhaustive_tune})")
|
||||||
|
self.model.compile(
|
||||||
|
migraphx.get_target(device),
|
||||||
|
offload_copy=True,
|
||||||
|
fast_math=detector_config.fast_math,
|
||||||
|
exhaustive_tune=detector_config.exhaustive_tune
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"migraphx: saving compiled model into cache {mxr_cache_path}")
|
||||||
|
migraphx.save(self.model, str(mxr_cache_path))
|
||||||
|
|
||||||
|
self.model_param_name = self.model.get_parameter_names()[0]
|
||||||
|
self.model_input_shape_obj = self.model.get_parameter_shapes()[self.model_param_name]
|
||||||
|
self.model_input_shape = tuple(self.model_input_shape_obj.lens())
|
||||||
|
self.model_input_arg = migraphx.generate_argument(self.model_input_shape_obj)
|
||||||
|
self.model_input_array = np.frombuffer(self.model_input_arg, dtype=np.float32).reshape(self.model_input_shape)
|
||||||
|
|
||||||
|
logger.info(f"migraphx: model loaded (input: {self.model_input_shape})")
|
||||||
|
|
||||||
|
def preprocess(self, tensor_input):
|
||||||
|
|
||||||
|
# Ensure we have a 4D tensor
|
||||||
|
if tensor_input.ndim == 3:
|
||||||
|
tensor_input = np.expand_dims(tensor_input, axis=0)
|
||||||
|
|
||||||
|
target_n, target_c, target_h, target_w = self.model_input_shape
|
||||||
|
|
||||||
|
# Ensure input shape is NCHW
|
||||||
|
if tensor_input.shape[1] == target_c:
|
||||||
|
# Already (N, C, H, W), do nothing
|
||||||
|
pass
|
||||||
|
elif tensor_input.shape[3] == target_c:
|
||||||
|
# From (N, H, W, C) to (N, C, H, W)
|
||||||
|
tensor_input = tensor_input.transpose(0, 3, 1, 2)
|
||||||
|
elif tensor_input.shape[2] == target_c:
|
||||||
|
# From (N, H, C, W) to (N, C, H, W)
|
||||||
|
tensor_input = tensor_input.transpose(0, 2, 1, 3)
|
||||||
|
|
||||||
|
np.copyto(self.model_input_array, tensor_input)
|
||||||
|
|
||||||
|
def detect_raw(self, tensor_input):
|
||||||
|
self.preprocess(tensor_input)
|
||||||
|
|
||||||
|
outputs = self.model.run({self.model_param_name: self.model_input_arg})
|
||||||
|
|
||||||
|
tensor_output = np.frombuffer(
|
||||||
|
outputs[0],
|
||||||
|
dtype=np.float32
|
||||||
|
).reshape(outputs[0].get_shape().lens())
|
||||||
|
|
||||||
|
return self.optimized_process_yolo(tensor_output)
|
||||||
|
|
||||||
|
def optimized_process_yolo(
|
||||||
|
self,
|
||||||
|
tensor_output,
|
||||||
|
confidence_threshold=0.4,
|
||||||
|
intersection_over_union_threshold=0.4,
|
||||||
|
top_k=100
|
||||||
|
):
|
||||||
|
# Transpose the raw output so each row represents one candidate detection
|
||||||
|
# Typical YOLO format: [batch, features, candidates] -> [candidates, features]
|
||||||
|
candidate_detections = tensor_output[0].T
|
||||||
|
|
||||||
|
# Extract class probability scores
|
||||||
|
class_probability_matrix = candidate_detections[:, 4:]
|
||||||
|
|
||||||
|
# Identify the highest class score and its index for every candidate box
|
||||||
|
max_class_scores = np.max(class_probability_matrix, axis=1)
|
||||||
|
|
||||||
|
# Create a boolean mask to filter out low-confidence predictions immediately
|
||||||
|
confidence_mask = max_class_scores > confidence_threshold
|
||||||
|
|
||||||
|
# Early exit if no detections meet the minimum confidence requirements
|
||||||
|
if not np.any(confidence_mask):
|
||||||
|
return np.zeros((20, 6), dtype=np.float32)
|
||||||
|
|
||||||
|
# Filter the detections, scores, and class IDs using the boolean mask
|
||||||
|
filtered_detections = candidate_detections[confidence_mask]
|
||||||
|
filtered_scores = max_class_scores[confidence_mask]
|
||||||
|
|
||||||
|
# Limit detections to top_k, reducing the set
|
||||||
|
if len(filtered_scores) > top_k:
|
||||||
|
partition_idx = np.argpartition(-filtered_scores, top_k)[:top_k]
|
||||||
|
filtered_detections = filtered_detections[partition_idx]
|
||||||
|
filtered_scores = filtered_scores[partition_idx]
|
||||||
|
|
||||||
|
# Obtain class IDs of reduced set
|
||||||
|
filtered_class_ids = np.argmax(filtered_detections[:, 4:], axis=1)
|
||||||
|
|
||||||
|
# YOLO boxes are typically [center_x, center_y, width, height]
|
||||||
|
center_coordinates_and_dimensions = filtered_detections[:, :4]
|
||||||
|
|
||||||
|
# Pre-calculate an inversion scale to convert pixels to 0.0-1.0 range
|
||||||
|
# This replaces multiple division operations with a single multiplication later
|
||||||
|
normalization_scale_vector = np.array([
|
||||||
|
1.0 / self.width,
|
||||||
|
1.0 / self.height,
|
||||||
|
1.0 / self.width,
|
||||||
|
1.0 / self.height
|
||||||
|
], dtype=np.float32)
|
||||||
|
|
||||||
|
# Calculate half-widths and half-heights to find box corners
|
||||||
|
half_dimensions = center_coordinates_and_dimensions[:, 2:4] * 0.5
|
||||||
|
|
||||||
|
# Initialize a buffer for the corner-format boxes [x1, y1, x2, y2]
|
||||||
|
corner_format_boxes = np.empty_like(center_coordinates_and_dimensions)
|
||||||
|
|
||||||
|
# Calculate Top-Left corners (x1, y1)
|
||||||
|
corner_format_boxes[:, :2] = (center_coordinates_and_dimensions[:, :2] - half_dimensions)
|
||||||
|
|
||||||
|
# Calculate Bottom-Right corners (x2, y2)
|
||||||
|
corner_format_boxes[:, 2:4] = (center_coordinates_and_dimensions[:, :2] + half_dimensions)
|
||||||
|
|
||||||
|
# Transform pixel coordinates into normalized values (0.0 to 1.0)
|
||||||
|
normalized_boxes = corner_format_boxes * normalization_scale_vector
|
||||||
|
|
||||||
|
# Apply Non-Maximum Suppression (NMS) to remove redundant, overlapping boxes
|
||||||
|
# returns indices of detections to keep
|
||||||
|
nms_kept_indices = self.optimized_nms(
|
||||||
|
normalized_boxes,
|
||||||
|
filtered_scores,
|
||||||
|
intersection_over_union_threshold
|
||||||
|
)
|
||||||
|
|
||||||
|
# Limit the number of detections to the top 20
|
||||||
|
number_of_detections_to_save = min(len(nms_kept_indices), 20)
|
||||||
|
top_detection_indices = nms_kept_indices[:number_of_detections_to_save]
|
||||||
|
|
||||||
|
# Pre-allocate a fixed-size result array (20 detections, 6 columns each)
|
||||||
|
# Column structure: [class_id, confidence, y1, x1, y2, x2]
|
||||||
|
results = np.zeros((20, 6), dtype=np.float32)
|
||||||
|
|
||||||
|
# Bulk assign data into results using the NMS indices
|
||||||
|
# We explicitly swap X and Y during assignment to meet the [y1, x1, y2, x2] requirement
|
||||||
|
results[:number_of_detections_to_save, 0] = filtered_class_ids[top_detection_indices] # class_id
|
||||||
|
results[:number_of_detections_to_save, 1] = filtered_scores[top_detection_indices] # confidence
|
||||||
|
|
||||||
|
# Coordinate Mapping
|
||||||
|
results[:number_of_detections_to_save, 2] = normalized_boxes[top_detection_indices, 1] # y1
|
||||||
|
results[:number_of_detections_to_save, 3] = normalized_boxes[top_detection_indices, 0] # x1
|
||||||
|
results[:number_of_detections_to_save, 4] = normalized_boxes[top_detection_indices, 3] # y2
|
||||||
|
results[:number_of_detections_to_save, 5] = normalized_boxes[top_detection_indices, 2] # x2
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def optimized_nms(self, boxes, scores, iou_threshold):
|
||||||
|
x1 = boxes[:, 0]
|
||||||
|
y1 = boxes[:, 1]
|
||||||
|
x2 = boxes[:, 2]
|
||||||
|
y2 = boxes[:, 3]
|
||||||
|
areas = (x2 - x1) * (y2 - y1)
|
||||||
|
order = scores.argsort()[::-1]
|
||||||
|
keep = []
|
||||||
|
|
||||||
|
while order.size > 0:
|
||||||
|
i = order[0]
|
||||||
|
keep.append(i)
|
||||||
|
if order.size == 1: break
|
||||||
|
|
||||||
|
# Calculate intersection
|
||||||
|
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||||||
|
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||||||
|
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||||||
|
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||||||
|
|
||||||
|
w = np.maximum(0.0, xx2 - xx1)
|
||||||
|
h = np.maximum(0.0, yy2 - yy1)
|
||||||
|
inter = w * h
|
||||||
|
|
||||||
|
# IoU = Inter / (Area1 + Area2 - Inter)
|
||||||
|
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
||||||
|
|
||||||
|
# Filter indices
|
||||||
|
inds = np.where(ovr <= iou_threshold)[0]
|
||||||
|
order = order[inds + 1]
|
||||||
|
|
||||||
|
return keep
|
||||||
@ -39,7 +39,6 @@ class ONNXDetector(DetectionApi):
|
|||||||
path = detector_config.model.path
|
path = detector_config.model.path
|
||||||
logger.info(f"ONNX: loading {detector_config.model.path}")
|
logger.info(f"ONNX: loading {detector_config.model.path}")
|
||||||
|
|
||||||
# Import get_optimized_runner AFTER setting AMD compatibility env vars
|
|
||||||
from frigate.detectors.detection_runners import get_optimized_runner
|
from frigate.detectors.detection_runners import get_optimized_runner
|
||||||
|
|
||||||
self.runner = get_optimized_runner(
|
self.runner = get_optimized_runner(
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user