frigate/docker/rocm/migraphx/targets/gpu/jit/pooling.cpp

194 lines
7.0 KiB
C++
Raw Normal View History

2025-02-04 00:44:02 +03:00
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/op/common.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
// NOLINTNEXTLINE
static const char* const pooling_kernel = R"__migraphx__(
#include <migraphx/kernels/pooling.hpp>
#include <migraphx/kernels/ops.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <args.hpp>
namespace migraphx {
extern "C" {
MIGRAPHX_GLOBAL void pooling_kernel(void* in_data, void* output)
{
transform_args(make_tensors(), rotate_last())(in_data, output)([](auto&&... xs) {
pooling<${algo}, ${group_size}>(${op}, make_window(index_ints<${window}>{}, index_ints<${stride}>{}, index_ints<${padding}>{}), xs...);
});
}
}
} // namespace migraphx
)__migraphx__";
struct pooling_compiler : compiler<pooling_compiler>
{
static std::size_t compute_subwave_size(context& ctx, std::size_t n)
{
std::size_t max_wavefront_size = ctx.get_current_device().get_wavefront_size();
std::size_t wavefront_size = 1;
while(wavefront_size <= n and wavefront_size < max_wavefront_size)
wavefront_size *= 2;
return wavefront_size / 2;
}
struct algorithm
{
std::string name = "reduce::lane";
std::size_t reduce_size = 1;
std::size_t block_size = 256;
std::size_t group_size = 1;
static std::size_t compute_group_size(const shape& output)
{
auto n = output.lens().back();
const std::size_t max_group_size = 32;
std::size_t group_size = 1;
while((n % (group_size * 2) == 0) and group_size <= max_group_size)
group_size *= 2;
return group_size;
}
algorithm() {}
algorithm(context& ctx, const shape& input, const std::vector<std::size_t>& window)
{
if(input.strides().back() != 1)
return;
std::size_t max_wavefront_size = ctx.get_current_device().get_wavefront_size();
auto wsize = window.back();
if(wsize > max_wavefront_size)
{
block_size = compute_block_size(ctx, wsize, 256);
reduce_size = block_size;
name = "reduce::block";
}
else
{
block_size = max_wavefront_size;
reduce_size = compute_subwave_size(ctx, wsize);
name = "reduce::subwave<" + to_string(reduce_size) + ">";
}
}
};
template <class... Ts>
static void normalize(std::vector<shape>& inputs, Ts&... xs)
{
auto perm = find_permutation(inputs);
std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto s) {
return reorder_shape(s, perm);
});
each_args([&](auto& dims) { dims = reorder_dims(dims, perm); }, xs...);
}
std::vector<std::string> names() const { return {"pooling"}; }
operation compile_op(context& ctx, const std::vector<shape>& inputs, const value& v) const
{
hip_compile_options options;
const auto& out_s = inputs.back();
options.inputs = inputs;
options.output = out_s;
options.kernel_name = "pooling_kernel";
options.virtual_inputs = inputs;
auto ndim = out_s.ndim();
auto pool_ndim = ndim - 2;
auto read_value = [&](const std::string& name, std::size_t def) {
if(v.contains(name))
{
std::vector<std::size_t> result(2, def);
auto x = v.at(name).to_vector<std::size_t>();
if(x.size() >= pool_ndim)
result.insert(result.end(), x.begin(), x.begin() + pool_ndim);
return result;
}
else
{
std::vector<std::size_t> result(ndim, def);
return result;
}
};
auto padding = read_value("padding", 0);
auto stride = read_value("stride", 1);
auto window = read_value("lengths", 1);
const auto& mode_v = v.at("mode");
std::string mode =
mode_v.is_string() ? mode_v.get_string() : to_string(mode_v.to<op::pooling_mode>());
bool count_include_pad = v.get("count_include_pad", false);
if(count_include_pad and mode == "average")
mode = "average_include_pad";
std::string op = mode + "_pool";
if(mode == "lpnorm")
op += "<" + v.at("lp_order").to<std::string>() + ">";
algorithm algo{};
options.set_launch_params(
v,
compute_global_for(ctx, (out_s.elements() / algo.group_size) * algo.reduce_size, 256),
algo.block_size);
normalize(options.virtual_inputs, padding, stride, window);
auto src = interpolate_string(pooling_kernel,
{{"op", op + "{}"},
{"algo", algo.name},
{"group_size", to_string(algo.group_size)},
{"window", to_string_range(window)},
{"stride", to_string_range(stride)},
{"padding", to_string_range(padding)}});
return compile_hip_code_object(ctx, src, options);
}
compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const
{
return compile_op(ctx, to_shapes(ins->inputs()), op.to_value());
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx