diff --git a/docker/rocm/migraphx/targets/cpu/CMakeLists.txt b/docker/rocm/migraphx/targets/cpu/CMakeLists.txt new file mode 100644 index 000000000..558e35387 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/CMakeLists.txt @@ -0,0 +1,105 @@ +##################################################################################### +# The MIT License (MIT) +# +# Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +##################################################################################### + +include(CheckCXXCompilerFlag) + +add_library(migraphx_cpu + allocate.cpp + allocation_model.cpp + binary.cpp + concat.cpp + convolution.cpp + copy.cpp + deconvolution.cpp + dnnl.cpp + eltwise.cpp + erf.cpp + fmod.cpp + fuse_ops.cpp + gather.cpp + gemm.cpp + layernorm.cpp + logsoftmax.cpp + lowering.cpp + lrn.cpp + mod.cpp + preallocate.cpp + pooling.cpp + reduction.cpp + reorder.cpp + softmax.cpp + sub.cpp + target.cpp + write_literals.cpp +) +set_target_properties(migraphx_cpu PROPERTIES EXPORT_NAME cpu) +rocm_set_soversion(migraphx_cpu ${MIGRAPHX_SO_VERSION}) + +set(MIGRAPHX_ENABLE_ZENDNN Off CACHE BOOL "") + +if(MIGRAPHX_ENABLE_ZENDNN) + find_path(ZENDNN_INC_PATH zendnn.hpp) + find_library(ZENDNN_LIB amdZenDNN) + find_library(BLIS_LIB blis) +else() + find_package(dnnl REQUIRED) +endif() + +rocm_clang_tidy_check(migraphx_cpu) +if(MIGRAPHX_ENABLE_ZENDNN) + target_compile_definitions(migraphx_cpu PRIVATE -DMIGRAPHX_ENABLE_ZENDNN) + target_include_directories(migraphx_cpu PRIVATE ${ZENDNN_INC_PATH}) + message(STATUS "ZENDNN_LIB: ${ZENDNN_LIB}") + target_link_libraries(migraphx_cpu PRIVATE ${BLIS_LIB}) + target_link_libraries(migraphx_cpu PRIVATE ${ZENDNN_LIB}) +else() + target_link_libraries(migraphx_cpu PUBLIC DNNL::dnnl) +endif() +target_link_libraries(migraphx_cpu PRIVATE migraphx) + +migraphx_generate_export_header(migraphx_cpu) + +find_package(OpenMP) +if(WIN32) + target_link_libraries(migraphx_cpu PUBLIC libomp) + target_include_directories(migraphx_cpu PUBLIC ${OpenMP_CXX_INCLUDE_DIRS}) + target_compile_options(migraphx_cpu PUBLIC ${OpenMP_CXX_FLAGS}) +else() + target_link_libraries(migraphx_cpu PUBLIC OpenMP::OpenMP_CXX) + # Add library path to rpath to workaround issues with our broken packages + foreach(LIBRARY ${OpenMP_CXX_LIBRARIES}) + if(LIBRARY MATCHES "libomp") + get_filename_component(LIBRARY_PATH "${LIBRARY}" PATH) + target_link_libraries(migraphx_cpu PUBLIC -Wl,-rpath=${LIBRARY_PATH} -Wl,-rpath-link=${LIBRARY_PATH}) + endif() + endforeach() +endif() + +rocm_install_targets( + PRIVATE + TARGETS migraphx_cpu + INCLUDE + ${CMAKE_CURRENT_SOURCE_DIR}/include +) + diff --git a/docker/rocm/migraphx/targets/cpu/allocate.cpp b/docker/rocm/migraphx/targets/cpu/allocate.cpp new file mode 100644 index 000000000..938139c9b --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/allocate.cpp @@ -0,0 +1,60 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct cpu_allocate : auto_register_op +{ + shape s; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.s, "shape")); + } + + std::string name() const { return "cpu::allocate"; } + shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, *this}.has(0); + return s; + } + argument compute(context&, const shape& output_shape, const std::vector&) const + { + argument result{output_shape}; + return result; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/allocation_model.cpp b/docker/rocm/migraphx/targets/cpu/allocation_model.cpp new file mode 100644 index 000000000..bd6833fb9 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/allocation_model.cpp @@ -0,0 +1,46 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +std::string cpu_allocation_model::name() const { return "cpu::allocate"; } +operation cpu_allocation_model::allocate(const shape& s) const +{ + return make_op(name(), {{"shape", to_value(s)}}); +} + +operation cpu_allocation_model::preallocate(const shape& s, const std::string& id) const +{ + return make_op("cpu::preallocate", {{"shape", to_value(s)}, {"id", id}}); +} + +std::string cpu_allocation_model::copy() const { return "cpu::copy"; } + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/binary.cpp b/docker/rocm/migraphx/targets/cpu/binary.cpp new file mode 100644 index 000000000..e663f50e7 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/binary.cpp @@ -0,0 +1,83 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct dnnl_binary : dnnl_op +{ + std::string algo; + template + static auto reflect(Self& self, F f) + { + return pack_join(self.reflect_base(self, f), pack(f(self.algo, "algo"))); + } + + std::string group() const { return this->name() + "::" + algo; } + + std::string name() const { return "dnnl::binary"; } + + shape compute_shape(std::vector inputs) const + { + // Compensate for allocation + inputs.pop_back(); + check_shapes{this->trim_post_op_inputs(inputs), *this}.has(2); + auto s0 = inputs.at(0); + auto s1 = inputs.at(1); + auto r = s0; + if(s0 != s1 or not s0.packed()) + { + if(s0.packed() != s1.packed()) + { + r = s0.packed() ? s0 : s1; + } + else if(s0.broadcasted() != s1.broadcasted()) + { + r = s0.broadcasted() ? s1.with_lens(s0.lens()) : s0.with_lens(s0.lens()); + } + else + { + r = {s0.type(), s0.lens()}; + } + } + // Call to get_primitive to make sure an algo is available + this->get_primitive(this->to_memory_desc(r, inputs)); + return r; + } + + dnnl::binary::desc get_desc(const std::unordered_map& m) const + { + return {to_dnnl_algo(algo), + m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)), + m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_1)), + m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST))}; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/concat.cpp b/docker/rocm/migraphx/targets/cpu/concat.cpp new file mode 100644 index 000000000..0c7cdc954 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/concat.cpp @@ -0,0 +1,67 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct dnnl_concat : dnnl_extend_op +{ + std::vector arg_map(int size) const + { + std::vector result(size); + std::iota(result.begin(), result.end(), MIGRAPHX_DNNL_PREFIX(ARG_MULTIPLE_SRC)); + return result; + } + // Custom desc class since its missing in dnnl + struct desc + { + dnnl::memory::desc dst; + std::size_t axis = 1; + std::vector srcs; + }; + desc get_desc(const std::unordered_map& m) const + { + std::vector srcs; + srcs.reserve(m.size() - 1); + + for(auto i = 0; i < m.size() - 1; i++) + { + srcs.push_back(m.at(MIGRAPHX_DNNL_PREFIX(ARG_MULTIPLE_SRC) + i)); + } + return {m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)), std::size_t(op.axis), srcs}; + } + + auto get_primitive_desc(const desc& d, const dnnl::primitive_attr& attr) const + { + return dnnl::concat::primitive_desc(d.dst, d.axis, d.srcs, get_dnnl_context().engine, attr); + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/convolution.cpp b/docker/rocm/migraphx/targets/cpu/convolution.cpp new file mode 100644 index 000000000..42e533003 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/convolution.cpp @@ -0,0 +1,86 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct dnnl_convolution + : dnnl_extend_op +{ + std::vector arg_map(int) const + { + return {MIGRAPHX_DNNL_PREFIX(ARG_SRC), MIGRAPHX_DNNL_PREFIX(ARG_WEIGHTS)}; + } + + shape adjust_shape(const shape& x, int i, const shape& output) const + { + auto s = base_adjust_shape(x, output); + if(i == 1 and op.group > 1) + { + // TODO: Add support for transposed weights + if(not s.standard()) + MIGRAPHX_THROW("Weights for grouped convolution must be standard"); + auto lens = s.lens(); + lens.insert(lens.begin(), op.group); + lens.at(1) /= op.group; + return shape{s.type(), lens}; + } + return s; + } + + dnnl::convolution_forward::desc + get_desc(const std::unordered_map& m) const + { + // In DNNL dilation is zero-based + auto dilation = op.dilation; + std::transform( + dilation.begin(), dilation.end(), dilation.begin(), [](auto x) { return x - 1; }); + auto kdims = op.kdims(); + std::vector padding_l(op.padding.begin(), op.padding.begin() + kdims); + std::vector padding_r(op.padding.begin() + kdims, op.padding.end()); + return {dnnl::prop_kind::forward_inference, + dnnl::algorithm::convolution_auto, + m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)), + m.at(MIGRAPHX_DNNL_PREFIX(ARG_WEIGHTS)), + m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)), + to_dnnl_dims(op.stride), + to_dnnl_dims(dilation), + to_dnnl_dims(padding_l), + to_dnnl_dims(padding_r)}; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/copy.cpp b/docker/rocm/migraphx/targets/cpu/copy.cpp new file mode 100644 index 000000000..4c4af2b71 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/copy.cpp @@ -0,0 +1,65 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct cpu_copy : reduce_dims_base, auto_register_op +{ + template + static auto reflect(Self&, F) + { + return pack(); + } + + std::string name() const { return "cpu::copy"; } + shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, *this}.has(2); + return inputs.at(1); + } + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const + { + argument result = get_arg(args, args.size() - 1); + + visit_all(result, get_arg(args, 0))([&](auto output, auto input) { + pointwise(output, input)(ctx, output.get_shape(), 1024, [](auto& y, auto x) { y = x; }); + }); + + return result.reshape(output_shape); + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/deconvolution.cpp b/docker/rocm/migraphx/targets/cpu/deconvolution.cpp new file mode 100644 index 000000000..3398036e1 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/deconvolution.cpp @@ -0,0 +1,76 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct dnnl_deconvolution + : dnnl_extend_op +{ + std::vector arg_map(int) const + { + return {MIGRAPHX_DNNL_PREFIX(ARG_SRC), MIGRAPHX_DNNL_PREFIX(ARG_WEIGHTS)}; + } + + shape adjust_shape(const shape& x, int i, const shape& output) const + { + auto s = base_adjust_shape(x, output); + if(i == 1) + { + // The input and output channels are flipped for dnnl + auto lens = s.lens(); + std::swap(lens[0], lens[1]); + auto strides = s.strides(); + std::swap(strides[0], strides[1]); + return {s.type(), lens, strides}; + } + return s; + } + + dnnl::deconvolution_forward::desc + get_desc(const std::unordered_map& m) const + { + // In DNNL dilation is zero-based + auto dilation = op.dilation; + std::transform( + dilation.begin(), dilation.end(), dilation.begin(), [](auto x) { return x - 1; }); + return {dnnl::prop_kind::forward_inference, + dnnl::algorithm::deconvolution_direct, + m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)), + m.at(MIGRAPHX_DNNL_PREFIX(ARG_WEIGHTS)), + m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)), + to_dnnl_dims(op.stride), + to_dnnl_dims(dilation), + to_dnnl_dims(op.padding), + to_dnnl_dims(op.padding)}; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/dnnl.cpp b/docker/rocm/migraphx/targets/cpu/dnnl.cpp new file mode 100644 index 000000000..dc252cdfe --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/dnnl.cpp @@ -0,0 +1,205 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include + +#if defined(__GNUC__) && __GNUC__ <= 5 +namespace std { +#ifdef MIGRAPHX_ENABLE_ZENDNN +namespace dnnl = zendnn; +#endif +template <> +struct hash +{ + using argument_type = dnnl::algorithm; + using result_type = std::size_t; + result_type operator()(const argument_type& x) const noexcept + { + return std::hash>{}( + static_cast>(x)); + } +}; + +} // namespace std +#endif + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +dnnl_context& get_dnnl_context() +{ + static dnnl_context ctx{}; // NOLINT + return ctx; +} + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wswitch-enum" +#endif +dnnl::memory::data_type to_dnnl_memory_data_type(shape::type_t t) +{ + using dt = dnnl::memory::data_type; + using st = shape::type_t; + switch(t) + { + case st::half_type: return dt::f16; + case st::float_type: return dt::f32; + case st::int32_type: return dt::s32; + case st::int8_type: return dt::s8; + case st::uint8_type: return dt::u8; + case st::fp8e4m3fnuz_type: MIGRAPHX_THROW("fp8e4m3fnuz unsupported in DNNL"); + default: MIGRAPHX_THROW("Unsupported data type"); + } +} +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +dnnl::memory::format_tag to_dnnl_memory_format_tag(std::size_t n) +{ + switch(n) + { + case 1: return dnnl::memory::format_tag::a; + case 2: return dnnl::memory::format_tag::ab; + case 3: return dnnl::memory::format_tag::abc; + case 4: return dnnl::memory::format_tag::abcd; + case 5: return dnnl::memory::format_tag::abcde; + case 6: return dnnl::memory::format_tag::abcdef; + default: MIGRAPHX_THROW("Unsupported tensor size: " + std::to_string(n)); + } +} + +dnnl::memory::desc to_dnnl_memory_desc(const shape& s) +{ + return {to_dnnl_dims(s.lens()), to_dnnl_memory_data_type(s.type()), to_dnnl_dims(s.strides())}; +} + +dnnl::memory to_dnnl_memory(const dnnl::memory::desc& desc, const argument& a) +{ + return {desc, get_dnnl_context().engine, a.data()}; +} + +dnnl::memory to_dnnl_memory(const argument& a) +{ + return to_dnnl_memory(to_dnnl_memory_desc(a.get_shape()), a); +} + +// clang-format off +#define MIGRAPHX_VISIT_DNNL_ALGO(m) \ + m(undef) \ + m(convolution_auto) \ + m(convolution_direct) \ + m(convolution_winograd) \ + m(deconvolution_direct) \ + m(deconvolution_winograd) \ + m(eltwise_relu) \ + m(eltwise_tanh) \ + m(eltwise_elu) \ + m(eltwise_square) \ + m(eltwise_abs) \ + m(eltwise_sqrt) \ + m(eltwise_swish) \ + m(eltwise_linear) \ + m(eltwise_bounded_relu) \ + m(eltwise_soft_relu) \ + m(eltwise_logistic) \ + m(eltwise_exp) \ + m(eltwise_gelu) \ + m(eltwise_gelu_tanh) \ + m(eltwise_gelu_erf) \ + m(eltwise_log) \ + m(eltwise_clip) \ + m(eltwise_pow) \ + m(eltwise_round) \ + m(eltwise_relu_use_dst_for_bwd) \ + m(eltwise_tanh_use_dst_for_bwd) \ + m(eltwise_elu_use_dst_for_bwd) \ + m(eltwise_sqrt_use_dst_for_bwd) \ + m(eltwise_logistic_use_dst_for_bwd) \ + m(eltwise_exp_use_dst_for_bwd) \ + m(lrn_across_channels) \ + m(lrn_within_channel) \ + m(pooling_max) \ + m(pooling_avg) \ + m(pooling_avg_include_padding) \ + m(pooling_avg_exclude_padding) \ + m(vanilla_rnn) \ + m(vanilla_lstm) \ + m(vanilla_gru) \ + m(lbr_gru) \ + m(binary_add) \ + m(binary_mul) \ + m(binary_max) \ + m(binary_min) \ + m(binary_div) \ + m(resampling_nearest) \ + m(resampling_linear) \ + m(reduction_max) \ + m(reduction_min) \ + m(reduction_sum) \ + m(reduction_mul) \ + m(reduction_mean) \ + m(reduction_norm_lp_max) \ + m(reduction_norm_lp_sum) \ + m(reduction_norm_lp_power_p_max) \ + m(reduction_norm_lp_power_p_sum) +// clang-format on + +const std::unordered_map& dnnl_algo_map() +{ + static const std::unordered_map m = { +#define MIGRAPHX_DNNL_ALGO_GENERATE_VISITOR(x) {#x, dnnl::algorithm::x}, + MIGRAPHX_VISIT_DNNL_ALGO(MIGRAPHX_DNNL_ALGO_GENERATE_VISITOR) +#undef MIGRAPHX_DNNL_ALGO_GENERATE_VISITOR + }; + return m; +} + +dnnl::algorithm to_dnnl_algo(const std::string& name) +{ + if(dnnl_algo_map().count(name) == 0) + MIGRAPHX_THROW("Missing dnnl algo: " + name); + return dnnl_algo_map().at(name); +} + +const std::unordered_map& dnnl_algo_string_map() +{ + static const std::unordered_map m = { +#define MIGRAPHX_DNNL_ALGO_GENERATE_VISITOR(x) {dnnl::algorithm::x, #x}, + MIGRAPHX_VISIT_DNNL_ALGO(MIGRAPHX_DNNL_ALGO_GENERATE_VISITOR) +#undef MIGRAPHX_DNNL_ALGO_GENERATE_VISITOR + }; + return m; +} + +std::string to_string(const dnnl::algorithm& algo) +{ + if(dnnl_algo_string_map().count(algo) == 0) + return "unknown_" + std::to_string(static_cast(algo)); + return dnnl_algo_string_map().at(algo); +} + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/eltwise.cpp b/docker/rocm/migraphx/targets/cpu/eltwise.cpp new file mode 100644 index 000000000..5b328cb7e --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/eltwise.cpp @@ -0,0 +1,73 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct dnnl_eltwise : dnnl_op +{ + std::string algo; + float alpha = 0; + float beta = 0; + template + static auto reflect(Self& self, F f) + { + return pack_join(self.reflect_base(self, f), + pack(f(self.algo, "algo"), f(self.alpha, "alpha"), f(self.beta, "beta"))); + } + + std::string group() const { return this->name() + "::" + algo; } + + std::string name() const { return "dnnl::eltwise"; } + + shape compute_shape(std::vector inputs) const + { + // Compensate for allocation + inputs.pop_back(); + check_shapes{this->trim_post_op_inputs(inputs), *this}.has(1).packed(); + auto s = inputs.at(0); + auto r = s; + if(not s.packed()) + r = shape{s.type(), s.lens()}; + // Call to get_primitive to make sure an algo is available + this->get_primitive(this->to_memory_desc(r, inputs)); + return r; + } + + dnnl::eltwise_forward::desc get_desc(const std::unordered_map& m) const + { + return {dnnl::prop_kind::forward_inference, + to_dnnl_algo(algo), + m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)), + alpha, + beta}; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/erf.cpp b/docker/rocm/migraphx/targets/cpu/erf.cpp new file mode 100644 index 000000000..9fa34b4fa --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/erf.cpp @@ -0,0 +1,36 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +template struct cpu_unary; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/fmod.cpp b/docker/rocm/migraphx/targets/cpu/fmod.cpp new file mode 100644 index 000000000..ade453147 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/fmod.cpp @@ -0,0 +1,36 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +template struct cpu_binary; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/fuse_ops.cpp b/docker/rocm/migraphx/targets/cpu/fuse_ops.cpp new file mode 100644 index 000000000..a4f8fe78f --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/fuse_ops.cpp @@ -0,0 +1,134 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_DNNL_POST_OPS_WORKAROUND); + +MIGRAPHX_PRED_MATCHER(has_post_ops, instruction_ref ins) +{ + auto v = ins->get_operator().to_value(); + return v.contains("post_ops"); +} + +MIGRAPHX_PRED_MATCHER(without_post_ops, instruction_ref ins) +{ + auto v = ins->get_operator().to_value(); + return v.contains("post_ops") and v["post_ops"].empty(); +} + +bool workaround_dnnl_broken_post_ops(const operation& op, const operation& post_op) +{ + if(contains({"dnnl::dot", "dnnl::convolution"}, op.name())) + return true; + auto pv = post_op.to_value(); + if(not pv.at("post_ops").empty()) + return true; + auto v = op.to_value(); + auto last_op = v.at("post_ops").empty() ? v : v.at("post_ops").back(); + auto algo = last_op.contains("algo") ? last_op.at("algo").to() : op.name(); + auto post_algo = pv["algo"].to(); + if(starts_with(algo, "eltwise") and starts_with(post_algo, "eltwise")) + return true; + if(algo == post_algo) + return true; + return false; +} + +operation merge_post_ops(const operation& op, const operation& post_op) +{ + auto pv = post_op.to_value(); + auto v = op.to_value(); + v["post_ops"].push_back({{"algo", pv["algo"]}, + {"alpha", pv["alpha"].value_or(0.0f)}, + {"beta", pv["beta"].value_or(0.0f)}}); + auto post_ops = pv.at("post_ops"); + for(const auto& po : post_ops) + v["post_ops"].push_back(po); + return make_op(op.name(), v); +} + +struct find_post_ops +{ + context* ctx = nullptr; + match::any_matcher matcher() const + { + if(enabled(MIGRAPHX_DISABLE_DNNL_POST_OPS_WORKAROUND{})) + return match::name("dnnl::eltwise", + "dnnl::binary")(match::arg(0)(has_post_ops(), match::used_once())); + else + { + auto dnnl_binary = match::name("dnnl::binary")(without_post_ops(), match::used_once()); + return match::name("dnnl::eltwise")(without_post_ops(), match::arg(0)(dnnl_binary)); + } + } + + void apply(module& m, const match::matcher_result& r) const + { + auto ins = r.result; + auto x_ins = ins->inputs().front(); + auto x = x_ins->get_operator(); + + if(workaround_dnnl_broken_post_ops(x, ins->get_operator())) + return; + + auto op = merge_post_ops(x, ins->get_operator()); + auto inputs = x_ins->inputs(); + inputs.back() = ins->inputs().back(); + if(ins->name() == "dnnl::binary") + inputs.insert(std::prev(inputs.end()), ins->inputs().at(1)); + auto input_shapes = to_shapes(inputs); + auto new_shape = try_compute_shape(op, input_shapes); + if(new_shape.empty() or new_shape.front() != ins->get_shape()) + return; + auto info = compile(op, *ctx, new_shape.front(), input_shapes); + if(info.contains("impl") and starts_with(info.at("impl").to(), "ref:")) + return; + m.replace_instruction(ins, op, inputs); + } +}; + +void fuse_ops::apply(module& m) const +{ + for(std::size_t i = 0; i < 4; i++) + { + match::find_matches(m, find_post_ops{ctx}); + dead_code_elimination{}.apply(m); + } +} + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/gather.cpp b/docker/rocm/migraphx/targets/cpu/gather.cpp new file mode 100644 index 000000000..40bc556b9 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/gather.cpp @@ -0,0 +1,88 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct cpu_gather : auto_register_op +{ + op::gather op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + std::string name() const { return "cpu::" + op.name(); } + shape compute_shape(std::vector inputs) const + { + // Compensate for allocation + inputs.pop_back(); + check_shapes(inputs, *this).standard(); + return migraphx::compute_shape(op, inputs); + } + + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const + { + std::size_t nelements = output_shape.elements(); + auto lens = args[0].get_shape().lens(); + auto axis_dim_size = lens[op.axis]; + lens[op.axis] = args[1].get_shape().elements(); + shape out_comp{output_shape.type(), lens}; + + visit_all(args.back(), args[0])([&](auto output, auto input) { + args[1].visit([&](auto indices) { + const auto* indices_ptr = indices.data(); + auto* output_ptr = output.data(); + ctx.bulk_execute(nelements, 1024, [=](auto start, auto end) { + for(auto i = start; i < end; i++) + { + auto idx = out_comp.multi(i); + auto in_index = indices_ptr[idx[op.axis]]; + in_index = (in_index < 0) ? in_index + axis_dim_size : in_index; + idx[op.axis] = in_index; + output_ptr[i] = input(idx.begin(), idx.end()); + } + }); + }); + }); + + return args.back(); + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/gemm.cpp b/docker/rocm/migraphx/targets/cpu/gemm.cpp new file mode 100644 index 000000000..50f42d5fe --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/gemm.cpp @@ -0,0 +1,62 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct dnnl_gemm : dnnl_extend_op +{ + std::vector arg_map(int) const + { + return {MIGRAPHX_DNNL_PREFIX(ARG_SRC), + MIGRAPHX_DNNL_PREFIX(ARG_WEIGHTS), + MIGRAPHX_DNNL_PREFIX(ARG_BIAS)}; + } + + template + void required(const check_shapes& cs) const + { + cs.not_broadcasted(); + } + + dnnl::matmul::desc get_desc(const std::unordered_map& m) const + { + return {m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)), + m.at(MIGRAPHX_DNNL_PREFIX(ARG_WEIGHTS)), + m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST))}; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/allocation_model.hpp b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/allocation_model.hpp new file mode 100644 index 000000000..4ee101331 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/allocation_model.hpp @@ -0,0 +1,49 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_ALLOCATION_MODEL_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_ALLOCATION_MODEL_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct cpu_allocation_model +{ + std::string name() const; + std::string copy() const; + operation allocate(const shape& s) const; + operation preallocate(const shape& s, const std::string& id) const; + bool needs_out_params() const { return false; } +}; + +} // namespace cpu + +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/context.hpp b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/context.hpp new file mode 100644 index 000000000..461dbcb39 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/context.hpp @@ -0,0 +1,58 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP +#define MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct context +{ + void finish() const {} + + template + void bulk_execute(std::size_t n, std::size_t min_grain, F f) + { + cpu::parallel_for(n, min_grain, f); + } + + template + void bulk_execute(std::size_t n, F f) + { + this->bulk_execute(n, 256, f); + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/dnnl.hpp b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/dnnl.hpp new file mode 100644 index 000000000..b05cad852 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/dnnl.hpp @@ -0,0 +1,441 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_DNNL_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_DNNL_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef MIGRAPHX_ENABLE_ZENDNN +#include +#else +#include +#endif + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +#ifdef MIGRAPHX_ENABLE_ZENDNN +namespace dnnl = zendnn; +#define MIGRAPHX_CONCAT_PREFIX(b) ZENDNN_##b // NOLINT +#else +#define MIGRAPHX_CONCAT_PREFIX(b) DNNL_##b // NOLINT +#endif +#define MIGRAPHX_DNNL_PREFIX(b) MIGRAPHX_CONCAT_PREFIX(b) // NOLINT + +struct dnnl_context +{ + dnnl::engine engine; + dnnl::stream stream; + dnnl_context() : engine(dnnl::engine::kind::cpu, 0), stream(engine) {} +}; + +dnnl_context& get_dnnl_context(); + +dnnl::memory::data_type to_dnnl_memory_data_type(shape::type_t t); + +dnnl::memory::format_tag to_dnnl_memory_format_tag(std::size_t n); + +template +inline dnnl::memory::dims to_dnnl_dims(R&& r) +{ + return {r.begin(), r.end()}; +} + +dnnl::memory::desc to_dnnl_memory_desc(const shape& s); + +dnnl::memory to_dnnl_memory(const dnnl::memory::desc& desc, const argument& a); + +dnnl::memory to_dnnl_memory(const argument& a); + +dnnl::algorithm to_dnnl_algo(const std::string& name); + +std::string to_string(const dnnl::algorithm& algo); + +struct post_op : reflect_equality, reflect_stream +{ + std::string algo; + float alpha = 0; + float beta = 0; + template + static auto reflect(Self& self, F f) + { + return pack(f(self.algo, "algo"), f(self.alpha, "alpha"), f(self.beta, "beta")); + } +}; + +template +struct execute_wrapper +{ + F f; + argument operator()(context&, const std::vector& args) const { return f(args); } +}; + +template +execute_wrapper make_execute_wrapper(F f) +{ + return {std::move(f)}; +} + +template +struct dnnl_op : auto_register_op +{ + std::vector post_ops; + std::function& args)> execute; + + template + static auto reflect_base(Self& self, F f) + { + return pack(f(self.post_ops, "post_ops")); + } + + template + static auto reflect(Self& self, F f) + { + return reflect_base(self, f); + } + + std::string group() const + { + const auto& self = static_cast(*this); + return self.name(); + } + + value attributes() const + { + std::vector names; + std::transform(post_ops.begin(), post_ops.end(), std::back_inserter(names), [](auto&& op) { + return op.algo; + }); + const auto& self = static_cast(*this); + auto g = self.group(); + if(not names.empty()) + g += "<" + join_strings(names, ",") + ">"; + return {{"group", g}}; + } + + std::size_t get_extra_post_op_args() const + { + return std::count_if(post_ops.begin(), post_ops.end(), [](const auto& po) { + return contains(po.algo, "binary"); + }); + } + + static std::size_t get_binary_post_op_arg(std::size_t pos) + { + return MIGRAPHX_DNNL_PREFIX(ARG_ATTR_MULTIPLE_POST_OP)(pos) | // NOLINT + MIGRAPHX_DNNL_PREFIX(ARG_SRC_1); // NOLINT + } + + static std::vector to_shapes(const std::vector& args) + { + std::vector shapes(args.size()); + std::transform(args.begin(), args.end(), shapes.begin(), [](const argument& a) { + return a.get_shape(); + }); + return shapes; + } + static std::string impl(const Primitive& prim) + { + auto desc = prim.get_primitive_desc(); + const char* str = nullptr; +#ifdef MIGRAPHX_ENABLE_ZENDNN + zendnn_primitive_desc_query( + desc, zendnn_query_impl_info_str, 0, reinterpret_cast(&str)); +#else + dnnl_primitive_desc_query(desc, dnnl_query_impl_info_str, 0, reinterpret_cast(&str)); +#endif + return str == nullptr ? "" : str; + } + // Map arg index to arg in dnnl + std::vector arg_map(int size) const + { + std::vector result(size); + std::iota(result.begin(), result.end(), MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)); + return result; + } + shape base_adjust_shape(const shape& s, const shape& output) const + { + if(s.broadcasted()) + { + auto lens = s.lens(); + auto strides = s.strides(); + std::transform(strides.begin(), + strides.end(), + lens.begin(), + lens.begin(), + [](auto stride, auto len) -> std::size_t { + if(stride == 0) + return 1; + else + return len; + }); + // Use the permutation of the output + return output.with_lens(s.type(), lens); + } + return s; + } + template + void for_each_post_op(F f) const + { + int i = 0; + for(auto&& op : post_ops) + { + if(contains(op.algo, "binary")) + { + f(op, get_binary_post_op_arg(i)); + } + else + { + f(op, -1); + } + i++; + } + } + shape adjust_shape(const shape& s, int, const shape& output) const + { + return base_adjust_shape(s, output); + } + std::vector create_arg_map(std::size_t input_size) const + { + const auto& self = static_cast(*this); + auto npost_ops = get_extra_post_op_args(); + auto prim_input_size = input_size - npost_ops; + auto m = self.arg_map(prim_input_size); + for_each_post_op([&](auto&&, auto arg) { + if(arg < 0) + return; + m.push_back(arg); + }); + return m; + } + std::unordered_map + to_memory_desc(const shape& output_shape, const std::vector& inputs) const + { + const auto& self = static_cast(*this); + std::unordered_map result; + result[MIGRAPHX_DNNL_PREFIX(ARG_DST)] = + to_dnnl_memory_desc(self.adjust_shape(output_shape, inputs.size(), output_shape)); + auto m = create_arg_map(inputs.size()); + assert(m.size() >= inputs.size()); + for(int i = 0; i < inputs.size(); i++) + { + result[m[i]] = to_dnnl_memory_desc(self.adjust_shape(inputs[i], i, output_shape)); + } + return result; + } + dnnl::primitive_attr + get_primitive_attr(const std::unordered_map& m) const + { + dnnl::primitive_attr result; + dnnl::post_ops po; + for_each_post_op([&](auto&& op, auto arg) { + if(contains(op.algo, "binary_add")) + { + auto desc = m.at(arg); + if(desc == m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST))) + po.append_sum(1.0f); + else + po.append_binary(to_dnnl_algo(op.algo), m.at(arg)); + } + else if(contains(op.algo, "binary")) + { + po.append_binary(to_dnnl_algo(op.algo), m.at(arg)); + } + else if(contains(op.algo, "eltwise")) + po.append_eltwise(1.0f, to_dnnl_algo(op.algo), op.alpha, op.beta); + else + MIGRAPHX_THROW("Unknown post op algo: " + op.algo); + }); + result.set_post_ops(po); + return result; + } + template + auto get_primitive_desc(const T& desc, const dnnl::primitive_attr& attr) const + -> decltype(typename Primitive::primitive_desc(desc, attr, get_dnnl_context().engine)) + { + return typename Primitive::primitive_desc(desc, attr, get_dnnl_context().engine); + } + Primitive get_primitive(const std::unordered_map& m) const + { + const auto& self = static_cast(*this); + auto desc = self.get_desc(m); + auto attr = MIGRAPHX_ASSERT_NO_THROW(this->get_primitive_attr(m)); + auto pd = self.get_primitive_desc(desc, attr); + return Primitive(pd); + } + argument compute(context& ctx, const shape&, const std::vector& args) const + { + return execute(ctx, args); + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } + value compile(context&, const shape& output_shape, std::vector inputs) + { + // Compensate for allocation + inputs.pop_back(); + auto md = to_memory_desc(output_shape, inputs); + auto prim = get_primitive(md); + auto impl_name = impl(prim); + return {{"impl", impl_name}}; + } + + void finalize(context&, const shape& output_shape, std::vector inputs) + { + // Compensate for allocation + inputs.pop_back(); + const auto& self = static_cast(*this); + auto name = self.name(); + auto md = to_memory_desc(output_shape, inputs); + auto prim = get_primitive(md); + auto arg_lookup = create_arg_map(inputs.size()); +#ifndef NDEBUG + auto prim_attr = get_primitive_attr(md); +#endif + execute = make_execute_wrapper([=](const std::vector& args) { +#ifndef NDEBUG + // Check that the memory descriptors have not changed + auto debug_args = args; + debug_args.pop_back(); + auto debug_md = to_memory_desc(output_shape, to_shapes(debug_args)); + for(auto&& p : debug_md) + { + if(md.count(p.first) == 0) + MIGRAPHX_THROW(name + + ": Missing memory descriptor for: " + std::to_string(p.first)); + if(p.second == md.at(p.first)) + continue; + MIGRAPHX_THROW(name + + ": Memory descriptor has changed for: " + std::to_string(p.first)); + } + // Check post_ops args are correct + auto pos = prim_attr.get_post_ops(); + auto prim_input_size = inputs.size() - this->get_extra_post_op_args(); + int j = 0; + for(int i = 0; i < pos.len(); i++) + { + auto arg = j + prim_input_size; + auto kind = pos.kind(i); + std::string mesg = + "Post op " + std::to_string(i) + "@" + std::to_string(arg) + ": "; + try + { + dnnl::algorithm algo; + dnnl::memory::desc mdesc; + float scale = 0; + float alpha = 0; + float beta = 0; + if(kind == dnnl::primitive::kind::binary) + { + pos.get_params_binary(i, algo, mdesc); + if(mdesc != md.at(arg_lookup.at(arg))) + MIGRAPHX_THROW(mesg + + "Memory descriptor doesn't match for binary post op"); + j++; + } + else if(kind == dnnl::primitive::kind::eltwise) + { + pos.get_params_eltwise(i, scale, algo, alpha, beta); + } + else if(kind == dnnl::primitive::kind::sum) + { + pos.get_params_sum(i, scale); + algo = dnnl::algorithm::binary_add; + } + else + { + MIGRAPHX_THROW("Unknown kind"); + } + if(to_dnnl_algo(post_ops[i].algo) != algo) + MIGRAPHX_THROW(mesg + "Algorithm doesn't match for post op " + + post_ops[i].algo + " != " + to_string(algo)); + } + catch(const dnnl::error& e) + { + MIGRAPHX_THROW(mesg + "Failed to get post ops argument " + ": " + e.what()); + } + } +#endif + std::unordered_map m; + m[MIGRAPHX_DNNL_PREFIX(ARG_DST)] = + to_dnnl_memory(md.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)), args.back()); + for(int i = 0; i < args.size() - 1; i++) + m[arg_lookup[i]] = to_dnnl_memory(md.at(arg_lookup[i]), args[i]); + prim.execute(get_dnnl_context().stream, m); + return args.back(); + }); + } + std::vector trim_post_op_inputs(const std::vector& inputs) const + { + auto prim_input_size = inputs.size() - this->get_extra_post_op_args(); + return {inputs.begin(), inputs.begin() + prim_input_size}; + } +}; + +template +struct dnnl_extend_op : dnnl_op +{ + Op op; + + template + static auto reflect(Self& self, F f) + { + return pack_join(self.reflect_base(self, f), migraphx::reflect(self.op, f)); + } + + // dnnl has some issues with non-packed inputs + template + void required(const check_shapes& cs) const + { + cs.packed_or_broadcasted(); + } + + std::string name() const { return "dnnl::" + op.name(); } + shape compute_shape(std::vector inputs) const + { + const auto& self = static_cast(*this); + // Compensate for allocation + inputs.pop_back(); + self.required(check_shapes(inputs, self)); + auto r = migraphx::compute_shape(op, this->trim_post_op_inputs(inputs)); + // Call to get_primitive to make sure an algo is available + this->get_primitive(this->to_memory_desc(r, inputs)); + return r; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/fuse_ops.hpp b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/fuse_ops.hpp new file mode 100644 index 000000000..e0918846a --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/fuse_ops.hpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_CPU_FUSE_OPS_HPP +#define MIGRAPHX_GUARD_CPU_FUSE_OPS_HPP + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct module; + +namespace cpu { + +struct MIGRAPHX_CPU_EXPORT fuse_ops +{ + context* ctx = nullptr; + std::string name() const { return "cpu::fuse_ops"; } + void apply(module& m) const; +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_CPU_FUSE_OPS_HPP diff --git a/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/lowering.hpp b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/lowering.hpp new file mode 100644 index 000000000..d4b96c543 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/lowering.hpp @@ -0,0 +1,46 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP +#define MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP + +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct module; + +namespace cpu { + +struct MIGRAPHX_CPU_EXPORT lowering +{ + std::string name() const { return "cpu::lowering"; } + void apply(module& m) const; +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/parallel.hpp b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/parallel.hpp new file mode 100644 index 000000000..cb3b9ed64 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/parallel.hpp @@ -0,0 +1,125 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_PARALLEL_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_PARALLEL_HPP + +// #define MIGRAPHX_DISABLE_OMP +#include +#include +#include +#ifdef MIGRAPHX_DISABLE_OMP +#include +#else + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#endif +#include +#ifdef __clang__ +#pragma clang diagnostic pop +#endif + +#endif + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +#ifdef MIGRAPHX_DISABLE_OMP + +inline std::size_t max_threads() { return std::thread::hardware_concurrency(); } + +template +void parallel_for_impl(std::size_t n, std::size_t threadsize, F f) +{ + if(threadsize <= 1) + { + f(std::size_t{0}, n); + } + else + { + std::vector threads(threadsize); +// Using const here causes gcc 5 to ICE +#if(!defined(__GNUC__) || __GNUC__ != 5) + const +#endif + std::size_t grainsize = std::ceil(static_cast(n) / threads.size()); + + std::size_t work = 0; + std::generate(threads.begin(), threads.end(), [=, &work] { + auto result = joinable_thread([=]() mutable { + assert(work < n); + f(work, std::min(n, work + grainsize)); + }); + work += grainsize; + return result; + }); + // cppcheck-suppress unsignedLessThanZero + assert(work >= n); + } +} +#else + +inline std::size_t max_threads() { return omp_get_max_threads(); } + +template +void parallel_for_impl(std::size_t n, std::size_t threadsize, F f) +{ + if(threadsize <= 1) + { + f(std::size_t{0}, n); + } + else + { + std::size_t grainsize = std::ceil(static_cast(n) / threadsize); +#pragma omp parallel for num_threads(threadsize) schedule(static, 1) + for(std::size_t tid = 0; tid < threadsize; tid++) + { + std::size_t work = tid * grainsize; + assert(work < n); + f(work, std::min(n, work + grainsize)); + } + } +} +#endif +template +void parallel_for(std::size_t n, std::size_t min_grain, F f) +{ + const auto threadsize = std::min(max_threads(), n / min_grain); + parallel_for_impl(n, threadsize, f); +} + +template +void parallel_for(std::size_t n, F f) +{ + const int min_grain = 8; + parallel_for(n, min_grain, f); +} + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/pointwise.hpp b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/pointwise.hpp new file mode 100644 index 000000000..ece5498c8 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/pointwise.hpp @@ -0,0 +1,414 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_POINTWISE_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_POINTWISE_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct multi_index +{ + constexpr multi_index() = default; + + multi_index(const shape& s, std::size_t i) : n(s.lens().size()) + { + assert(n < max_size); + std::copy(s.lens().begin(), s.lens().end(), dims); + s.multi_copy(i, index, index + max_size); + } + + constexpr std::size_t size() const { return n; } + + constexpr std::size_t* begin() { return index; } + constexpr const std::size_t* begin() const { return index; } + + constexpr std::size_t* end() { return index + size(); } + constexpr const std::size_t* end() const { return index + size(); } + + std::size_t offset(const shape& s) const { return s.index(begin(), end()); } + + constexpr void carry() + { + std::size_t overflow = 0; + for(std::ptrdiff_t i = size() - 1; i > 0; i--) + { + auto z = index[i] + overflow; + // Reset overflow + overflow = 0; + // Compute overflow using while loop instead of mod + // overflow = z / dims[i]; + // z = z % dims[i]; + while(z >= dims[i]) + { + z -= dims[i]; + overflow += 1; + } + index[i] = z; + // Exit if there is no overflow + if(overflow == 0) + return; + } + index[0] += overflow; + } + + constexpr void increment(std::size_t i) + { + index[size() - 1] += i; + carry(); + } + + constexpr multi_index& operator+=(std::size_t i) + { + increment(i); + return *this; + } + + constexpr multi_index& operator++() + { + increment(1); + return *this; + } + multi_index operator++(int) // NOLINT + { + multi_index result = *this; + increment(1); + return result; + } + + private: + static const std::size_t max_size = 5; + std::size_t index[max_size] = {}; + std::size_t dims[max_size] = {}; + std::size_t n = 0; +}; + +struct reduce_dims_base +{ + std::vector reduce_shapes; + + void finalize(context&, const shape&, const std::vector& inputs) + { + reduce_shapes = reduce_dims(inputs); + } + + argument get_arg(const std::vector& args, std::size_t i) const + { + if(reduce_shapes.empty()) + return args[i]; + return args.at(i).reshape(reduce_shapes.at(i)); + } + + argument get_output() const + { + argument a{reduce_shapes[0]}; + return a; + } +}; + +template +struct vec +{ + using array_type = std::array; + using vector_type __attribute__((vector_size(N * sizeof(T)))) = T; + union + { + array_type array; + vector_type vector; + }; + + static_assert(sizeof(array_type) == sizeof(vector_type), "Not the same size"); +}; + +template +constexpr std::integral_constant vec_size(const T&) +{ + return {}; +} + +template +constexpr std::integral_constant vec_size(const vec&) +{ + return {}; +} + +template +constexpr std::size_t vec_size() +{ + return decltype(vec_size(std::declval())){}; +} + +template () > 0))> +void vec_apply(F f, V& v, Vs... vs) +{ + assert(all_of({vec_size()...}, [&](auto n) { return n == vec_size(); })); + assert(vec_size() == v.array.size()); + for(std::size_t i = 0; i < vec_size(); i++) + f(v.array[i], vs.vector[i]...); +} + +template () == 0))> +void vec_apply(F f, V& v, Vs&... vs) +{ + f(v, vs...); +} + +inline std::size_t find_packed_len(const shape& s) +{ + for(std::size_t i = 0; i < s.lens().size(); i++) + { + if(s.lens()[i] > 1 and s.strides()[i] == 1) + { + return i; + } + } + return -1; +} + +template +shape vectorize(const shape& s) +{ + assert(s.standard() or s.broadcasted()); + auto lens = s.lens(); + if(s.broadcasted()) + { + auto n = find_packed_len(s); + assert(n != -1); + assert((lens[n] % N) == 0); + lens[n] /= N; + return {s.type(), lens, s.strides()}; + } + assert((lens.back() % N) == 0); + lens.back() /= N; + return {s.type(), lens}; +} + +template +tensor_view> vectorize(tensor_view tv) +{ + return {vectorize(tv.get_shape()), reinterpret_cast*>(tv.data())}; +} + +template +struct is_vector_type : std::false_type +{ +}; + +template <> +struct is_vector_type : std::true_type +{ +}; + +template +struct is_vector_tensor_view : and_{}...> +{ +}; + +template +bool is_vectorizable(const Xs&... xs) +{ + return all_of({xs...}, [](const auto& s) { + if(s.standard() and (s.lens().back() % N) == 0) + return true; + if(s.broadcasted()) + { + auto n = std::inner_product(s.lens().begin(), + s.lens().end(), + s.strides().begin(), + 0, + std::plus<>{}, + [&](auto len, auto stride) -> std::size_t { + if(stride > 0 and len == 1) + return 0; + return stride; + }); + if(n == 1) + { + auto i = find_packed_len(s); + assert(i != -1); + return (s.lens()[i] % N) == 0; + } + } + return false; + }); +} + +template {})> +auto auto_vectorize(const shape& base_shape, Ts... xs) +{ + return [=](auto f) { + if(is_vectorizable<32>(base_shape, xs.get_shape()...)) + f(vectorize<32>(base_shape), vectorize<32>(xs)...); + else if(is_vectorizable<8>(base_shape, xs.get_shape()...)) + f(vectorize<8>(base_shape), vectorize<8>(xs)...); + else + f(base_shape, xs...); + }; +} + +template {})> +auto auto_vectorize(const shape& base_shape, Ts... xs) +{ + return [=](auto f) { f(base_shape, xs...); }; +} + +template +bool is_standard_offset(const X& x, const Xs&... xs) +{ + if(all_of({x, xs...}, [](const auto& s) { return s.standard(); })) + return true; + if(all_of({x, xs...}, [](const auto& s) { return s.packed(); }) and + all_of({xs...}, [&](const auto& s) { return s == x; })) + return true; + return false; +} + +template +auto pointwise_apply(Ts... ts) +{ + return [=](context& ctx, const shape& base_shape, std::size_t min_grain, auto f) mutable { + if(is_standard_offset(ts.get_shape()...)) + { + ctx.bulk_execute(base_shape.elements(), min_grain, [=](auto start, auto end) mutable { + for(auto i = start; i < end; i++) + { + vec_apply(f, ts.data()[i]...); + } + }); + } + else + { + assert(base_shape.lens().size() <= 6); + ctx.bulk_execute(base_shape.elements(), min_grain, [=](auto start, auto end) mutable { + multi_index mi(base_shape, start); + for(auto i = start; i < end; i++) + { + vec_apply(f, ts.data()[mi.offset(ts.get_shape())]...); + ++mi; + } + }); + } + }; +} + +template +auto pointwise(Ts... ts) +{ + return [=](context& ctx, const shape& base_shape, std::size_t min_grain, auto f) mutable { + auto_vectorize(base_shape, ts...)( + [&](auto bs, auto... xs) { pointwise_apply(xs...)(ctx, bs, min_grain, f); }); + }; +} + +template +struct cpu_unary : reduce_dims_base, auto_register_op> +{ + Op op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + std::string name() const { return "cpu::" + op.name(); } + shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, *this}.has(2); + const auto& s = inputs.at(0); + return {s.type(), s.lens()}; + } + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const + { + argument result = get_arg(args, args.size() - 1); + + visit_all(result, get_arg(args, 0))([&](auto output, auto input) { + auto op2 = op; + pointwise(output, input)( + ctx, output.get_shape(), 1024, [op2](auto& y, auto x) { y = op2.apply()(x); }); + }); + + return result.reshape(output_shape); + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +template +struct cpu_binary : reduce_dims_base, auto_register_op> +{ + Op op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + std::string name() const { return "cpu::" + op.name(); } + shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, *this}.has(3); + const auto& s = inputs.at(0); + return {s.type(), s.lens()}; + } + + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const + { + argument result = get_arg(args, args.size() - 1); + + visit_all(result, get_arg(args, 0), get_arg(args, 1))( + [&](auto output, auto input1, auto input2) { + auto op2 = op; + pointwise(output, input1, input2)( + ctx, output.get_shape(), 1024, [op2](auto& z, auto x, auto y) { + z = op2.apply()(x, y); + }); + }); + + return result.reshape(output_shape); + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/target.hpp b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/target.hpp new file mode 100644 index 000000000..589b680fe --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/target.hpp @@ -0,0 +1,51 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP +#define MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +struct pass; +namespace cpu { + +struct MIGRAPHX_CPU_EXPORT target +{ + std::string name() const; + std::vector get_passes(migraphx::context& gctx, const compile_options&) const; + migraphx::context get_context() const { return context{}; } + argument copy_to(const argument& arg) const { return arg; } + argument copy_from(const argument& arg) const { return arg; } + argument allocate(const shape& s) const; +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/write_literals.hpp b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/write_literals.hpp new file mode 100644 index 000000000..3c23fb14f --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/write_literals.hpp @@ -0,0 +1,45 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_WRITE_LITERALS_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_WRITE_LITERALS_HPP + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +struct module; +namespace cpu { + +struct write_literals +{ + std::string name() const { return "cpu::write_literals"; } + void apply(module& m) const; +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/cpu/layernorm.cpp b/docker/rocm/migraphx/targets/cpu/layernorm.cpp new file mode 100644 index 000000000..0d19eb827 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/layernorm.cpp @@ -0,0 +1,65 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct dnnl_layernorm : dnnl_op +{ + float epsilon = 1e-12f; + template + static auto reflect(Self& self, F f) + { + return pack(f(self.epsilon, "epsilon")); + } + + std::string name() const { return "dnnl::layernorm"; } + + shape compute_shape(std::vector inputs) const + { + // Compensate for allocation + inputs.pop_back(); + check_shapes{this->trim_post_op_inputs(inputs), *this}.has(1); + auto s = inputs.at(0); + // Call to get_primitive to make sure an algo is available + this->get_primitive(this->to_memory_desc(s, inputs)); + return s; + } + + dnnl::layer_normalization_forward::desc + get_desc(const std::unordered_map& m) const + { + return {dnnl::prop_kind::forward_inference, + m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)), + 1e-12f, + dnnl::normalization_flags::none}; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/logsoftmax.cpp b/docker/rocm/migraphx/targets/cpu/logsoftmax.cpp new file mode 100644 index 000000000..e4bb88dc8 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/logsoftmax.cpp @@ -0,0 +1,44 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct dnnl_logsoftmax : dnnl_extend_op +{ + dnnl::logsoftmax_forward::desc + get_desc(const std::unordered_map& m) const + { + int axis = this->op.axis; + return {dnnl::prop_kind::forward_inference, m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)), axis}; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/lowering.cpp b/docker/rocm/migraphx/targets/cpu/lowering.cpp new file mode 100644 index 000000000..a68eae820 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/lowering.cpp @@ -0,0 +1,502 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +template +T zero(const T&) +{ + return T(0); +} + +template +typename std::conditional_t{}, std::make_signed, std::enable_if>:: + type + make_signed(T x) +{ + return x; +} + +struct cpu_im2col +{ + op::im2col op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + static std::string name() { return "cpu::im2col"; } + shape compute_shape(const std::vector& inputs) const + { + return op.normalize_compute_shape(inputs); + } + + argument compute(context&, const shape& output_shape, std::vector args) const + { + argument result{output_shape}; + auto input_shape = args[0].get_shape(); + auto weights_shape = args[1].get_shape(); + visit_all(result, args[0])([&](auto col, auto input) { + const std::size_t& height = input_shape.lens()[2]; + const std::size_t& width = input_shape.lens()[3]; + const std::size_t& channels = weights_shape.lens()[1]; + const std::size_t& kernel_h = weights_shape.lens()[2]; + const std::size_t& kernel_w = weights_shape.lens()[3]; + const std::size_t& pad_h = op.padding[0]; + const std::size_t& pad_w = op.padding[1]; + const std::size_t& stride_h = op.stride[0]; + const std::size_t& stride_w = op.stride[1]; + + long kdiv2_h = long(kernel_h) / 2; + long kdiv2_w = long(kernel_w) / 2; + // calculate output sizes + const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1; + const std::size_t col_width = (width - kernel_w + 2 * pad_w) / stride_w + 1; + // account for padding for the starting position of the input pixels + long iinput = kdiv2_h - long(pad_h); + // loop over output pixels (ioutput, joutput) + for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h) + { + long jinput = kdiv2_w - long(pad_w); + for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w) + { + // compute linear index for output + std::size_t ldx = ioutput * col_width + joutput; + std::size_t p = 0; + dfor(channels, + kernel_h, + kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) { + auto idx = iinput + long(koffset) - kdiv2_h; + auto jdx = jinput + long(loffset) - kdiv2_w; + col(ldx, p) = + ((idx >= 0) and (idx < height) and (jdx >= 0) and (jdx < width)) + ? input(0, c, idx, jdx) + : 0; + p++; + }); + } + } + }); + return result; + } +}; +MIGRAPHX_REGISTER_OP(cpu_im2col) + +struct cpu_op +{ + operation op = op::identity{}; + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + std::string name() const { return "cpu::op"; } + shape compute_shape(const std::vector& inputs) const { return op.compute_shape(inputs); } + argument compute(context&, const shape& output_shape, const std::vector& args) const + { + return op.compute(output_shape, args); + } + value to_value() const + { + value v; + v["name"] = op.name(); + v["operator"] = op.to_value(); + return v; + } + void from_value(const value& v) + { + op = make_op(v.at("name").to(), v.at("operator")); + } + friend std::ostream& operator<<(std::ostream& os, const cpu_op& x) + { + os << "cpu::" << x.op; + return os; + } +}; +MIGRAPHX_REGISTER_OP(cpu_op) + +struct cpu_pad +{ + op::pad op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "cpu::pad"; } + shape compute_shape(const std::vector& inputs) const { return op.compute_shape(inputs); } + argument compute(context&, const shape& output_shape, std::vector args) const + { + assert(output_shape.standard()); + argument result{output_shape}; + result.visit([&](auto output) { + using type = typename decltype(output)::value_type; + std::fill(output.begin(), output.end(), pad_clamp(op.value)); + }); + + visit_all(result, args[0])([&](auto output, auto input) { + shape_for_each(input.get_shape(), [&](const auto& idx) { + std::vector new_idx(idx.size()); + std::transform( + idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) { + return i + j; + }); + output(new_idx.begin(), new_idx.end()) = input(idx.begin(), idx.end()); + }); + }); + + return result; + } +}; +MIGRAPHX_REGISTER_OP(cpu_pad) + +struct cpu_rnn_var_sl_last_output +{ + op::rnn_var_sl_last_output op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "cpu::rnn_var_sl_last_output"; } + + shape compute_shape(std::vector inputs) const + { + return op.compute_shape(std::move(inputs)); + } + + argument compute(const shape& output_shape, std::vector args) const + { + argument result{output_shape}; + auto out_comp_lens = args[0].get_shape().lens(); + out_comp_lens[0] = 1; + shape out_comp_s{output_shape.type(), out_comp_lens}; + + visit_all(result, args[0])([&](auto output, auto input) { + args[1].visit([&](auto seq_lens) { + par_for(output_shape.elements(), [&](auto i) { + auto idx = out_comp_s.multi(i); + auto b = idx[2]; + if(op.direction == op::rnn_direction::reverse or idx[1] == 1) + { + idx[0] = 0; + } + else + { + idx[0] = seq_lens[b] - 1; + } + output[i] = input(idx.begin(), idx.end()); + }); + }); + }); + + return result; + } +}; +MIGRAPHX_REGISTER_OP(cpu_rnn_var_sl_last_output) + +struct cpu_apply +{ + module* modl; + std::unordered_map> apply_map{}; + instruction_ref last{}; + + void extend_op(const std::string& op_name, const std::string& cpu_name, bool allocate = true) + { + apply_map.emplace(op_name, [=](instruction_ref ins) { + auto&& op = ins->get_operator(); + if(allocate) + return replace(ins, make_op(cpu_name, op.to_value())); + return modl->replace_instruction(ins, make_op(cpu_name, op.to_value()), ins->inputs()); + }); + } + + void extend_dnnl_algos(const std::string& dnnl_name, + const std::vector>& algos) + { + for(auto&& pp : algos) + { + std::string op_name = pp.first; + std::string algo = pp.second; + apply_map.emplace(op_name, [=](instruction_ref ins) { + auto v = ins->get_operator().to_value(); + if(not v.is_object()) + return ins; + v["algo"] = algo; + auto op = make_op(dnnl_name, v); + return replace(ins, op); + }); + } + } + + template + auto fuse_match(M matcher, const operation& op, const std::vector& bind_inputs) + { + return match::make_match_finder(matcher, [=](auto&, const auto& r) { + auto ins = r.result; + std::vector inputs; + std::transform(bind_inputs.begin(), + bind_inputs.end(), + std::back_inserter(inputs), + [&](const auto& s) { return r.instructions[s]; }); + inputs.push_back(this->insert_allocation(ins, ins->get_shape())); + modl->replace_instruction(ins, op, inputs); + }); + } + + void init() + { + extend_dnnl_algos("dnnl::binary", + { + {"add", "binary_add"}, + {"div", "binary_div"}, + {"max", "binary_max"}, + {"min", "binary_min"}, + {"mul", "binary_mul"}, + }); + + extend_dnnl_algos("dnnl::eltwise", + { + {"abs", "eltwise_abs"}, + {"elu", "eltwise_elu"}, + {"exp", "eltwise_exp"}, + {"log", "eltwise_log"}, + {"relu", "eltwise_relu"}, + {"sqrt", "eltwise_sqrt"}, + {"tanh", "eltwise_tanh"}, + }); + + extend_dnnl_algos("dnnl::reduction", + { + {"reduce_max", "reduction_max"}, + {"reduce_mean", "reduction_mean"}, + {"reduce_min", "reduction_min"}, + {"reduce_sum", "reduction_sum"}, + }); + extend_op("concat", "dnnl::concat"); + extend_op("contiguous", "dnnl::reorder"); + extend_op("convolution", "dnnl::convolution"); +#ifndef MIGRAPHX_ENABLE_ZENDNN + extend_op("convolution_backwards", "dnnl::convolution_backwards"); + extend_op("dot", "dnnl::dot"); +#endif + extend_op("erf", "cpu::erf"); + extend_op("gather", "cpu::gather"); + extend_op("logsoftmax", "dnnl::logsoftmax"); + extend_op("lrn", "dnnl::lrn"); + extend_op("softmax", "dnnl::softmax"); + + extend_op("im2col", "cpu::im2col", false); + extend_op("leaky_relu", "cpu::leaky_relu", false); + extend_op("pad", "cpu::pad", false); + extend_op("rnn_var_sl_last_output", "cpu::rnn_var_sl_last_output", false); + } + + void apply() + { + init(); + // Apply fusion matchers first + match::find_matches(*modl, + fuse_match(match::gelu_erf(), + make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_erf"}}), + {"x"}), + fuse_match(match::gelu_tanh(), + make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_tanh"}}), + {"x"}), + fuse_match(match::layernorm(), make_op("dnnl::layernorm"), {"x"})); + // Apply these operators first so the inputs can be const folded + for(auto it : iterator_for(*modl)) + { + // skip lowering if input has fp8 as one of the inputs since oneDNN doesn't have fp8 + // supported yet. + if(std::any_of(it->inputs().begin(), it->inputs().end(), [](const auto& i) { + return contains(fp8_types{}.get(), i->get_shape().type()); + })) + continue; + if(it->name() == "pow") + { + apply_pow(it); + } + } + for(auto it : iterator_for(*modl)) + { + // skip lowering if input has fp8 as one of the inputs since oneDNN doesn't have fp8 + // supported yet. + if(std::any_of(it->inputs().begin(), it->inputs().end(), [](const auto& i) { + return contains(fp8_types{}.get(), i->get_shape().type()); + })) + continue; + if(it->name() == "pooling") + { + apply_pooling(it); + } + else if(it->name() == "reshape") + { + apply_reshape(it); + } + else if(apply_map.count(it->name()) > 0) + { + apply_map.at(it->name())(it); + } + } + } + + instruction_ref apply_pow(instruction_ref ins) const + { + auto beta = read_scalar(ins->inputs()[1]); + if(beta.empty()) + return ins; + return replace(ins, + make_op("dnnl::eltwise", + {{"algo", "eltwise_pow"}, {"alpha", 1.0}, {"beta", beta.front()}}), + {ins->inputs().front()}); + } + + // TODO: update lowering to run the reference + // code when OneDNN can't execute pooling for a CPU + + // OneDNN has a limitation on padding size for pooling. see + // https://oneapi-src.github.io/oneDNN/dev_guide_convolution.html#doxid-dev-guide-convolution + + // padding = {2}; stride = {1}; lengths = {3} succeeds in oneDNN but + // padding = {2}; stride = {1}; lengths = {2} fails. + // Also, the referenced documentation contains a max. dimension size of 14 for the kernel + // ("weights tensor") that MIGraphX doesn't enforce. + instruction_ref apply_pooling(instruction_ref ins) const + { + auto&& op = ins->get_operator(); + auto v = op.to_value(); + if(has_op("dnnl::pooling") and ins->get_shape().type() == shape::type_t::float_type and + not v["ceil_mode"].to() and + v["mode"].to() != op::pooling_mode::lpnorm) + return replace(ins, make_op("dnnl::pooling", op.to_value())); + return ins; + } + /* + Lowers reshape copy operator to reshape lazy by inserting contiguous operators around it. + Contiguous ops will later by removed by eliminate_contiguous pass. + */ + instruction_ref apply_reshape(instruction_ref ins) const + { + std::vector before_contiguous_args = ins->inputs(); + auto before_alloc = + insert_allocation(ins, before_contiguous_args.front()->get_shape().as_standard()); + before_contiguous_args.push_back(before_alloc); + auto before_contig = + modl->insert_instruction(ins, make_op("dnnl::reorder"), {before_contiguous_args}); + + auto new_lazy_reshape = modl->insert_instruction( + ins, + make_op("reshape_lazy", {{"dims", {ins->get_operator().to_value().at("dims")}}}), + before_contig); + + std::vector after_contiguous_args = {new_lazy_reshape}; + auto after_alloc = insert_allocation(new_lazy_reshape, new_lazy_reshape->get_shape()); + after_contiguous_args.push_back(after_alloc); + return modl->replace_instruction(ins, make_op("dnnl::reorder"), after_contiguous_args); + } + + template + static std::vector read_scalar(instruction_ref ins) + { + if(ins->name() == "contiguous") + return read_scalar(ins->inputs().front()); + if(ins->get_shape().elements() != 1 and not ins->get_shape().scalar()) + return {}; + auto r = ins->eval(); + if(r.empty()) + return {}; + return {r.at()}; + } + + instruction_ref replace(instruction_ref ins, const operation& op) const + { + return replace(ins, op, ins->inputs()); + } + + instruction_ref + replace(instruction_ref ins, const operation& op, std::vector inputs) const + { + inputs.push_back(insert_allocation(ins, ins->get_shape())); + return modl->replace_instruction(ins, op, inputs); + } + + instruction_ref insert_allocation(instruction_ref ins, const shape& s) const + { + return modl->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}})); + } +}; + +void lowering::apply(module& m) const { cpu_apply{&m}.apply(); } + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/lrn.cpp b/docker/rocm/migraphx/targets/cpu/lrn.cpp new file mode 100644 index 000000000..bd4c27129 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/lrn.cpp @@ -0,0 +1,48 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct dnnl_lrn : dnnl_extend_op +{ + dnnl::lrn_forward::desc get_desc(const std::unordered_map& m) const + { + return {dnnl::prop_kind::forward_inference, + dnnl::algorithm::lrn_across_channels, + m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)), + this->op.size, + this->op.alpha, + this->op.beta, + this->op.bias}; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/mod.cpp b/docker/rocm/migraphx/targets/cpu/mod.cpp new file mode 100644 index 000000000..e28bdb19d --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/mod.cpp @@ -0,0 +1,36 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +template struct cpu_binary; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/pooling.cpp b/docker/rocm/migraphx/targets/cpu/pooling.cpp new file mode 100644 index 000000000..d10ed75a6 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/pooling.cpp @@ -0,0 +1,83 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct dnnl_pooling : dnnl_extend_op +{ + std::vector arg_map(int) const { return {MIGRAPHX_DNNL_PREFIX(ARG_SRC)}; } + + dnnl::algorithm get_algo() const + { + switch(op.mode) + { + case op::pooling_mode::max: return dnnl::algorithm::pooling_max; + case op::pooling_mode::average: + return op.count_include_pad ? dnnl::algorithm::pooling_avg_include_padding + : dnnl::algorithm::pooling_avg_exclude_padding; + case op::pooling_mode::lpnorm: MIGRAPHX_THROW("Lpnorn pooling mode not supported"); + } + MIGRAPHX_THROW("Unknown pooling mode"); + } + + dnnl::pooling_v2_forward::desc + get_desc(const std::unordered_map& m) const + { + auto algo = get_algo(); + auto kdims = op.kdims(); + std::vector padding_l(op.padding.begin(), op.padding.begin() + kdims); + std::vector padding_r(op.padding.begin() + kdims, op.padding.end()); + // Note: It is not documented, but the default dilation seems to be 0 instead of 1. + // We need to offset dilations with -1. + std::vector dilations; + std::transform(op.dilations.cbegin(), + op.dilations.cend(), + std::back_inserter(dilations), + [](size_t d) { return d - 1; }); + return {dnnl::prop_kind::forward_inference, + algo, + m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)), + m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)), + to_dnnl_dims(op.stride), + to_dnnl_dims(op.lengths), + to_dnnl_dims(dilations), + to_dnnl_dims(padding_l), + to_dnnl_dims(padding_r)}; + } +}; + +} // namespace cpu + +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/preallocate.cpp b/docker/rocm/migraphx/targets/cpu/preallocate.cpp new file mode 100644 index 000000000..d831a1942 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/preallocate.cpp @@ -0,0 +1,60 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct cpu_preallocate : auto_register_op +{ + shape s; + std::string id = ""; + argument data; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.s, "shape"), f(self.id, "id")); + } + + std::string name() const { return "cpu::preallocate"; } + shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, *this}.has(0); + return s; + } + argument compute(context&, const shape&, const std::vector&) const { return data; } + void finalize(context&, const shape&, const std::vector&) { data = argument(s); } + lifetime get_lifetime() const { return lifetime::global; } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/reduction.cpp b/docker/rocm/migraphx/targets/cpu/reduction.cpp new file mode 100644 index 000000000..e0a7517ee --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/reduction.cpp @@ -0,0 +1,73 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct dnnl_reduction : dnnl_op +{ + std::string algo; + std::vector axes{}; + template + static auto reflect(Self& self, F f) + { + return pack_join(self.reflect_base(self, f), + pack(f(self.algo, "algo"), f(self.axes, "axes"))); + } + + std::string name() const { return "dnnl::reduction"; } + + shape compute_shape(std::vector inputs) const + { + // Compensate for allocation + inputs.pop_back(); + check_shapes{this->trim_post_op_inputs(inputs), *this}.has(1).standard(); + auto s = inputs.at(0); + auto lens = s.lens(); + for(auto axis : axes) + { + lens[axis] = 1; + } + auto r = shape{s.type(), lens}; + // Call to get_primitive to make sure an algo is available + this->get_primitive(this->to_memory_desc(r, inputs)); + return r; + } + + dnnl::reduction::desc get_desc(const std::unordered_map& m) const + { + return {to_dnnl_algo(algo), + m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)), + m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)), + 0, + 0}; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/reorder.cpp b/docker/rocm/migraphx/targets/cpu/reorder.cpp new file mode 100644 index 000000000..c549a6013 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/reorder.cpp @@ -0,0 +1,65 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct dnnl_reorder : dnnl_op +{ + std::string name() const { return "dnnl::reorder"; } + + shape adjust_shape(const shape& x, int, const shape&) const { return x; } + + shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, *this}.has(2); + auto r = inputs.back(); + // Call to get_primitive to make sure an algo is available + this->get_primitive(this->to_memory_desc(r, inputs)); + return r; + } + // Custom desc class since its missing in dnnl + struct desc + { + dnnl::memory::desc src; + dnnl::memory::desc dst; + }; + desc get_desc(const std::unordered_map& m) const + { + return {m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)), m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST))}; + } + + auto get_primitive_desc(const desc& d, const dnnl::primitive_attr& attr) const + { + auto& engine = get_dnnl_context().engine; + return dnnl::reorder::primitive_desc(engine, d.src, engine, d.dst, attr); + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/softmax.cpp b/docker/rocm/migraphx/targets/cpu/softmax.cpp new file mode 100644 index 000000000..8c3610f23 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/softmax.cpp @@ -0,0 +1,43 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct dnnl_softmax : dnnl_extend_op +{ + dnnl::softmax_forward::desc get_desc(const std::unordered_map& m) const + { + int axis = this->op.axis; + return {dnnl::prop_kind::forward_inference, m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)), axis}; + } +}; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/sub.cpp b/docker/rocm/migraphx/targets/cpu/sub.cpp new file mode 100644 index 000000000..8f3436071 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/sub.cpp @@ -0,0 +1,36 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +template struct cpu_binary; + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/target.cpp b/docker/rocm/migraphx/targets/cpu/target.cpp new file mode 100644 index 000000000..e148aa5b6 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/target.cpp @@ -0,0 +1,122 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +std::string target::name() const { return "cpu"; } + +// cppcheck-suppress constParameterReference +std::vector target::get_passes(migraphx::context& gctx, const compile_options&) const +{ + auto& ctx = any_cast(gctx); + std::set unsupported_types(shape::types().begin(), shape::types().end()); + std::set unsupported_ops{ + "all", "scatternd_add", "scatternd_mul", "scatternd_none"}; + unsupported_types.erase(shape::type_t::float_type); + return {normalize_ops{}, + rewrite_quantization{}, + dead_code_elimination{}, + eliminate_data_type{unsupported_types, shape::type_t::float_type, unsupported_ops}, + dead_code_elimination{}, + simplify_reshapes{}, + eliminate_convert{}, + eliminate_identity{}, + eliminate_pad{}, + dead_code_elimination{}, + rewrite_rnn{}, + dead_code_elimination{}, + eliminate_common_subexpression{}, + dead_code_elimination{}, + simplify_algebra{}, + simplify_reshapes{}, + eliminate_convert{}, + dead_code_elimination{}, + simplify_reshapes{}, + eliminate_convert{}, + dead_code_elimination{}, + simplify_algebra{}, + simplify_reshapes{}, + eliminate_convert{}, + dead_code_elimination{}, + propagate_constant{}, + dead_code_elimination{}, + auto_contiguous{}, + lowering{}, + eliminate_contiguous{"dnnl::reorder"}, + dead_code_elimination{}, + replace_allocate{cpu_allocation_model{}}, + dead_code_elimination{}, + adjust_allocation{cpu_allocation_model{}}, + dead_code_elimination{}, + fuse_ops{&ctx}, + dead_code_elimination{}, + write_literals{}, + dead_code_elimination{}, + memory_coloring{"cpu::allocate"}, + dead_code_elimination{}, + preallocate_param{"scratch", cpu_allocation_model{}}, + dead_code_elimination{}}; +} + +argument target::allocate(const shape& s) const { return fill_argument(s, 0); } + +MIGRAPHX_REGISTER_TARGET(target); + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/cpu/write_literals.cpp b/docker/rocm/migraphx/targets/cpu/write_literals.cpp new file mode 100644 index 000000000..0899df4e8 --- /dev/null +++ b/docker/rocm/migraphx/targets/cpu/write_literals.cpp @@ -0,0 +1,70 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace cpu { + +struct cpu_literal +{ + argument data; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.data, "data")); + } + + std::string name() const { return "cpu::literal"; } + + shape compute_shape(const std::vector&) const { return data.get_shape(); } + + argument compute(const shape&, const std::vector&) const { return data; } + + friend std::ostream& operator<<(std::ostream& os, const cpu_literal& x) + { + os << x.name(); + return os; + } +}; +MIGRAPHX_REGISTER_OP(cpu_literal); + +void write_literals::apply(module& m) const +{ + for(auto ins : iterator_for(m)) + { + if(ins->name() != "@literal") + continue; + m.replace_instruction(ins, cpu_literal{ins->get_literal().get_argument()}); + } +} + +} // namespace cpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/fpga/CMakeLists.txt b/docker/rocm/migraphx/targets/fpga/CMakeLists.txt new file mode 100644 index 000000000..11b47b9b2 --- /dev/null +++ b/docker/rocm/migraphx/targets/fpga/CMakeLists.txt @@ -0,0 +1,43 @@ +##################################################################################### +# The MIT License (MIT) +# +# Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +##################################################################################### + +add_library(migraphx_fpga + target.cpp + lowering.cpp + subgraph.cpp + vitis_ai_adapter.cpp +) + +set_target_properties(migraphx_fpga PROPERTIES EXPORT_NAME fpga) +rocm_set_soversion(migraphx_fpga ${MIGRAPHX_SO_VERSION}) + +rocm_clang_tidy_check(migraphx_fpga) +target_link_libraries(migraphx_fpga migraphx) + +rocm_install_targets( + PRIVATE + TARGETS migraphx_fpga + INCLUDE + ${CMAKE_CURRENT_SOURCE_DIR}/include +) diff --git a/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/context.hpp b/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/context.hpp new file mode 100644 index 000000000..2c8242a76 --- /dev/null +++ b/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/context.hpp @@ -0,0 +1,45 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_FPGA_CONTEXT_HPP +#define MIGRAPHX_GUARD_FPGA_CONTEXT_HPP + +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace fpga { + +struct context +{ + int id = 0; + + void finish() const {} +}; + +} // namespace fpga +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif // MIGRAPHX_GUARD_FPGA_CONTEXT_HPP diff --git a/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/lowering.hpp b/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/lowering.hpp new file mode 100644 index 000000000..dc8a7bc6b --- /dev/null +++ b/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/lowering.hpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_FPGA_LOWERING_HPP +#define MIGRAPHX_GUARD_FPGA_LOWERING_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace fpga { + +struct lowering +{ + context* ctx = nullptr; + std::string name() const { return "fpga::lowering"; } + void apply(module& m) const; +}; + +} // namespace fpga +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif // MIGRAPHX_GUARD_FPGA_LOWERING_HPP diff --git a/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/subgraph.hpp b/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/subgraph.hpp new file mode 100644 index 000000000..62f68b09d --- /dev/null +++ b/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/subgraph.hpp @@ -0,0 +1,45 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_FPGA_SUBGRAPH_HPP +#define MIGRAPHX_GUARD_FPGA_SUBGRAPH_HPP + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace fpga { + +struct subgraph +{ + std::string name() const { return "fpga::subgraph"; } + void apply(module_pass_manager& mpm) const; +}; + +} // namespace fpga +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif // MIGRAPHX_GUARD_FPGA_SUBGRAPH_HPP diff --git a/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/target.hpp b/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/target.hpp new file mode 100644 index 000000000..dbcb0bcff --- /dev/null +++ b/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/target.hpp @@ -0,0 +1,55 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_FPGA_TARGET_HPP +#define MIGRAPHX_GUARD_FPGA_TARGET_HPP + +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +struct pass; +namespace fpga { + +struct target +{ + std::string name() const; + std::vector get_passes(migraphx::context& ctx, const compile_options&) const; + migraphx::context get_context() const { return context{}; } + supported_segments find_supported(const_module_ref mod, support_metric m) const; + argument copy_to(const argument& arg) const { return arg; } + argument copy_from(const argument& arg) const { return arg; } + argument allocate(const shape& s) const; +}; + +} // namespace fpga +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif // MIGRAPHX_GUARD_FPGA_TARGET_HPP diff --git a/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/vitis_ai_adapter.hpp b/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/vitis_ai_adapter.hpp new file mode 100644 index 000000000..64d2300c4 --- /dev/null +++ b/docker/rocm/migraphx/targets/fpga/include/migraphx/fpga/vitis_ai_adapter.hpp @@ -0,0 +1,52 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_FPGA_VITIS_AI_ADAPTER_HPP +#define MIGRAPHX_GUARD_FPGA_VITIS_AI_ADAPTER_HPP + +#include + +#include +#include + +namespace vitis_ai { + +class x_model +{ + migraphx::shape shape; + + public: + migraphx::shape get_shape() const; + void set_shape(migraphx::shape); +}; + +x_model create_xmodel(migraphx::const_module_ref mod); + +migraphx::argument execute(const x_model& xmodel, + const migraphx::shape& output_shape, + std::vector& args); + +} // namespace vitis_ai + +#endif // MIGRAPHX_GUARD_FPGA_VITIS_AI_ADAPTER_HPP diff --git a/docker/rocm/migraphx/targets/fpga/lowering.cpp b/docker/rocm/migraphx/targets/fpga/lowering.cpp new file mode 100644 index 000000000..ad17dc8d1 --- /dev/null +++ b/docker/rocm/migraphx/targets/fpga/lowering.cpp @@ -0,0 +1,91 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "migraphx/fpga/vitis_ai_adapter.hpp" + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +namespace fpga { + +struct fpga_vitis_op +{ + fpga_vitis_op() = default; + explicit fpga_vitis_op(vitis_ai::x_model model) : xmodel(std::move(model)){}; + + vitis_ai::x_model xmodel; + int dummy = 0; + + template + static auto reflect(Self& self, F f) + { + // return pack(f(self.xmodel, "xmodel")); + return pack(f(self.dummy, "dummy")); + } + + std::string name() const { return "fpga::vitis_ai"; } + + shape compute_shape(const std::vector& inputs) const + { + (void)inputs; + return xmodel.get_shape(); + } + + argument + compute(const context& ctx, const shape& output_shape, std::vector args) const + { + std::cout << "The context is " << ctx.id << std::endl; + return ::vitis_ai::execute(xmodel, output_shape, args); + } +}; +MIGRAPHX_REGISTER_OP(fpga_vitis_op) + +void lowering::apply(module& m) const +{ + auto* mod = &m; + + // test modifying the context from a pass + ctx->id = 2; + + for(auto it : iterator_for(*mod)) + { + if(it->name() == "fpga::vitis_placeholder") + { + assert(it->module_inputs().size() == 1); + auto xmodel = ::vitis_ai::create_xmodel(it->module_inputs()[0]); + mod->replace_instruction(it, fpga_vitis_op{xmodel}, it->inputs()); + } + } +} + +} // namespace fpga +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/fpga/subgraph.cpp b/docker/rocm/migraphx/targets/fpga/subgraph.cpp new file mode 100644 index 000000000..d0e09a5de --- /dev/null +++ b/docker/rocm/migraphx/targets/fpga/subgraph.cpp @@ -0,0 +1,133 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#include +#include "migraphx/iterator.hpp" +#include +#include "migraphx/make_op.hpp" +#include "migraphx/module.hpp" +#include "migraphx/ranges.hpp" +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +namespace fpga { + +struct fpga_placeholder_op +{ + fpga_placeholder_op() = default; + + int dummy = 0; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.dummy, "dummy")); + } + + std::string name() const { return "fpga::vitis_placeholder"; } + + shape compute_shape(const std::vector& inputs, std::vector mods) const + { + (void)inputs; + if(mods.size() != 1) + { + MIGRAPHX_THROW("should have one submodule."); + } + module_ref sm = mods.front(); + if(sm->get_output_shapes().size() != 1) + MIGRAPHX_THROW("Only one return"); + return sm->get_output_shapes().front(); + } +}; +MIGRAPHX_REGISTER_OP(fpga_placeholder_op) + +bool is_fpga_instr(migraphx::instruction_ref it) +{ + // assuming all instructions that aren't @param, @literal, or input data are fpga instrs + if(migraphx::starts_with(it->name(), "@")) + { + return false; + } + // no inputs to the instr means it's input data + if(it->inputs().empty()) + { + return false; + } + return true; +} + +void subgraph::apply(module_pass_manager& mpm) const +{ + auto& mod = mpm.get_module(); + auto* pm = mpm.create_module(mod.name() + ":fpga"); + pm->set_bypass(); + + migraphx::instruction_ref first = mod.end(); + migraphx::instruction_ref last; + std::vector literal_inputs; + for(auto it : iterator_for(mod)) + { + // assuming we want all the params/literals as inputs to the FPGA submodule + if(migraphx::starts_with(it->name(), "@param") or + migraphx::starts_with(it->name(), "@literal")) + { + literal_inputs.push_back(it); + } + if(is_fpga_instr(it)) + { + if(first == mod.end()) + { + first = it; + } + last = it; + } + } + + // TODO(varunsh): this code may be replaceable by code in the fuse_pointwise pass + + // assuming all FPGA instructions are in one contiguous range + pm->insert_instructions(pm->end(), first, std::next(last), {}); + migraphx::instruction_ref placeholder_ins; + for(auto it : iterator_for(mod)) + { + if(migraphx::starts_with(it->name(), "@return")) + { + placeholder_ins = mod.insert_instruction( + it, migraphx::make_op("fpga::vitis_placeholder"), literal_inputs, {pm}); + break; + } + } + + mod.replace_return({placeholder_ins}); +} + +} // namespace fpga +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/fpga/target.cpp b/docker/rocm/migraphx/targets/fpga/target.cpp new file mode 100644 index 000000000..570779fff --- /dev/null +++ b/docker/rocm/migraphx/targets/fpga/target.cpp @@ -0,0 +1,83 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace fpga { + +std::string target::name() const { return "fpga"; } + +std::vector target::get_passes(migraphx::context& gctx, const compile_options&) const +{ + // not sure if all these passes are needed but they were copied from ref/ + auto& ctx = any_cast(gctx); + return {normalize_ops{}, + eliminate_pad{}, + dead_code_elimination{}, + insert_pad{}, + dead_code_elimination{}, + rewrite_rnn{}, + dead_code_elimination{}, + auto_contiguous{}, + dead_code_elimination{}, + subgraph{}, + dead_code_elimination{}, + lowering{&ctx}, + dead_code_elimination{}}; +} + +argument target::allocate(const shape& s) const { return fill_argument(s, 0); } + +supported_segments target::find_supported(const_module_ref mod, support_metric m) const +{ + (void)m; + + supported_segment instrs; + for(const auto ins : iterator_for(*mod)) + { + instrs.instructions.insert(ins); + } + instrs.metric = 1; // arbitrary value + return {instrs}; +} + +MIGRAPHX_REGISTER_TARGET(target); + +} // namespace fpga +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/fpga/vitis_ai_adapter.cpp b/docker/rocm/migraphx/targets/fpga/vitis_ai_adapter.cpp new file mode 100644 index 000000000..fa4ecdc68 --- /dev/null +++ b/docker/rocm/migraphx/targets/fpga/vitis_ai_adapter.cpp @@ -0,0 +1,65 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "migraphx/fpga/vitis_ai_adapter.hpp" + +#include "migraphx/module.hpp" + +#include "migraphx/stringutils.hpp" +namespace vitis_ai { + +migraphx::shape x_model::get_shape() const { return shape; }; + +void x_model::set_shape(migraphx::shape s) { shape = s; } + +x_model create_xmodel(migraphx::const_module_ref mod) +{ + std::cout << "Calling an external function: create_xmodel!\n"; + x_model xmodel; + xmodel.set_shape(migraphx::shape(mod->get_output_shapes())); + return xmodel; +} + +migraphx::argument execute(const x_model& xmodel, + const migraphx::shape& output_shape, + std::vector& args) +{ + (void)xmodel; + + std::cout << "Calling an external function: execute!\n"; + + std::cout << "Output Shape: " << output_shape << std::endl; + std::cout << "Args: " << args.size() << std::endl; + for(const auto& arg : args) + { + std::cout << " " << arg.get_shape() << std::endl; + } + std::cout << std::endl; + + migraphx::argument result{output_shape}; + + return result; +} + +} // namespace vitis_ai diff --git a/docker/rocm/migraphx/targets/gpu/CMakeLists.txt b/docker/rocm/migraphx/targets/gpu/CMakeLists.txt new file mode 100644 index 000000000..82cc1fb0a --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/CMakeLists.txt @@ -0,0 +1,407 @@ +# #################################################################################### +# The MIT License (MIT) +# +# Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +# #################################################################################### + +find_package(hip REQUIRED) +if(NOT GPU_TARGETS) + set(fatal_msg "HIP package is broken and has no GPU_TARGETS. Please pass GPU_TARGETS to cmake.") + if(NOT WIN32) + set(fatal_msg "${fatal_msg}\nUse -DGPU_TARGETS=$(/opt/rocm/bin/rocminfo | grep -o -m1 'gfx.*') to build for your GPU.") + endif() + message(FATAL_ERROR ${fatal_msg}) +endif() + +if(MIGRAPHX_USE_MIOPEN) + find_package(miopen REQUIRED) + message(STATUS "MIGraphX is using MIOpen") +else() + message(STATUS "MIGraphX is not using MIOpen") +endif() + +if(MIGRAPHX_USE_ROCBLAS) + # rocblas + find_package(rocblas REQUIRED) + message(STATUS "MIGraphX build with rocBLAS") +else() + message(STATUS "MIGraphX build without rocBLAS") +endif() + +if(MIGRAPHX_USE_HIPBLASLT) + # hipblaslt + find_package(hipblaslt REQUIRED) + # Making hipblas required to workaround the broken hipblaslt package. + find_package(hipblas REQUIRED) + message(STATUS "MIGraphx build with hipBLAS and hipBLASLt") +else() + message(STATUS "MIGraphX build without hipBLAS and hipBLASLt") +endif() + +if(MIGRAPHX_USE_COMPOSABLEKERNEL) + find_package(composable_kernel 1.0.0 REQUIRED COMPONENTS jit_library) +endif() + +if(BUILD_DEV) + set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "Use hipRTC APIs") +else() + set(MIGRAPHX_USE_HIPRTC ON CACHE BOOL "Use hipRTC APIs") +endif() + +file(GLOB KERNEL_FILES CONFIGURE_DEPENDS + ${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/*.hpp) + +if(NOT MIGRAPHX_USE_COMPOSABLEKERNEL) + list(REMOVE_ITEM KERNEL_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/ck_gemm.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/ck_gemm_softmax_gemm.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/ck.hpp) +endif() + +include(Embed) +add_embed_library(migraphx_kernels ${KERNEL_FILES} RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/) + +configure_file(device/targets.hpp.in include/migraphx/gpu/device/targets.hpp) +file(GLOB DEVICE_GPU_SRCS CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/device/*.cpp) +add_library(migraphx_device ${DEVICE_GPU_SRCS}) + +add_library(compile_for_gpu INTERFACE) +target_compile_features(compile_for_gpu INTERFACE cxx_std_17) +target_compile_options(compile_for_gpu INTERFACE -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fnative-half-arguments-and-returns) +target_link_options(compile_for_gpu INTERFACE -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored) +target_link_libraries(compile_for_gpu INTERFACE hip::device) +check_cxx_compiler_flag("--cuda-host-only -fhip-lambda-host-device -x hip" HAS_HIP_LAMBDA_HOST_DEVICE) + +if(HAS_HIP_LAMBDA_HOST_DEVICE) + message(STATUS "Enable -fhip-lambda-host-device") + target_compile_options(compile_for_gpu INTERFACE -fhip-lambda-host-device) +endif() + +set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device) +rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION}) +rocm_clang_tidy_check(migraphx_device) +target_link_libraries(migraphx_device PUBLIC migraphx) +target_link_libraries(migraphx_device PRIVATE compile_for_gpu) +if(NOT MIGRAPHX_USE_MIOPEN AND NOT MIGRAPHX_USE_ROCBLAS) + target_link_libraries(migraphx_device INTERFACE hip::host) +endif() +target_include_directories(migraphx_device PUBLIC $) +target_include_directories(migraphx_device PRIVATE $) +target_include_directories(migraphx_device PRIVATE $) +target_compile_options(migraphx_device PRIVATE -Wno-ignored-attributes) +migraphx_generate_export_header(migraphx_device DIRECTORY migraphx/gpu/device) + +add_library(kernel_file_check EXCLUDE_FROM_ALL) + +foreach(KERNEL_FILE ${KERNEL_FILES}) + get_filename_component(KERNEL_BASE_FILE ${KERNEL_FILE} NAME_WE) + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp "#include \n") + target_sources(kernel_file_check PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp) +endforeach() + +target_compile_definitions(kernel_file_check PRIVATE -DMIGRAPHX_NLOCAL=256) +target_compile_definitions(kernel_file_check PRIVATE -DMIGRAPHX_WAVEFRONTSIZE=64) +target_include_directories(kernel_file_check PRIVATE $) +target_link_libraries(kernel_file_check compile_for_gpu) +if(MIGRAPHX_USE_COMPOSABLEKERNEL) + target_link_libraries(kernel_file_check composable_kernel::jit_library) +endif() + +rocm_clang_tidy_check(kernel_file_check) + +file(GLOB JIT_GPU_SRCS CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jit/*.cpp) + +if(NOT MIGRAPHX_USE_COMPOSABLEKERNEL) + list(REMOVE_ITEM JIT_GPU_SRCS + ${CMAKE_CURRENT_SOURCE_DIR}/jit/ck_gemm.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/jit/ck_gemm_softmax_gemm.cpp) +endif() + +if(MIGRAPHX_USE_MIOPEN) + set(MIOPEN_SRCS abs.cpp) +endif() + +add_library(migraphx_gpu + analyze_streams.cpp + allocation_model.cpp + argmax.cpp + argmin.cpp + code_object_op.cpp + compile_ops.cpp + compile_gen.cpp + compile_hip.cpp + compile_hip_code_object.cpp + compile_hipblaslt.cpp + compile_miopen.cpp + compile_pointwise.cpp + compiler.cpp + device_name.cpp + fuse_ck.cpp + fuse_mlir.cpp + fuse_ops.cpp + gemm_impl.cpp + hip.cpp + hipblaslt.cpp + hip_gemm_impl.cpp + kernel.cpp + lowering.cpp + logsoftmax.cpp + loop.cpp + lrn.cpp + mlir.cpp + multinomial.cpp + no_device.cpp + nonzero.cpp + pack_args.cpp + prefuse_ops.cpp + prepare_reduce.cpp + perfdb.cpp + pooling.cpp + problem_cache.cpp + reverse.cpp + rnn_variable_seq_lens.cpp + rocblas.cpp + schedule_model.cpp + sync_device.cpp + target.cpp + time_op.cpp + topk.cpp + write_literals.cpp + ${JIT_GPU_SRCS} + ${MIOPEN_SRCS} +) + +set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu) +migraphx_generate_export_header(migraphx_gpu) + +function(register_migraphx_gpu_ops PREFIX) + foreach(OP ${ARGN}) + register_op(migraphx_gpu HEADER migraphx/gpu/${OP}.hpp OPERATORS gpu::${PREFIX}${OP} INCLUDES migraphx/gpu/context.hpp) + endforeach() +endfunction() + +register_migraphx_gpu_ops(hip_ + argmax + argmin + logsoftmax + loop + multinomial + nonzero + prefix_scan_sum + reverse + topk +) +if (MIGRAPHX_USE_MIOPEN) +register_migraphx_gpu_ops(miopen_ + abs + contiguous + lrn + pooling +) +else() +register_migraphx_gpu_ops(miopen_ + contiguous +) +endif() +register_op(migraphx_gpu + HEADER migraphx/gpu/rnn_variable_seq_lens.hpp + OPERATORS gpu::hip_rnn_var_sl_shift_sequence gpu::hip_rnn_var_sl_shift_output gpu::hip_rnn_var_sl_last_output + INCLUDES migraphx/gpu/context.hpp) +if(MIGRAPHX_USE_ROCBLAS) + register_op(migraphx_gpu + HEADER migraphx/gpu/gemm.hpp + OPERATORS gpu::rocblas_gemm gpu::rocblas_gemm + INCLUDES migraphx/gpu/context.hpp) +endif() +if(MIGRAPHX_USE_HIPBLASLT) + register_op(migraphx_gpu + HEADER migraphx/gpu/hip_gemm.hpp + OPERATORS gpu::hip_gemm gpu::hip_gemm + INCLUDES migraphx/gpu/context.hpp) +endif() +if (MIGRAPHX_USE_MIOPEN) + register_op(migraphx_gpu HEADER migraphx/gpu/convolution.hpp + OPERATORS gpu::miopen_convolution gpu::miopen_convolution gpu::miopen_convolution + INCLUDES migraphx/gpu/context.hpp) +endif() +rocm_set_soversion(migraphx_gpu ${MIGRAPHX_SO_VERSION}) +rocm_clang_tidy_check(migraphx_gpu) + +set(MIGRAPHX_ENABLE_MLIR ON CACHE BOOL "") + +if(MIGRAPHX_ENABLE_MLIR) + # Find package rocMLIR + find_package(rocMLIR 1.0.0 CONFIG REQUIRED) + message(STATUS "Build with rocMLIR::rockCompiler ${rocMLIR_VERSION}") + target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_MLIR") + # Make this private to avoid multiple inclusions of LLVM symbols. + # TODO: Fix rocMLIR's library to hide LLVM internals. + target_link_libraries(migraphx_gpu PRIVATE rocMLIR::rockCompiler) +endif() + +if(MIGRAPHX_USE_HIPRTC) + find_package(hiprtc REQUIRED) + message(STATUS "MIGraphX is using hipRTC") + target_compile_definitions(migraphx_gpu PRIVATE -DMIGRAPHX_USE_HIPRTC=1) + target_link_libraries(migraphx_gpu PUBLIC hiprtc::hiprtc) +else() + message(STATUS "MIGraphX is using HIP Clang") + + # Get flags needed to compile hip + include(TargetFlags) + target_flags(HIP_COMPILER_FLAGS hip::device) + + # Remove cuda arch flags + string(REGEX REPLACE "--cuda-gpu-arch=[a-z0-9]+ ?" "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}") + string(REGEX REPLACE "--offload-arch=[a-z0-9:+-]+ ?" "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}") + + # Skip library paths since hip will incorrectly treat it as a source file + string(APPEND HIP_COMPILER_FLAGS " ") + + if(WIN32) + string(REPLACE "\\" "/" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}") + endif() + foreach(_unused RANGE 2) + string(REGEX REPLACE " /[^ ]+\\.(a|so) " " " HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}") + endforeach() + + message(STATUS "Hip compiler flags: \"${HIP_COMPILER_FLAGS}\"") + target_compile_definitions(migraphx_gpu PRIVATE + -DMIGRAPHX_HIP_COMPILER="${CMAKE_CXX_COMPILER}" + -DMIGRAPHX_HIP_COMPILER_FLAGS="${HIP_COMPILER_FLAGS}" + ) + + if(DEFINED CMAKE_CXX_COMPILER_LAUNCHER) + if(WIN32) + execute_process(COMMAND where ${CMAKE_CXX_COMPILER_LAUNCHER} OUTPUT_VARIABLE MIGRAPHX_HIP_COMPILER_LAUNCHER) + else() + execute_process(COMMAND which ${CMAKE_CXX_COMPILER_LAUNCHER} OUTPUT_VARIABLE MIGRAPHX_HIP_COMPILER_LAUNCHER) + endif() + string(STRIP "${MIGRAPHX_HIP_COMPILER_LAUNCHER}" MIGRAPHX_HIP_COMPILER_LAUNCHER) + target_compile_definitions(migraphx_gpu PRIVATE -DMIGRAPHX_HIP_COMPILER_LAUNCHER="${MIGRAPHX_HIP_COMPILER_LAUNCHER}") + endif() +endif() + +target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_CXX_COMPILER="${CMAKE_CXX_COMPILER}") + +# Check miopen find mode api + +include(CheckLibraryExists) +if (MIGRAPHX_USE_MIOPEN) + get_target_property(MIOPEN_LOCATION MIOpen LOCATION) + target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_USE_MIOPEN=1) + check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API) + check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_2_API) +else() +target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_USE_MIOPEN=0) +endif() + +if(MIGRAPHX_USE_ROCBLAS) + get_target_property(ROCBLAS_LOCATION roc::rocblas LOCATION) + target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_USE_ROCBLAS=1) + # Beta API for automated GEMM tuning + check_library_exists(roc::rocblas "rocblas_gemm_ex_get_solutions" "${ROCBLAS_LOCATION}" HAS_ROCBLAS_TUNING_BETA_FEATURE_API) + # rocblas FP8 API + check_library_exists(roc::rocblas "rocblas_gemm_strided_batched_ex3" "${ROCBLAS_LOCATION}" HAS_ROCBLAS_FP8_BETA_API) +else() + target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_USE_ROCBLAS=0) +endif() + +if(MIGRAPHX_USE_HIPBLASLT) + target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_USE_HIPBLASLT=1) +else() + target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_USE_HIPBLASLT=0) +endif() + +if(MIGRAPHX_USE_MIOPEN) + set(MIGRAPHX_USE_FIND_2_API "${HAS_FIND_2_API}" CACHE BOOL "") + + if(MIGRAPHX_USE_FIND_2_API) + check_library_exists(MIOpen "miopenSetFindOptionPreallocatedTensor" "${MIOPEN_LOCATION}" HAS_PREALLOCATION_API) + if(HAS_PREALLOCATION_API) + target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API -DMIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS) + else() + target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API) + endif() + message(STATUS "MIGraphx is using Find-2.0 API of MIOpen") + else() + message(STATUS "MIGraphx is using legacy Find API in MIOpen") + endif() + + if(HAS_FIND_MODE_API) + target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API) + message(STATUS "MIGraphx is using Find Mode API of MIOpen") + else() + message(STATUS "MIOpen does not have find mode api") + endif() + + target_link_libraries(migraphx_gpu PUBLIC MIOpen) +endif() + +if(MIGRAPHX_USE_ROCBLAS) + if(HAS_ROCBLAS_TUNING_BETA_FEATURE_API) + target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_USE_ROCBLAS_TUNING_API -DROCBLAS_BETA_FEATURES_API -DROCBLAS_NO_DEPRECATED_WARNINGS) + message(STATUS "MIGraphx is using Beta API of rocBLAS") + else() + message(STATUS "rocBLAS does not have User Tuning Beta API") + endif() + + if(HAS_ROCBLAS_FP8_BETA_API) + target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_USE_ROCBLAS_FP8_API -DROCBLAS_BETA_FEATURES_API -DROCBLAS_NO_DEPRECATED_WARNINGS) + message(STATUS "MIGraphX is using Beta API of rocBLAS for FP8 computations") + else() + message(STATUS "rocBLAS does not have Fp8 Beta API") + endif() + + + target_link_libraries(migraphx_gpu PUBLIC roc::rocblas) +endif() + +if(MIGRAPHX_USE_HIPBLASLT) + target_link_libraries(migraphx_gpu PUBLIC roc::hipblaslt) +endif() + +if(WIN32) + # Temporary workaround on rocMLIR not exporting correctly libraries it depends on. + target_link_libraries(migraphx_gpu PRIVATE ntdll) +endif() + +target_link_libraries(migraphx_gpu PUBLIC migraphx) +if(NOT MIGRAPHX_USE_MIOPEN AND NOT MIGRAPHX_USE_ROCBLAS) + target_link_libraries(migraphx_gpu PUBLIC migraphx_device) +else() + target_link_libraries(migraphx_gpu PRIVATE migraphx_device) +endif() +target_link_libraries(migraphx_gpu PRIVATE migraphx_kernels) +if(MIGRAPHX_USE_COMPOSABLEKERNEL) + target_link_libraries(migraphx_gpu PRIVATE composable_kernel::jit_library) + target_compile_definitions(migraphx_gpu PRIVATE MIGRAPHX_USE_COMPOSABLEKERNEL=1) +endif() + +add_subdirectory(driver) +add_subdirectory(hiprtc) + +rocm_install_targets( + PRIVATE + TARGETS migraphx_gpu migraphx_device compile_for_gpu + INCLUDE + ${CMAKE_CURRENT_SOURCE_DIR}/include +) diff --git a/docker/rocm/migraphx/targets/gpu/abs.cpp b/docker/rocm/migraphx/targets/gpu/abs.cpp new file mode 100644 index 000000000..8cd0a1d8b --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/abs.cpp @@ -0,0 +1,61 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +#if MIGRAPHX_USE_MIOPEN +shape miopen_abs::compute_shape(const std::vector& inputs) const +{ + check_shapes{inputs, *this}.has(2).packed(); + return inputs.at(0); +} + +argument miopen_abs::compute(context& ctx, + const shape& output_shape, + const std::vector& args) const +{ + float alpha = 1; + float beta = 0; + auto x_desc = make_tensor(args[0].get_shape()); + auto y_desc = make_tensor(output_shape); + miopenActivationForward(ctx.get_stream().get_miopen(), + ad.get(), + &alpha, + x_desc.get(), + args[0].implicit(), + &beta, + y_desc.get(), + args[1].implicit()); + + return args[1]; +} + +void miopen_abs::finalize(context&, const shape&, const std::vector&) { ad = make_abs(); } +#endif +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/allocation_model.cpp b/docker/rocm/migraphx/targets/gpu/allocation_model.cpp new file mode 100644 index 000000000..e5fd2cc27 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/allocation_model.cpp @@ -0,0 +1,48 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +std::string gpu_allocation_model::name() const { return "hip::allocate"; } +operation gpu_allocation_model::allocate(const shape& s) const +{ + return make_op(name(), {{"shape", to_value(s)}}); +} + +operation gpu_allocation_model::preallocate(const shape& s, const std::string& id) const +{ + return make_op("hip::hip_allocate_memory", {{"shape", to_value(s)}, {"id", id}}); +} + +std::string gpu_allocation_model::copy() const { return "hip::copy"; } + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/analyze_streams.cpp b/docker/rocm/migraphx/targets/gpu/analyze_streams.cpp new file mode 100644 index 000000000..e08c89d82 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/analyze_streams.cpp @@ -0,0 +1,82 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct hip_stream_model +{ + std::size_t max_stream = 0; + std::unordered_map ins2stream{}; + std::size_t get_nstream() const { return max_stream + 1; } + std::size_t get_stream(migraphx::instruction_ref ins) const { return ins2stream.at(ins); } + std::size_t get_event_id(migraphx::instruction_ref ins) const + { + auto v = ins->get_operator().to_value(); + return v["event"].to(); + } + bool has_stream(migraphx::instruction_ref ins) const { return ins2stream.count(ins) > 0; } + bool is_record(migraphx::instruction_ref ins) const + { + return ins->name() == "gpu::record_event"; + } + bool is_wait(migraphx::instruction_ref ins) const { return ins->name() == "gpu::wait_event"; } +}; + +stream_model make_stream_model(const module& m) +{ + hip_stream_model hsm; + std::size_t stream = 0; + for(auto ins : iterator_for(m)) + { + if(ins->name() == "gpu::set_stream") + { + auto v = ins->get_operator().to_value(); + stream = v["stream"].to(); + hsm.max_stream = std::max(stream, hsm.max_stream); + } + if(ins->get_operator().is_context_free()) + continue; + if(contains({"hip::hip_allocate_memory", "hip::hip_copy_literal", "@param"}, ins->name())) + continue; + hsm.ins2stream[ins] = stream; + } + return hsm; +} + +std::vector analyze_streams(const module& m) +{ + return migraphx::analyze_streams(m, make_stream_model(m)); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/argmax.cpp b/docker/rocm/migraphx/targets/gpu/argmax.cpp new file mode 100644 index 000000000..b5f720295 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/argmax.cpp @@ -0,0 +1,50 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +shape hip_argmax::compute_shape(const std::vector& inputs) const +{ + check_shapes{inputs, *this}.has(2); + return op.normalize_compute_shape({inputs.at(0)}); +} + +argument hip_argmax::compute(context& ctx, const shape&, const std::vector& args) const +{ + auto n_dim = args.front().get_shape().lens().size(); + int64_t tuned_axis = tune_axis(n_dim, op.axis, op.name()); + device::argmax( + ctx.get_stream().get(), args.back(), args.front(), tuned_axis, op.select_last_index); + return args.back(); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/argmin.cpp b/docker/rocm/migraphx/targets/gpu/argmin.cpp new file mode 100644 index 000000000..02c44e29f --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/argmin.cpp @@ -0,0 +1,50 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +shape hip_argmin::compute_shape(const std::vector& inputs) const +{ + check_shapes{inputs, *this}.has(2); + return op.normalize_compute_shape({inputs.at(0)}); +} + +argument hip_argmin::compute(context& ctx, const shape&, const std::vector& args) const +{ + auto n_dim = args.front().get_shape().lens().size(); + int64_t tuned_axis = tune_axis(n_dim, op.axis, op.name()); + device::argmin( + ctx.get_stream().get(), args.back(), args.front(), tuned_axis, op.select_last_index); + return args.back(); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/code_object_op.cpp b/docker/rocm/migraphx/targets/gpu/code_object_op.cpp new file mode 100644 index 000000000..98a580dc4 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/code_object_op.cpp @@ -0,0 +1,67 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_REGISTER_OP(code_object_op); + +shape code_object_op::compute_shape(std::vector inputs) const +{ + std::transform(inputs.begin(), inputs.end(), inputs.begin(), [](const shape& s) { + return s.normalize_standard(); + }); + auto einputs = expected_inputs; + std::transform(einputs.begin(), einputs.end(), einputs.begin(), [](const shape& s) { + return s.normalize_standard(); + }); + if(not migraphx::equal(flatten(einputs), flatten(inputs), &shape::is_compatible)) + MIGRAPHX_THROW("Input shapes have changed: [" + to_string_range(einputs) + "] -> [" + + to_string_range(inputs) + "]"); + return output; +} +argument +code_object_op::compute(context& ctx, const shape&, const std::vector& args) const +{ + auto fargs = flatten(args); + std::vector kargs(fargs.size()); + std::transform( + fargs.begin(), fargs.end(), kargs.begin(), [](const argument& a) { return a.data(); }); + auto [start, stop] = ctx.get_perf_events(); + k.launch(ctx.get_stream().get(), global, local, std::move(kargs), start, stop); + return args[get_output_arg(args.size())]; +} +void code_object_op::finalize(context&, const shape&, const std::vector&) +{ + assert(not code_object.empty()); + k = kernel(code_object, symbol_name); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/compile_gen.cpp b/docker/rocm/migraphx/targets/gpu/compile_gen.cpp new file mode 100644 index 000000000..82ff3c4a2 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/compile_gen.cpp @@ -0,0 +1,576 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace gen { + +static std::vector vector_sizes(const std::vector& inputs) +{ + // If all inputs are half then only use half2 + if(std::all_of(inputs.begin(), inputs.end(), [](const auto& s) { + return s.type() == shape::half_type; + })) + return {2}; + return {4, 2}; +} + +vectorize vectorize::elements(std::size_t axis, + const std::vector& inputs, + const std::vector& sizes) +{ + // disable vectorization for fp8 types + if(std::any_of(inputs.begin(), inputs.end(), [&](auto ishape) { + return contains(fp8_types{}.get(), ishape.type()); + })) + return {1, axis}; + if(std::all_of( + inputs.begin(), inputs.end(), [&](const auto& s) { return s.lens()[axis] == 1; })) + return {1, axis}; + std::vector max_vec_size; + std::transform(inputs.begin(), + inputs.end(), + std::back_inserter(max_vec_size), + [&](const auto& input) -> std::size_t { + auto stride = input.strides()[axis]; + auto len = input.lens()[axis]; + if(not contains({0, 1}, stride)) + return 1; + if(len == 1 and input.elements() > sizes.front()) + return sizes.front(); + auto it = std::find_if(sizes.begin(), sizes.end(), [&](auto vsize) { + // The len is divisible by the size and all the strides are divisible by + // the size + return (len % vsize) == 0 and + std::all_of( + input.strides().begin(), input.strides().end(), [&](auto i) { + return contains({0, 1}, i) or i % vsize == 0; + }); + }); + if(it != sizes.end()) + return *it; + return 1; + }); + return {*std::min_element(max_vec_size.begin(), max_vec_size.end()), axis}; +} + +vectorize vectorize::elements(context& ctx, std::size_t axis, const std::vector& inputs) +{ + // disable vectorization for fp8 types + if(std::any_of(inputs.begin(), inputs.end(), [&](auto ishape) { + return contains(fp8_types{}.get(), ishape.type()); + })) + return {1, axis}; + if(inputs.empty()) + return {1, axis}; + std::size_t n = std::max_element(inputs.begin(), + inputs.end(), + by(std::less<>{}, [](const auto& s) { return s.elements(); })) + ->elements(); + std::size_t max_global = ctx.get_current_device().get_cu_count() * + ctx.get_current_device().get_max_workitems_per_cu(); + std::size_t over = n / max_global; + bool broadcasted = + std::any_of(inputs.begin(), inputs.end(), [](const auto& s) { return s.broadcasted(); }); + std::vector sizes; + if(broadcasted and over > 8) + sizes.push_back(8); + if(over > 4) + sizes.push_back(4); + sizes.push_back(2); + return elements(axis, inputs, sizes); +} + +vectorize vectorize::elements(std::size_t axis, const std::vector& inputs) +{ + return elements(axis, inputs, vector_sizes(inputs)); +} + +std::string vectorize::str() const +{ + return "vectorize<" + to_string(size) + ", " + to_string(axis) + ">()"; +} + +preload preload::broadcasts(std::size_t axis, const std::vector& inputs) +{ + const std::size_t max_lds_bytes = 4096; + std::vector result(inputs.size()); + std::vector preloaded; + auto idxs = range(inputs.size()); + std::copy_if(idxs.begin(), idxs.end(), std::back_inserter(preloaded), [&](auto i) { + return inputs[i].strides()[axis] == 0; + }); + std::sort(preloaded.begin(), preloaded.end(), by(std::less<>{}, [&](auto i) { + return inputs[i].bytes(); + })); + + std::size_t bytes = 0; + for(auto i : preloaded) + { + const auto& input = inputs[i]; + bytes += input.bytes(); + if(bytes > max_lds_bytes) + break; + result[i] = true; + } + return {result}; +} + +std::string preload::str() const +{ + std::vector bool_strs; + std::transform(args.begin(), std::prev(args.end()), std::back_inserter(bool_strs), [](bool b) { + if(b) + return "true"; + return "false"; + }); + return "auto_preload(idx)"; +} + +bool preload::is_preloading() const +{ + return std::accumulate(args.begin(), args.end(), false, std::logical_or<>{}); +} + +static std::size_t integer_divide_ceil(std::size_t x, std::size_t y) +{ + return (x + y - std::size_t{1}) / y; +} + +static std::size_t compute_tile_factor(std::size_t r, std::size_t max_size = 64) +{ + std::size_t n = 1; + auto factors = make_array(2, 3, 5, 7, 11); + while(n < max_size) + { + // NOLINTNEXTLINE(readability-qualified-auto) + auto it = std::find_if(factors.begin(), factors.end(), [&](auto d) { return r % d == 0; }); + if(it == factors.end()) + break; + r /= *it; + n *= *it; + } + return n; +} + +tile tile::elements(const std::vector& inputs, std::size_t noutputs) +{ + tile result; + auto ndim = inputs.front().ndim(); + std::vector faxes; + std::transform( + inputs.begin(), inputs.end(), std::back_inserter(faxes), MIGRAPHX_LIFT(find_fast_axis)); + result.axis = std::accumulate(faxes.begin(), faxes.end(), ndim, MIGRAPHX_LIFT(std::min)); + if(result.axis >= (ndim - 1)) + return {}; + auto select = [&](auto m) { + return [&, m](std::size_t faxis, shape input) { + if(input.broadcasted()) + return none; + if(faxis < (ndim - 1)) + return m; + return none; + }; + }; + std::transform(faxes.begin(), + faxes.end() - noutputs, + inputs.begin(), + std::back_inserter(result.args), + select(load)); + std::transform(faxes.end() - noutputs, + faxes.end(), + inputs.end() - noutputs, + std::back_inserter(result.args), + select(store)); + + auto nargs = std::count_if( + result.args.begin(), result.args.end(), [](auto m) { return m != mode::none; }); + // TODO: Handle tiling more than one arguments + if(nargs != 1) + return {}; + + const auto& s = inputs.front(); + auto dim1 = compute_tile_factor(s.lens()[result.axis]); + auto dim2 = compute_tile_factor(s.lens().back(), 4096 / dim1); + if(dim1 == 1 or dim2 == 1) + return {}; + + result.inner = s.lens(); + std::fill(result.inner.begin(), result.inner.end(), 1); + result.inner[result.axis] = dim1; + result.inner.back() = dim2; + + result.outer = s.lens(); + result.outer[result.axis] /= dim1; + result.outer.back() /= dim2; + + auto tile_size = dim1 * dim2; + result.ntiles = s.elements() / tile_size; + // equivalent to dim1 * (dim2 + 1) to avoid bank conflicts + auto tile_bytes = (tile_size + dim1) * s.type_size(); + if(tile_bytes > 65536) + return {}; + + result.block_size = std::min(256, integer_divide_ceil(tile_size / 4, 64) * 64); + return result; +} + +std::string tile::str() const +{ + if(args.empty()) + return "transform_args()"; + std::vector strs; + std::transform(args.begin(), args.end(), std::back_inserter(strs), [](mode m) { + switch(m) + { + case load: return "tile::load"; + case store: return "tile::store"; + case none: return "tile::none"; + } + MIGRAPHX_THROW("Invalid mode"); + }); + const std::string auto_tile = "auto_tile<${modes}>(${inner}, ${outer})"; + return interpolate_string(auto_tile, + {{"modes", join_strings(strs, ", ")}, + {"inner", generate_index_ints(inner)}, + {"outer", generate_index_ints(outer)}}); +} + +std::size_t find_fast_axis(const shape& input) +{ + if(input.scalar()) + return input.ndim() - 1; + if(input.broadcasted()) + { + auto stride_it = std::min_element( + input.strides().begin(), input.strides().end(), by(std::less<>{}, [](std::size_t i) { + if(i == 0) + return std::numeric_limits::max(); + return i; + })); + return stride_it - input.strides().begin(); + } + auto permutation = invert_permutation(find_permutation(input)); + auto it = std::max_element(permutation.begin(), permutation.end()); + return it - permutation.begin(); +} + +std::size_t find_fast_axis(const std::vector& inputs) +{ + auto permutation = invert_permutation(find_permutation(inputs)); + auto it = std::max_element(permutation.begin(), permutation.end()); + return it - permutation.begin(); +} + +std::string make_transformer_args(std::vector transformers) +{ + return join_strings(std::move(transformers), ", "); +} + +static void generate_pointwise(cpp_generator& gg, + const module& pm, + const std::string& name, + bool always_return_tuple = false) +{ + module m = pm; + run_passes(m, {rewrite_quantization{}, optimize_module{}}); + m.sort(); + cpp_generator g; + g.always_return_tuple(always_return_tuple); + g.fmap([](const std::string& fname) { return "migraphx::" + fname; }); + g.add_point_op("where", "${function:where}(${0}, ${1}, ${2})"); + g.add_point_op("prelu", "${function:where}(${0} < 0, ${0} * ${1}, ${0})"); + g.add_point_op("sign", "${function:where}(${0} > 0, 1, ${function:where}(${0} < 0, -1, 0))"); + g.add_point_op("equal", "migraphx::abs(${0} == ${1})"); + g.add_point_op("less", "migraphx::abs(${0} < ${1})"); + g.add_point_op("greater", "migraphx::abs(${0} > ${1})"); + g.add_point_op("not", "migraphx::abs(not ${0})"); + // Add explict conversions + g.fresult( + [](const shape& s) { return "migraphx::convert<" + shape::cpp_type(s.type()) + ">"; }); + gg.create_function(g.generate_module(m) + .set_attributes({"__device__", "__attribute__((const))"}) + .set_generic_types(m) + .set_name(name)); +} +std::string generate_pointwise(const module& pm, const std::string& name, bool always_return_tuple) +{ + cpp_generator g; + generate_pointwise(g, pm, name, always_return_tuple); + return g.str(); +} + +std::string reduce_op::str() const +{ + return write + "(r.reduce(" + reduction + ", " + init + ", " + read + ")(" + + join_strings(inputs, ", ") + "))"; +} +void reduce_op::set(const std::string& name, const shape& input, const shape& output) +{ + assert(input.type() != shape::tuple_type); + assert(output.type() != shape::tuple_type); + if(name == "reduce_sum") + { + reduction = "op::sum{}"; + } + else if(name == "reduce_mean") + { + auto reduce_elements = input.elements() / output.elements(); + auto reduce_type = input.type(); + reduction = "op::sum{}"; + std::string mean = "op::mean<" + std::to_string(reduce_elements) + ">{}"; + // Use float accumulator when reduction size is too large for half + if(reduce_type == shape::half_type and reduce_elements > 16384) + read = "compose(" + mean + ", op::convert_to{})"; + else if(contains({shape::float_type, shape::half_type, shape::double_type}, reduce_type)) + read = mean; + else + write = mean; + } + else if(name == "reduce_max") + { + reduction = "op::max{}"; + init = "lowest{}"; + } + else if(name == "reduce_min") + { + reduction = "op::min{}"; + init = "highest{}"; + } + else if(name == "reduce_prod") + { + reduction = "op::product{}"; + init = "1"; + } + else if(name == "reduce_any") + { + reduction = "op::logical_or{}"; + init = "bool{false}"; + } + else if(name == "reduce_all") + { + reduction = "op::logical_and{}"; + init = "bool{true}"; + } + else + { + MIGRAPHX_THROW("Unsupported reduce"); + } +} + +void reduce_op::set(instruction_ref ins, const operation& op) +{ + if(op.name() == "gpu::parallel_reduce") + { + auto rop = from_value(op.to_value().at("op")); + auto input = ins->inputs().front()->get_shape(); + auto output = ins->get_shape().sub_shapes().front(); + set(rop.name(), input, output); + read = "compose(array_apply(" + read + "), MIGRAPHX_LIFT(make_array))"; + } + else + { + set(op.name(), ins->inputs().front()->get_shape(), ins->get_shape()); + } +} +std::string reduce_op::generate(instruction_ref ins, const std::vector& x) +{ + reduce_op r{x}; + r.set(ins, ins->get_operator()); + return r.str(); +} + +static bool use_lazy_inner(instruction_ref ins) +{ + if(ins->outputs().size() != 1) + return false; + // When the inputs are broadcasted, it means the lambda will capture SGPRs + // when doing block/wave reduction. This can cause register spilling in + // the compiler when the lambda is evaluated at a later time although it + // shouldn't. Instead, use `inner` to workaround this issue in the + // compiler. + if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [](instruction_ref input) { + return input->get_shape().broadcasted(); + })) + return false; + auto output = ins->outputs().front(); + return contains(output->name(), "reduce") or output->name() == "@return"; +} + +void preload_params(module& m) +{ + for(auto ins : iterator_for(m)) + { + if(ins->name() != "@param") + continue; + if(ins->outputs().size() <= 1) + continue; + auto id = m.insert_instruction(std::next(ins), make_op("identity"), ins); + m.replace_instruction(ins, id); + } +} + +std::string generate_reduce(module m, const std::string& name) +{ + preload_params(m); + run_passes(m, {optimize_module{}, prepare_reduce{}, optimize_module{}}); + m.sort(); + cpp_generator g; + g.always_return_tuple(); + auto param_shapes = m.get_parameter_shapes(); + auto max_shape = + std::max_element(param_shapes.begin(), + param_shapes.end(), + by(std::less<>{}, [](const auto& p) { return p.second.elements(); })); + auto ilens = max_shape->second.lens(); + std::size_t i = 0; + auto f = g.generate_module(m, [&](instruction_ref ins, const auto& names) { + if(contains(ins->name(), "reduce")) + { + return reduce_op::generate(ins, cpp_generator::to_args(ins->inputs(), names)); + } + if(ins->name() == "pointwise") + { + auto pointwise_name = "pointwise" + std::to_string(i); + i++; + generate_pointwise(g, *ins->module_inputs().front(), pointwise_name); + std::vector tensors; + std::copy_if(ins->inputs().begin(), + ins->inputs().end(), + std::back_inserter(tensors), + [&](auto input) { + return input->get_shape().lens() == ilens and + not input->get_shape().broadcasted(); + }); + auto inner_names = names; + for(auto input : ins->inputs()) + { + if(input->name() != "@param") + continue; + if(contains(tensors, input)) + continue; + inner_names[input] += "[out_idx]"; + } + for(auto input : tensors) + inner_names[input] += "_lambda_param"; + auto call_function = + pointwise_name + "(" + + join_strings(cpp_generator::to_args(ins->inputs(), inner_names), ", ") + ")"; + if(tensors.empty()) + return call_function; + const std::string inner_template = + "r.${inner}([=](${params}) { return ${call}; })(${args})"; + std::string inner_name = use_lazy_inner(ins) ? "lazy_inner" : "inner"; + auto args = cpp_generator::to_args(tensors, names); + auto params = cpp_generator::to_args(tensors, inner_names); + std::transform( + params.begin(), params.end(), params.begin(), [](auto s) { return "auto " + s; }); + return interpolate_string(inner_template, + {{"inner", inner_name}, + {"params", join_strings(params, ", ")}, + {"args", join_strings(args, ", ")}, + {"call", call_function}}); + } + if(ins->name() == "multibroadcast") + { + return names.at(ins->inputs().front()); + } + if(ins->name() == "get_tuple_elem") + { + const auto& x = names.at(ins->inputs().front()); + auto index = ins->get_operator().to_value()["index"].to(); + return interpolate_string("${x}[${index}]", + {{"x", x}, {"index", std::to_string(index)}}); + } + if(ins->name() == "identity") + { + const auto& x = names.at(ins->inputs().front()); + return "r.inner(op::id{})(" + x + ")"; + } + MIGRAPHX_THROW("Unknown operator: " + ins->name()); + }); + f.set_attributes({"__device__", "__attribute__((const))"}).set_generic_types(m).set_name(name); + f.add_generic_param("r"); + f.add_generic_param("out_idx"); + f.unused_param("out_idx"); + g.create_function(f); + return g.str(); +} + +static std::vector get_op_names(const module& m) +{ + std::vector result; + for(auto& ins : m) + { + if(starts_with(ins.name(), "@")) + continue; + if(contains({"multibroadcast", "contiguous", "identity"}, ins.name())) + continue; + if(ins.name() == "pointwise") + { + auto names = get_op_names(*ins.module_inputs().front()); + result.insert(result.end(), names.begin(), names.end()); + } + else + { + result.push_back(ins.name()); + } + } + return result; +} + +std::string generate_name_from_ops(const module& m, const std::string& postname) +{ + auto op_names = get_op_names(m); + if(not postname.empty()) + op_names.push_back(postname); + if(op_names.empty()) + return "noop"; + return join_strings(op_names, "_"); +} + +} // namespace gen +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/compile_hip.cpp b/docker/rocm/migraphx/targets/gpu/compile_hip.cpp new file mode 100644 index 000000000..58b518725 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/compile_hip.cpp @@ -0,0 +1,406 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef MIGRAPHX_USE_HIPRTC +#include +#include +#include +#include +#include +#include +#include +#include +#include +#else +#include +#include +#endif + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG_SYM); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_OPTIMIZE); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_ASM); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_SRC); + +#ifdef MIGRAPHX_USE_HIPRTC + +std::string hiprtc_error(hiprtcResult err, const std::string& msg) +{ + return "hiprtc: " + (hiprtcGetErrorString(err) + (": " + msg)); +} + +void hiprtc_check_error(hiprtcResult err, const std::string& msg, const std::string& ctx) +{ + if(err != HIPRTC_SUCCESS) + throw make_exception(ctx, hiprtc_error(err, msg)); +} + +// NOLINTNEXTLINE +#define MIGRAPHX_HIPRTC(...) \ + hiprtc_check_error(__VA_ARGS__, #__VA_ARGS__, MIGRAPHX_MAKE_SOURCE_CTX()) + +#define MIGRAPHX_HIPRTC_THROW(error, msg) MIGRAPHX_THROW(hiprtc_error(error, msg)) + +// Workaround hiprtc's broken API +void hiprtc_program_destroy(hiprtcProgram prog) { hiprtcDestroyProgram(&prog); } +using hiprtc_program_ptr = MIGRAPHX_MANAGE_PTR(hiprtcProgram, hiprtc_program_destroy); + +template +hiprtc_program_ptr hiprtc_program_create(Ts... xs) +{ + hiprtcProgram prog = nullptr; + auto result = hiprtcCreateProgram(&prog, xs...); + hiprtc_program_ptr p{prog}; + if(result != HIPRTC_SUCCESS) + MIGRAPHX_HIPRTC_THROW(result, "Create program failed."); + return p; +} + +struct hiprtc_program +{ + struct string_array + { + std::deque strings{}; + std::vector c_strs{}; + + string_array() {} + string_array(const string_array&) = delete; + + std::size_t size() const { return strings.size(); } + + const char** data() { return c_strs.data(); } + + void push_back(std::string s) + { + strings.push_back(std::move(s)); + c_strs.push_back(strings.back().c_str()); + } + }; + + hiprtc_program_ptr prog = nullptr; + string_array headers{}; + string_array include_names{}; + std::string cpp_src = ""; + std::string cpp_name = ""; + + hiprtc_program(const std::string& src, const std::string& name = "main.cpp") + : cpp_src(src), cpp_name(name) + { + create_program(); + } + + hiprtc_program(std::vector srcs) + { + for(auto&& src : srcs) + { + if(ends_with(src.path, ".cpp")) + { + cpp_src = std::move(src.content); + cpp_name = std::move(src.path); + } + else + { + headers.push_back(std::move(src.content)); + include_names.push_back(std::move(src.path)); + } + } + create_program(); + } + + void create_program() + { + assert(not cpp_src.empty()); + assert(not cpp_name.empty()); + assert(headers.size() == include_names.size()); + prog = hiprtc_program_create(cpp_src.c_str(), + cpp_name.c_str(), + headers.size(), + headers.data(), + include_names.data()); + } + + void compile(const std::vector& options, bool quiet = false) const + { + if(enabled(MIGRAPHX_TRACE_HIPRTC{})) + std::cout << "hiprtc " << join_strings(options, " ") << " " << cpp_name << std::endl; + std::vector c_options; + std::transform(options.begin(), + options.end(), + std::back_inserter(c_options), + [](const std::string& s) { return s.c_str(); }); + auto result = hiprtcCompileProgram(prog.get(), c_options.size(), c_options.data()); + auto prog_log = log(); + if(not prog_log.empty() and not quiet) + { + std::cerr << prog_log << std::endl; + } + if(result != HIPRTC_SUCCESS) + MIGRAPHX_HIPRTC_THROW(result, "Compilation failed."); + } + + std::string log() const + { + std::size_t n = 0; + MIGRAPHX_HIPRTC(hiprtcGetProgramLogSize(prog.get(), &n)); + if(n == 0) + return {}; + std::string buffer(n, '\0'); + MIGRAPHX_HIPRTC(hiprtcGetProgramLog(prog.get(), buffer.data())); + assert(buffer.back() != 0); + return buffer; + } + + std::vector get_code_obj() const + { + std::size_t n = 0; + MIGRAPHX_HIPRTC(hiprtcGetCodeSize(prog.get(), &n)); + std::vector buffer(n); + MIGRAPHX_HIPRTC(hiprtcGetCode(prog.get(), buffer.data())); + return buffer; + } +}; + +std::vector> compile_hip_src_with_hiprtc(std::vector srcs, + const std::vector& params, + const std::string& arch) +{ + hiprtc_program prog(std::move(srcs)); + auto options = params; + options.push_back("-DMIGRAPHX_USE_HIPRTC=1"); + if(enabled(MIGRAPHX_GPU_DEBUG{})) + options.push_back("-DMIGRAPHX_DEBUG"); + if(std::none_of(options.begin(), options.end(), [](const std::string& s) { + return starts_with(s, "--std=") or starts_with(s, "-std="); + })) + options.push_back("-std=c++17"); + options.push_back("-fno-gpu-rdc"); + options.push_back("-O" + string_value_of(MIGRAPHX_GPU_OPTIMIZE{}, "3")); + options.push_back("-Wno-cuda-compat"); + options.push_back("--offload-arch=" + arch); + prog.compile(options); + return {prog.get_code_obj()}; +} + +bool hip_has_flags(const std::vector& flags) +{ + hiprtc_program prog{" "}; + + std::string src = " "; + src_file input{"main.cpp", src}; + std::vector srcs = {input}; + + try + { + std::string arch = "gfx900"; + compile_hip_src(srcs, flags, arch); + return true; + } + catch(...) + { + return false; + } +} + +std::vector> compile_hip_src(const std::vector& srcs, + const std::vector& params, + const std::string& arch) +{ + std::vector hsrcs{srcs.begin(), srcs.end()}; + if(enabled(MIGRAPHX_GPU_DUMP_SRC{})) + { + for(const auto& src : srcs) + { + if(src.path.extension() != ".cpp") + continue; + std::cout << std::string(src.content) << std::endl; + } + } + + auto fname = make_executable_filename("migraphx-hiprtc-driver"); + auto p = dynamic_loader::path(&compile_hip_src_with_hiprtc); + auto driver = p.parent_path() / fname; + + bool found = fs::exists(driver); + if(not found) + { + driver = p.parent_path().parent_path() / "bin" / fname; + found = fs::exists(driver); + } + + if(found) + { + value v; + v["srcs"] = to_value(hsrcs); + v["params"] = to_value(params); + v["arch"] = to_value(arch); + + tmp_dir td{}; + auto out = td.path / "output"; + + process(driver, {quote_string(out.string())}).write([&](auto writer) { + to_msgpack(v, writer); + }); + if(fs::exists(out)) + return {read_buffer(out)}; + } + return compile_hip_src_with_hiprtc(std::move(hsrcs), params, arch); +} + +#else // MIGRAPHX_USE_HIPRTC + +std::vector> +compile_hip_src_with_hiprtc(std::vector, // NOLINT + const std::vector&, // NOLINT + const std::string&) +{ + MIGRAPHX_THROW("Not using hiprtc"); +} + +bool is_hip_clang_compiler() +{ + static const auto result = fs::path{MIGRAPHX_HIP_COMPILER}.stem() == "clang++"; + return result; +} + +#ifdef MIGRAPHX_HIP_COMPILER_LAUNCHER + +bool has_compiler_launcher() +{ + static const auto result = fs::exists(MIGRAPHX_HIP_COMPILER_LAUNCHER); + return result; +} + +#endif + +src_compiler assemble(src_compiler compiler) +{ + compiler.out_ext = ".S"; + std::replace(compiler.flags.begin(), compiler.flags.end(), "-c", "-S"); + return compiler; +} + +std::vector> compile_hip_src(const std::vector& srcs, + const std::vector& params, + const std::string& arch) +{ + assert(not srcs.empty()); + + if(not is_hip_clang_compiler()) + MIGRAPHX_THROW("Unknown hip compiler: " MIGRAPHX_HIP_COMPILER); + + src_compiler compiler; + compiler.flags = params; + compiler.compiler = MIGRAPHX_HIP_COMPILER; +#ifdef MIGRAPHX_HIP_COMPILER_LAUNCHER + if(has_compiler_launcher()) + compiler.launcher = MIGRAPHX_HIP_COMPILER_LAUNCHER; +#endif + + if(std::none_of(params.begin(), params.end(), [](const std::string& s) { + return starts_with(s, "--std=") or starts_with(s, "-std="); + })) + compiler.flags.emplace_back("--std=c++17"); + compiler.flags.emplace_back(" -fno-gpu-rdc"); + if(enabled(MIGRAPHX_GPU_DEBUG_SYM{})) + compiler.flags.emplace_back("-g"); + compiler.flags.emplace_back("-c"); + compiler.flags.emplace_back("--offload-arch=" + arch); + compiler.flags.emplace_back("--cuda-device-only"); + compiler.flags.emplace_back("-O" + string_value_of(MIGRAPHX_GPU_OPTIMIZE{}, "3") + " "); + + if(enabled(MIGRAPHX_GPU_DEBUG{})) + compiler.flags.emplace_back("-DMIGRAPHX_DEBUG"); + + compiler.flags.emplace_back("-Wno-unused-command-line-argument"); + compiler.flags.emplace_back("-Wno-cuda-compat"); + compiler.flags.emplace_back(MIGRAPHX_HIP_COMPILER_FLAGS); + + if(enabled(MIGRAPHX_GPU_DUMP_SRC{})) + { + for(const auto& src : srcs) + { + if(src.path.extension() != ".cpp") + continue; + std::cout << std::string(src.content) << std::endl; + } + } + + if(enabled(MIGRAPHX_GPU_DUMP_ASM{})) + { + + std::cout << assemble(compiler).compile(srcs).data() << std::endl; + } + + return {compiler.compile(srcs)}; +} + +bool hip_has_flags(const std::vector& flags) +{ + src_compiler compiler; + compiler.compiler = MIGRAPHX_HIP_COMPILER; + compiler.flags = flags; + compiler.flags.emplace_back("-x hip"); + compiler.flags.emplace_back("-c"); + compiler.flags.emplace_back("--offload-arch=gfx900"); + compiler.flags.emplace_back("--cuda-device-only"); + + std::string src; + src_file input{"main.cpp", src}; + + try + { + compiler.compile({input}); + return true; + } + catch(...) + { + return false; + } +} + +#endif // MIGRAPHX_USE_HIPRTC + +std::string enum_params(std::size_t count, std::string param) +{ + std::vector items(count); + transform(range(count), items.begin(), [&](auto i) { return param + std::to_string(i); }); + return join_strings(items, ","); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/compile_hip_code_object.cpp b/docker/rocm/migraphx/targets/gpu/compile_hip_code_object.cpp new file mode 100644 index 000000000..dfd18ad7d --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/compile_hip_code_object.cpp @@ -0,0 +1,215 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +std::string generate_make_shape(const shape& s) +{ + return "make_shape(" + generate_index_ints(s.lens()) + ", " + generate_index_ints(s.strides()) + + ")"; +} + +static const char* const make_tensor_template = R"__migraphx__( +template<> +struct make_tensor<${n}> +{ + static __device__ auto apply(void* __restrict__ p) + { + return make_tensor_view(reinterpret_cast<${type}* __restrict__>(p), make_shape(${lens}, ${strides})); + } +}; +)__migraphx__"; + +std::string generate_make_tensor(std::size_t n, const shape& s) +{ + return interpolate_string(make_tensor_template, + {{"n", std::to_string(n)}, + {"type", shape::cpp_type(s.type())}, + {"lens", generate_index_ints(s.lens())}, + {"strides", generate_index_ints(s.strides())}}); +} + +std::string generate_args_hpp(const std::vector& inputs) +{ + std::string inner; + for(std::size_t i = 0; i < inputs.size(); i++) + { + inner += generate_make_tensor(i, inputs[i]); + } + const std::string args_hpp = R"__migraphx__( +#ifndef MIGRAPHX_GUARD_AUTO_ARGS_HPP +#define MIGRAPHX_GUARD_AUTO_ARGS_HPP + +#include +#include +#include + +namespace migraphx { + +__content__ + +} // namespace migraphx +#endif +)__migraphx__"; + return replace_string(args_hpp, "__content__", inner); +} + +static std::vector get_compiler_warnings() +{ + std::vector warnings = { + "-Weverything", + "-Wno-c++98-compat", + "-Wno-c++98-compat-pedantic", + "-Wno-conversion", + "-Wno-double-promotion", + "-Wno-exit-time-destructors", + "-Wno-extra-semi", + "-Wno-extra-semi-stmt", + "-Wno-float-conversion", + "-Wno-gnu-anonymous-struct", + "-Wno-gnu-zero-variadic-macro-arguments", + "-Wno-missing-prototypes", + "-Wno-nested-anon-types", + "-Wno-padded", + "-Wno-shorten-64-to-32", + "-Wno-sign-conversion", + "-Wno-sign-compare", + "-Wno-unused-command-line-argument", + "-Wno-weak-vtables", + "-Wno-c99-extensions", + }; + + if(hip_has_flags({"-Werror", "-Wunsafe-buffer-usage"})) + warnings.push_back("-Wno-unsafe-buffer-usage"); + return warnings; +} + +const std::vector& compiler_warnings() +{ + static std::vector warnings = get_compiler_warnings(); + return warnings; +} + +void hip_compile_options::set_launch_params( + const value& v, + const std::function& compute_global, + std::size_t default_local) +{ + local = v.get("local", default_local); + if(v.contains("global")) + global = v.at("global").to(); + else + global = compute_global(local); +} + +static bool hip_accept_non_uniform_wg() +{ + static bool non_uniform_wg = hip_has_flags({"-fno-offload-uniform-block"}); + return non_uniform_wg; +} + +std::function +compute_global_for(context& ctx, std::size_t n, std::size_t over) +{ + assert(over > 0); + std::size_t max_global = ctx.get_current_device().get_cu_count() * + ctx.get_current_device().get_max_workitems_per_cu(); + return [n, over, max_global](std::size_t local) { + std::size_t num_elements = n; + if(not hip_accept_non_uniform_wg()) + { + num_elements = (1 + (n - 1) / local) * local; + } + std::size_t groups = 1 + (num_elements - 1) / local; + std::size_t max_blocks = max_global / local; + std::size_t nglobal = std::min(max_blocks * over, groups) * local; + return std::min(nglobal, num_elements); + }; +} + +std::size_t compute_block_size(context& ctx, std::size_t n, std::size_t max_block_size) +{ + const std::size_t min_block_size = ctx.get_current_device().get_wavefront_size(); + auto block_size = (((n - 1) / min_block_size + 1)) * min_block_size; + return std::min(std::max(min_block_size, block_size), max_block_size); +} + +operation +compile_hip_code_object(context& ctx, const std::string& content, hip_compile_options options) +{ + assert(options.global > 0); + assert(options.local > 0); + assert(not options.inputs.empty()); + assert(options.inputs.size() == options.virtual_inputs.size() or + options.virtual_inputs.empty()); + std::vector srcs = options.additional_src_files; + static auto kernels{::migraphx_kernels()}; + std::transform( + kernels.begin(), + kernels.end(), + std::back_inserter(srcs), + [](const std::pair& elem) { return src_file{elem}; }); + srcs.emplace_back("main.cpp", content); + auto args_hpp = + generate_args_hpp(options.virtual_inputs.empty() ? options.inputs : options.virtual_inputs); + srcs.emplace_back("args.hpp", args_hpp); + + if(options.global % options.local != 0 and hip_accept_non_uniform_wg()) + options.emplace_param("-fno-offload-uniform-block"); + else + assert(options.global % options.local == 0); + + options.emplace_param("-DMIGRAPHX_NGLOBAL=" + std::to_string(options.global)); + options.emplace_param("-DMIGRAPHX_NLOCAL=" + std::to_string(options.local)); + options.emplace_param("-DMIGRAPHX_WAVEFRONTSIZE=" + + std::to_string(ctx.get_current_device().get_wavefront_size())); + const auto& warnings = compiler_warnings(); + options.params.insert(options.params.end(), warnings.begin(), warnings.end()); + options.emplace_param("-ftemplate-backtrace-limit=0"); + options.emplace_param("-Werror"); + auto cos = compile_hip_src(srcs, options.params, get_device_name()); + if(cos.size() != 1) + MIGRAPHX_THROW("No code object"); + return code_object_op{value::binary{cos.front()}, + options.kernel_name, + options.global, + options.local, + options.inputs, + options.output, + options.output_arg}; +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/compile_hipblaslt.cpp b/docker/rocm/migraphx/targets/gpu/compile_hipblaslt.cpp new file mode 100644 index 000000000..c320e6b7d --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/compile_hipblaslt.cpp @@ -0,0 +1,78 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#if MIGRAPHX_USE_HIPBLASLT +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +static size_t compile(migraphx::context& ctx, operation& op, instruction_ref ins) +{ + auto v = op.compile(ctx, ins->get_shape(), to_shapes(ins->inputs())); + return v.get("workspace", 0); +} + +void compile_hipblaslt::apply(module& m) const +{ + assert(ctx); + for(auto ins : iterator_for(m)) + { + if(ins->name() != "gpu::hipblaslt_op") + continue; + auto op = any_cast(ins->get_operator()).op; + auto inputs = ins->inputs(); + + std::size_t ws = hipblaslt_workspace_size; + + auto alloc = m.insert_instruction( + ins, make_op("allocate", {{"shape", to_value(shape{shape::uint8_type, {ws}})}})); + inputs.insert(std::prev(inputs.end()), alloc); + m.replace_instruction(ins, op, inputs); + + // Calculate workspace size + ws = compile(*ctx, op, ins); + auto alloc_after = m.insert_instruction( + ins, make_op("allocate", {{"shape", to_value(shape{shape::uint8_type, {ws}})}})); + + // Replace the workspace size with actual worksapce size needed. + auto it = std::find(inputs.begin(), inputs.end(), alloc); + if(it != inputs.end()) + { + *it = alloc_after; // Replace `alloc` with `alloc_after` + } + m.replace_instruction(ins, op, inputs); + } +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_USE_HIPBLASLT diff --git a/docker/rocm/migraphx/targets/gpu/compile_miopen.cpp b/docker/rocm/migraphx/targets/gpu/compile_miopen.cpp new file mode 100644 index 000000000..583601bdd --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/compile_miopen.cpp @@ -0,0 +1,89 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct miopen_op +{ + operation op = op::identity{}; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.op, "op")); + } + + std::string name() const { return "gpu::miopen_op"; } + + shape compute_shape(std::vector inputs) const + { + inputs.push_back(inputs.back()); + return op.compute_shape(inputs); + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; +MIGRAPHX_REGISTER_OP(miopen_op); + +std::size_t compile_miopen::compile(operation& op, instruction_ref ins) const +{ + auto v = op.compile(*ctx, ins->get_shape(), to_shapes(ins->inputs())); + return v.get("workspace", 0); +} + +void compile_miopen::apply(module& m) const +{ + assert(ctx); + for(auto ins : iterator_for(m)) + { + if(ins->name() != "gpu::miopen_op") + continue; + auto op = any_cast(ins->get_operator()).op; + std::size_t ws = 0; + ws = compile(op, ins); + auto inputs = ins->inputs(); + auto alloc = m.insert_instruction( + ins, make_op("allocate", {{"shape", to_value(shape{shape::int8_type, {ws}})}})); + inputs.insert(std::prev(inputs.end()), alloc); + + m.replace_instruction(ins, op, inputs); + } +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/compile_ops.cpp b/docker/rocm/migraphx/targets/gpu/compile_ops.cpp new file mode 100644 index 000000000..cc5a7fc24 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/compile_ops.cpp @@ -0,0 +1,332 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_COMPILE_PARALLEL); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_BENCHMARKING); + +struct precompile_op +{ + operation op = op::identity{}; + std::size_t additional_args = 1; + bool ignore_modules = false; + std::optional output_shape = nullopt; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.op, "op"), + f(self.additional_args, "additional_args"), + f(self.ignore_modules, "ignore_modules"), + f(self.output_shape, "output_shape")); + } + + std::string name() const { return "gpu::precompile_op"; } + + shape compute_shape(std::vector inputs, const std::vector& mods) const + { + // Pop off additional args + inputs.resize(inputs.size() - additional_args); + if(output_shape.has_value()) + return output_shape.value(); + if(ignore_modules) + return op.compute_shape(inputs); + return op.compute_shape(inputs, mods); + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +MIGRAPHX_REGISTER_OP(precompile_op); + +struct compiled_result +{ + compiler_replace replace; + instruction_ref ins; + + friend std::ostream& operator<<(std::ostream& os, const compiled_result& cr) + { + cr.replace.trace(os, cr.ins); + return os; + } +}; + +struct compile_plan +{ + context* ctx; + operation preop; + instruction_ref ins; + optional config = nullopt; + std::vector> results = {}; + void update_config(bool exhaustive) + { + config = get_tuning_config(*ctx, ins, preop, exhaustive); + } + template + void insert_compiles(Vector& compiles, const value& solution, std::size_t i) + { + compiles.emplace_back([=] { + try + { + results[i] = compiled_result{compile(*ctx, ins, preop, solution), ins}; + } + catch(const std::exception& e) + { + const auto trace_level = value_of(MIGRAPHX_TRACE_BENCHMARKING{}); + if(trace_level > 0) + std::cerr << "Exception in " + preop.name() + ": " + e.what() << std::endl; + results[i] = nullopt; + } + catch(...) + { + results[i] = nullopt; + } + }); + } + + template + void add_compiles(Vector& compiles) + { + if(config.has_value()) + { + const auto& problem = config->problem; + if(auto sol = ctx->get_problem_cache().get(preop.name(), problem)) + { + auto solution = sol.value(); + // No solution yet until benchmarked so skip for now + if(solution.is_null()) + return; + results.resize(1); + insert_compiles(compiles, solution, 0); + } + else + { + ctx->get_problem_cache().mark(preop.name(), problem); + const auto& solutions = config->solutions; + if(solutions.empty()) + MIGRAPHX_THROW("No solutions provided for " + preop.name() + " with " + + to_string(problem)); + results.resize(solutions.size()); + for(auto i : range(solutions.size())) + { + auto solution = solutions[i]; + insert_compiles(compiles, solution, i); + } + } + } + else + { + results.resize(1); + insert_compiles(compiles, value{}, 0); + } + } + std::string problem_string() const + { + if(config) + return to_string(config->problem); + return ""; + } + + const compiled_result& benchmark() const + { + const auto trace_level = value_of(MIGRAPHX_TRACE_BENCHMARKING{}); + if(trace_level > 0 and not results.empty()) + { + std::cout << "Benchmarking " << preop.name() << ": " << results.size() << " configs" + << std::endl; + } + if(results.empty()) + MIGRAPHX_THROW("No valid tuned compilation for " + preop.name() + " with " + + problem_string()); + if(results.size() == 1) + { + if(not results.front().has_value()) + MIGRAPHX_THROW("No valid tuned compilation for " + preop.name() + " with " + + problem_string()); + return *results.front(); + } + if(not config) + MIGRAPHX_THROW("Multiple kernels without config for " + preop.name()); + if(trace_level > 1) + std::cout << "Problem: " << config->problem << std::endl; + std::vector times; + times.reserve(results.size()); + std::transform(results.begin(), + results.end(), + config->solutions.begin(), + std::back_inserter(times), + [&](const auto& cr, const auto& solution) { + if(trace_level > 1) + std::cout << "Benchmarking solution: " << solution << std::endl; + if(not cr.has_value()) + { + if(trace_level > 1) + std::cout << "No binary" << std::endl; + return std::numeric_limits::max(); + } + if(trace_level > 2) + std::cout << *cr << std::endl; + /* + create a small program with insturction being compiled and call "replace" + on that which would insert all the compiled code objects, prefills etc. + necessary to run candidate code object + */ + program bench_prog; + auto* bench_mm = bench_prog.get_main_module(); + std::vector bench_ins_inputs; + + std::transform(cr->ins->inputs().begin(), + cr->ins->inputs().end(), + std::back_inserter(bench_ins_inputs), + [&](const auto& arg) { + return bench_mm->add_parameter( + std::to_string(bench_ins_inputs.size()), + arg->get_shape()); + }); + auto bench_ins = bench_mm->add_instruction( + cr->ins->get_operator(), bench_ins_inputs, cr->ins->module_inputs()); + cr->replace.replace(*bench_mm, bench_ins); + // do dead code elimination by directly removing instruction + bench_mm->remove_instruction(bench_ins); + auto t = time_program(*ctx, bench_prog, 20); + if(trace_level > 1) + std::cout << t << "ms" << std::endl; + return t; + }); + std::this_thread::sleep_for(std::chrono::milliseconds{50}); + auto i = std::distance(times.begin(), std::min_element(times.begin(), times.end())); + if(trace_level > 0) + std::cout << "Fastest solution: " << config->solutions.at(i) << std::endl; + ctx->get_problem_cache().insert(preop.name(), config->problem, config->solutions.at(i)); + if(not results[i].has_value()) + MIGRAPHX_THROW("No valid tuned compilation for " + preop.name() + " with " + + problem_string()); + auto skipped = std::count_if( + results.begin(), results.end(), [](const auto& cr) { return not cr.has_value(); }); + if(skipped > 0) + std::cout << "Skipped " << skipped << " configs for " << preop.name() << std::endl; + + return *results[i]; + } + + void replace(module& m) const + { + const auto& cr = benchmark(); + cr.replace.replace(m, cr.ins); + } +}; + +template +void par_compile(std::size_t n, F f) +{ + if(n == 0) + return; + auto d = value_of(MIGRAPHX_GPU_COMPILE_PARALLEL{}); + if(d == 0) + d = n; + par_for(n, n / d, f); +} + +struct compile_manager +{ + std::vector cps; + bool exhaustive = false; + + template + void add_plan(Ts&&... xs) + { + cps.push_back({std::forward(xs)...}); + } + + void update_configs() + { + par_compile(cps.size(), [&](auto i) { cps[i].update_config(exhaustive); }); + } + + void compile(module& m) + { + std::vector> compiles; + for(auto& cp : cps) + { + cp.add_compiles(compiles); + } + par_compile(compiles.size(), [&](auto i) { compiles[i](); }); + + // Replace and/or benchmark + for(const auto& cp : cps) + { + if(cp.results.empty()) + continue; + cp.replace(m); + } + + // Remove compile_plan already executed + cps.erase(std::remove_if(cps.begin(), + cps.end(), + [](const auto& cp) { return not cp.results.empty(); }), + cps.end()); + } +}; + +void compile_ops::apply(module& m) const +{ + compile_manager cm; + cm.exhaustive = exhaustive_tune; + // Find all precompile ops + for(auto ins : iterator_for(m)) + { + if(ins->name() != "gpu::precompile_op") + continue; + operation preop = any_cast(ins->get_operator()).op; + cm.add_plan(ctx, preop, ins); + } + cm.update_configs(); + cm.compile(m); + // Compile already tuned configs + cm.compile(m); + assert(cm.cps.empty()); +} + +} // namespace gpu + +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/compile_pointwise.cpp b/docker/rocm/migraphx/targets/gpu/compile_pointwise.cpp new file mode 100644 index 000000000..ee682cf2c --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/compile_pointwise.cpp @@ -0,0 +1,50 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +operation +compile_pointwise(context& ctx, const std::vector& in_shapes, const_module_ref pm) +{ + auto pf = gen::generate_pointwise(*pm, "inner_pointwise", true); + std::string lambda = "MIGRAPHX_LIFT(inner_pointwise)"; + auto kernel_name = gen::generate_name_from_ops(*pm, "kernel"); + return gpu::compile_op("pointwise", + ctx, + in_shapes, + {{"lambda", lambda}, {"preamble", pf}, {"kernel", kernel_name}}); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/compiler.cpp b/docker/rocm/migraphx/targets/gpu/compiler.cpp new file mode 100644 index 000000000..3b3b786e2 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/compiler.cpp @@ -0,0 +1,74 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +namespace { +struct compiler_handle +{ + compiler_compile compile; + compiler_compile_op compile_op; + compiler_tuning_config get_tuning_config; +}; +} // namespace + +auto& compiler_map() +{ + static std::unordered_map m; // NOLINT + return m; +} + +void register_compiler(const std::string& name, + compiler_compile c, + compiler_compile_op cop, + compiler_tuning_config ctg) +{ + compiler_map()[name] = {std::move(c), std::move(cop), std::move(ctg)}; +} + +bool has_compiler_for(const std::string& name) { return compiler_map().count(name) > 0; } +compiler_replace +compile(context& ctx, instruction_ref ins, const operation& op, const value& solution) +{ + return compiler_map().at(op.name()).compile(ctx, ins, op, solution); +} +operation +compile_op(const std::string& name, context& ctx, const std::vector& inputs, const value& v) +{ + return compiler_map().at(name).compile_op(ctx, inputs, v); +} + +optional +get_tuning_config(context& ctx, instruction_ref ins, const operation& op, bool exhaustive) +{ + return compiler_map().at(op.name()).get_tuning_config(ctx, ins, op, exhaustive); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/device/argmax.cpp b/docker/rocm/migraphx/targets/gpu/device/argmax.cpp new file mode 100644 index 000000000..e71a1b955 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/argmax.cpp @@ -0,0 +1,52 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void argmax(hipStream_t stream, + const argument& result, + const argument& arg, + int64_t axis, + bool select_last_index) +{ + if(select_last_index) + arg_op(argmax_op_last_index{}, stream, result, arg, axis); + else + arg_op(argmax_op_first_index{}, stream, result, arg, axis); +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/device/argmin.cpp b/docker/rocm/migraphx/targets/gpu/device/argmin.cpp new file mode 100644 index 000000000..18338bc48 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/argmin.cpp @@ -0,0 +1,52 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void argmin(hipStream_t stream, + const argument& result, + const argument& arg, + int64_t axis, + bool select_last_index) +{ + if(select_last_index) + arg_op(argmin_op_last_index{}, stream, result, arg, axis); + else + arg_op(argmin_op_first_index{}, stream, result, arg, axis); +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/device/contiguous.cpp b/docker/rocm/migraphx/targets/gpu/device/contiguous.cpp new file mode 100644 index 000000000..7d30aec54 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/contiguous.cpp @@ -0,0 +1,65 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void contiguous_nonstandard(hipStream_t stream, const argument& result, const argument& arg) +{ + shape s{result.get_shape().type(), result.get_shape().lens()}; + visit_all(result, arg)([&](auto output_v, auto input_v) { + hip_visit_views(output_v, input_v, s)([&](auto output, auto input, auto standard_shape) { + mi_gs_launch(stream, + standard_shape)([=](auto idx) __device__ { output[idx] = input[idx]; }); + }); + }); +} + +void contiguous_packed(hipStream_t stream, const argument& result, const argument& arg) +{ + index_int nelements = result.get_shape().elements(); + visit_all(result, arg)([&](auto output_v, auto input_v) { + const auto* input = device_cast(input_v.data()); + auto* output = device_cast(output_v.data()); + gs_launch(stream, nelements)([=](auto i) __device__ { output[i] = input[i]; }); + }); +} + +void contiguous(hipStream_t stream, const argument& result, const argument& arg) +{ + if(result.get_shape() == arg.get_shape() and result.get_shape().packed()) + contiguous_packed(stream, result, arg); + else + contiguous_nonstandard(stream, result, arg); +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/device/fill.cpp b/docker/rocm/migraphx/targets/gpu/device/fill.cpp new file mode 100644 index 000000000..ea6640b7e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/fill.cpp @@ -0,0 +1,40 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void fill(hipStream_t stream, const argument& result, unsigned long val) +{ + nary(stream, result)([=]() __device__ { return val; }); +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/array.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/array.hpp new file mode 100644 index 000000000..41d58e667 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/array.hpp @@ -0,0 +1,185 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ARRAY_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARRAY_HPP + +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +// NOLINTNEXTLINE +#define MIGRAPHX_DEVICE_ARRAY_OP(op, binary_op) \ + MIGRAPHX_DEVICE_CONSTEXPR hip_array& operator op(const hip_array& x) \ + { \ + for(index_int i = 0; i < N; i++) \ + d[i] op x[i]; \ + return *this; \ + } \ + MIGRAPHX_DEVICE_CONSTEXPR hip_array& operator op(const T& x) \ + { \ + for(index_int i = 0; i < N; i++) \ + d[i] op x; \ + return *this; \ + } \ + friend MIGRAPHX_DEVICE_CONSTEXPR hip_array operator binary_op(hip_array x, const hip_array& y) \ + { \ + return x op y; \ + } \ + friend MIGRAPHX_DEVICE_CONSTEXPR hip_array operator binary_op(hip_array x, const T& y) \ + { \ + return x op y; \ + } \ + friend MIGRAPHX_DEVICE_CONSTEXPR hip_array operator binary_op(const T& y, hip_array x) \ + { \ + return x op y; \ + } + +template +struct hip_array +{ + T d[N]; + MIGRAPHX_DEVICE_CONSTEXPR T& operator[](index_int i) { return d[i]; } + MIGRAPHX_DEVICE_CONSTEXPR const T& operator[](index_int i) const { return d[i]; } + + MIGRAPHX_DEVICE_CONSTEXPR T& front() { return d[0]; } + MIGRAPHX_DEVICE_CONSTEXPR const T& front() const { return d[0]; } + + MIGRAPHX_DEVICE_CONSTEXPR T& back() { return d[N - 1]; } + MIGRAPHX_DEVICE_CONSTEXPR const T& back() const { return d[N - 1]; } + + MIGRAPHX_DEVICE_CONSTEXPR T* data() { return d; } + MIGRAPHX_DEVICE_CONSTEXPR const T* data() const { return d; } + + MIGRAPHX_DEVICE_CONSTEXPR std::integral_constant size() const { return {}; } + + MIGRAPHX_DEVICE_CONSTEXPR T* begin() { return d; } + MIGRAPHX_DEVICE_CONSTEXPR const T* begin() const { return d; } + + MIGRAPHX_DEVICE_CONSTEXPR T* end() { return d + size(); } + MIGRAPHX_DEVICE_CONSTEXPR const T* end() const { return d + size(); } + + MIGRAPHX_DEVICE_CONSTEXPR T dot(const hip_array& x) const + { + T result = 0; + for(index_int i = 0; i < N; i++) + result += x[i] * d[i]; + return result; + } + + MIGRAPHX_DEVICE_CONSTEXPR T product() const + { + T result = 1; + for(index_int i = 0; i < N; i++) + result *= d[i]; + return result; + } + + MIGRAPHX_DEVICE_CONSTEXPR T single(index_int width = 100) const + { + T result = 0; + T a = 1; + for(index_int i = 0; i < N; i++) + { + result += d[N - i - 1] * a; + a *= width; + } + return result; + } + + MIGRAPHX_DEVICE_ARRAY_OP(+=, +) + MIGRAPHX_DEVICE_ARRAY_OP(*=, *) + MIGRAPHX_DEVICE_ARRAY_OP(/=, /) + MIGRAPHX_DEVICE_ARRAY_OP(%=, %) + MIGRAPHX_DEVICE_ARRAY_OP(&=, &) + MIGRAPHX_DEVICE_ARRAY_OP(|=, |) + MIGRAPHX_DEVICE_ARRAY_OP(^=, ^) + + friend MIGRAPHX_DEVICE_CONSTEXPR bool operator==(const hip_array& x, const hip_array& y) + { + for(index_int i = 0; i < N; i++) + { + if(x[i] != y[i]) + return false; + } + return true; + } + + friend MIGRAPHX_DEVICE_CONSTEXPR bool operator!=(const hip_array& x, const hip_array& y) + { + return not(x == y); + } + // This uses the product order rather than lexical order + friend MIGRAPHX_DEVICE_CONSTEXPR bool operator<(const hip_array& x, const hip_array& y) + { + for(index_int i = 0; i < N; i++) + { + if(not(x[i] < y[i])) + return false; + } + return true; + } + friend MIGRAPHX_DEVICE_CONSTEXPR bool operator>(const hip_array& x, const hip_array& y) + { + return y < x; + } + friend MIGRAPHX_DEVICE_CONSTEXPR bool operator<=(const hip_array& x, const hip_array& y) + { + return (x < y) or (x == y); + } + friend MIGRAPHX_DEVICE_CONSTEXPR bool operator>=(const hip_array& x, const hip_array& y) + { + return (y < x) or (x == y); + } + + MIGRAPHX_DEVICE_CONSTEXPR hip_array carry(hip_array result) const + { + uint32_t overflow = 0; + for(std::ptrdiff_t i = result.size() - 1; i > 0; i--) + { + auto z = result[i] + overflow; + // Reset overflow + overflow = 0; + // Compute overflow using while loop instead of mod + while(z >= d[i]) + { + z -= d[i]; + overflow += 1; + } + result[i] = z; + } + result[0] += overflow; + return result; + } +}; + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/fast_div.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/fast_div.hpp new file mode 100644 index 000000000..70c355135 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/fast_div.hpp @@ -0,0 +1,70 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_FAST_DIV_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_FAST_DIV_HPP + +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +constexpr const uint64_t fast_div_shift = 42; +inline uint64_t encode_divisor(uint64_t divisor) +{ + if(divisor == 0) + return 0; + auto p = uint64_t{1} << fast_div_shift; + return (p + divisor - 1) / divisor; +} + +inline constexpr bool is_divisor_encodable(uint64_t i) +{ + return i < (uint64_t{1} << (fast_div_shift / 2)); +} + +MIGRAPHX_DEVICE_CONSTEXPR uint64_t fast_div(uint64_t dividend, uint64_t encoded_divisor) +{ + return (dividend * encoded_divisor) >> fast_div_shift; +} + +MIGRAPHX_DEVICE_CONSTEXPR uint64_t remainder(uint64_t result, uint64_t dividend, uint64_t divisor) +{ + return dividend - divisor * result; +} + +MIGRAPHX_DEVICE_CONSTEXPR uint64_t fast_mod(uint64_t dividend, + uint64_t divisor, + uint64_t encoded_divisor) +{ + return remainder(fast_div(dividend, encoded_divisor), dividend, divisor); +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/float_equal.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/float_equal.hpp new file mode 100644 index 000000000..a5f18fc5a --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/float_equal.hpp @@ -0,0 +1,74 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_FLOAT_EQUAL_HPP +#define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_FLOAT_EQUAL_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +template +using common_type = typename std::common_type::type; + +template {})> +__device__ bool float_equal_device(T x, T y) +{ + return std::isfinite(x) and std::isfinite(y) and + std::nextafter(x, std::numeric_limits::lowest()) <= y and + std::nextafter(x, std::numeric_limits::max()) >= y; +} + +template <> +__device__ bool float_equal_device(__bf16 x, __bf16 y) // NOLINT(misc-definitions-in-headers) +{ + float xf = x; + float yf = y; + return std::isfinite(xf) and std::isfinite(yf) and + std::nextafter(xf, std::numeric_limits::lowest()) <= yf and + std::nextafter(xf, std::numeric_limits::max()) >= yf; +} + +template {})> +__device__ bool float_equal_device(T x, T y) +{ + return x == y; +} + +template +__device__ bool float_equal(T x, U y) +{ + return float_equal_device>(x, y); +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/launch.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/launch.hpp new file mode 100644 index 000000000..573f57b3b --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/launch.hpp @@ -0,0 +1,146 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_LAUNCH_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_LAUNCH_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +struct index +{ + index_int global = 0; + index_int local = 0; + index_int group = 0; + + __device__ index_int nglobal() const { return blockDim.x * gridDim.x; } // NOLINT + + __device__ index_int nlocal() const { return blockDim.x; } // NOLINT + + template + __device__ void global_stride(index_int n, F f) const + { + const auto stride = nglobal(); + for(index_int i = global; i < n; i += stride) + { + f(i); + } + } + + template + __device__ void local_stride(index_int n, F f) const + { + const auto stride = nlocal(); + for(index_int i = local; i < n; i += stride) + { + f(i); + } + } +}; + +template +__global__ void launcher(F f) +{ + index idx{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT + f(idx); +} + +inline auto launch(hipStream_t stream, index_int global, index_int local) +{ + return [=](auto f) { + assert(local > 0); + assert(global > 0); + using f_type = decltype(f); + dim3 nblocks(global / local); + dim3 nthreads(local); + /* + hipGetLastError() returns error for the first failed HIP call that happened previously. + MIGraphX calls into various backend libraries and failed HIP calls can also happen there. + Calling hipGetLastError() would reset error code to hipSuccess, so that inside MIGraphX + failed call to hipLaunchKernelGGL() can be captured. + */ + hipError_t flush_call = hipGetLastError(); + (void)(flush_call); + // cppcheck-suppress migraphx-UseDeviceLaunch + hipLaunchKernelGGL((launcher), nblocks, nthreads, 0, stream, f); + hipError_t kernel_launch_status = hipGetLastError(); + if(kernel_launch_status != hipSuccess) + { + std::string message = hipGetErrorString(kernel_launch_status); + if(not contains(get_targets(), get_device_name())) + { + message += ". Trying to run a kernel for " + get_device_name() + + " but MIGraphX was built for targets " + get_targets_as_string() + + ". Please rebuild MIGraphX with -DGPU_TARGETS='" + get_device_name() + + "'."; + } + MIGRAPHX_THROW("MIGraphX device kernel failed to launch with error: " + message); + } + }; +} + +template +MIGRAPHX_DEVICE_CONSTEXPR auto gs_invoke(F&& f, index_int i, index idx) -> decltype(f(i, idx)) +{ + return f(i, idx); +} + +template +MIGRAPHX_DEVICE_CONSTEXPR auto gs_invoke(F&& f, index_int i, index) -> decltype(f(i)) +{ + return f(i); +} + +inline auto gs_launch(hipStream_t stream, index_int n, index_int local = 1024) +{ + index_int groups = (n + local - 1) / local; + // max possible number of blocks is set to 1B (1,073,741,824) + index_int nglobal = std::min(1073741824, groups) * local; + + return [=](auto f) { + launch(stream, nglobal, local)([=](auto idx) __device__ { + idx.global_stride(n, [&](auto i) { gs_invoke(f, i, idx); }); + }); + }; +} + +#ifdef MIGRAPHX_USE_CLANG_TIDY +#define MIGRAPHX_DEVICE_SHARED +#else +#define MIGRAPHX_DEVICE_SHARED __shared__ +#endif + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/multi_index.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/multi_index.hpp new file mode 100644 index 000000000..6be513a88 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/multi_index.hpp @@ -0,0 +1,164 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_MULTI_INDEX_HPP +#define MIGRAPHX_GUARD_RTGLIB_MULTI_INDEX_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +template +struct multi_index +{ + using hip_index = hip_array; + hip_index id{}; + hip_index stride{}; + + MIGRAPHX_DEVICE_CONSTEXPR auto for_stride(hip_index n) const + { + // f should return void, but this helps with type deduction + return [=](auto f) -> decltype(f(hip_index{})) { + for(hip_index i = id; i < n; i = n.carry(i + stride)) + { + f(i); + } + }; + } +}; + +template +__device__ __host__ auto deduce_for_stride(ForStride fs) -> decltype(fs(id{})); + +MIGRAPHX_DEVICE_CONSTEXPR multi_index<1> make_multi_index(index_int i, index_int n) +{ + return {{i}, {n}}; +} + +template +MIGRAPHX_DEVICE_CONSTEXPR multi_index +make_multi_index(const hip_shape& s, index_int i, index_int n) +{ + return {s.multi(i), s.multi(n)}; +} + +template +MIGRAPHX_DEVICE_CONSTEXPR multi_index +make_multi_index(const hip_shape& s, index_int i, const hip_array& n) +{ + return {s.multi(i), n}; +} + +template +inline auto mi_nglobal(const hip_shape& s, index_int nlocal) +{ + assert(s.standard); + assert(s.elements() > 0); + index_int n = s.elements(); + index_int groups = (n + nlocal - 1) / nlocal; + // max possible number of blocks is set to 1B (1,073,741,824) + index_int nglobal = std::min(1073741824, groups) * nlocal; + + assert(groups > 0); + assert(nglobal > 0); + auto nglobal_multi = s.multi(nglobal); + + // Skip checking this, since this will cause metadata to not be generated + // for some unknown reason. + // + // assert(std::any_of(nglobal_multi.begin(), nglobal_multi.end(), [](auto x){return x>0;})); + + // cppcheck-suppress migraphx-RedundantLocalVariable + return nglobal_multi; +} + +template +inline auto mi_nlocal(const hip_shape& s, index_int local) +{ + assert(s.standard); + assert(s.elements() > 0); + auto nlocal_multi = s.multi(local); + + // Skip checking this, since this will cause metadata to not be generated + // for some unknown reason. + // + // assert(std::any_of(nlocal_multi.begin(), nlocal_multi.end(), [](auto x){return x>0;})); + + // cppcheck-suppress migraphx-RedundantLocalVariable + return nlocal_multi; +} + +template +inline auto mi_launch(hipStream_t stream, const hip_shape& global, index_int nlocal = 1024) +{ + auto nglobal_multi = mi_nglobal(global, nlocal); + auto nglobal = global.index(nglobal_multi); + + return [=](auto f) { + launch(stream, nglobal, nlocal)([=](auto idx) __device__ { + auto midx = make_multi_index(global, idx.global, nglobal_multi); + f(idx, midx.for_stride(global.lens)); + }); + }; +} + +template +inline auto mi_launch(hipStream_t stream, + const hip_shape& global, + const hip_shape& local, + index_int nlocal = 1024) +{ + auto nglobal_multi = mi_nglobal(global, 1); + auto nglobal = global.index(nglobal_multi); + auto nlocal_multi = mi_nlocal(local, nlocal); + + return [=](auto f) { + launch(stream, nglobal * nlocal, nlocal)([=](auto idx) { + // TODO: Use fast div for nlocal + auto midx = make_multi_index(global, idx.global / nlocal, nglobal_multi); + auto lidx = make_multi_index(local, idx.local, nlocal_multi); + f(idx, midx.for_stride(global.lens), lidx.for_stride(local.lens)); + }); + }; +} + +template +inline auto mi_gs_launch(hipStream_t stream, const hip_shape& global, index_int nlocal = 1024) +{ + return [=](auto f) { + mi_launch(stream, global, nlocal)([=](auto, auto g) { g([&](auto i) { f(i); }); }); + }; +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/nary.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/nary.hpp new file mode 100644 index 000000000..e9af38473 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/nary.hpp @@ -0,0 +1,473 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_NARY_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_NARY_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_NARY); + +// NOLINTNEXTLINE +#define MIGRAPHX_TRACE_NARY_FUNCTION \ + if(enabled(MIGRAPHX_TRACE_NARY{})) \ + std::cout << "nary device function: " << __PRETTY_FUNCTION__ << std::endl; + +template +constexpr auto pack(Ts... xs) +{ + return [=](auto f) { return f(xs...); }; +} + +template +auto nary_nonstandard_nonpacked_impl(hipStream_t stream, F f, argument result, Arguments... args) +{ + MIGRAPHX_TRACE_NARY_FUNCTION + shape s{result.get_shape().type(), result.get_shape().lens()}; + hip_visit_all(s, result, args...)([&](auto standard_shape, auto output, auto... inputs) { + mi_gs_launch(stream, + standard_shape)([=](auto idx) __device__ { output[idx] = f(inputs[idx]...); }); + }); +} + +inline auto create_broadcast_index(index_int len, index_int stride) +{ + auto next_stride = stride * len; + auto e_next_stride = encode_divisor(next_stride); + auto e_stride = encode_divisor(stride); + return [=](auto i) __device__ { + // ( i % next_stride) / stride + return fast_div(i, e_stride) - len * fast_div(i, e_next_stride); + }; +} + +template +auto nary_nonstandard_packed_impl(hipStream_t stream, + F f, + const argument& result, + Arguments... args) +{ + MIGRAPHX_TRACE_NARY_FUNCTION + auto arg_shape = make_array(args...).front().get_shape(); + auto perm = find_permutation(arg_shape); + auto s = reorder_shape(arg_shape, perm); + hip_visit_all(s, result.reshape(reorder_shape(result.get_shape(), perm)), args.reshape(s)...)( + [&](auto standard_shape, auto output, auto... inputs) { + mi_gs_launch(stream, standard_shape)( + [=](auto idx) __device__ { output[idx] = f(inputs[idx]...); }); + }); +} + +template +void nary_broadcast_vec_impl( + hipStream_t stream, F f, argument result, argument barg, Arguments... args) +{ + MIGRAPHX_TRACE_NARY_FUNCTION + const auto& output_shape = result.get_shape(); + const auto& b_shape = barg.get_shape(); + auto bdim = + std::distance(b_shape.strides().begin(), + std::find_if(b_shape.strides().begin(), b_shape.strides().end(), [](auto x) { + return x != 0; + })); + auto bdim_len = output_shape.lens()[bdim]; + auto bdim_stride = output_shape.strides()[bdim]; + auto broadcast_idx = create_broadcast_index(bdim_len, bdim_stride); + + const index_int vec_size = 4; + const index_int nlocal = 1024; + const index_int nglobal = 256 * nlocal; + const index_int bdim_vec_len = bdim_len / vec_size; + hip_vec_visit_all(result, barg, args...)( + [&](auto output, auto binput, auto... inputs) { + using type = typename decltype(output)::value_type; + const index_int nelements = output.size() / vec_size; + launch(stream, nglobal, nlocal)([=](auto idx) __device__ { + MIGRAPHX_DEVICE_SHARED type buffer[2048 / vec_size]; + // Load bias into LDS + for(size_t i = idx.local; i < bdim_vec_len; i += nlocal) + { + buffer[i] = binput.data()[i]; + } + __syncthreads(); + const auto* bp = as_pointer(buffer); + // Process the data + for(size_t i = idx.global; i < nelements; i += nglobal) + { + auto bidx = broadcast_idx(i * vec_size); + auto b = bp[bidx]; + auto out = output.data()[i]; + for(index_int j = 0; j < vec_size; j++) + { + out[j] = f(inputs.data()[i][j]..., b); + } + output.data()[i] = out; + } + }); + }); +} + +template +void nary_broadcast_impl(hipStream_t stream, F f, argument result, argument barg, Arguments... args) +{ + MIGRAPHX_TRACE_NARY_FUNCTION + const auto& output_shape = result.get_shape(); + const auto& b_shape = barg.get_shape(); + auto bdim = + std::distance(b_shape.strides().begin(), + std::find_if(b_shape.strides().begin(), b_shape.strides().end(), [](auto x) { + return x != 0; + })); + auto bdim_len = output_shape.lens()[bdim]; + auto bdim_stride = output_shape.strides()[bdim]; + auto broadcast_idx = create_broadcast_index(bdim_len, bdim_stride); + + const index_int nlocal = 1024; + const index_int nglobal = 256 * nlocal; + index_int nelements = result.get_shape().elements(); + hip_visit_all(result, barg, args...)([&](auto output, auto binput, auto... inputs) { + using type = typename decltype(output)::value_type; + launch(stream, nglobal, nlocal)([=](auto idx) __device__ { + MIGRAPHX_DEVICE_SHARED type buffer[2048]; + // Load bias into LDS + for(size_t i = idx.local; i < bdim_len; i += nlocal) + { + buffer[i] = binput.data()[i]; + } + __syncthreads(); + // Process the data + for(size_t i = idx.global; i < nelements; i += nglobal) + { + auto bidx = broadcast_idx(i); + auto b = buffer[bidx]; + output.data()[i] = f(inputs.data()[i]..., b); + } + }); + }); +} + +template +void nary_double_broadcast_vec_impl( + hipStream_t stream, F f, argument result, argument barg1, argument barg2, Arguments... args) +{ + MIGRAPHX_TRACE_NARY_FUNCTION + assert(barg1.get_shape().broadcasted()); + assert(barg2.get_shape().broadcasted()); + assert(barg1.get_shape() == barg2.get_shape()); + const auto& output_shape = result.get_shape(); + const auto& b_shape = barg1.get_shape(); + auto bdim = + std::distance(b_shape.strides().begin(), + std::find_if(b_shape.strides().begin(), b_shape.strides().end(), [](auto x) { + return x != 0; + })); + auto bdim_len = output_shape.lens()[bdim]; + auto bdim_stride = output_shape.strides()[bdim]; + auto broadcast_idx = create_broadcast_index(bdim_len, bdim_stride); + + const index_int vec_size = 4; + const index_int nlocal = 1024; + const index_int nglobal = 256 * nlocal; + const index_int bdim_vec_len = bdim_len / vec_size; + hip_vec_visit_all(result, barg1, barg2, args...)( + [&](auto output, auto binput1, auto binput2, auto... inputs) { + using type = typename decltype(output)::value_type; + const index_int nelements = output.size() / vec_size; + launch(stream, nglobal, nlocal)([=](auto idx) __device__ { + MIGRAPHX_DEVICE_SHARED type buffer[2048 / vec_size]; + // Load bias into LDS + for(size_t i = idx.local; i < bdim_vec_len; i += nlocal) + { + buffer[i] = binput1.data()[i]; + } + for(size_t i = idx.local; i < bdim_vec_len; i += nlocal) + { + buffer[i + bdim_vec_len] = binput2.data()[i]; + } + __syncthreads(); + const auto* bp = as_pointer(buffer); + // Process the data + for(size_t i = idx.global; i < nelements; i += nglobal) + { + auto bidx = broadcast_idx(i * vec_size); + auto b1 = bp[bidx]; + auto b2 = bp[bidx + bdim_len]; + auto out = output.data()[i]; + for(index_int j = 0; j < vec_size; j++) + { + out[j] = f(inputs.data()[i][j]..., b2, b1); + } + output.data()[i] = out; + } + }); + }); +} + +template +void nary_double_broadcast_impl( + hipStream_t stream, F f, argument result, argument barg1, argument barg2, Arguments... args) +{ + MIGRAPHX_TRACE_NARY_FUNCTION + assert(barg1.get_shape().broadcasted()); + assert(barg2.get_shape().broadcasted()); + assert(barg1.get_shape() == barg2.get_shape()); + const auto& output_shape = result.get_shape(); + const auto& b_shape = barg1.get_shape(); + auto bdim = + std::distance(b_shape.strides().begin(), + std::find_if(b_shape.strides().begin(), b_shape.strides().end(), [](auto x) { + return x != 0; + })); + auto bdim_len = output_shape.lens()[bdim]; + auto bdim_stride = output_shape.strides()[bdim]; + auto broadcast_idx = create_broadcast_index(bdim_len, bdim_stride); + + const index_int nlocal = 1024; + const index_int nglobal = 256 * nlocal; + index_int nelements = result.get_shape().elements(); + hip_visit_all(result, barg1, barg2, args...)( + [&](auto output, auto binput1, auto binput2, auto... inputs) { + using type = typename decltype(output)::value_type; + launch(stream, nglobal, nlocal)([=](auto idx) __device__ { + MIGRAPHX_DEVICE_SHARED type buffer[2048]; + // Load bias into LDS + for(size_t i = idx.local; i < bdim_len; i += nlocal) + { + buffer[i] = binput1.data()[i]; + } + for(size_t i = idx.local; i < bdim_len; i += nlocal) + { + buffer[i + bdim_len] = binput2.data()[i]; + } + __syncthreads(); + // Process the data + for(size_t i = idx.global; i < nelements; i += nglobal) + { + auto bidx = broadcast_idx(i); + auto b1 = buffer[bidx]; + auto b2 = buffer[bidx + bdim_len]; + output.data()[i] = f(inputs.data()[i]..., b2, b1); + } + }); + }); +} + +template +void nary_standard_vec_impl(hipStream_t stream, F f, argument result, Arguments... args) +{ + MIGRAPHX_TRACE_NARY_FUNCTION + const auto& output_shape = result.get_shape(); + visit_all(result, args...)([&](auto output, auto... inputs) { + using type = device_type>; + const index_int vec_size = 4; + auto data = pack_vec<4>(device_cast(inputs.data())...); + auto* outp = as_vec<4>(device_cast(output.data())); + gs_launch(stream, output_shape.elements() / vec_size)([=](auto i) __device__ { + vec out = outp[i]; + data( + [&](auto... xs) { + for(index_int j = 0; j < vec_size; j++) + { + out[j] = f(xs[j]...); + } + }, + i); + outp[i] = out; + }); + }); +} + +template +void nary_standard_impl(hipStream_t stream, F f, argument result, Arguments... args) +{ + MIGRAPHX_TRACE_NARY_FUNCTION + index_int nelements = result.get_shape().elements(); + hip_pointer_visit_all(result, args...)([&](auto output, auto... inputs) { + gs_launch(stream, nelements)([=](auto i) __device__ { output[i] = f(inputs[i]...); }); + }); +} + +template +void nary_impl(hipStream_t stream, F f, argument result, Arguments... args) +{ + MIGRAPHX_TRACE_NARY_FUNCTION + const auto shapes = make_array(args.get_shape()...); + const bool standard = all_of(shapes, [](const shape& s) { return s.standard(); }); + const bool packed = + all_of(shapes, [](const shape& s) { return s.packed() and not s.broadcasted(); }); + const bool same_shapes = + all_of(shapes, [&](const shape& s) { return s == result.get_shape(); }); + const bool same_input_shapes = all_of(shapes, [&](const shape& s) { return s == shapes[0]; }); + if((result.get_shape().standard() and standard) or (packed and same_shapes)) + nary_standard_impl(stream, f, result, args...); + else if(packed and same_input_shapes) + nary_nonstandard_packed_impl(stream, f, result, args...); + else + nary_nonstandard_nonpacked_impl(stream, f, result, args...); +} + +template +auto nary_nonstandard(hipStream_t stream, argument result, Arguments... args) +{ + return [=](auto f) { nary_nonstandard_nonpacked_impl(stream, f, result, args...); }; +} + +template +auto nary_standard(hipStream_t stream, argument result, Arguments... args) +{ + return [=](auto f) { nary_standard_impl(stream, f, result, args...); }; +} + +template +bool broadcastable(bool& divisible_by_4, + index_int max_size, + const argument& result, + const argument& barg, + const Arguments&... args) +{ + divisible_by_4 = false; + auto bshape = barg.get_shape(); + const bool standard = + all_of({args.get_shape()...}, [](const shape& s) { return s.standard(); }); + const bool same_shapes = + all_of({args.get_shape()...}, [&](const shape& s) { return s == result.get_shape(); }); + // TODO: Check result and args shape is the same + if(standard and same_shapes and bshape.broadcasted() and not bshape.scalar()) + { + auto not_zero = [](auto x) { return x != 0; }; + const auto& strides = bshape.strides(); + auto b_it = std::find_if(strides.begin(), strides.end(), not_zero); + auto b_idx = std::distance(strides.begin(), b_it); + auto b_len = result.get_shape().lens()[b_idx]; + auto b_stride = result.get_shape().strides()[b_idx]; + assert(bshape.lens()[b_idx] == b_len); + if(b_len <= max_size and std::none_of(std::next(b_it), strides.end(), not_zero) and + is_divisor_encodable(b_stride * b_len)) + { + + divisible_by_4 = (b_len % 4 == 0) and (b_stride % 4 == 0) and + (front_args(args...).get_shape().elements() % 4 == 0); + return true; + } + } + return false; +} + +inline bool broadcastable(bool& divisible_by_4, index_int, const argument&, const argument&) +{ + divisible_by_4 = false; + return false; +} + +// Nullary +inline auto nary(hipStream_t stream, argument result) +{ + return [=](auto f) { nary_standard_impl(stream, f, result); }; +} + +// Unary +inline auto nary(hipStream_t stream, argument result, argument arg) +{ + return [=](auto f) { nary_impl(stream, f, result, arg); }; +} + +// Binary +inline auto nary(hipStream_t stream, argument result, argument arg, argument barg) +{ + return [=](auto f) { + bool divisible_by_4 = false; + if(broadcastable(divisible_by_4, 2048, result, barg, arg)) + { + if(divisible_by_4) + nary_broadcast_vec_impl(stream, f, result, barg, arg); + else + nary_broadcast_impl(stream, f, result, barg, arg); + } + else + { + nary_impl(stream, f, result, arg, barg); + } + }; +} + +template +auto nary(hipStream_t stream, argument result, Arguments... args) +{ + static_assert(sizeof...(args) > 2, "Args needs to be greater than 2"); + return [=](auto f) { + auto barg1 = back_args(args...); + bool fallback1 = pop_back_args(args...)([&](auto&&... args2) { + auto barg2 = back_args(args2...); + bool fallback2 = + barg2.get_shape() != barg1.get_shape() or not barg2.get_shape().broadcasted() or + pop_back_args(args2...)([&](auto&&... args3) { + bool divisible_by_4 = false; + if(broadcastable(divisible_by_4, 1024, result, barg2, args3...)) + { + if(divisible_by_4) + nary_double_broadcast_vec_impl( + stream, f, result, barg1, barg2, args3...); + else + nary_double_broadcast_impl(stream, f, result, barg1, barg2, args3...); + return false; + } + return true; + }); + if(not fallback2) + return false; + bool divisible_by_4 = false; + if(broadcastable(divisible_by_4, 2048, result, barg1, args2...)) + { + if(divisible_by_4) + nary_broadcast_vec_impl(stream, f, result, barg1, args2...); + else + nary_broadcast_impl(stream, f, result, barg1, args2...); + return false; + } + return true; + }); + if(fallback1) + nary_impl(stream, f, result, args...); + }; +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp new file mode 100644 index 000000000..ae796c66e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp @@ -0,0 +1,311 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +#ifdef MIGRAPHX_NO_DPP + +template {})> +__device__ auto block_reduce(index idx, Op op, T init, ForStride fs, F f) +{ + using type = decltype(f(deduce_for_stride(fs))); + MIGRAPHX_DEVICE_SHARED type buffer[N]; + type x = init; + fs([&](auto i) { x = op(x, f(i)); }); + buffer[idx.local] = x; + __syncthreads(); + + for(index_int s = 1; s < idx.nlocal(); s *= 2) + { + const index_int index = 2 * s * idx.local; + if(index + s < idx.nlocal()) + { + buffer[index] = op(buffer[index], buffer[index + s]); + } + __syncthreads(); + } + return buffer[0]; +} + +#else +constexpr unsigned int dpp_row_shr(unsigned int x) { return 0x110u | x; } + +constexpr unsigned int dpp_row_bcast(unsigned int x) +{ + unsigned int y = 0; + switch(x) + { + case 15: y = 0x142; break; + case 31: y = 0x143; break; + default: throw std::runtime_error("Unknown bcast"); + } + return y; +} + +template +__device__ T dpp_mov(T& x) +{ + static const index_int n = sizeof(T) < 4 ? 1 : sizeof(T) / 4; + union type + { + uint32_t reg[n]; + T data; + }; + type output{}; + type input{}; + // cppcheck-suppress unreadVariable + input.data = x; + for(index_int i = 0; i < n; i++) + { + output.reg[i] = __hip_move_dpp(input.reg[i], DppCtrl, RowMask, BankMask, BoundCtrl); + } + return output.data; +} + +template +__device__ void dpp_reduce(T& in, Op op) +{ + T out{}; + out = dpp_mov(in); + in = op(in, out); + out = dpp_mov(in); + in = op(in, out); + out = dpp_mov(in); + in = op(in, out); + out = dpp_mov(in); + in = op(in, out); +#if __AMDGCN_WAVEFRONT_SIZE == 64 + out = dpp_mov(in); + in = op(in, out); + out = dpp_mov(in); + in = op(in, out); +#endif +} + +__device__ inline void dpp_reduce(float& x, sum) +{ +#if defined(MIGRAPHX_USE_CLANG_TIDY) || defined(CPPCHECK) + x = 1; +#else + __asm__ volatile("s_nop 4\n" + "v_add_f32 %0 %0 %0 row_shr:1\n" + "s_nop 1\n" + "v_add_f32 %0 %0 %0 row_shr:2\n" + "s_nop 1\n" + "v_add_f32 %0 %0 %0 row_shr:4 bank_mask:0xe\n" + "s_nop 1\n" + "v_add_f32 %0 %0 %0 row_shr:8 bank_mask:0xc\n" + "s_nop 1\n" +#if __AMDGCN_WAVEFRONT_SIZE == 64 + "v_add_f32 %0 %0 %0 row_bcast:15 row_mask:0xa\n" + "s_nop 1\n" + "v_add_f32 %0 %0 %0 row_bcast:31 row_mask:0xc\n" +#endif + "s_nop 1\n" + : "=v"(x) + : "0"(x)); +#endif +} + +template {})> +__device__ auto block_reduce(index idx, Op op, T init, ForStride fs, F f) +{ + +#if __AMDGCN_WAVEFRONT_SIZE == 32 + constexpr index_int nthreads = 16; +#else + constexpr index_int nthreads = 64; +#endif + using type = decltype(f(deduce_for_stride(fs))); + MIGRAPHX_DEVICE_SHARED type buffer[N / nthreads]; + type x = init; + fs([&](auto i) { x = op(x, f(i)); }); + dpp_reduce(x, op); + + const auto ldsidx = idx.local / nthreads; + if((idx.local % nthreads) == nthreads - 1) + { + buffer[ldsidx] = x; + } + __syncthreads(); + + type y = init; + for(index_int i = 0; i < idx.nlocal() / nthreads; i++) + { + y = op(y, buffer[i]); + } + return y; +} +#endif +template +__device__ auto block_reduce(index idx, Op op, T init, index_int n, F f) +{ + auto midx = make_multi_index(idx.local, idx.nlocal()); + // Workaround hcc, create a local array + auto fs = midx.id; + fs[0] = n; + return block_reduce( + idx, op, init, midx.for_stride(fs), [&](auto mi) __device__ { return f(mi[0]); }); +} +constexpr index_int compute_block_size(index_int n, index_int max_block_size) +{ + size_t block_size = 64; + while(block_size < max_block_size and block_size < n) + block_size *= 2; + return block_size; +} + +inline std::vector get_reduce_lens(const std::vector& input_lens, + const std::vector& output_lens) +{ + std::vector reduce_lens; + std::transform(output_lens.begin(), + output_lens.end(), + input_lens.begin(), + std::back_inserter(reduce_lens), + [](auto x, auto y) -> index_int { + if(x == y) + return 1; + else + return y; + }); + return reduce_lens; +} + +template +void reduce_multi_impl(hipStream_t stream, + const argument& result, + const argument& arg, + Op op, + T init, + Input read_input, + Output read_output, + const shape& reduce_slice) +{ + hip_visit_all(result, arg, reduce_slice)([&](auto output, auto input, auto reduce_shape) { + auto relements = reduce_slice.elements(); + + const index_int max_block_size = 256; + const index_int block_size = compute_block_size(relements, max_block_size); + mi_launch(stream, output.get_shape(), reduce_shape, block_size)( + [=](auto idx, auto global, auto local) __device__ { + global([&](auto i) __device__ { + auto r = + block_reduce(idx, op, init, local, [&](auto j) __device__ { + return read_input(input[i + j]); + }); + if(idx.local == 0) + output[i] = read_output(r); + }); + }); + }); +} + +template +void reduce_standard_impl(hipStream_t stream, + const argument& result, + const argument& arg, + Op op, + T init, + Input read_input, + Output read_output, + index_int relements) +{ + hip_visit_all(result, arg)([&](auto output, auto input) { + auto nelements = result.get_shape().elements(); + + const index_int max_block_size = 256; + const index_int block_size = compute_block_size(relements, max_block_size); + gs_launch(stream, nelements * block_size, block_size)([=](auto i, auto idx) __device__ { + const auto out_idx = i / block_size; + const auto base_idx = out_idx * relements; + auto r = block_reduce(idx, op, init, relements, [&](auto j) __device__ { + return read_input(input.data()[base_idx + j]); + }); + if(idx.local == 0) + output.data()[out_idx] = read_output(r); + }); + }); +} + +template +void reduce(hipStream_t stream, + const argument& result, + const argument& arg, + Op op, + T init, + Input read_input, + Output read_output) +{ + auto&& output_shape = result.get_shape(); + auto&& input_shape = arg.get_shape(); + auto input_lens = input_shape.lens(); + auto output_lens = output_shape.lens(); + assert(output_lens.size() == input_lens.size()); + if(input_shape.standard() and output_shape.standard() and + output_lens.back() != input_lens.back() and + std::equal(output_lens.begin(), std::prev(output_lens.end()), input_lens.begin())) + { + reduce_standard_impl( + stream, result, arg, op, init, read_input, read_output, input_lens.back()); + } + else + { + std::vector reduce_lens = get_reduce_lens(input_lens, output_lens); + shape reduce_slice{output_shape.type(), reduce_lens}; + reduce_multi_impl(stream, result, arg, op, init, read_input, read_output, reduce_slice); + } +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif // MIGRAPHX_NO_DPP diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/reduce_ops.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/reduce_ops.hpp new file mode 100644 index 000000000..6bafb0d08 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/reduce_ops.hpp @@ -0,0 +1,111 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_DEVICE_REDUCE_OPS_HPP +#define MIGRAPHX_GUARD_DEVICE_REDUCE_OPS_HPP + +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +struct sum +{ + template + MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const + { + return x + y; + } +}; + +struct product +{ + template + MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const + { + return x * y; + } +}; + +struct id +{ + template + MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x) const + { + return x; + } +}; + +struct mean +{ + size_t item_num = 1; + template + MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x) const + { + return x / static_cast(item_num); + } +}; + +struct max +{ + template + MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const + { + return (x > y) ? x : y; + } +}; + +struct min +{ + template + MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const + { + return (x < y) ? x : y; + } +}; + +struct lowest +{ + template + __device__ __host__ operator T() const + { + return device_cast(std::numeric_limits>::lowest()); + } +}; + +struct highest +{ + template + __device__ __host__ operator T() const + { + return device_cast(std::numeric_limits>::max()); + } +}; + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_DEVICE_REDUCE_OPS_HPP diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/scan.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/scan.hpp new file mode 100644 index 000000000..5a66f7f73 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/scan.hpp @@ -0,0 +1,97 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_DEVICE_SCAN_HPP +#define MIGRAPHX_GUARD_DEVICE_SCAN_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +template {})> +__device__ void block_scan(index idx, Op op, T init, ForStride fs, Input input, Output output) +{ + using type = decltype(input(deduce_for_stride(fs))); + MIGRAPHX_DEVICE_SHARED type buffer[2][N]; + type x = init; + fs([&](auto i) { + index_int iout = 0; + index_int iin = 1; + if(idx.local == 0) + buffer[iout][idx.local] = op(input(i), x); + else + buffer[iout][idx.local] = input(i); + __syncthreads(); + for(index_int s = 1; s < idx.nlocal(); s *= 2) + { + iout = 1 - iout; + iin = 1 - iin; + if(idx.local >= s) + { + buffer[iout][idx.local] = op(buffer[iin][idx.local], buffer[iin][idx.local - s]); + } + else + { + buffer[iout][idx.local] = buffer[iin][idx.local]; + } + __syncthreads(); + } + x = buffer[iout][idx.nlocal() - 1]; + output(i, buffer[iout][idx.local]); + }); +} + +template +__device__ void block_scan(index idx, Op op, T init, index_int n, Input input, Output output) +{ + block_scan( + idx, + op, + init, + [&](auto f) -> decltype(f(index_int{})) { return idx.local_stride(n, f); }, + input, + output); +} + +template +constexpr auto reverse_scan(index_int n, F f) +{ + return [=](auto i, auto&&... xs) { return f(n - i - 1, xs...); }; +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_DEVICE_SCAN_HPP diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/shape.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/shape.hpp new file mode 100644 index 000000000..66f065c78 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/shape.hpp @@ -0,0 +1,120 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_SHAPE_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_SHAPE_HPP + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +template +struct hip_shape +{ + using hip_index = hip_array; + hip_index lens = {}; + hip_index strides = {}; + hip_array divs = {}; + bool standard = false; + + __device__ __host__ hip_shape() = default; + + hip_shape(const shape& s) : standard(s.standard()) + { + assert(s.lens().size() == N); + assert(s.strides().size() == N); + std::copy(s.lens().begin(), s.lens().end(), lens.begin()); + std::copy(s.strides().begin(), s.strides().end(), strides.begin()); + assert(std::all_of(s.lens().begin(), s.lens().end(), &is_divisor_encodable)); + std::transform(s.lens().begin(), s.lens().end(), divs.begin(), &encode_divisor); + } + + MIGRAPHX_DEVICE_CONSTEXPR index_int elements() const { return lens.product(); } + + MIGRAPHX_DEVICE_CONSTEXPR index_int index(hip_index x) const { return x.dot(strides); } + + MIGRAPHX_DEVICE_CONSTEXPR index_int index(std::initializer_list x) const + { + index_int idx = 0; + for(index_int i = 0; i < x.size(); i++) + idx += *(x.begin() + i) * strides[i]; + return idx; + } + + MIGRAPHX_DEVICE_CONSTEXPR index_int index(index_int i) const + { + if(this->standard) + return i; + else + { + const index_int rank = this->lens.size(); + index_int s = 1; + index_int result = 0; + for(index_int j = 0; j < this->lens.size(); j++) + { + const index_int k = rank - j - 1; + const index_int stride = this->strides[k]; + const index_int len = this->lens[k]; + const index_int slen = s * len; + const index_int idx = (i % slen) / s; + result += stride * idx; + s = slen; + } + return result; + } + } + + MIGRAPHX_DEVICE_CONSTEXPR hip_index multi(index_int idx) const + { + hip_index result; + index_int tidx = idx; + for(std::ptrdiff_t is = result.size() - 1; is > 0; is--) + { + // result[is] = tidx % lens[is]; + // tidx = tidx / lens[is]; + auto q = fast_div(tidx, divs[is]); + result[is] = remainder(q, tidx, lens[is]); + tidx = q; + } + result[0] = tidx; + return result; + } +}; + +template +hip_shape make_hip_shape(const shape& x) +{ + return x; +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp new file mode 100644 index 000000000..2b85cb89d --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp @@ -0,0 +1,76 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP + +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +template +using hip_tensor_index = hip_array; + +template +struct hip_tensor_descriptor +{ + __device__ __host__ hip_tensor_descriptor() = default; + + hip_tensor_descriptor(const shape& s) + { + std::copy(s.lens().begin(), s.lens().end(), lens); + std::copy(s.strides().begin(), s.strides().end(), strides); + } + + __device__ __host__ hip_tensor_index multi(index_int idx) const + { + hip_tensor_index result{}; + index_int tidx = idx; + for(index_int is = 0; is < NDim; is++) + { + result[is] = tidx / strides[is]; + tidx = tidx % strides[is]; + } + + return result; + } + __device__ __host__ index_int linear(hip_tensor_index s) const + { + index_int idx = 0; + for(index_int i = 0; i < NDim; i++) + idx += s[i] * strides[i]; + return idx; + } + index_int lens[NDim] = {}; + index_int strides[NDim] = {}; +}; + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/tensor_view.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/tensor_view.hpp new file mode 100644 index 000000000..8be3908a4 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/tensor_view.hpp @@ -0,0 +1,82 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_TENSOR_VIEW_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TENSOR_VIEW_HPP + +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +template +struct hip_tensor_view +{ + using value_type = T; + using hip_index = typename hip_shape::hip_index; + __device__ __host__ hip_tensor_view() = default; + __host__ hip_tensor_view(tensor_view x) : d(x.data()), s(x.get_shape()) {} + __host__ hip_tensor_view(T* x, const shape& ss) : d(x), s(ss) {} + + MIGRAPHX_DEVICE_CONSTEXPR const hip_shape& get_shape() const { return s; } + + MIGRAPHX_DEVICE_CONSTEXPR index_int size() const { return s.elements(); } + + MIGRAPHX_DEVICE_CONSTEXPR value_type* data() const { return d; } + + template + MIGRAPHX_DEVICE_CONSTEXPR value_type& operator[](U i) const + { + return d[s.index(i)]; + } + + MIGRAPHX_DEVICE_CONSTEXPR value_type* begin() const { return d; } + + MIGRAPHX_DEVICE_CONSTEXPR value_type* end() const { return d + size(); } + + private: + value_type* d = nullptr; + hip_shape s{}; +}; + +template +hip_tensor_view make_hip_view(const shape& s, T* x) +{ + return {x, s}; +} + +template +hip_tensor_view make_hip_view(tensor_view x) +{ + return {x}; +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/types.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/types.hpp new file mode 100644 index 000000000..c9f2e3d7c --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/types.hpp @@ -0,0 +1,213 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP +#define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +using index_int = std::uint32_t; + +#define MIGRAPHX_DEVICE_CONSTEXPR constexpr __device__ __host__ // NOLINT + +template +using vec = T __attribute__((ext_vector_type(N))); + +template +__device__ __host__ T* as_pointer(vec* x) +{ + return reinterpret_cast(x); +} + +template +__device__ __host__ vec* as_vec(T* x) +{ + return reinterpret_cast*>(x); +} + +template +tensor_view> as_vec(tensor_view x) +{ + return {x.get_shape(), as_vec(x.data())}; +} + +template +auto pack_vec(Ts... xs) +{ + return [=](auto f, index_int n) { return f(as_vec(xs)[n]...); }; +} + +using gpu_half = __fp16; +using gpu_bf16 = __bf16; + +namespace detail { +template +struct device_type +{ + using type = T; +}; + +template +struct device_type> +{ + using type = vec::type, N>; +}; + +template <> +struct device_type +{ + using type = gpu_half; +}; + +template <> +struct device_type +{ + using type = gpu_bf16; +}; + +template +struct host_type +{ + using type = T; +}; + +template <> +struct host_type +{ + using type = half; +}; + +template <> +struct host_type +{ + using type = bf16; +}; + +} // namespace detail + +template +using host_type = typename detail::host_type::type; + +template +using device_type = typename detail::device_type::type; + +template +host_type host_cast(T x) +{ + return reinterpret_cast&>(x); +} + +template +host_type* host_cast(T* x) +{ + return reinterpret_cast*>(x); +} + +template +__device__ __host__ device_type device_cast(const T& x) +{ + return reinterpret_cast&>(x); +} + +template +__device__ __host__ device_type* device_cast(T* x) +{ + return reinterpret_cast*>(x); +} + +template +__device__ __host__ tensor_view> device_cast(tensor_view x) +{ + return {x.get_shape(), reinterpret_cast*>(x.data())}; +} + +template +__device__ __host__ T to_hip_type(T x) +{ + return x; +} + +// Hip doens't support __fp16 and __bf16 +inline __device__ __host__ float to_hip_type(gpu_half x) { return x; } +inline __device__ __host__ float to_hip_type(gpu_bf16 x) { return x; } + +template +struct is_floating_point : std::is_floating_point +{ +}; + +template <> +struct is_floating_point<__fp16> : std::true_type +{ +}; + +template +struct is_signed : std::is_signed +{ +}; + +template <> +struct is_signed<__fp16> : std::true_type +{ +}; + +template +struct is_arithmetic : std::is_arithmetic +{ +}; + +template <> +struct is_arithmetic<__fp16> : std::true_type +{ +}; + +// Redo for __bf16 +template <> +struct is_floating_point<__bf16> : std::true_type +{ +}; +template <> +struct is_signed<__bf16> : std::true_type +{ +}; +template <> +struct is_arithmetic<__bf16> : std::true_type +{ +}; + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/vector.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/vector.hpp new file mode 100644 index 000000000..93fe06b0c --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/vector.hpp @@ -0,0 +1,99 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_VECTOR_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_VECTOR_HPP + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +template +struct hip_vector +{ + MIGRAPHX_DEVICE_CONSTEXPR hip_vector() = default; + MIGRAPHX_DEVICE_CONSTEXPR hip_vector(index_int s) : len(s) {} + template + __device__ __host__ hip_vector(Iterator start, Iterator last) + { + auto it = std::copy(start, last, d); + len = std::distance(d, it); + } + + __device__ __host__ hip_vector(std::initializer_list x) + { + std::copy(x.begin(), x.end(), d); + len = x.size(); + } + + MIGRAPHX_DEVICE_CONSTEXPR T& operator[](index_int i) { return d[i]; } + MIGRAPHX_DEVICE_CONSTEXPR const T& operator[](index_int i) const { return d[i]; } + + MIGRAPHX_DEVICE_CONSTEXPR T& front() { return d[0]; } + MIGRAPHX_DEVICE_CONSTEXPR const T& front() const { return d[0]; } + + MIGRAPHX_DEVICE_CONSTEXPR T& back() { return d[size() - 1]; } + MIGRAPHX_DEVICE_CONSTEXPR const T& back() const { return d[size() - 1]; } + + MIGRAPHX_DEVICE_CONSTEXPR T* data() { return d; } + MIGRAPHX_DEVICE_CONSTEXPR const T* data() const { return d; } + + MIGRAPHX_DEVICE_CONSTEXPR index_int size() const { return len; } + + MIGRAPHX_DEVICE_CONSTEXPR T* begin() { return d; } + MIGRAPHX_DEVICE_CONSTEXPR const T* begin() const { return d; } + + MIGRAPHX_DEVICE_CONSTEXPR T* end() { return d + size(); } + MIGRAPHX_DEVICE_CONSTEXPR const T* end() const { return d + size(); } + + template + MIGRAPHX_DEVICE_CONSTEXPR void push_back(U&& x) + { + d[len] = static_cast(x); + len++; + } + + private: + T d[N] = {}; + index_int len = 0; +}; + +template +hip_vector to_hip_vector(const std::vector& x) +{ + hip_vector result(x.size()); + std::copy(x.begin(), x.end(), result.begin()); + return result; +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/visit.hpp b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/visit.hpp new file mode 100644 index 000000000..78f28a552 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/include/migraphx/gpu/device/visit.hpp @@ -0,0 +1,245 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_VISIT_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_VISIT_HPP + +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +template +constexpr void visit_tensor_size(index_int n, F f) +{ + switch(n) + { + case 1: { + f(std::integral_constant{}); + break; + } + case 2: { + f(std::integral_constant{}); + break; + } + case 3: { + f(std::integral_constant{}); + break; + } + case 4: { + f(std::integral_constant{}); + break; + } + case 5: { + f(std::integral_constant{}); + break; + } + default: throw std::runtime_error("Tensor dims " + std::to_string(n) + " out of range"); + } +} + +inline shape get_shape(const shape& x) { return x; } + +template +auto get_shape(const T& x) -> decltype(x.get_shape()) +{ + return x.get_shape(); +} + +template +struct is_hip_type : std::false_type +{ +}; + +template <> +struct is_hip_type : std::true_type +{ +}; +template <> +struct is_hip_type : std::true_type +{ +}; +template <> +struct is_hip_type : std::true_type +{ +}; +template <> +struct is_hip_type : std::true_type +{ +}; +template <> +struct is_hip_type : std::true_type +{ +}; +template <> +struct is_hip_type : std::true_type +{ +}; +template <> +struct is_hip_type : std::true_type +{ +}; + +template {})> +void hip_visitor_invoke(T as, V&& v) +{ + v(as); +} + +template {})> +void hip_visitor_invoke(T, V&&) +{ + MIGRAPHX_THROW(std::string("Unsupported data type on GPU: ") + __PRETTY_FUNCTION__); +} + +template +auto hip_visitor(V v) +{ + return [=](auto as) { hip_visitor_invoke(as, v); }; +} + +template +void hip_visit_all_impl(const shape& s, F f, V&& v, Ts&&... xs) +{ + std::initializer_list types = {get_shape(xs).type()...}; + if(not std::all_of( + types.begin(), types.end(), [&](migraphx::shape::type_t t) { return t == s.type(); })) + MIGRAPHX_THROW("Types must be the same"); + std::initializer_list ranks = {static_cast(get_shape(xs).ndim())...}; + if(not std::all_of(ranks.begin(), ranks.end(), [&](index_int r) { return r == s.ndim(); })) + MIGRAPHX_THROW("Ranks must be the same"); + visit_tensor_size(s.ndim(), [&](auto ndim) { + s.visit_type(hip_visitor([&](auto as) { v(f(xs, ndim, as)...); })); + }); +} + +template +void hip_visit_views_impl(const shape& s, F f, V&& v, Ts&&... xs) +{ + std::initializer_list ranks = {static_cast(get_shape(xs).ndim())...}; + if(not std::all_of(ranks.begin(), ranks.end(), [&](index_int r) { return r == s.ndim(); })) + MIGRAPHX_THROW("Ranks must be the same"); + visit_tensor_size(s.ndim(), [&](auto ndim) { v(f(xs, ndim)...); }); +} + +template +struct hip_convert +{ + F f; + template + auto operator()(RawData x, N ndim, As as) const + -> decltype(make_hip_view(x.get_shape(), f(as.from(x.data())))) + { + return make_hip_view(x.get_shape(), f(as.from(x.data()))); + } + + template + auto operator()(const shape& s, N ndim, As) const + { + return make_hip_shape(s); + } +}; + +template +hip_convert make_hip_convert(F f) +{ + return {f}; +} + +template +struct hip_convert_view +{ + F f; + template + auto operator()(tensor_view x, N ndim) const + { + return make_hip_view(f(x)); + } + + template + auto operator()(const shape& s, N ndim) const + { + return make_hip_shape(s); + } +}; + +template +hip_convert_view make_hip_convert_view(F f) +{ + return {f}; +} + +template +auto hip_visit_all(T&& x, Ts&&... xs) +{ + return [&](auto f) { + hip_visit_all_impl( + get_shape(x), make_hip_convert([](auto* p) { return device_cast(p); }), f, x, xs...); + }; +} + +template +auto hip_vec_visit_all(T&& x, Ts&&... xs) +{ + return [&](auto f) { + auto sx = get_shape(x); + auto lens = sx.lens(); + assert(lens.back() % N == 0); + assert(sx.strides().back() == 1); + lens.back() /= N; + shape vec_sx{sx.type(), lens}; + hip_visit_all_impl(vec_sx, + make_hip_convert([](auto* p) { return as_vec(device_cast(p)); }), + f, + x, + xs...); + }; +} + +template +auto hip_pointer_visit_all(T&& x, Ts&&... xs) +{ + return [&](auto f) { visit_all(x, xs...)([&](auto... vs) { f(device_cast(vs.data())...); }); }; +} + +template +auto hip_visit_views(T&& x, Ts&&... xs) +{ + return [&](auto f) { + hip_visit_views_impl(get_shape(x), + make_hip_convert_view([](auto v) { return device_cast(v); }), + f, + x, + xs...); + }; +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/device/logsoftmax.cpp b/docker/rocm/migraphx/targets/gpu/device/logsoftmax.cpp new file mode 100644 index 000000000..f2dd6148b --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/logsoftmax.cpp @@ -0,0 +1,80 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void logsoftmax(hipStream_t stream, const argument& result, const argument& arg, int64_t axis) +{ + auto batch_lens = result.get_shape().lens(); + index_int batch_item_num = batch_lens[axis]; + batch_lens[axis] = 1; + migraphx::shape batch_shape{result.get_shape().type(), batch_lens}; + + hip_visit_all(result, arg, batch_shape)([&](auto output, auto input, auto batch) { + const index_int max_block_size = 256; + const index_int block_size = compute_block_size(batch_item_num, max_block_size); + gs_launch(stream, + batch_shape.elements() * block_size, + block_size)([=](auto i, auto idx) __device__ { + auto data_idx = batch.multi(i / block_size); + using type = device_type>; + type init = lowest(); + + auto batch_max = block_reduce( + idx, max{}, init, batch_item_num, [&](auto j) __device__ { + data_idx[axis] = j; + return input[data_idx]; + }); + + auto batch_sum = + block_reduce(idx, sum{}, 0, batch_item_num, [&](auto j) __device__ { + data_idx[axis] = j; + auto val = input[data_idx] - batch_max; + return ::exp(to_hip_type(val)); + }); + + auto log_batch_sum = ::log(to_hip_type(batch_sum)) + batch_max; + + idx.local_stride(batch_item_num, [&](auto j) __device__ { + data_idx[axis] = j; + output[data_idx] = input[data_idx] - log_batch_sum; + }); + }); + }); +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/device/multinomial.cpp b/docker/rocm/migraphx/targets/gpu/device/multinomial.cpp new file mode 100644 index 000000000..e7a89d7f1 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/multinomial.cpp @@ -0,0 +1,90 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +template +constexpr Iterator upper_bound(Iterator first, Iterator last, const T& value) +{ + Iterator it; + typename std::iterator_traits::difference_type count; + typename std::iterator_traits::difference_type step; + count = std::distance(first, last); + + while(count > 0) + { + it = first; + step = count / 2; + std::advance(it, step); + if(not(value < *it)) + { + first = ++it; + count -= step + 1; + } + else + count = step; + } + return first; +} + +void multinomial(hipStream_t stream, + const argument& result, + const argument& arg0, + const argument& arg1) +{ + size_t batch_size = arg0.get_shape().lens().front(); + size_t class_size = arg0.get_shape().lens().back(); + size_t sample_size = result.get_shape().lens().back(); + + visit_all(arg0, arg1)([&](auto cdf_host, auto dist_host) { + result.visit([&](auto output_host) { + hip_visit_views(cdf_host, dist_host, output_host)( + [&](auto cdf, auto dist, auto output) { + gs_launch(stream, batch_size * sample_size)([=](auto i) __device__ { + auto idx = output.get_shape().multi(i); + auto cdf_begin = cdf.begin() + (idx.front() * class_size); + auto cdf_end = cdf_begin + class_size; + auto* sample_iter = + upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end))); + output[i] = std::distance(cdf_begin, sample_iter); + }); + }); + }); + }); +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/device/nonzero.cpp b/docker/rocm/migraphx/targets/gpu/device/nonzero.cpp new file mode 100644 index 000000000..223713390 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/nonzero.cpp @@ -0,0 +1,77 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +argument nonzero(hipStream_t stream, const argument& result, const argument& arg_data) +{ + auto s = arg_data.get_shape(); + auto elem_num = s.elements(); + auto out_elem_num = result.get_shape().elements(); + + // call the prefix_sum function to do a prefix_sum to compute + // index in the output. Only 1 block can be used since we have + // only one prefix sum + const index_int block_size = 256; + hip_visit_all(arg_data, s)([&](auto input, auto si) { + const auto* in_ptr = device_cast(input.data()); + auto* ptr = result.cast(); + gs_launch(stream, block_size, block_size)([=](auto, auto idx) __device__ { + // fill all output to 0 first + idx.local_stride(out_elem_num, [&](auto j) { ptr[j] = 0; }); + + block_scan( + idx, + sum{}, + 0, + elem_num, + [&](auto j) { return (float_equal(in_ptr[j], 0)) ? 0 : 1; }, + [&](auto j, auto x) { + auto out_loc = x - 1; + if(float_equal(in_ptr[j], 0)) + return; + + auto index = si.multi(j); + for(size_t k = 0; k < index.size(); ++k) + { + ptr[k * elem_num + out_loc] = index[k]; + } + }); + }); + }); + + return result; +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/device/prefix_scan_sum.cpp b/docker/rocm/migraphx/targets/gpu/device/prefix_scan_sum.cpp new file mode 100644 index 000000000..9518f5b45 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/prefix_scan_sum.cpp @@ -0,0 +1,143 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void prefix_scan_sum(hipStream_t stream, + const argument& result, + const argument& arg, + int32_t axis, + bool exclusive, + bool reverse) +{ + const index_int max_block_size = 256; + const index_int n = arg.get_shape().lens()[axis]; + auto rlens = result.get_shape().lens(); + rlens[axis] = 1; + + hip_visit_all(result, arg, result.get_shape().with_lens(rlens))( + [=](auto output, auto input, auto rshape) { + const index_int block_size = compute_block_size(rshape.elements(), max_block_size); + if(reverse and exclusive) + { + gs_launch(stream, rshape.elements() * block_size, block_size)( + [=](auto i, auto idx) __device__ { + const auto ridx = rshape.multi(i / block_size); + auto compute_idx = [&](auto j) { + auto k = ridx; + k[axis] = j; + return k; + }; + block_scan( + idx, + sum{}, + 0, + n, + reverse_scan(n, [&](auto j) { return input[compute_idx(j)]; }), + reverse_scan(n, [&](auto j, auto x) { + if(j == n - 1) + output[compute_idx(j)] = 0; + if(j > 0) + output[compute_idx(j - 1)] = x; + })); + }); + } + else if(reverse) + { + gs_launch(stream, rshape.elements() * block_size, block_size)( + [=](auto i, auto idx) __device__ { + const auto ridx = rshape.multi(i / block_size); + auto compute_idx = [&](auto j) { + auto k = ridx; + k[axis] = j; + return k; + }; + block_scan( + idx, + sum{}, + 0, + n, + reverse_scan(n, [&](auto j) { return input[compute_idx(j)]; }), + reverse_scan(n, [&](auto j, auto x) { output[compute_idx(j)] = x; })); + }); + } + else if(exclusive) + { + gs_launch(stream, rshape.elements() * block_size, block_size)( + [=](auto i, auto idx) __device__ { + const auto ridx = rshape.multi(i / block_size); + auto compute_idx = [&](auto j) { + auto k = ridx; + k[axis] = j; + return k; + }; + block_scan( + idx, + sum{}, + 0, + n, + [&](auto j) { return input[compute_idx(j)]; }, + [&](auto j, auto x) { + auto k = j + 1; + if(j == 0) + output[compute_idx(0)] = 0; + if(k < n) + output[compute_idx(k)] = x; + }); + }); + } + else + { + gs_launch(stream, rshape.elements() * block_size, block_size)( + [=](auto i, auto idx) __device__ { + const auto ridx = rshape.multi(i / block_size); + auto compute_idx = [&](auto j) { + auto k = ridx; + k[axis] = j; + return k; + }; + block_scan( + idx, + sum{}, + 0, + n, + [&](auto j) { return input[compute_idx(j)]; }, + [&](auto j, auto x) { output[compute_idx(j)] = x; }); + }); + } + }); +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/device/reverse.cpp b/docker/rocm/migraphx/targets/gpu/device/reverse.cpp new file mode 100644 index 000000000..5d5831127 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/reverse.cpp @@ -0,0 +1,66 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "migraphx/gpu/device/visit.hpp" +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +argument +reverse(hipStream_t stream, argument result, argument arg1, const std::vector& axes) +{ + auto s = arg1.get_shape(); + // auto lens = s.lens(); + std::vector axis_len(axes.begin(), axes.end()); + shape sa{shape::float_type, axis_len}; + std::size_t nelements = s.elements(); + visit_all(result, arg1)([&](auto output1, auto input1) { + hip_visit_views(output1, input1, s)([&](auto output, auto input, auto hs) { + hip_visit_views(sa)([&](auto daxes) { + auto lens = hs.lens; + gs_launch(stream, nelements)([=](auto i) __device__ { + auto idx = hs.multi(i); + auto in_idx = idx; + for(auto axis : daxes.lens) + in_idx[axis] = lens[axis] - 1 - idx[axis]; + output[idx] = input[in_idx]; + }); + }); + }); + }); + + return result; +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/device/rnn_variable_seq_lens.cpp b/docker/rocm/migraphx/targets/gpu/device/rnn_variable_seq_lens.cpp new file mode 100644 index 000000000..6d21c702f --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/rnn_variable_seq_lens.cpp @@ -0,0 +1,140 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void rnn_var_sl_shift_sequence(hipStream_t stream, + const argument& result, + const argument& arg_hs, + const argument& arg_sl) +{ + auto output_shape = result.get_shape(); + int64_t max_len = output_shape.lens()[0]; + visit_all(result, arg_hs)([&](auto output, auto input) { + const auto* in_data = device_cast(input.data()); + auto* out_data = device_cast(output.data()); + auto out_s = make_hip_shape<3>(output_shape); + arg_sl.visit([&](auto sl) { + const auto* sl_data = device_cast(sl.data()); + gs_launch(stream, output_shape.elements(), 256)([=](auto i) __device__ { + auto idx = out_s.multi(i); + auto t = idx[0]; + auto b = idx[1]; + auto l = sl_data[b]; + auto val = in_data[0]; + val = 0; + if(t >= max_len - l) + { + auto in_idx = idx; + in_idx[0] -= (max_len - l); + val = in_data[out_s.index(in_idx)]; + } + out_data[i] = val; + }); + }); + }); +} + +void rnn_var_sl_shift_output(hipStream_t stream, + const argument& result, + const argument& arg_hs, + const argument& arg_sl, + bool is_reverse) +{ + auto output_shape = result.get_shape(); + int64_t max_len = output_shape.lens()[0]; + visit_all(result, arg_hs)([&](auto output, auto input) { + const auto* in_data = device_cast(input.data()); + auto* out_data = device_cast(output.data()); + auto out_s = make_hip_shape<4>(output_shape); + arg_sl.visit([&](auto sl) { + const auto* sl_data = device_cast(sl.data()); + gs_launch(stream, output_shape.elements(), 256)([=](auto i) __device__ { + auto idx = out_s.multi(i); + auto t = idx[0]; + auto d = idx[1]; + auto b = idx[2]; + auto l = sl_data[b]; + auto val = in_data[0]; + val = 0; + if(t < l) + { + int offset = (d == 1 or is_reverse) ? 1 : 0; + auto in_idx = idx; + in_idx[0] += offset * (max_len - l); + val = in_data[out_s.index(in_idx)]; + } + out_data[i] = val; + }); + }); + }); +} + +void rnn_var_sl_last_output(hipStream_t stream, + const argument& result, + const argument& arg_hs, + const argument& arg_sl, + bool is_reverse) +{ + auto input_shape = arg_hs.get_shape(); + auto out_comp_lens = input_shape.lens(); + out_comp_lens[0] = 1; + shape out_comp_shape{input_shape.type(), out_comp_lens}; + + visit_all(result, arg_hs)([&](auto output, auto input) { + const auto* in_data = device_cast(input.data()); + auto* out_data = device_cast(output.data()); + arg_sl.visit([&](auto sl) { + const auto* sl_data = device_cast(sl.data()); + auto in_s = make_hip_shape<4>(input_shape); + auto out_s = make_hip_shape<4>(out_comp_shape); + gs_launch(stream, result.get_shape().elements(), 256)([=](auto i) __device__ { + auto idx = out_s.multi(i); + auto d = idx[1]; + auto b = idx[2]; + auto l = sl_data[b]; + if(is_reverse or d == 1) + { + idx[0] = 0; + } + else + { + idx[0] = l - 1; + } + out_data[i] = in_data[in_s.index(idx)]; + }); + }); + }); +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/device/targets.cpp b/docker/rocm/migraphx/targets/gpu/device/targets.cpp new file mode 100644 index 000000000..0b1853db7 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/targets.cpp @@ -0,0 +1,66 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +static std::vector parse_targets() { return split_string(MIGRAPHX_GPU_TARGETS, ';'); } + +const std::vector& get_targets() +{ + static auto result = parse_targets(); + return result; +} + +std::string get_targets_as_string() { return join_strings(get_targets(), ", "); } + +static int get_device_id() +{ + int device; + auto status = hipGetDevice(&device); + if(status != hipSuccess) + MIGRAPHX_THROW("No device"); + return device; +} + +std::string get_device_name() +{ + hipDeviceProp_t props{}; + auto status = hipGetDeviceProperties(&props, get_device_id()); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed to get device properties"); + return props.gcnArchName; +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/device/targets.hpp.in b/docker/rocm/migraphx/targets/gpu/device/targets.hpp.in new file mode 100644 index 000000000..0a0e19aba --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/targets.hpp.in @@ -0,0 +1,52 @@ +/* +* The MIT License (MIT) +* +* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +* THE SOFTWARE. +*/ +#ifndef MIGRAPHX_GUARD_DEVICE_TARGETS_CPP +#define MIGRAPHX_GUARD_DEVICE_TARGETS_CPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { +#define MIGRAPHX_GPU_TARGETS "@GPU_TARGETS@" // NOLINT + +MIGRAPHX_DEVICE_EXPORT +const std::vector& get_targets(); + +MIGRAPHX_DEVICE_EXPORT +std::string get_targets_as_string(); + +MIGRAPHX_DEVICE_EXPORT +std::string get_device_name(); + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_DEVICE_TARGETS_CPP + + diff --git a/docker/rocm/migraphx/targets/gpu/device/topk.cpp b/docker/rocm/migraphx/targets/gpu/device/topk.cpp new file mode 100644 index 000000000..2168af94e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device/topk.cpp @@ -0,0 +1,239 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +template +struct hip_heap_vector +{ + MIGRAPHX_DEVICE_CONSTEXPR hip_heap_vector(T* val, index_int n, Index v_idx, Compare comp) + : data(val), size(n), data_index(v_idx), compare(comp) + { + make_heap(size); + } + + MIGRAPHX_DEVICE_CONSTEXPR void try_push(const T val) + { + if(compare(val, data[data_index(0)])) + return; + + pop_heap(size - 1); + data[data_index(size - 1)] = val; + push_heap(size - 1); + } + + MIGRAPHX_DEVICE_CONSTEXPR void sort() { sort_heap(size); } + + private: + MIGRAPHX_DEVICE_CONSTEXPR inline static void swap(T& v1, T& v2) noexcept + { + T v = v1; + v1 = v2; + v2 = v; + } + + MIGRAPHX_DEVICE_CONSTEXPR inline void heapify_down(index_int n, index_int index) + { + while(index < n) + { + auto pre_index = index; + index_int l = 2 * index + 1; + index_int r = 2 * index + 2; + + if(l < n and compare(data[data_index(l)], data[data_index(index)])) + { + index = l; + } + + if(r < n and compare(data[data_index(r)], data[data_index(index)])) + { + index = r; + if(compare(data[data_index(l)], data[data_index(r)])) + { + index = l; + } + } + + if(index == pre_index) + { + break; + } + + swap(data[data_index(index)], data[data_index(pre_index)]); + } + } + + MIGRAPHX_DEVICE_CONSTEXPR inline void heapify_up(index_int index) + { + while(index > 0) + { + auto parent_idx = (index - 1) / 2; + + if(not compare(data[data_index(index)], data[data_index(parent_idx)])) + { + break; + } + + swap(data[data_index(index)], data[data_index(parent_idx)]); + index = parent_idx; + } + } + + MIGRAPHX_DEVICE_CONSTEXPR inline void make_heap(index_int n) + { + for(int j = n / 2 - 1; j >= 0; --j) + { + heapify_down(n, j); + } + } + + MIGRAPHX_DEVICE_CONSTEXPR inline void push_heap(index_int loc) { heapify_up(loc); } + + MIGRAPHX_DEVICE_CONSTEXPR inline void pop_heap(index_int loc) + { + swap(data[data_index(0)], data[data_index(loc)]); + heapify_down(loc, 0); + } + + MIGRAPHX_DEVICE_CONSTEXPR inline void sort_heap(index_int n) + { + for(int j = n - 1; j > 0; --j) + { + swap(data[data_index(0)], data[data_index(j)]); + heapify_down(j, 0); + } + } + + T* data = nullptr; + index_int size; + Index data_index; + Compare compare; +}; + +template +__device__ hip_heap_vector +make_heap(T* data, index_int n, Index idx, Compare compare) +{ + return {data, n, idx, compare}; +} + +template +std::vector topk(hipStream_t stream, + const argument& val_res, + const argument& ind_res, + const argument& arg, + int64_t k, + int64_t axis, + Compare compare) +{ + auto in_s = arg.get_shape(); + auto in_lens = in_s.lens(); + auto out_s = val_res.get_shape(); + auto axis_dim = in_s.lens()[axis]; + auto comp_lens = in_lens; + comp_lens[axis] = 1; + shape comp_s{in_s.type(), comp_lens}; + std::size_t elem_num = comp_s.elements(); + + hip_visit_all(val_res, arg, out_s, in_s, comp_s)( + [&](auto out_val, auto input, auto oss, auto iss, auto css) { + auto* data = device_cast(input.data()); + auto* out = device_cast(out_val.data()); + auto* const ind = ind_res.cast(); + gs_launch(stream, elem_num)([=](auto i) __device__ { + auto idx = css.multi(i); + + auto in_idx = [&](int ii) { + auto iidx = idx; + iidx[axis] = ii; + return iss.index(iidx); + }; + + auto out_idx = [&](int ii) { + auto iidx = idx; + iidx[axis] = ii; + return oss.index(iidx); + }; + + auto data_compare = [=](auto ii, auto jj) { + return compare(data[in_idx(ii)], data[in_idx(jj)]); + }; + + for(int j = 0; j < k; ++j) + { + ind[out_idx(j)] = j; + } + + auto hp = make_heap(ind, k, out_idx, data_compare); + for(int j = k; j < axis_dim; ++j) + { + hp.try_push(j); + } + hp.sort(); + + for(int j = 0; j < k; ++j) + { + out[out_idx(j)] = data[in_idx(ind[out_idx(j)])]; + } + }); + }); + + return {val_res, ind_res}; +} + +argument topk_largest(hipStream_t stream, + const argument& val_res, + const argument& ind_res, + const argument& arg, + int64_t k, + int64_t axis) +{ + return {topk(stream, val_res, ind_res, arg, k, axis, std::less<>{})}; +} + +argument topk_smallest(hipStream_t stream, + const argument& val_res, + const argument& ind_res, + const argument& arg, + int64_t k, + int64_t axis) +{ + return {topk(stream, val_res, ind_res, arg, k, axis, std::greater<>{})}; +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/device_name.cpp b/docker/rocm/migraphx/targets/gpu/device_name.cpp new file mode 100644 index 000000000..c717742e2 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/device_name.cpp @@ -0,0 +1,68 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +int get_device_id() +{ + int device; + auto status = hipGetDevice(&device); + if(status != hipSuccess) + MIGRAPHX_THROW("No device"); + return device; +} + +std::string get_device_name() +{ + hipDeviceProp_t props{}; + auto status = hipGetDeviceProperties(&props, get_device_id()); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed to get device properties"); + return props.gcnArchName; +} + +bool gfx_has_fp8fnuz_intrinsics() +{ + const auto device_name = trim(split_string(get_device_name(), ':').front()); + return (starts_with(device_name, "gfx94")); +} + +bool gfx_has_fp8ocp_intrinsics() +{ + const auto device_name = trim(split_string(get_device_name(), ':').front()); + bool is_navi_with_fp8ocp = starts_with(device_name, "gfx12") and device_name >= "gfx1200"; + bool is_mi_with_fp8ocp = starts_with(device_name, "gfx9") and device_name >= "gfx950"; + return (is_navi_with_fp8ocp or is_mi_with_fp8ocp); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/driver/CMakeLists.txt b/docker/rocm/migraphx/targets/gpu/driver/CMakeLists.txt new file mode 100644 index 000000000..ae9b9a685 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/driver/CMakeLists.txt @@ -0,0 +1,31 @@ +##################################################################################### +# The MIT License (MIT) +# +# Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +##################################################################################### + +file(GLOB GPU_DRIVER_SRCS CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) +add_executable(gpu-driver + ${GPU_DRIVER_SRCS} +) +rocm_clang_tidy_check(gpu-driver) +target_include_directories(gpu-driver PRIVATE include) +target_link_libraries(gpu-driver PRIVATE migraphx_gpu) diff --git a/docker/rocm/migraphx/targets/gpu/driver/action.cpp b/docker/rocm/migraphx/targets/gpu/driver/action.cpp new file mode 100644 index 000000000..ea71afdf1 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/driver/action.cpp @@ -0,0 +1,50 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace driver { + +auto& action_map() +{ + static std::unordered_map m; + return m; +} + +action_function get_action(const std::string& name) +{ + if(action_map().count(name) == 0) + MIGRAPHX_THROW("Missing action: " + name); + return action_map().at(name); +} + +void register_action(const std::string& name, const action_function& a) { action_map()[name] = a; } + +} // namespace driver +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/driver/compile_op.cpp b/docker/rocm/migraphx/targets/gpu/driver/compile_op.cpp new file mode 100644 index 000000000..5caae2a79 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/driver/compile_op.cpp @@ -0,0 +1,50 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace driver { + +struct compile_op : action +{ + static void apply(const parser& p, const value& v) + { + context ctx; + auto inputs = p.parse_shapes(v.at("inputs")); + auto op = gpu::compile_op(v.at("name").to(), ctx, inputs, v); + auto t = time_op(ctx, op, inputs, p.get(v, "iterations", 100)); + std::cout << op << " -> " << op.compute_shape(inputs) << ": " << t << "ms" << std::endl; + std::cout << std::endl; + } +}; + +} // namespace driver +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/driver/include/migraphx/gpu/driver/action.hpp b/docker/rocm/migraphx/targets/gpu/driver/include/migraphx/gpu/driver/action.hpp new file mode 100644 index 000000000..172419e7c --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/driver/include/migraphx/gpu/driver/action.hpp @@ -0,0 +1,60 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_DRIVER_ACTION_HPP +#define MIGRAPHX_GUARD_GPU_DRIVER_ACTION_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace driver { + +using action_function = std::function; + +action_function get_action(const std::string& name); +void register_action(const std::string& name, const action_function& a); + +struct auto_register_action +{ + template + static void apply() + { + const auto& name = get_type_name(); + register_action(name.substr(name.rfind("::") + 2), + [](auto&&... xs) { T::apply(std::forward(xs)...); }); + } +}; + +template +using action = auto_register; + +} // namespace driver +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_DRIVER_ACTION_HPP diff --git a/docker/rocm/migraphx/targets/gpu/driver/include/migraphx/gpu/driver/parser.hpp b/docker/rocm/migraphx/targets/gpu/driver/include/migraphx/gpu/driver/parser.hpp new file mode 100644 index 000000000..d5995eeb5 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/driver/include/migraphx/gpu/driver/parser.hpp @@ -0,0 +1,68 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_DRIVER_PARSER_HPP +#define MIGRAPHX_GUARD_GPU_DRIVER_PARSER_HPP + +#include +#include + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace driver { + +[[noreturn]] void error(const std::string& msg); + +struct parser +{ + parser() = default; + + template + T get(const value& v, const std::string& key, const T& default_value) const + { + return v.get(key, settings.get(key, default_value)); + } + + shape parse_shape(const value& v) const; + + std::vector parse_shapes(const value& v) const; + + void load_settings(const value& v); + + static void process(const value& v); + + private: + value settings = value::object{}; +}; + +} // namespace driver +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_DRIVER_PARSER_HPP diff --git a/docker/rocm/migraphx/targets/gpu/driver/main.cpp b/docker/rocm/migraphx/targets/gpu/driver/main.cpp new file mode 100644 index 000000000..c61e447db --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/driver/main.cpp @@ -0,0 +1,44 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +using namespace migraphx; // NOLINT +using namespace migraphx::gpu; // NOLINT +using namespace migraphx::gpu::driver; // NOLINT + +int main(int argc, char const* argv[]) +{ + std::vector args(argv, argv + argc); + if(args.size() < 2) + { + std::cout << "Usage: gpu-driver " << std::endl; + std::abort(); + } + auto v = from_json_string(convert_to_json(read_string(args[1]))); + parser::process(v); +} diff --git a/docker/rocm/migraphx/targets/gpu/driver/parser.cpp b/docker/rocm/migraphx/targets/gpu/driver/parser.cpp new file mode 100644 index 000000000..c84d00580 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/driver/parser.cpp @@ -0,0 +1,81 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace driver { + +[[noreturn]] void error(const std::string& msg) +{ + std::cout << msg << std::endl; + std::abort(); +} + +shape parser::parse_shape(const value& v) const +{ + auto lens = get(v, "lens", std::vector{}); + auto strides = get(v, "strides", std::vector{}); + auto type = shape::parse_type(get(v, "type", "float")); + if(strides.empty()) + return shape{type, lens}; + else + return shape{type, lens, strides}; +} + +std::vector parser::parse_shapes(const value& v) const +{ + std::vector result; + std::transform( + v.begin(), v.end(), std::back_inserter(result), [&](auto&& x) { return parse_shape(x); }); + return result; +} + +void parser::load_settings(const value& v) +{ + if(v.contains("settings")) + settings = v.at("settings"); +} + +void parser::process(const value& v) +{ + if(not v.is_object()) + error("Input is not an object"); + parser p{}; + p.load_settings(v); + for(auto&& pp : v) + { + if(pp.get_key() == "settings") + continue; + get_action(pp.get_key())(p, pp.without_key()); + } +} + +} // namespace driver +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/driver/precompile_op.cpp b/docker/rocm/migraphx/targets/gpu/driver/precompile_op.cpp new file mode 100644 index 000000000..2aec2a2d3 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/driver/precompile_op.cpp @@ -0,0 +1,84 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace driver { + +struct precompile_op : action +{ + static program create_preop_program(const operation& preop, std::vector inputs) + { + program p; + auto* mm = p.get_main_module(); + std::vector args; + inputs.pop_back(); + transform(inputs, range(inputs.size()), std::back_inserter(args), [&](auto input, auto i) { + return mm->add_parameter("x" + std::to_string(i), input); + }); + mm->add_instruction(preop, args); + return p; + } + + static operation get_code_object(const program& p) + { + MIGRAPHX_TIDY_CONST auto* mm = p.get_main_module(); + auto it = std::find_if(mm->begin(), mm->end(), [](const auto& ins) { + return (ins.name() == "gpu::code_object"); + }); + if(it == mm->end()) + MIGRAPHX_THROW("Failed to create code object"); + return it->get_operator(); + } + static void apply(const parser& p, const value& v) + { + context ctx; + auto inputs = p.parse_shapes(v.at("inputs")); + auto name = v.at("name").to(); + auto preop = make_op(name); + if(v.contains("fields")) + preop.from_value(v.at("fields")); + bool exhaustive = v.get("exhaustive", false); + auto prog = create_preop_program(preop, inputs); + run_passes(prog, {lowering{}, compile_ops{&ctx, exhaustive}}); + auto op = get_code_object(prog); + auto t = time_op(ctx, op, inputs, p.get(v, "iterations", 100)); + std::cout << preop << ": " << t << "ms" << std::endl; + } +}; + +} // namespace driver +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/driver/run_op.cpp b/docker/rocm/migraphx/targets/gpu/driver/run_op.cpp new file mode 100644 index 000000000..d5575a933 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/driver/run_op.cpp @@ -0,0 +1,54 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace driver { + +struct run_op : action +{ + static void apply(const parser& p, const value& v) + { + context ctx; + auto inputs = p.parse_shapes(v.at("inputs")); + auto name = v.at("name").to(); + if(not contains(name, "::")) + name = "gpu::" + name; + auto op = make_op(name); + if(v.contains("fields")) + op.from_value(v.at("fields")); + auto t = time_op(ctx, op, inputs, p.get(v, "iterations", 100)); + std::cout << op << " -> " << op.compute_shape(inputs) << ": " << t << "ms" << std::endl; + } +}; + +} // namespace driver +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/fuse_ck.cpp b/docker/rocm/migraphx/targets/gpu/fuse_ck.cpp new file mode 100644 index 000000000..bf9a269f3 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/fuse_ck.cpp @@ -0,0 +1,217 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +struct module; + +namespace gpu { + +struct ck_gemm +{ + operation op = make_op("dot"); + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.op, "op")); + } + + std::string name() const { return "gpu::ck_gemm"; } + + void check_gemm_shape(const shape& s) const + { + if(not contains(range(s.strides().rbegin(), s.strides().rbegin() + 3), 1)) + MIGRAPHX_THROW("Invalid shape for ck_gemm"); + } + + shape compute_shape(std::vector inputs, const std::vector& mods) const + { + check_shapes{inputs, *this}.same_ndims(); + if(inputs.size() < 2) + MIGRAPHX_THROW(name() + ": should have at least two inputs."); + auto a = inputs[0]; + auto b = inputs[1]; + for(const auto& input : inputs) + check_gemm_shape(input); + auto r = op.compute_shape({a, b}); + if(mods.empty()) + return r; + return r.with_type(mods.front()->get_output_shapes().front().type()); + } + + static bool is_ck_supported_type(shape::type_t t) + { + return contains({shape::half_type, shape::int8_type, shape::int32_type}, t); + } +}; +MIGRAPHX_REGISTER_OP(ck_gemm); + +struct ck_gemm_softmax_gemm : gemm_softmax_gemm +{ + std::string name() const { return "gpu::ck_gemm_softmax_gemm"; } +}; +MIGRAPHX_REGISTER_OP(ck_gemm_softmax_gemm); + +namespace { + +MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins) +{ + if(ins->name() != "dot" and ins->name() != "quant_dot") + return false; + if(not ck_gemm::is_ck_supported_type(ins->get_shape().type())) + return false; + auto a = ins->inputs().front()->get_shape(); + auto b = ins->inputs().back()->get_shape(); + auto m = a.lens()[a.lens().size() - 2]; + auto n = b.lens().back(); + auto k = a.lens().back(); + auto batch_size = std::accumulate( + a.lens().rbegin() + 2, a.lens().rend(), std::size_t{1}, std::multiplies()); + // Integer gemms must be divisible by 4 in ck + if(contains({shape::int8_type, shape::int32_type}, ins->get_shape().type())) + { + if(m % 4 != 0) + return false; + if(n % 4 != 0) + return false; + if(k % 4 != 0) + return false; + } + auto device_name = trim(split_string(get_device_name(), ':').front()); + if(starts_with(device_name, "gfx94")) + { + if(ins->get_shape().type() == shape::half_type) + { + if(batch_size >= 64) + return m < 2048 or k <= 64 or n <= 384 or n >= 2048; + return true; + } + return true; + } + return k <= 2048; +} + +struct find_ck_gemm_pointwise +{ + // Find a gemm followed by a pointwise operation. + auto matcher() const + { + auto gemm = match::skip(match::name("contiguous"))( + match::name("dot", "quant_dot")(is_ck_gemm().bind("gemm"))); + return match::name("pointwise")(match::any_of[match::inputs()](gemm.bind("x"))); + } + + void apply(module_pass_manager& mpm, const match::matcher_result& r) const + { + auto ins = r.result; + auto gemm_ins = r.instructions["gemm"]; + auto x_ins = r.instructions["x"]; // input after contiguous + auto* pm = ins->module_inputs().front(); + auto names = pm->get_parameter_names(); + std::sort(names.begin(), names.end()); + auto inputs = ins->inputs(); + auto gemm_it = std::find(inputs.begin(), inputs.end(), x_ins); + auto gemm_idx = gemm_it - inputs.begin(); + if(gemm_ins->get_shape().type() != shape::int32_type and + ins->get_shape().type() != gemm_ins->get_shape().type()) + return; + if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [](auto input) { + return not ck_gemm::is_ck_supported_type(input->get_shape().type()); + })) + return; + if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [](auto input) { + return not input->inputs().empty() and input->inputs().front()->name() == "capture"; + })) + return; + if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [](auto input) { + return not input->inputs().empty() and input->inputs().front()->name() == "capture"; + })) + return; + assert(gemm_it != inputs.end()); + if(gemm_idx != 0) + { + auto first_param = pm->get_parameter(names[0]); + auto gemm_param = pm->get_parameter(names[gemm_idx]); + auto new_gemm_param = pm->add_parameter(names[0] + "_0", gemm_param->get_shape()); + auto new_first_param = + pm->add_parameter(names[gemm_idx] + "_0", first_param->get_shape()); + pm->replace_instruction(gemm_param, new_gemm_param); + pm->replace_instruction(first_param, new_first_param); + pm->remove_instruction(first_param); + pm->remove_instruction(gemm_param); + } + inputs.erase(gemm_it); + inputs.insert(inputs.begin(), gemm_ins->inputs().begin(), gemm_ins->inputs().end()); + + mpm.get_module().replace_instruction(ins, ck_gemm{gemm_ins->get_operator()}, inputs, {pm}); + } +}; + +struct find_ck_gemm +{ + auto matcher() const { return match::name("dot", "quant_dot")(is_ck_gemm().bind("gemm")); } + + void apply(module_pass_manager& mpm, const match::matcher_result& r) const + { + auto ins = r.result; + mpm.get_module().replace_instruction(ins, ck_gemm{ins->get_operator()}, ins->inputs()); + } +}; + +struct find_ck_gemm_softmax_gemm +{ + auto matcher() const { return match::name("gpu::pre_gemm_softmax_gemm"); } + + void apply(module_pass_manager& mpm, const match::matcher_result& r) const + { + auto ins = r.result; + auto v = ins->get_operator().to_value(); + assert(v.contains("scale")); + auto scale = v.at("scale").to(); + mpm.get_module().replace_instruction( + ins, ck_gemm_softmax_gemm{migraphx::make_op("dot"), scale}, ins->inputs()); + } +}; + +} // namespace + +void fuse_ck::apply(module_pass_manager& mpm) const +{ + match::find_matches(mpm, find_ck_gemm_softmax_gemm{}, find_ck_gemm_pointwise{}); + match::find_matches(mpm, find_ck_gemm{}); +} + +} // namespace gpu + +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/fuse_mlir.cpp b/docker/rocm/migraphx/targets/gpu/fuse_mlir.cpp new file mode 100644 index 000000000..519955c21 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/fuse_mlir.cpp @@ -0,0 +1,1106 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct module; + +namespace gpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_EXTRA_MLIR); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_MLIR_INPUT_FUSION); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_MLIR_REDUCE_FUSION); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_MLIR); +/** + * @brief Declares a new MIGraphX environment variable which forces to generate + * only specific MLIR operations. + * + * The variable, if defined, forces MIGraphX to use only specific operations + * with MLIR regardless of the underlying GPU architecture. The variable accepts + * a list of operations separated by comma. The variable recognizes the following + * operations: "fused", "convolution", "dot". If the variable is not defined MIGraphX + * will decide by itself which operations to delegate to MLIR. The variable is + * intended to be primarily used by rocMLIR developers. + */ +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_USE_SPECIFIC_OPS); + +bool mlir_enabled() +{ +#ifdef MIGRAPHX_MLIR + const bool mlir_disabled = enabled(MIGRAPHX_DISABLE_MLIR{}); + return not mlir_disabled; +#else + return false; +#endif +} + +namespace { +struct requested +{ +}; +struct rejected +{ +}; +} // namespace + +static bool is_negated_op(const std::string& s) +{ + if(s.empty()) + return false; + return contains({'!', '~'}, s[0]); +} + +template +static std::vector get_usage() +{ + static const auto options = + split_string(string_value_of(MIGRAPHX_MLIR_USE_SPECIFIC_OPS{}, ""), ','); + static const bool enabled = std::is_same{}; + std::vector result; + auto remove_not_symbol = [&](const std::string& s) { + if(is_negated_op(s)) + return s.substr(1); + return s; + }; + transform_if( + options.begin(), + options.end(), + std::back_inserter(result), + [&](const std::string& option) { + if(option.empty()) + return false; + if(is_negated_op(option)) + return not enabled; + return enabled; + }, + remove_not_symbol); + return result; +} + +template +static bool specific_op(std::string_view option, bool fallback = false) +{ + static const auto options = get_usage(); + if(options.empty()) + return fallback; + if(contains(option, "fused") and contains(options, "fused")) + return true; + return contains(options, option); +} + +bool mlir_attention_enabled(context* ctx) +{ +#ifdef MIGRAPHX_MLIR + if(not mlir_enabled()) + return false; + if(specific_op("attention")) + return false; + // Enable attention by default for mi300 + if(ctx != nullptr and starts_with(ctx->get_current_device().get_gfx_name(), "gfx94")) + return true; + return specific_op("attention"); +#else + return false; +#endif +} + +#ifdef MIGRAPHX_MLIR + +struct mlir_op +{ + std::string name() const { return "gpu::mlir_op"; } + operation op = make_op("convolution"); + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.op, "op")); + } + + // Check if the shape can be created from a transpose/broadcast/slice + static bool is_mlir_compatible(const shape& s) + { + if(s.standard() or s.packed() or s.scalar() or s.ndim() == 1) + return true; + auto ns = reorder_shape(s, find_permutation(s)); + std::vector stride_ratios; + auto last = std::find(ns.strides().begin(), ns.strides().end(), 0); + if(*std::prev(last) != 1) + return false; + std::adjacent_difference(ns.strides().begin(), + last, + std::back_inserter(stride_ratios), + [](auto y, auto x) -> std::size_t { + assert(y != 0); + if((x % y) != 0) + return 0; + return x / y; + }); + return std::equal(stride_ratios.begin() + 1, + stride_ratios.end(), + ns.lens().begin() + 1, + [](auto ratio, auto len) { return ratio >= len; }); + } + + shape compute_shape(const std::vector& inputs, const std::vector& mods) const + { + module_ref mod = mods[0]; + check_shapes{inputs, *this}.has_at_least(1); + if(mods.size() != 1) + MIGRAPHX_THROW("should have one submodule."); + + if(not std::all_of(inputs.begin(), inputs.end(), &is_mlir_compatible)) + MIGRAPHX_THROW("Shape is not mlir compatible."); + + auto result = + mod->compute_shapes(inputs, {.name = name(), .strict_type = true, .strict_lens = true}); + if(result.size() == 1) + return result.front(); + return shape{result}; + } +}; +MIGRAPHX_REGISTER_OP(mlir_op); + +namespace { + +const auto& reshaper_names() +{ + // clang-format off + static const std::unordered_set names = { + "slice", + "transpose", + "multibroadcast", + "broadcast", + "contiguous", + "reshape", + "lazy_reshape", + "squeeze", + "flatten", + "unsqueeze" + }; + // clang-format on + return names; +} + +std::tuple> +get_fusable_input_op_stream(instruction_ref lower_input) +{ + instruction_ref upper_input = lower_input; + std::vector op_stream; + while(contains(reshaper_names(), upper_input->name())) + { + operation op = upper_input->get_operator(); + op_stream.push_back(op); + upper_input = upper_input->inputs().at(0); + } + return {upper_input, op_stream}; +} + +void fuse_input_ops(module_ref mm, + const std::vector& inputs, + std::unordered_map* map_ins) +{ + assert(map_ins != nullptr); + size_t input_cnt = mm->get_parameters().size(); + for(instruction_ref input : inputs) + { + if(contains(*map_ins, input)) + continue; + auto [upper_input, op_stream] = get_fusable_input_op_stream(input); + if(not contains(*map_ins, upper_input)) + (*map_ins)[upper_input] = + mm->add_parameter(param_name(input_cnt++), upper_input->get_shape().as_standard()); + instruction_ref prev_input = (*map_ins)[upper_input]; + for(const auto& op : reverse(op_stream)) + { + prev_input = mm->add_instruction(op, {prev_input}); + } + (*map_ins)[input] = prev_input; + } +} + +std::tuple> +fuse_input_ops_and_gemm_based_op(module_ref mm, + const std::vector& gemm_based_op_inputs, + const operation& gemm_based_op) +{ + std::vector top_inputs; + std::vector imm_inputs; + size_t input_cnt = 0; + for(instruction_ref input : gemm_based_op_inputs) + { + auto [upper_input, op_stream] = get_fusable_input_op_stream(input); + top_inputs.push_back(upper_input); + instruction_ref prev_input = + mm->add_parameter(param_name(input_cnt++, "y"), upper_input->get_shape().as_standard()); + for(const auto& op : reverse(op_stream)) + { + prev_input = mm->add_instruction(op, {prev_input}); + } + imm_inputs.push_back(prev_input); + } + instruction_ref new_gemm_based_op = mm->add_instruction(gemm_based_op, imm_inputs); + return {new_gemm_based_op, top_inputs}; +} + +enum class mlir_mode +{ + all, + fast, + int8, + none +}; + +auto is_mlir_dot(mlir_mode mode) +{ + return match::make_basic_pred_matcher([=](instruction_ref ins) { + if(mode == mlir_mode::none) + return false; + if(ins->name() != "dot" and ins->name() != "quant_dot") + return false; + // dot operation where (FP8 * FP8 = FP8) is not available in MLIR. rocBLAS/hipBLASLt should + // have the support for it. + if(contains(fp8_types{}.get(), ins->get_shape().type())) + return false; + if(mode != mlir_mode::fast) + return true; + auto a = ins->inputs().front()->get_shape(); + auto b = ins->inputs().back()->get_shape(); + // auto m = a.lens()[a.lens().size() - 2]; + // auto n = b.lens().back(); + auto k = a.lens().back(); + // Skipping GEMMs with a K dimension greater than 2048 is a course-grained strategy + // to avoid poor-performing GEMM kernels from MLIR + // To-do: Investigate a more precise strategy + return k <= 1024; + }); +} + +auto is_mlir_conv(mlir_mode mode) +{ + return match::make_basic_pred_matcher([=](instruction_ref ins) { + if(mode == mlir_mode::none) + return false; + if(ins->name() != "convolution" and ins->name() != "quant_convolution") + return false; + auto input = ins->inputs().front()->get_shape(); + value v = ins->get_operator().to_value(); + auto group = v.at("group").to(); + // Avoid MLIR assertion: Index < Length && "Invalid index!" + if(ins->get_shape().lens().size() != 4 and group > 1) + return false; + std::set supported_types = fp8_types{}.get(); + supported_types.insert(shape::int8_type); + if(contains(supported_types, input.type())) + return true; + if(mode == mlir_mode::all) + return true; + // No winograd for group convolution + if(group > 1) + return true; + auto w = ins->inputs().at(1)->get_shape(); + if(w.lens().size() != 4) + return true; + if(w.lens()[2] != w.lens()[3]) + return true; + return (w.lens()[3] % 3) != 0; + }); +} + +std::unordered_map +create_param_map_with_literals(module_ref mm, const module* pm, const shape& shape) +{ + std::unordered_map ins_map; + for(auto ins : iterator_for(*pm)) + { + if(ins->name() != "@literal") + { + continue; + } + literal r = ins->get_literal(); + instruction_ref literal = mm->add_literal(r); + instruction_ref mbcast = + mm->add_instruction(make_op("multibroadcast", {{"out_lens", shape.lens()}}), literal); + ins_map[ins] = mbcast; + } + return ins_map; +} + +instruction_ref unroll_pointwise(module& main_mod, + instruction_ref pos, + const operation& op, + const std::vector& inputs, + const std::vector& mod_args) +{ + if(op.name() == "pointwise") + { + auto* sub_pm = mod_args.front(); + auto param_map_2 = create_param_map_with_literals( + &main_mod, sub_pm, op.compute_shape(to_shapes(inputs), mod_args)); + return main_mod.insert_inline(pos, *sub_pm, inputs, ¶m_map_2) + .front(); // cppcheck-suppress returnDanglingLifetime; + } + return main_mod.insert_instruction(pos, op, inputs, mod_args); +} + +// Whitelist supported fusion options, including imposing type constraints +// for cases where MLIR only supports an operation (usually a pointwise function) +// on particular types. +bool is_pointwise_op_supported_by_mlir(const instruction& i) +{ + using type_t = shape::type_t; + const auto& name = i.name(); + const auto result_type = i.get_shape().type(); + const std::initializer_list allowed_types = {type_t::float_type, + type_t::bf16_type, + type_t::half_type, + type_t::fp8e4m3fnuz_type, + type_t::fp8e5m2fnuz_type, + type_t::fp8e4m3fn_type, + type_t::fp8e5m2_type, + type_t::int8_type, + type_t::uint8_type, + type_t::int32_type, + type_t::uint32_type, + type_t::bool_type}; + // Preliminary type check. + if(not contains(allowed_types, result_type)) + { + return false; + } + const std::initializer_list any_type_ops = {"@literal", "@param", "@return"}; + const std::initializer_list no_bool_ops = { + "convolution", + "quant_convolution", + "dot", + "quant_dot", + "add", + "clip", + "relu", + "sub", + "mul", + "div", + "pow", + "where", + "quantizelinear", + "dequantizelinear", + "abs", + "neg", + }; + const std::initializer_list fp_only_ops = { + "ceil", + "erf", + "exp", + "floor", + "log", + "recip", + "sqrt", + "rsqrt", + "sigmoid", + "softmax", + "tanh", + }; + std::set float_types = {type_t::float_type, + type_t::half_type, + type_t::bf16_type, + type_t::fp8e4m3fnuz_type, + type_t::fp8e5m2fnuz_type, + type_t::fp8e4m3fn_type, + type_t::fp8e5m2_type}; + bool is_float = contains(float_types, result_type); + if(contains(any_type_ops, name)) + return true; + if(result_type != type_t::bool_type and contains(no_bool_ops, name)) + return true; + if(is_float and contains(fp_only_ops, name)) + return true; + // Only conversions between floating types are known to be unambigiously + // supported. + if(is_float and name == "convert") + { + if(contains(fp8_types{}.get(), result_type)) + { + return false; + } // else + return std::all_of(i.inputs().begin(), i.inputs().end(), [](const auto& arg) { + return contains({type_t::float_type, type_t::half_type, type_t::bf16_type}, + arg->get_shape().type()); + }); + } + return false; +} + +bool is_reduce_op_supported_by_mlir(const instruction& i) +{ + using type_t = shape::type_t; + const auto& name = i.name(); + const auto result_type = i.get_shape().type(); + const std::initializer_list allowed_types = {type_t::float_type, + type_t::half_type, + type_t::bf16_type, + type_t::fp8e4m3fnuz_type, + type_t::fp8e5m2fnuz_type, + type_t::fp8e4m3fn_type, + type_t::fp8e5m2_type}; + + // Preliminary type check. + if(not contains(allowed_types, result_type)) + { + return false; + } + const std::initializer_list reduce_ops = {"reduce_mean", "reduce_sum"}; + return contains(reduce_ops, i.name()); +} + +// A separate function so we can remove operators that are supported by mlir +// but not supported for an input fusion. +bool is_pointwise_op_supported_by_mlir_for_input(const instruction& i) +{ + return is_pointwise_op_supported_by_mlir(i); +} + +MIGRAPHX_PRED_MATCHER(mlir_split_reduce, instruction_ref ins) +{ + if(ins->name() != "split_fused_reduce") + return false; + auto* mod_arg = ins->module_inputs().front(); + auto supported_reshapes = reshaper_names(); + supported_reshapes.erase("slice"); + std::unordered_set builtins = {"@param", "@literal", "@return"}; + for(const auto i : iterator_for(*mod_arg)) + { + if(is_reduce(*i)) + { + if(not is_reduce_op_supported_by_mlir(*i)) + return false; + } + else if(i->name() == "pointwise") + { + if(not std::all_of(i->module_inputs().front()->begin(), + i->module_inputs().front()->end(), + &is_pointwise_op_supported_by_mlir)) + return false; + } + else if(not contains(reshaper_names(), i->name()) and not contains(builtins, i->name())) + { + return false; + } + } + return true; +} + +MIGRAPHX_PRED_MATCHER(mlir_pointwise, instruction_ref ins) +{ + if(ins->name() != "pointwise") + return false; + auto* pm = ins->module_inputs().front(); + return std::all_of(pm->begin(), pm->end(), &is_pointwise_op_supported_by_mlir); +} + +MIGRAPHX_PRED_MATCHER(mlir_input_pointwise, instruction_ref ins) +{ + if(ins->name() != "pointwise") + return false; + auto* pm = ins->module_inputs().front(); + return std::all_of(pm->begin(), pm->end(), &is_pointwise_op_supported_by_mlir_for_input); +} + +std::vector mlir_contiguous(module_pass_manager& mpm, + const std::vector& inputs) +{ + std::vector result; + std::transform( + inputs.begin(), inputs.end(), std::back_inserter(result), [&](instruction_ref input) { + if(input->get_shape().packed() or input->get_shape().broadcasted()) + return input; + return mpm.get_module().insert_instruction( + std::next(input), make_op("contiguous"), input); + }); + return result; +} + +struct find_mlir_split_reduce +{ + mlir_mode conv_mode = mlir_mode::none; + mlir_mode dot_mode = mlir_mode::none; + auto matcher() const + { + auto dot_or_conv = match::name("gpu::mlir_op"); + // TODO: Handle reshapes inbetween + return mlir_split_reduce()(match::any_of[match::inputs()](dot_or_conv.bind("gemm"))); + } + + void apply(module_pass_manager& mpm, const match::matcher_result& r) const + { + auto reduce_ins = r.result; + auto gemm_ins = r.instructions["gemm"]; + assert(gemm_ins->get_shape().sub_shapes().empty()); + auto* rm = reduce_ins->module_inputs().front(); + auto names = rm->get_parameter_names(); + std::sort(names.begin(), names.end()); + module_ref gemm_old_mm = gemm_ins->module_inputs().front(); + module_ref mm = mpm.create_module(gemm_old_mm->name() + "_" + rm->name(), *gemm_old_mm); + // remove last return instruction + if(std::prev(mm->end())->name() == "@return") + { + mm->remove_instruction(std::prev(mm->end())); + } + mm->set_bypass(); + std::unordered_map param_map; + param_map[gemm_ins] = std::prev(mm->end()); + bool gemm_has_multi_outs = gemm_ins->outputs().size() > 1; + auto return_vals = mm->fuse(*rm, reduce_ins->inputs(), ¶m_map, &unroll_pointwise); + if(gemm_has_multi_outs) + { + return_vals.insert(return_vals.end(), param_map[gemm_ins]); + } + mm->add_return(return_vals); + std::vector inputs; + std::copy_if(reduce_ins->inputs().begin(), + reduce_ins->inputs().end(), + std::back_inserter(inputs), + [&](auto input) { return input != gemm_ins; }); + inputs.insert(inputs.end(), gemm_ins->inputs().begin(), gemm_ins->inputs().end()); + if(gemm_has_multi_outs) + { + auto fused_ins = mpm.get_module().insert_instruction( + reduce_ins, mlir_op{gemm_ins->get_operator()}, mlir_contiguous(mpm, inputs), {mm}); + auto dot_ins = mpm.get_module().insert_instruction( + reduce_ins, + migraphx::make_op("get_tuple_elem", {{"index", return_vals.size() - 1}}), + fused_ins); + + mpm.get_module().replace_instruction(gemm_ins, dot_ins); + for(const auto& outs : reduce_ins->outputs()) + { + assert(outs->get_operator().name() == "get_tuple_elem"); + mpm.get_module().replace_instruction(outs, outs->get_operator(), fused_ins); + } + } + else + { + mpm.get_module().replace_instruction( + reduce_ins, mlir_op{gemm_ins->get_operator()}, mlir_contiguous(mpm, inputs), {mm}); + } + } +}; + +struct find_mlir_fused_ops +{ + mlir_mode conv_mode = mlir_mode::none; + mlir_mode dot_mode = mlir_mode::none; + auto matcher() const + { + auto reshapes = reshaper_names(); + // slice is not supported + reshapes.erase("slice"); + auto dot_or_conv = match::skip(match::name(reshapes))( + match::any_of(is_mlir_dot(dot_mode), is_mlir_conv(conv_mode)).bind("gemm_based_op")); + return mlir_pointwise()(match::any_of[match::inputs()](dot_or_conv.bind("x"))); + } + + void apply(module_pass_manager& mpm, const match::matcher_result& r) const + { + auto pw_ins = r.result; + auto gemm_based_op = r.instructions["gemm_based_op"]; + auto x_ins = r.instructions["x"]; // input to pointwise after reshaper op stream + auto* pm = pw_ins->module_inputs().front(); + auto pw_inputs = pw_ins->inputs(); + // only of one of the inputs to pointwise module should be dependent on conv/gemm that is + // being fused, otherwise it can create invalid graph transformation + if(std::any_of(pw_inputs.begin(), pw_inputs.end(), [&](const auto& i) { + return i != x_ins and reaches(gemm_based_op, i); + })) + return; + auto names = pm->get_parameter_names(); + std::sort(names.begin(), names.end()); + module_ref mm = mpm.create_module("mlir_" + pm->name()); + mm->set_bypass(); + auto [anchor_op, top_inputs] = fuse_input_ops_and_gemm_based_op( + mm, gemm_based_op->inputs(), gemm_based_op->get_operator()); + std::unordered_map param_map = + create_param_map_with_literals(mm, pm, pw_ins->get_shape()); + auto [upper_input, op_stream] = get_fusable_input_op_stream(x_ins); + assert(upper_input == gemm_based_op); + auto prev_input = anchor_op; + for(const auto& op : reverse(op_stream)) + { + prev_input = mm->add_instruction(op, {prev_input}); + } + assert(prev_input->get_shape().lens() == x_ins->get_shape().lens()); + param_map[x_ins] = prev_input; // this is to avoid adding parameter for gemm/conv reshaped + // input to pointwise in new fused module + bool gemm_has_multi_outs = gemm_based_op->outputs().size() > 1; + auto reshaped_gemm = x_ins; + std::vector reshapes_vec; + while(reshaped_gemm != gemm_based_op) + { + reshapes_vec.push_back(reshaped_gemm); + gemm_has_multi_outs = gemm_has_multi_outs or reshaped_gemm->outputs().size() > 1; + reshaped_gemm = reshaped_gemm->inputs().at(0); + } + reshapes_vec.push_back(reshaped_gemm); + + auto return_vals = mm->fuse(*pm, pw_ins->inputs(), ¶m_map); + if(gemm_has_multi_outs) + { + return_vals.insert(return_vals.begin(), anchor_op); + } + mm->add_return(return_vals); + + std::vector inputs; + std::copy_if(pw_ins->inputs().begin(), + pw_ins->inputs().end(), + std::back_inserter(inputs), + [&](auto input) { return input != x_ins; }); + inputs.insert(inputs.end(), top_inputs.begin(), top_inputs.end()); + if(gemm_has_multi_outs) + { + auto fused_ins = mpm.get_module().insert_instruction( + pw_ins, mlir_op{gemm_based_op->get_operator()}, mlir_contiguous(mpm, inputs), {mm}); + mpm.get_module().replace_instruction( + pw_ins, migraphx::make_op("get_tuple_elem", {{"index", 1}}), fused_ins); + auto dot_ins = mpm.get_module().insert_instruction( + pw_ins, migraphx::make_op("get_tuple_elem", {{"index", 0}}), fused_ins); + // move all the reshape instructions and original GEMM instruction after the fused op to + // avoid generating invalid migraphx program + for(const auto& orig_i : reverse(reshapes_vec)) + { + mpm.get_module().move_instruction(orig_i, pw_ins); + } + mpm.get_module().replace_instruction(gemm_based_op, dot_ins); + } + else + { + mpm.get_module().replace_instruction( + pw_ins, mlir_op{gemm_based_op->get_operator()}, mlir_contiguous(mpm, inputs), {mm}); + } + } +}; + +template +struct find_mlir_standalone_op +{ + mlir_mode mode = mlir_mode::none; + std::size_t* counter = nullptr; + auto matcher() const { return Matcher(mode); } + + std::string get_count() const + { + if(counter == nullptr) + MIGRAPHX_THROW("Invalid counter"); + return std::to_string((*counter)++); + } + + void apply(module_pass_manager& mpm, const match::matcher_result& r) const + { + auto gemm_based_op = r.result; + // enable only for fp32/fp16/i8/fp8 types + if(std::any_of(gemm_based_op->inputs().begin(), gemm_based_op->inputs().end(), [&](auto i) { + return not contains({shape::type_t::float_type, + shape::type_t::half_type, + shape::type_t::bf16_type, + shape::type_t::int8_type, + shape::type_t::fp8e4m3fnuz_type, + shape::type_t::fp8e5m2fnuz_type, + shape::type_t::fp8e4m3fn_type, + shape::type_t::fp8e5m2_type}, + i->get_shape().type()); + })) + return; + std::string module_name = "mlir_" + gemm_based_op->name() + get_count(); + if(mpm.get_module().name() != "main") + module_name = mpm.get_module().name() + ":" + module_name; + module_ref mm = mpm.create_module(module_name); + mm->set_bypass(); + auto [anchor_op, top_inputs] = fuse_input_ops_and_gemm_based_op( + mm, gemm_based_op->inputs(), gemm_based_op->get_operator()); + mm->add_return({anchor_op}); + mpm.get_module().replace_instruction(gemm_based_op, + mlir_op{gemm_based_op->get_operator()}, + mlir_contiguous(mpm, top_inputs), + {mm}); + } +}; + +using find_mlir_standalone_convolution_op = find_mlir_standalone_op<&is_mlir_conv>; +using find_mlir_standalone_dot_op = find_mlir_standalone_op<&is_mlir_dot>; + +struct find_mlir_standalone_attention_op +{ + mlir_mode dot_mode = mlir_mode::none; + + auto matcher() const + { + auto gemm1 = + match::skip(match::name("contiguous"))(match::used_once(), is_mlir_dot(dot_mode)) + .bind("gemm1"); + auto fused_reduce = + match::name("fused_reduce")(match::used_once(), + match::any_of[match::inputs()]( + match::skip(match::name("reshape").bind("rsp"))(gemm1))) + .bind("fused_reduce"); + return is_mlir_dot(dot_mode)(match::arg(0)(fused_reduce)).bind("gemm2"); + } + + std::unordered_map + invert_map_ins(const std::unordered_map& map_ins) const + { + std::unordered_map inverse_map; + for(auto const& [key, value] : map_ins) + { + assert(not contains(inverse_map, value)); + inverse_map[value] = key; + } + return inverse_map; + } + + auto finalize_attention_module(module_ref m) const + { + eliminate_common_subexpression{}.apply(*m); + dead_code_elimination{}.apply(*m); + } + + void apply(module_pass_manager& mpm, const match::matcher_result& r) const + { + auto gemm2 = r.instructions["gemm2"]; + auto fused_reduce = r.instructions["fused_reduce"]; + auto gemm1 = r.instructions["gemm1"]; + + auto axes = fused_reduce->get_operator().to_value()["axes"]; + if(axes.size() != 1) + return; + + module m_attn; + std::unordered_map map_main_to_mattn; + + // Add first gemm and fuse any input shape ops + module fuse_gemm1; + auto [anchor_op, top_inputs] = + fuse_input_ops_and_gemm_based_op(&fuse_gemm1, gemm1->inputs(), gemm1->get_operator()); + fuse_gemm1.add_return({anchor_op}); + m_attn.add_params(top_inputs, &map_main_to_mattn); + + std::unordered_map map_gemm1_to_mattn(map_main_to_mattn); + auto m_gemm1 = m_attn.fuse(fuse_gemm1, top_inputs, &map_gemm1_to_mattn).front(); + map_main_to_mattn[gemm1] = m_gemm1; + + if(contains(r.instructions, "rsp")) + { + auto rsp = r.instructions["rsp"]; + auto m_rsp = m_attn.add_instruction(rsp->get_operator(), {m_gemm1}); + map_main_to_mattn[rsp] = m_rsp; + } + // Add pointwise-softmax, unroll any pointwise modules back to base ops + m_attn.add_params(fused_reduce->inputs(), &map_main_to_mattn); + std::unordered_map map_mfr_to_mattn(map_main_to_mattn); + auto pw_softmax = m_attn + .fuse(*fused_reduce->module_inputs().front(), + fused_reduce->inputs(), + &map_mfr_to_mattn, + &unroll_pointwise) + .front(); + + // fused_reduce submodule should end with a softmax + auto result = match::match_instruction(m_attn, pw_softmax, match::softmax()); + if(result.result != pw_softmax) + return; + + // Insert explict softmax op - required for MLIR + auto softmax_in = result.instructions["x"]; + auto softmax = m_attn.insert_instruction( + std::next(softmax_in), make_op("softmax", {{"axis", axes.front()}}), softmax_in); + map_main_to_mattn[fused_reduce] = softmax; + + // all preceeding ops should be fusable ops + if(not std::all_of(m_gemm1, softmax, [](auto i) { + return (is_pointwise_op_supported_by_mlir(i) or + contains(reshaper_names(), i.name())); + })) + return; + + // Add second gemm and fuse any input shape ops + module fuse_gemm2; + auto [anchor_op2, top_inputs2] = + fuse_input_ops_and_gemm_based_op(&fuse_gemm2, gemm2->inputs(), gemm2->get_operator()); + fuse_gemm2.add_return({anchor_op2}); + m_attn.add_params(top_inputs2, &map_main_to_mattn); + + std::unordered_map map_gemm2_to_mattn(map_main_to_mattn); + auto m_gemm2 = m_attn.fuse(fuse_gemm2, top_inputs2, &map_gemm2_to_mattn).front(); + map_main_to_mattn[gemm2] = m_gemm2; + + // Fuse any succeeding pointwise module + if(contains(r.instructions, "trailing_pm")) + { + auto trailing_pm_ins = r.instructions["trailing_pm"]; + auto lit_map = create_param_map_with_literals( + &m_attn, trailing_pm_ins->module_inputs().front(), trailing_pm_ins->get_shape()); + m_attn.add_params(trailing_pm_ins->inputs(), &map_main_to_mattn); + map_main_to_mattn.insert(lit_map.begin(), lit_map.end()); + std::unordered_map map_pm_to_mattn(map_main_to_mattn); + auto fused_pw_outs = m_attn + .fuse(*trailing_pm_ins->module_inputs().front(), + trailing_pm_ins->inputs(), + &map_pm_to_mattn) + .front(); + map_main_to_mattn[trailing_pm_ins] = fused_pw_outs; + m_attn.add_return({fused_pw_outs}); + } + else + { + m_attn.add_return({m_gemm2}); + } + + finalize_attention_module(&m_attn); + auto map_mattn_to_main = invert_map_ins(map_main_to_mattn); + auto new_inputs = m_attn.get_inputs(map_mattn_to_main); + + module_ref mpm_attn = mpm.create_module( + "mlir_attn_" + fused_reduce->module_inputs().front()->name(), std::move(m_attn)); + mpm_attn->set_bypass(); + + mpm.get_module().replace_instruction( + r.result, mlir_op{gemm1->get_operator()}, mlir_contiguous(mpm, new_inputs), {mpm_attn}); + } +}; + +struct find_mlir_attention_fused_ops : public find_mlir_standalone_attention_op +{ + auto matcher() const + { + auto standalone_matcher = find_mlir_standalone_attention_op::matcher(); + return mlir_pointwise()( + match::any_of[match::inputs()](standalone_matcher).bind("trailing_pm")); + ; + } +}; + +struct find_pointwise_mlir +{ + auto supported_pointwise() const { return mlir_input_pointwise(match::used_once()); } + + auto matcher() const + { + return match::name("gpu::mlir_op")(match::any_of[match::inputs()](supported_pointwise())); + } + + static bool is_simple_op(const_module_ref pm, std::initializer_list op_names) + { + auto last = std::prev(pm->end()); + assert(last->name() == "@return"); + if(last->inputs().size() != 1) + return false; + auto rins = last->inputs().front(); + auto op_ins = std::find_if(pm->begin(), pm->end(), [](const instruction& x) { + return not contains({"@param", "@literal", "broadcast", "multibroadcast"}, x.name()); + }); + if(op_ins != rins) + return false; + return contains(op_names, op_ins->name()); + } + + static instruction_ref insert_pointwise(module& m, + instruction_ref ins, + const operation& op, + const std::vector& inputs, + const std::vector& mod_args) + { + // Only used in assert + (void)mod_args; + assert(mod_args.empty()); + return insert_common_op(m, ins, op, inputs, {.common_type = false}); + } + + void apply(module_pass_manager& mpm, const match::matcher_result& r) const + { + auto ins = r.result; + + auto* mm = ins->module_inputs().front(); + std::vector pws; + std::copy_if( + ins->inputs().begin(), + ins->inputs().end(), + std::back_inserter(pws), + [&](instruction_ref input) { + if(not match::instruction_matches(mpm.get_module(), input, supported_pointwise())) + return false; + auto* pm = input->module_inputs().front(); + if(input->inputs().size() > 1 and not is_simple_op(pm, {"dequantizelinear"})) + { + if(not enabled(MIGRAPHX_ENABLE_MLIR_INPUT_FUSION{})) + return false; + } + return true; + }); + if(pws.empty()) + return; + + std::string module_name; + std::transform( + pws.begin(), pws.end(), join_back_inserter(module_name), [](instruction_ref pw) { + return pw->module_inputs().front()->name() + ":"; + }); + module_name += mm->name(); + module_ref m = mpm.create_module(module_name); + m->set_bypass(); + + std::unordered_map map_ins; + for(auto pw : pws) + { + auto* pm = pw->module_inputs().front(); + fuse_input_ops(m, pw->inputs(), &map_ins); + auto rins = m->fuse(*pm, pw->inputs(), &map_ins, &insert_pointwise).front(); + map_ins[pw] = rins; + } + + auto ret = m->fuse(*mm, ins->inputs(), &map_ins); + m->add_return({ret}); + + auto inputs = find_inputs(map_ins, &mpm.get_module(), m); + mpm.get_module().replace_instruction( + ins, ins->get_operator(), mlir_contiguous(mpm, inputs), {m}); + } +}; + +struct find_unpack_int4_mlir_op +{ + auto matcher() const + { + return match::name("gpu::mlir_op")( + match::any_of[match::inputs()](match::name("unpack_int4").bind("unpack_int4"))); + } + + void apply(module_pass_manager& mpm, const match::matcher_result& r) const + { + auto ins = r.result; + auto* mm = ins->module_inputs().front(); + module_ref nm = mpm.create_module("int4:" + mm->name()); + nm->set_bypass(); + + std::vector x_in; + std::unordered_map map_ins; + int ct = 0; + + for(auto input : ins->inputs()) + { + if(input->get_operator().name() == "unpack_int4") + { + auto unpack_input = input->inputs()[0]; + instruction_ref t_ins = + nm->add_parameter(param_name(++ct), unpack_input->get_shape().as_standard()); + map_ins[input] = nm->add_instruction(input->get_operator(), t_ins); + x_in.push_back(unpack_input); + } + else + { + map_ins[input] = + nm->add_parameter(param_name(++ct), input->get_shape().as_standard()); + x_in.push_back(input); + } + } + auto ret = nm->fuse(*mm, ins->inputs(), &map_ins); + nm->add_return({ret}); + mpm.get_module().replace_instruction(ins, ins->get_operator(), x_in, {nm}); + } +}; + +} // namespace + +#endif // MIGRAPHX_MLIR + +void fuse_mlir::apply(module_pass_manager& mpm) const +{ +#ifdef MIGRAPHX_MLIR + std::size_t counter = 0; + const auto& device_name = ctx == nullptr ? "" : ctx->get_current_device().get_gfx_name(); + const bool is_navi = starts_with(device_name, "gfx11") or starts_with(device_name, "gfx12"); + + auto get_mode = [&](std::string_view option, mlir_mode m1, mlir_mode m2 = mlir_mode::fast) { + if(specific_op(option)) + return mlir_mode::none; + if(specific_op(option)) + return mlir_mode::all; + if(is_navi) + return mlir_mode::all; + return std::max(m1, m2); + }; + + // Attention offloads; default disabled + if(mlir_attention_enabled(ctx) or enable_extra) + { + match::find_matches(mpm, find_mlir_attention_fused_ops{mlir_mode::all}); + mpm.run_pass(dead_code_elimination{}); + match::find_matches(mpm, find_mlir_standalone_attention_op{mlir_mode::all}); + mpm.run_pass(dead_code_elimination{}); + } + + match::find_matches( + mpm, + find_mlir_fused_ops{.conv_mode = get_mode("fused_convolution", mlir_mode::fast), + .dot_mode = get_mode("fused_dot", mlir_mode::fast)}); + + match::find_matches( + mpm, + find_mlir_standalone_convolution_op{.mode = get_mode("convolution", mlir_mode::fast), + .counter = &counter}, + find_mlir_standalone_dot_op{.mode = get_mode("dot", mlir_mode::fast), .counter = &counter}); + + mpm.run_pass(dead_code_elimination{}); + if(enabled(MIGRAPHX_ENABLE_MLIR_REDUCE_FUSION{})) + { + match::find_matches( + mpm, + find_mlir_split_reduce{.conv_mode = get_mode("fused_convolution", mlir_mode::fast), + .dot_mode = get_mode("fused_dot", mlir_mode::fast)}); + } + + match::find_matches(mpm, find_pointwise_mlir{}); + match::find_matches(mpm, find_unpack_int4_mlir_op{}); + +#else + (void)mpm; +#endif +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/fuse_ops.cpp b/docker/rocm/migraphx/targets/gpu/fuse_ops.cpp new file mode 100644 index 000000000..5e93ccf5e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/fuse_ops.cpp @@ -0,0 +1,1060 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_HIPBLASLT_GEMM) +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_MIOPEN_FUSION) +#if MIGRAPHX_USE_MIOPEN +struct fusion +{ + using op_t = miopenFusionOpDescriptor_t; + shared fp; + + // Used as a temporary hack to keep descriptor references alive + std::vector> storage; + + template + auto keep_alive(T x) + { + auto result = share(std::move(x)); + storage.push_back(result); + return result; + } + + fusion() = default; + + fusion(const shape& input) + { + assert(input.standard()); + auto t = make_tensor(input); + fp = make_fusion_plan(t); + assert(fp); + keep_alive(std::move(t)); + } + + bool empty() const { return fp == nullptr; } + + op_t operator[](std::size_t i) const + { + assert(fp); + op_t result; + auto status = miopenFusionPlanGetOp(fp.get(), i, &result); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("Failed retrieving operator at " + std::to_string(i)); + return result; + } + + auto get() const + { + assert(fp); + return fp.get(); + } + + op_t create_bias(const shape& bias) + { + assert(fp); + op_t result; + auto b = shape{bias.type(), {1, bias.lens().at(1), 1, 1}}; + auto t = keep_alive(make_tensor(b)); + auto status = miopenCreateOpBiasForward(fp.get(), &result, t.get()); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("Creating operator failed"); + return result; + } + + op_t create_relu() + { + assert(fp); + op_t result; + auto status = miopenCreateOpActivationForward(fp.get(), &result, miopenActivationRELU); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("Creating operator failed"); + return result; + } + + op_t create_conv(const op::convolution& op, const shape& weights) + { + assert(fp); + op_t result; + auto cd = keep_alive(make_conv(op)); + auto t = keep_alive(make_tensor(weights)); + auto status = miopenCreateOpConvForward(fp.get(), &result, cd.get(), t.get()); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("Creating operator failed"); + return result; + } + + shape get_workspace(context&) + { + // assert(fp); + // TODO: Use zero workspace for now + std::size_t ws_size = 0; + // int algo_count = 1; + // miopenConvFwdAlgorithm_t algo; + // miopenFusionPlanConvolutionGetAlgo(fp.get(), 1, &algo_count, &algo); + // miopenFusionPlanGetWorkSpaceSize(ctx.get_stream().get_miopen(), fp.get(), &ws_size, + // algo); + return shape{shape::int8_type, {ws_size}}; + } + + bool compile(context& ctx) + { + assert(fp); + return miopenCompileFusionPlan(ctx.get_stream().get_miopen(), fp.get()) == + miopenStatusSuccess; + } + + argument execute(context& ctx, + const fused_operator_args& fargs, + const argument& x, + const argument& y) const + { + assert(fp); + auto x_td = make_tensor(x.get_shape()); + auto y_td = make_tensor(y.get_shape()); + auto status = miopenExecuteFusionPlan(ctx.get_stream().get_miopen(), + fp.get(), + x_td.get(), + x.implicit(), + y_td.get(), + y.implicit(), + fargs.get()); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("Failed to execute fusion plan"); + return y; + } +}; +#endif + +const std::unordered_set& get_supported_archs() +{ + static std::unordered_set supported_archs{ + "gfx900", "gfx906", "gfx908", "gfx1030", "gfx940"}; + return supported_archs; +} +#if MIGRAPHX_USE_MIOPEN +MIGRAPHX_PRED_MATCHER(bias_shape, instruction_ref ins) +{ + auto&& s = ins->get_shape(); + return s.broadcasted() and s.strides().size() == 4 and s.strides()[0] == 0 and + s.strides()[1] != 0 and s.strides()[2] == 0 and s.strides()[3] == 0; +} + +MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins) +{ + const auto device_name = trim(split_string(get_device_name(), ':').front()); + if(not contains(get_supported_archs(), device_name)) + return false; + if(enabled(MIGRAPHX_DISABLE_MIOPEN_FUSION{})) + return false; + if(ins->name() != "gpu::convolution") + return false; + if(ins->get_shape().type() != shape::float_type) + return false; + auto wei = ins->inputs().at(1)->get_shape(); + assert(wei.lens().size() == 4); + auto miopen_conv_op = ins->get_operator().to_value(); + auto algo = miopen_conv_op.at("algo").to(); + auto conv_op = from_value(miopen_conv_op["op"]); + if(conv_op.group > 1) + return false; + if(wei.lens()[1] > 512 and algo != miopenConvolutionFwdAlgoWinograd) + return false; + + // Do not fuse non-symmetric input + auto input_lens = ins->inputs().at(0)->get_shape().lens(); + if(input_lens[2] != input_lens[3] or wei.lens()[2] != wei.lens()[3]) + return false; + + // Dont fuse winograd for non-3x3s since there is no fused windograd for those configs + if(algo == miopenConvolutionFwdAlgoWinograd and wei.lens()[2] != 3 and wei.lens()[3] != 3 and + contains({{1, 1}}, conv_op.stride)) + return false; + return contains({{0, 0, 0, 0}, {1, 1, 1, 1}, {2, 2, 2, 2}}, conv_op.padding) and + contains({{0, 0}, {1, 1}}, conv_op.stride) and contains({{1, 1}}, conv_op.dilation); +} +#endif + +void move_broadcasted_back(std::vector& args) +{ + // Ensure the last arguments is the broadcasted one + auto last = std::prev(args.end()); + auto it = + std::find_if(args.begin(), last, [](auto arg) { return arg->get_shape().broadcasted(); }); + if(it != last) + std::swap(*it, *std::prev(last)); +} + +void move_standard_front(std::vector& args) +{ + // Ensure the first arguments is the standard one + auto last = std::prev(args.end()); + auto it = + std::find_if(args.begin(), last, [](auto arg) { return arg->get_shape().standard(); }); + if(it != last) + std::swap(*it, args.front()); +} + +auto gpu_name(const std::string& s) { return match::name("gpu::" + s); } + +namespace { +#if MIGRAPHX_USE_MIOPEN +struct miopen_fusion +{ + struct fuse_op_data + { + operation op; + float alpha = 1; + float beta = 0; + }; + struct fuse_op : fuse_op_data, reflect_equality, reflect_stream + { + template + static auto reflect(Self& self, F f) + { + return pack(f(self.op, "op"), f(self.alpha, "alpha"), f(self.beta, "beta")); + } + }; + std::vector ops = {}; + fusion f = {}; + std::function&)> execute; + template + static auto reflect(Self& self, F f) + { + return pack(f(self.ops, "ops")); + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } + + value compile(context& ctx, const shape&, std::vector inputs) + { + // Compensate for allocation + inputs.pop_back(); + std::size_t i = 0; + f = fusion(inputs[i]); + i++; + std::vector&)>> + invokers; + for(auto&& fop : ops) + { + if(i > inputs.size()) + { + f = {}; + return {}; + } + if(fop.op.name() == "convolution") + { + auto* mop = f.create_conv(any_cast(fop.op), inputs[i]); + invokers.push_back( + [=](const fused_operator_args& fargs, const std::vector& args) { + miopenSetOpArgsConvForward( + fargs.get(), mop, &fop.alpha, &fop.beta, args[i].implicit()); + }); + i++; + } + else if(fop.op.name() == "add") + { + auto* mop = f.create_bias(inputs[i]); + invokers.push_back( + [=](const fused_operator_args& fargs, const std::vector& args) { + miopenSetOpArgsBiasForward( + fargs.get(), mop, &fop.alpha, &fop.beta, args[i].implicit()); + }); + i++; + } + else if(fop.op.name() == "relu") + { + auto* mop = f.create_relu(); + invokers.push_back([=](const fused_operator_args& fargs, + const std::vector&) { + miopenSetOpArgsActivForward(fargs.get(), mop, &fop.alpha, &fop.beta, 0, 0, 0); + }); + } + else + { + f = {}; + return {}; + } + } + if(not f.compile(ctx)) + { + f = {}; + return {}; + } + execute = [invokers](context& c, const fusion& ff, const std::vector& args) { + auto fargs = make_fused_args(); + for(auto&& invoker : invokers) + invoker(fargs, args); + ff.execute(c, fargs, args.front(), args.back()); + }; + return {{"workspace", f.get_workspace(ctx).bytes()}}; + } + void finalize(context& ctx, const shape& output_shape, const std::vector& inputs) + { + if(not f.empty()) + return; + auto v = compile(ctx, output_shape, inputs); + if(not v.is_object()) + MIGRAPHX_THROW("Failed to compile fusion plan"); + } + std::string name() const { return "gpu::miopen_fusion"; } + shape compute_shape(const std::vector& inputs) const + { + if(ops.empty()) + return {}; + // TODO: Check number of arguments + return ops.front().op.compute_shape({inputs[0], inputs[1]}); + } + argument compute(context& ctx, const shape&, const std::vector& args) const + { + execute(ctx, f, args); + return args.back(); + } +}; +MIGRAPHX_REGISTER_OP(miopen_fusion) + +struct miopen_conv_bias +{ + op::convolution op; + fusion fp = {}; + fusion::op_t conv = {}; + fusion::op_t bias = {}; + + template + static auto reflect(Self& self, F f) + { + return op::convolution::reflect(self.op, f); + } + + std::string name() const { return "gpu::conv_bias"; } + shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, *this}.has(5); + // TODO: Check slices + return op.normalize_compute_shape({inputs.at(0), inputs.at(1)}); + } + argument compute(context& ctx, const shape&, const std::vector& args) const + { + auto fargs = make_fused_args(); + float alpha = 1; + float beta = 0; + miopenSetOpArgsConvForward(fargs.get(), conv, &alpha, &beta, args[1].implicit()); + miopenSetOpArgsBiasForward(fargs.get(), bias, &alpha, &beta, args[3].implicit()); + return fp.execute(ctx, fargs, args[0], args[4]); + } + + void finalize(context& ctx, const shape&, const std::vector& inputs) + { + fp = fusion(inputs[0]); + conv = fp.create_conv(op, inputs[1]); + bias = fp.create_bias(inputs[3]); + if(not fp.compile(ctx)) + MIGRAPHX_THROW("Failed to compile fusion plan"); + } + + shape get_workspace(context& ctx) { return fp.get_workspace(ctx); } + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; +MIGRAPHX_REGISTER_OP(miopen_conv_bias) + +struct miopen_conv_bias_relu +{ + op::convolution op; + fusion fp = {}; + fusion::op_t conv = {}; + fusion::op_t bias = {}; + fusion::op_t relu = {}; + + template + static auto reflect(Self& self, F f) + { + return op::convolution::reflect(self.op, f); + } + + std::string name() const { return "gpu::conv_bias_relu"; } + shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, *this}.has(5); + // TODO: Check slices + return op.normalize_compute_shape({inputs.at(0), inputs.at(1)}); + } + argument compute(context& ctx, const shape&, const std::vector& args) const + { + auto fargs = make_fused_args(); + float alpha = 1; + float beta = 0; + miopenSetOpArgsConvForward(fargs.get(), conv, &alpha, &beta, args[1].implicit()); + miopenSetOpArgsBiasForward(fargs.get(), bias, &alpha, &beta, args[3].implicit()); + miopenSetOpArgsActivForward(fargs.get(), relu, &alpha, &beta, 0, 0, 0); + return fp.execute(ctx, fargs, args[0], args[4]); + } + void finalize(context& ctx, const shape&, const std::vector& inputs) + { + fp = fusion(inputs[0]); + conv = fp.create_conv(op, inputs[1]); + bias = fp.create_bias(inputs[3]); + relu = fp.create_relu(); + fp.compile(ctx); + } + + shape get_workspace(context& ctx) { return fp.get_workspace(ctx); } + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; +MIGRAPHX_REGISTER_OP(miopen_conv_bias_relu) + +template +auto conv_bias(Ms... ms) +{ + return match::name("gpu::add")( + match::either_arg(0, 1)(bias_shape(match::used_once()).bind("bias"), + fusable_conv(match::used_once()).bind("conv")), + ms...); +} + +template +void apply_conv_bias(context& ctx, module& m, const match::matcher_result& r) +{ + auto conv_ins = r.instructions["conv"]; + auto bias_ins = r.instructions["bias"]; + auto ins = r.result; + auto input_ins = conv_ins->inputs().at(0); + auto weights_ins = conv_ins->inputs().at(1); + auto conv_op = from_value((conv_ins->get_operator()).to_value()["op"]); + auto alloc_ins = ins->inputs().back(); + auto old_ws_ins = conv_ins->inputs().at(2); + + Op cb{conv_op}; + // TODO: Insert ws allocation + auto ws = cb.get_workspace(ctx); + (void)ws; + m.replace_instruction(ins, cb, input_ins, weights_ins, old_ws_ins, bias_ins, alloc_ins); +} +#endif + +template +inline auto precompile_name(Strings... names) // NOLINT +{ + return match::make_basic_pred_matcher([=](instruction_ref ins) { + if(ins->name() != "gpu::precompile_op") + return false; + auto op = from_value(ins->get_operator().to_value().at("op")); + return (contains({names...}, op.name())); + }); +} + +#if MIGRAPHX_USE_MIOPEN +struct find_conv_bias +{ + context* ctx = nullptr; + auto matcher() const + { + auto relu = match::name(std::unordered_set{"gpu::relu"}); + return conv_bias(match::none_of(match::output(relu))); + } + + void apply(module& m, const match::matcher_result& r) const + { + apply_conv_bias(*ctx, m, r); + } +}; + +struct find_conv_bias_relu +{ + context* ctx = nullptr; + auto matcher() const { return match::name("gpu::relu")(match::arg(0)(conv_bias())); } + + void apply(module& m, const match::matcher_result& r) const + { + apply_conv_bias(*ctx, m, r); + } +}; +struct find_conv_pointwise +{ + context* ctx = nullptr; + auto matcher() const + { + return precompile_name("pointwise")( + match::nargs(3), + match::either_arg(0, 1)(bias_shape(match::used_once()).bind("bias"), + fusable_conv(match::used_once()).bind("conv"))); + } + + void apply(module& m, const match::matcher_result& r) const + { + auto conv_ins = r.instructions["conv"]; + auto bias_ins = r.instructions["bias"]; + auto ins = r.result; + auto input_ins = conv_ins->inputs().at(0); + auto weights_ins = conv_ins->inputs().at(1); + auto conv_op = from_value(conv_ins->get_operator().to_value()["op"]); + auto alloc_ins = ins->inputs().back(); + + module_ref pm = ins->module_inputs().front(); + + miopen_fusion op{}; + op.ops.push_back({{conv_op}}); + for(auto&& i : *pm) + { + if(i.name()[0] == '@') + continue; + op.ops.push_back({{i.get_operator()}}); + } + std::vector inputs = {input_ins, weights_ins, bias_ins, alloc_ins}; + auto v = op.compile(*ctx, ins->get_shape(), to_shapes(inputs)); + if(not v.is_object()) + return; + m.replace_instruction(ins, op, inputs); + } +}; +#endif + +#if MIGRAPHX_USE_ROCBLAS or MIGRAPHX_USE_HIPBLASLT +struct gemm_pointwise +{ + // TODO: Move to matcher.hpp + static auto match_param(const std::string& name) + { + return match::make_basic_pred_matcher([=](auto ins) { + if(ins->name() != "@param") + return false; + auto p = any_cast(ins->get_operator()); + return p.parameter == name; + }); + } + + template + static auto match_mul_const(M m, const std::string& var) + { + return match::name("mul")(match::either_arg(0, 1)(match::name("@literal").bind(var), m)) + .bind(var + "_mul"); + } + + static auto match_add(const std::string& input, const std::string& output) + { + auto param = match::name("@param"); + auto add = match::name("add")(match::args(param, param)); + auto inner_mul = match::any_of(match_mul_const(match_param(input), "alpha"), + match_mul_const(match_param(output), "beta")); + auto mul_add = match::name("add")(match::either_arg(0, 1)(inner_mul, param)); + auto add_mul = match_mul_const(add, "gamma"); + return match::name("@return")(match::args(match::any_of(add, mul_add, add_mul))); + } + + static auto match_mul(const std::string& input) + { + auto mul = match_mul_const(match_param(input), "alpha"); + return match::name("@return")(match::args(mul)); + } + + static float get_float(instruction_ref ins) { return ins->get_literal().at(); } + + template + static bool update_gemm(Gemm& gemm, module_ref pm, unsigned input) + { + auto names = pm->get_parameter_names(); + std::sort(names.begin(), names.end()); + if(names.size() == 1) + { + auto mr = match::match_instruction(*pm, std::prev(pm->end()), match_mul(names[input])); + if(mr.result == pm->end()) + return false; + gemm.alpha *= get_float(mr.instructions["alpha"]); + return true; + } + else if(names.size() == 2) + { + unsigned output = input == 0 ? 1 : 0; + auto mr = match::match_instruction( + *pm, std::prev(pm->end()), match_add(names[input], names[output])); + if(mr.result == pm->end()) + return false; + if(contains(mr.instructions, "alpha_mul")) + gemm.alpha *= get_float(mr.instructions["alpha"]); + else if(contains(mr.instructions, "beta_mul")) + gemm.beta *= get_float(mr.instructions["beta"]); + else if(contains(mr.instructions, "gamma_mul")) + { + gemm.alpha *= get_float(mr.instructions["gamma"]); + gemm.beta *= get_float(mr.instructions["gamma"]); + } + return true; + } + else + { + return false; + } + } +}; +#endif + +#if MIGRAPHX_USE_ROCBLAS +struct find_rocblas_gemm_pointwise : gemm_pointwise +{ + auto matcher() const + { + auto gemm_op = match::name("gpu::gemm")(match::nargs(3), match::used_once()).bind("gemm"); + auto binary_op = match::all_of( + match::nargs(3), + match::either_arg(0, 1)( + match::any_of(match::standard_shape(), match::is_constant()).bind("c"), gemm_op)); + auto unary_op = match::all_of(match::nargs(2), match::arg(0)(gemm_op)); + return precompile_name("pointwise")(match::any_of(binary_op, unary_op)); + } + + void apply(module& m, const match::matcher_result& r) const + { + auto ins = r.result; + auto gemm_ins = r.instructions["gemm"]; + + auto gemm = any_cast>(gemm_ins->get_operator()); + + // Already fused gemm + if(not float_equal(gemm.beta, 0)) + return; + if(ins->inputs().size() == 3) + gemm.beta = 1; + + if(not update_gemm( + gemm, ins->module_inputs().front(), ins->inputs().front() == gemm_ins ? 0 : 1)) + return; + + auto inputs = gemm_ins->inputs(); + inputs.pop_back(); + + if(ins->inputs().size() == 3) + { + auto c_ins = r.instructions["c"]; + shape s = c_ins->get_shape(); + // const-fold input if not standard shape since rocblas can't handle it + // Updated for a case where "standard" shape has out-of-sequence strides + if(not s.standard()) + { + auto c = make_op("contiguous"); + auto l = c.compute(c.compute_shape({c_ins->get_shape()}), {c_ins->eval()}); + c_ins = m.add_literal(l.get_shape(), l.data()); + } + inputs.push_back(c_ins); + } + + inputs.push_back(ins->inputs().back()); + + m.replace_instruction(ins, gemm, inputs); + } +}; +#endif + +#if MIGRAPHX_USE_HIPBLASLT +struct find_hipblas_gemm_pointwise : gemm_pointwise +{ + auto matcher() const + { + auto gemm_op = + match::name("gpu::hipblaslt_op")(match::nargs(3), match::used_once()).bind("hip_gemm"); + auto binary_op = match::all_of( + match::nargs(3), + match::either_arg(0, 1)( + match::any_of(match::standard_shape(), match::is_constant()).bind("c"), gemm_op)); + auto unary_op = match::all_of(match::nargs(2), match::arg(0)(gemm_op)); + return precompile_name("pointwise")(match::any_of(binary_op, unary_op)); + } + + void apply(module& m, const match::matcher_result& r) const + { + auto ins = r.result; + auto gemm_ins = r.instructions["hip_gemm"]; + + auto gemm_op = any_cast(gemm_ins->get_operator()).op; + + if(gemm_op.name() != "gpu::hip_gemm") + return; + + auto gemm = any_cast>(gemm_op); + + // Already fused gemm + if(not float_equal(gemm.beta, 0)) + return; + if(ins->inputs().size() == 3) + gemm.beta = 1; + if(not update_gemm( + gemm, ins->module_inputs().front(), ins->inputs().front() == gemm_ins ? 0 : 1)) + { + return; + } + auto inputs = gemm_ins->inputs(); + inputs.pop_back(); + if(ins->inputs().size() == 3) + { + auto c_ins = r.instructions["c"]; + shape s = c_ins->get_shape(); + // const-fold input if not standard shape + // Updated for a case where "standard" shape has out-of-sequence strides + if(not s.standard()) + { + auto c = make_op("contiguous"); + auto l = c.compute(c.compute_shape({c_ins->get_shape()}), {c_ins->eval()}); + c_ins = m.add_literal(l.get_shape(), l.data()); + } + inputs.push_back(c_ins); + } + inputs.push_back(ins->inputs().back()); + + operation new_gemm_op = gemm; + auto new_ins = m.insert_instruction( + ins, make_op("gpu::hipblaslt_op", {{"op", to_value(new_gemm_op)}}), inputs); + m.replace_instruction(ins, new_ins); + } +}; +#endif + +struct contiguous_transpose_gemm +{ + template + static bool is_swapped(const Vector& perm, std::size_t i, std::size_t j) + { + if(i >= perm.size() or j >= perm.size()) + return false; + auto perm2 = perm; + std::iota(perm2.begin(), perm2.end(), 0); + std::swap(perm2[i], perm2[j]); + return perm2 == perm; + } +}; + +struct find_contiguous_transpose_rocblas_gemm : contiguous_transpose_gemm +{ + auto matcher() const + { + return match::name("gpu::contiguous")(match::arg(0)( + match::name("transpose")( + match::arg(0)(match::name("gpu::gemm")(match::used_once()).bind("gemm"))) + .bind("transpose"))); + } + + void apply(module& m, const match::matcher_result& r) const + { + auto ins = r.result; + auto gemm = r.instructions["gemm"]; + auto alloc = gemm->inputs().back(); + auto transpose = r.instructions["transpose"]; + auto perm = transpose->get_operator().to_value()["permutation"].to_vector(); + auto iperm = invert_permutation(perm); + + if(perm.size() < 3) + return; + + if(not is_swapped(perm, perm.size() - 3, perm.size() - 2)) + return; + + auto lens = gemm->get_shape().lens(); + if(lens.size() > 3 and + not std::all_of(lens.begin(), lens.end() - 3, [](auto i) { return i == 1; })) + return; + + auto gemmv = gemm->get_operator().to_value(); + gemmv["trans_batch"] = 1; + + auto s = shape{alloc->get_shape().type(), reorder_dims(alloc->get_shape().lens(), iperm)}; + auto new_alloc = m.insert_instruction(gemm, make_op("allocate", {{"shape", to_value(s)}})); + auto alloc_transpose = + m.insert_instruction(gemm, make_op("transpose", {{"permutation", perm}}), new_alloc); + + auto inputs = gemm->inputs(); + inputs.back() = alloc_transpose; + auto new_gemm = m.insert_instruction(gemm, make_op("gpu::gemm", gemmv), inputs); + auto gemm_transpoe = m.insert_instruction(gemm, transpose->get_operator(), new_gemm); + + m.replace_instruction(ins, gemm_transpoe); + } +}; + +#if MIGRAPHX_USE_HIPBLASLT +struct find_contiguous_transpose_hip_gemm : contiguous_transpose_gemm +{ + auto matcher() const + { + return match::name("gpu::contiguous")(match::arg(0)( + match::name("transpose")( + match::arg(0)( + match::name("gpu::hipblaslt_op")(match::used_once()).bind("hip_gemm"))) + .bind("transpose"))); + } + + void apply(module& m, const match::matcher_result& r) const + { + auto ins = r.result; + auto gemm_ins = r.instructions["hip_gemm"]; + auto gemm_op = any_cast(gemm_ins->get_operator()).op; + + if(gemm_op.name() != "gpu::hip_gemm") + return; + + auto gemm = any_cast>(gemm_op); + + auto alloc = gemm_ins->inputs().back(); + auto transpose = r.instructions["transpose"]; + auto perm = transpose->get_operator().to_value()["permutation"].to_vector(); + auto iperm = invert_permutation(perm); + + if(perm.size() < 3) + return; + + if(not is_swapped(perm, perm.size() - 3, perm.size() - 2)) + return; + + auto lens = gemm_ins->get_shape().lens(); + if(lens.size() > 3 and + not std::all_of(lens.begin(), lens.end() - 3, [](auto i) { return i == 1; })) + return; + + gemm.trans_batch = 1; + + auto s = shape{alloc->get_shape().type(), reorder_dims(alloc->get_shape().lens(), iperm)}; + auto new_alloc = + m.insert_instruction(gemm_ins, make_op("allocate", {{"shape", to_value(s)}})); + + auto alloc_transpose = m.insert_instruction( + gemm_ins, make_op("transpose", {{"permutation", perm}}), new_alloc); + + auto inputs = gemm_ins->inputs(); + inputs.back() = alloc_transpose; + operation new_gemm_op = gemm; + auto new_gemm = m.insert_instruction( + gemm_ins, make_op("gpu::hipblaslt_op", {{"op", to_value(new_gemm_op)}}), inputs); + + auto gemm_transpoe = m.insert_instruction(gemm_ins, transpose->get_operator(), new_gemm); + + m.replace_instruction(ins, gemm_transpoe); + } +}; +#endif + +struct find_commutative_broadcast +{ + auto matcher() const + { + return match::name("gpu::add", "gpu::mul")(match::arg(1)(match::broadcast_shape())); + } + + void apply(module& m, const match::matcher_result& r) const + { + auto ins = r.result; + auto args = ins->inputs(); + move_broadcasted_back(args); + + m.replace_instruction(ins, ins->get_operator(), args); + } +}; +} // namespace + +struct find_contiguous +{ + auto matcher() const { return match::name("gpu::contiguous"); } + + void apply(module& m, const match::matcher_result& r) const + { + auto ins = r.result; + + m.replace_instruction( + ins, + make_op("gpu::precompile_op", {{"op", to_value(make_op("contiguous"))}}), + ins->inputs()); + } +}; + +struct find_contiguous_layout_pointwise +{ + auto matcher() const + { + auto cont_pw = precompile_name("pointwise")(match::any_of[match::inputs()]( + match::name("gpu::contiguous")(match::used_once()).bind("layout_ins"))); + auto layout_pw = precompile_name("pointwise")(match::any_of[match::inputs()]( + precompile_name("layout")(match::used_once()).bind("layout_ins"))); + return match::any_of(cont_pw, layout_pw); + } + + void apply(module& m, const match::matcher_result& r) const + { + auto pw_ins = r.result; + auto layout_ins = r.instructions["layout_ins"]; + auto layout_input = layout_ins->inputs().front(); + auto pw_ins_inputs = pw_ins->inputs(); + replace(pw_ins_inputs, layout_ins, layout_input); + // Ensure the output shape of the pointwise module retains the memory layout + auto pw_op_val = pw_ins->get_operator().to_value(); + pw_op_val["output_shape"] = to_value(pw_ins->get_shape()); + + auto new_ins = m.insert_instruction( + pw_ins, make_op(pw_ins->name(), pw_op_val), pw_ins_inputs, pw_ins->module_inputs()); + m.replace_instruction(pw_ins, new_ins); + } +}; + +struct find_pointwise_layout_contiguous +{ + auto matcher() const + { + auto is_layout = precompile_name("layout")( + match::arg(0)(match::used_once(), precompile_name("pointwise"))); + auto is_contiguous = match::name("gpu::contiguous")( + match::arg(0)(match::used_once(), precompile_name("pointwise"))); + return match::any_of(is_layout, is_contiguous); + } + + void apply(module& m, const match::matcher_result& r) const + { + auto ins = r.result; + auto pw = ins->inputs().front(); + auto alloc = ins->inputs().back(); + auto args = pw->inputs(); + args.back() = alloc; + + // Ensure the output shape of the pointwise module retains the memory layout + auto pw_op_val = pw->get_operator().to_value(); + pw_op_val["output_shape"] = to_value(ins->get_shape()); + + m.replace_instruction(ins, make_op(pw->name(), pw_op_val), args, pw->module_inputs()); + } +}; + +struct find_layernorm_pointwise +{ + auto matcher() const + { + return precompile_name("pointwise")(match::arg(0)( + precompile_name("gpu::prelayernorm", "gpu::preadd_layernorm").bind("layernorm"))); + } + + void apply(module& m, const match::matcher_result& r) const + { + auto pw_ins = r.result; + auto layernorm = r.instructions["layernorm"]; + if(not layernorm->module_inputs().empty()) + return; + auto* pm = pw_ins->module_inputs().front(); + auto pw_inputs = pw_ins->inputs(); + auto ln_pos = std::find(pw_inputs.begin(), pw_inputs.end(), layernorm); + assert(ln_pos != pw_inputs.end()); + pw_inputs.erase(ln_pos); + auto inputs = layernorm->inputs(); + inputs.pop_back(); + inputs.insert(inputs.end(), pw_inputs.begin(), pw_inputs.end()); + + // Ensure the output shape retains the memory layout + auto layernorm_op_val = layernorm->get_operator().to_value(); + layernorm_op_val["output_shape"] = to_value(pw_ins->get_shape()); + + m.replace_instruction(pw_ins, make_op(layernorm->name(), layernorm_op_val), inputs, {pm}); + } +}; + +struct find_concat_pointwise +{ + auto matcher() const + { + return precompile_name("pointwise")( + match::arg(0)(precompile_name("concat").bind("concat"))); + } + + void apply(module& m, const match::matcher_result& r) const + { + auto ins = r.result; + auto concat = r.instructions["concat"]; + if(not concat->module_inputs().empty()) + return; + + // TODO: Handle type conversions + if(ins->get_shape().type() != concat->get_shape().type()) + return; + + auto* pm = ins->module_inputs().front(); + auto inputs = concat->inputs(); + inputs.pop_back(); + inputs.insert(inputs.end(), ins->inputs().begin() + 1, ins->inputs().end()); + + auto op = concat->get_operator(); + op.from_value({{"additional_args", ins->inputs().size() - 1}, + {"ignore_modules", true}, + {"output_shape", to_value(ins->get_shape())}}); + + m.replace_instruction(ins, op, inputs, {pm}); + } +}; + +void fuse_ops::apply(module& m) const +{ + match::find_matches(m, find_pointwise_layout_contiguous{}, find_contiguous_layout_pointwise{}); + run_passes(m, {dead_code_elimination{}}); +#if MIGRAPHX_USE_MIOPEN + match::find_matches(m, find_conv_pointwise{ctx}, find_conv_bias_relu{ctx}, find_conv_bias{ctx}); + run_passes(m, {dead_code_elimination{}}); +#endif +#if MIGRAPHX_USE_ROCBLAS + match::find_matches(m, find_rocblas_gemm_pointwise{}); +#endif +#if MIGRAPHX_USE_HIPBLASLT + match::find_matches(m, find_hipblas_gemm_pointwise{}); +#endif + match::find_matches(m, + find_layernorm_pointwise{}, + find_concat_pointwise{}, + find_contiguous_transpose_rocblas_gemm{}, +#if MIGRAPHX_USE_HIPBLASLT + find_contiguous_transpose_hip_gemm{}, +#endif + find_commutative_broadcast{}); + match::find_matches(m, find_contiguous{}); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/gemm_impl.cpp b/docker/rocm/migraphx/targets/gpu/gemm_impl.cpp new file mode 100644 index 000000000..d0f750a25 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/gemm_impl.cpp @@ -0,0 +1,708 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +using microseconds = std::chrono::duration; + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +#if MIGRAPHX_USE_ROCBLAS +/* +Regular rocBLAS API takes compute_type as `rocblas_datatype` enum value v/s "ex3" BETA API takes it +as `rocblas_computetype` enum value. `rb_compute_type` is faciliator to implictly cast integer enum +value to required type that can be used inside `common_args` generator. +*/ +struct rb_compute_type +{ + int type = 0; + rb_compute_type(rocblas_datatype t) : type(static_cast(t)) {} + rb_compute_type(rocblas_computetype t) : type(static_cast(t)) {} + operator rocblas_datatype() const { return static_cast(type); } + operator rocblas_computetype() const { return static_cast(type); } +}; + +// Convert rocBLAS datatypes to equivalent Migraphx data types +rocblas_datatype get_type(shape::type_t type) +{ + switch(type) + { + case shape::double_type: return rocblas_datatype_f64_r; + case shape::float_type: return rocblas_datatype_f32_r; + case shape::half_type: return rocblas_datatype_f16_r; + case shape::int8_type: return rocblas_datatype_i8_r; + case shape::uint8_type: return rocblas_datatype_u8_r; + case shape::int32_type: return rocblas_datatype_i32_r; + case shape::uint32_type: return rocblas_datatype_u32_r; + case shape::fp8e4m3fnuz_type: return rocblas_datatype_f8_r; + case shape::fp8e5m2fnuz_type: return rocblas_datatype_bf8_r; + case shape::fp8e4m3fn_type: + case shape::fp8e5m2_type: + case shape::tuple_type: + case shape::bool_type: + case shape::uint16_type: + case shape::int16_type: + case shape::int64_type: + case shape::uint64_type: MIGRAPHX_THROW("ROCBLAS_GEMM: data type not supported!"); + case shape::bf16_type: return rocblas_datatype_bf16_r; + } + + MIGRAPHX_THROW("ROCBLAS_GEMM: data type not supported!"); +} + +void blas_shape(const shape& in_shape) +{ + if(in_shape.lens().size() < 2) + return; + auto s = in_shape.normalize_standard(); + if(std::none_of(s.strides().end() - 2, s.strides().end(), [](auto i) { return i == 1; })) + MIGRAPHX_THROW("GPU_GEMM: needs to have one matrix stride as 1"); + if(std::any_of(s.strides().end() - 2, s.strides().end(), [](auto i) { return i == 0; })) + MIGRAPHX_THROW("GPU_GEMM: matrix dimensions can't be broadcasted"); + if(s.lens().size() < 3) + return; + shape batch_shape{s.type(), + {s.lens().begin(), s.lens().end() - 2}, + {s.strides().begin(), s.strides().end() - 2}}; + auto batch_shapes = reduce_dims({batch_shape}); + if(batch_shapes.front().lens().size() != 1) + MIGRAPHX_THROW("GPU_GEMM: Batch dimension is not collapsible"); +} + +shape transpose_batch(const shape& s, unsigned trans_batch) +{ + if(trans_batch == 0) + return s; + if(s.lens().size() < 3) + return s; + auto batch = s.lens().size() - 3; + std::vector perm(s.lens().size()); + std::iota(perm.begin(), perm.end(), 0); + std::swap(perm[batch], perm[batch + trans_batch]); + return shape::from_permutation(s.type(), s.lens(), perm); +} + +/** + * Returns results of rocblas_status_success, rocblas_status_perf_degraded, + * or rocblas_status_invalid_value. Caller + * is expected to check for invalid index. Any other result causes an exception. + * + */ +template +auto rocblas_invoke(F f, Pack p, Ts... xs) +{ + return p([=](auto... ws) { + auto status = f(ws..., xs...); + if(status != rocblas_status_success and status != rocblas_status_invalid_value) + { + if(status == rocblas_status_perf_degraded) + { + std::cerr << "WARNING: degraded perf. in rocBLAS call" << std::endl; + } + else + MIGRAPHX_THROW("rocblas_invoke: rocBLAS call failed with status " + + std::to_string(status)); + } + return status; + }); +} + +static bool is_transposed(const shape& s) +{ + if(s.transposed()) + { + return s.strides().back() != 1; + } + + if(not s.broadcasted() and s.strides() != s.as_standard().strides()) + { + auto perm = find_permutation(s); + return not std::is_sorted(perm.begin(), perm.end()); + } + + return false; +} + +static rocblas_int get_batch_stride(const shape& s) +{ + // This value is not needed for non-strided inputs + if(s.strides().size() < 3) + return 0; + else + return s.strides()[s.strides().size() - 3]; +} + +/** + * Wrapper for multiple rocBLAS calls. The constructor creates parameters for + * these calls based on data shapes and other values contained in the associated + * instruction and operation. + * + * The template parameter T is not the type of the matrix data but of the weighting + * coefficients alpha and beta (these are float in rocBLAS internals) + */ +template +struct gemm_impl +{ + gemm_impl(const shape& output_shape, + const std::vector& input_shapes, + T alpha_param, + T beta_param, + bool compute_fp32_flag) + : alpha(alpha_param), + beta(beta_param), + is_3inputs(input_shapes.size() == 4), + compute_fp32(compute_fp32_flag) + { + if(not is_3inputs) + { + beta = 0; + } + + // Create lambdas that will cast alpha, beta to the output shape's type + // and retain the values being pointed to + output_shape.visit_type([&](auto as) { + auto alpha_r = as(alpha); + auto beta_r = as(beta); + if(compute_fp32) + { + get_alpha = [=] { return α }; + get_beta = [=] { return β }; + } + else + { + get_alpha = [=] { return &alpha_r; }; + get_beta = [=] { return &beta_r; }; + } + }); + + transa = is_transposed(input_shapes[0]); + transb = is_transposed(input_shapes[1]); + auto n_dim = output_shape.lens().size(); + auto dim_0 = n_dim - 2; + auto dim_1 = n_dim - 1; + // Leading dimensions of matrices + lda = input_shapes[0].strides()[transa ? dim_1 : dim_0]; + ldb = input_shapes[1].strides()[transb ? dim_1 : dim_0]; + ldc = input_shapes[2].strides()[dim_0]; + ldd = is_3inputs ? input_shapes[3].strides()[dim_0] : ldc; + + arg_type = get_type(input_shapes[0].type()); + output_type = get_type(input_shapes[2].type()); + if(output_type == rocblas_datatype_i8_r or output_type == rocblas_datatype_u8_r) + { + output_type = rocblas_datatype_i32_r; + } + compute_type = rb_compute_type{output_type}; + if(compute_fp32) + { + if(arg_type == rocblas_datatype_f16_r or arg_type == rocblas_datatype_bf16_r) + compute_type = rocblas_datatype_f32_r; + } + if(arg_type == rocblas_datatype_f8_r) + { + assert(get_type(input_shapes[1].type()) == rocblas_datatype_f8_r); + compute_type = rocblas_compute_type_f32; + } + + auto a_lens = input_shapes[0].lens(); + auto b_lens = input_shapes[1].lens(); + + auto out_lens = output_shape.lens(); + m = out_lens[dim_0]; + n = out_lens[dim_1]; + k = input_shapes[0].lens()[dim_1]; + + a_stride = get_batch_stride(input_shapes[0]); + b_stride = get_batch_stride(input_shapes[1]); + c_stride = get_batch_stride(input_shapes[2]); + d_stride = is_3inputs ? get_batch_stride(input_shapes[3]) : c_stride; + num_matrices = std::accumulate( + out_lens.rbegin() + 2, out_lens.rend(), std::size_t{1}, std::multiplies()); + strided_batched = num_matrices > 1; + if(strided_batched and b_stride == 0 and input_shapes[0].standard()) + { + // If the batch dimension of B is broadcasted, then we can + // multiply m by the batch_size and use rocblas_gemm_ex + // instead of rocblas_gemm_strided_batched_ex. + m *= num_matrices; + strided_batched = false; + } + } + + void run(context& ctx, const std::vector& input_args, int32_t solution_idx = 0) const + { +#ifdef MIGRAPHX_USE_ROCBLAS_FP8_API + if(rocblas_fp8_available() and + std::any_of(input_args.begin(), input_args.end(), [](const auto i) { + return i.get_shape().type() == migraphx::shape::fp8e4m3fnuz_type; + })) + { + if(strided_batched) + { + auto common_args = + create_strided_batched_args_common(ctx, compute_type, input_args); + rocblas_invoke(&rocblas_gemm_strided_batched_ex3, + common_args, + rocblas_gemm_algo_standard, + solution_idx, + gemm_flags); + } + else + { + auto common_args = create_gemm_ex_args_common(ctx, compute_type, input_args); + rocblas_invoke(&rocblas_gemm_ex3, + common_args, + rocblas_gemm_algo_standard, + solution_idx, + gemm_flags); + } + } + else +#endif + { + if(strided_batched) + { + auto common_args = + create_strided_batched_args_common(ctx, compute_type, input_args); + rocblas_invoke(&rocblas_gemm_strided_batched_ex, + common_args, + rocblas_gemm_algo_solution_index, + solution_idx, + gemm_flags); + } + else + { + auto common_args = create_gemm_ex_args_common(ctx, compute_type, input_args); + rocblas_invoke(&rocblas_gemm_ex, + common_args, + rocblas_gemm_algo_solution_index, + solution_idx, + gemm_flags); + } + } + } + +#ifdef MIGRAPHX_USE_ROCBLAS_TUNING_API + auto validate(context& ctx, const std::vector& input_shapes, int32_t solution_idx) const + { + // Create dummy arguments for the shapes, and call the overloaded method + std::vector input_args; + unsigned long seed = 0; + std::transform(input_shapes.begin(), + input_shapes.end(), + std::back_inserter(input_args), + [&](const shape& x) { + return to_gpu(generate_argument(x, seed++, random_mode::random)); + }); + return validate(ctx, input_args, solution_idx); + } + + /** + * Checks a particular solution for validity by running it with the flag + * rocblas_gemm_flags_check_solution_index (could be invalid if this model was + * tuned with a different rocBLAS version) + * + * @return Returns either solution_idx if valid, or else the default value 0 + * if not. The default does not mean list index 0, but tells the picker + * to choose a solution. + */ + int32_t + validate(context& ctx, const std::vector& input_args, int32_t solution_idx) const + { + rocblas_status_ check_valid(rocblas_status_success); + + if(strided_batched) + { + auto common_args = create_strided_batched_args_common(ctx, compute_type, input_args); + check_valid = rocblas_invoke(&rocblas_gemm_strided_batched_ex, + common_args, + rocblas_gemm_algo_solution_index, + solution_idx, + rocblas_gemm_flags_check_solution_index); + } + else + { + auto common_args = create_gemm_ex_args_common(ctx, compute_type, input_args); + check_valid = rocblas_invoke(&rocblas_gemm_ex, + common_args, + rocblas_gemm_algo_solution_index, + solution_idx, + rocblas_gemm_flags_check_solution_index); + } + + if(check_valid == rocblas_status_invalid_value) + { + std::cerr << "WARNING: tuned solution is invalid; reverting to default" << std::endl; + return 0; + } + return solution_idx; + } +#endif + + /** + * Helper method to create that subset of a long rocBLAS argument list that is common + * to multiple "...strided_batched..." calls. + * + * The rocblas_gemm API handles inputs and output matrices as + * column-major format. When doing a C = A * B, we actually do + * C^T = (B^T) * (A^T). That is the reason we input args[1] as + * A and args[0] as B in calling the rocblas_gemm. + * + */ + auto create_strided_batched_args_common(context& ctx, + rb_compute_type rbcompute_type, + const std::vector& args) const + { + return pack(ctx.get_stream().get_rocblas(), + transb ? rocblas_operation_transpose : rocblas_operation_none, + transa ? rocblas_operation_transpose : rocblas_operation_none, + n, + m, + k, + get_alpha(), + args[1].data(), + arg_type, + ldb, + b_stride, + args[0].data(), + arg_type, + lda, + a_stride, + get_beta(), + args[2].data(), + output_type, + ldc, + c_stride, + is_3inputs ? args[3].data() : args[2].data(), + output_type, + ldd, + d_stride, + num_matrices, + rbcompute_type); + } + /** + * Helper method to create that subset of a long rocBLAS argument list that is common + * to multiple "gemm_ex..." calls. + * + * The rocblas_gemm API handles inputs and output matrices as + * column-major format. When doing a C = A * B, we actually do + * C^T = (B^T) * (A^T). That is the reason we input args[1] as + * A and args[0] as B in calling the rocblas_gemm. + * + * */ + auto create_gemm_ex_args_common(context& ctx, + rb_compute_type rbcompute_type, + const std::vector& args) const + { + return pack(ctx.get_stream().get_rocblas(), + transb ? rocblas_operation_transpose : rocblas_operation_none, + transa ? rocblas_operation_transpose : rocblas_operation_none, + n, + m, + k, + get_alpha(), + args[1].data(), + arg_type, + ldb, + args[0].data(), + arg_type, + lda, + get_beta(), + args[2].data(), + output_type, + ldc, + is_3inputs ? args[3].data() : args[2].data(), + output_type, + ldd, + rbcompute_type); + } + +#ifdef MIGRAPHX_USE_ROCBLAS_TUNING_API + /** + * Find best rocBLAS solution: Get list of solutions and try them all, returning the index + * of the fastest one. + */ + int tune(context& ctx, const std::vector& input_shapes) const + { + // tuning meta parameters + const int hot_calls = 40; + unsigned long seed = 0; + std::vector input_args; + std::transform(input_shapes.begin(), + input_shapes.end(), + std::back_inserter(input_args), + [&](const shape& x) { + return to_gpu(generate_argument(x, seed++, random_mode::random)); + }); + + // Get the solutions list in 2 rocBLAS steps: + // 1. Find out how many solutions there are and allocate the array + // 2. Get the solutions + // + rocblas_int list_size = 0; + std::vector solution_indices; + rb_compute_type rbcompute_type = compute_type; + // rocblas_gemm_get_solutions() API requires compute_type as rocblas_datatype. Convert + // manually for FP8 + if(arg_type == rocblas_datatype_f8_r) + { + rbcompute_type = rocblas_datatype_f32_r; + } + if(strided_batched) + { + auto common_args = create_strided_batched_args_common(ctx, rbcompute_type, input_args); + rocblas_invoke(&rocblas_gemm_strided_batched_ex_get_solutions, + common_args, + rocblas_gemm_algo_solution_index, + gemm_flags, + nullptr, + &list_size); + solution_indices.resize(list_size); + + auto common_sol_args = + create_strided_batched_args_common(ctx, rbcompute_type, input_args); + rocblas_invoke(&rocblas_gemm_strided_batched_ex_get_solutions, + common_sol_args, + rocblas_gemm_algo_solution_index, + gemm_flags, + solution_indices.data(), + &list_size); + } + else + { + auto common_args = create_gemm_ex_args_common(ctx, rbcompute_type, input_args); + rocblas_invoke(&rocblas_gemm_ex_get_solutions, + common_args, + rocblas_gemm_algo_solution_index, + gemm_flags, + nullptr, + &list_size); + solution_indices.resize(list_size); + + auto common_sol_args = create_gemm_ex_args_common(ctx, rbcompute_type, input_args); + rocblas_invoke(&rocblas_gemm_ex_get_solutions, + common_sol_args, + rocblas_gemm_algo_solution_index, + gemm_flags, + solution_indices.data(), + &list_size); + } + + double best_time = std::numeric_limits::max(); + double first_time = -1; + // Initialize to default solution index + rocblas_int best_sol = 0; + for(auto sol : solution_indices) + { + // Warmup: the first call to an op. may not be representative since there is + // more time taken initializing caches, etc. so we won't time it. + run(ctx, input_args, sol); + double host_time = time([&] { + for([[maybe_unused]] int hc : range(hot_calls)) + run(ctx, input_args, sol); + ctx.finish(); + }); + + host_time /= hot_calls; + + // dev/evaluation only: track time for first solution. + if(first_time < 0) + first_time = host_time; + + // track current best + if(host_time < best_time) + { + best_sol = sol; + best_time = host_time; + } + } + std::cout << "Winning GEMM solution: " << best_sol << " in " << best_time << " ms, beats " + << first_time << "ms" << std::endl; + std::this_thread::sleep_for(std::chrono::milliseconds{50}); + return best_sol; + } +#endif + private: + size_t num_matrices = 0; + rocblas_int m = 0; + rocblas_int n = 0; + rocblas_int k = 0; + bool transa = false; + bool transb = false; + T alpha = 0; + T beta = 0; + + std::function get_alpha{}; + std::function get_beta{}; + rocblas_gemm_flags gemm_flags = rocblas_gemm_flags_none; + rocblas_int lda = 0; + rocblas_int ldb = 0; + rocblas_int ldc = 0; + rocblas_int ldd = 0; + rocblas_int a_stride = 0; + rocblas_int b_stride = 0; + rocblas_int c_stride = 0; + rocblas_int d_stride = 0; + rocblas_datatype arg_type = rocblas_datatype_f32_r; + rb_compute_type compute_type = rocblas_datatype_f32_r; + rocblas_datatype output_type = rocblas_datatype_f32_r; + bool strided_batched = true; + bool is_3inputs = true; + bool compute_fp32 = true; +}; // gemm_impl + +void gemm_compute(context& ctx, + const shape& output_shape, + const std::vector& args, + float alpha, + float beta, + bool compute_fp32, + int32_t solution_idx) +{ + std::vector input_shapes; + std::transform(args.begin(), + args.end(), + std::back_inserter(input_shapes), + [](const argument& x) { return x.get_shape().normalize_standard(); }); + auto gemm_item = gemm_impl(output_shape, input_shapes, alpha, beta, compute_fp32); + gemm_item.run(ctx, args, solution_idx); +} + +void gemm_compute(context& ctx, + const shape& output_shape, + const std::vector& args, + int32_t alpha, + int32_t beta, + bool compute_fp32, + int32_t solution_idx) +{ + std::vector input_shapes; + std::transform(args.begin(), + args.end(), + std::back_inserter(input_shapes), + [](const argument& x) { return x.get_shape().normalize_standard(); }); + auto gemm_item = gemm_impl(output_shape, input_shapes, alpha, beta, compute_fp32); + gemm_item.run(ctx, args, solution_idx); +} + +static value gemm_problem(const shape& output_shape, std::vector input_shapes) +{ + input_shapes.push_back(output_shape); + return to_value(input_shapes); +} + +#ifdef MIGRAPHX_USE_ROCBLAS_TUNING_API +static void gemm_save_solution(context& ctx, + const shape& output_shape, + const std::vector& input_shapes, + int32_t solution_idx) +{ + ctx.get_problem_cache().insert( + "rocblas", gemm_problem(output_shape, input_shapes), solution_idx); +} +#endif + +int32_t gemm_default_solution(context& ctx, + const shape& output_shape, + const std::vector& input_shapes) +{ + auto sol = ctx.get_problem_cache().get("rocblas", gemm_problem(output_shape, input_shapes)); + if(sol.has_value()) + return sol->to(); + return 0; +} + +/** + * Decides if the tune() or validate() method is appropriate and calls it. + * Return value is the chosen solution index, or 0 to let picker choose it. + */ +template +int32_t gemm_finalize_impl(context& ctx, + const shape& output_shape, + const std::vector& input_shapes, + T alpha, + T beta, + bool compute_fp32, + int32_t solution_idx) +{ +#ifdef MIGRAPHX_USE_ROCBLAS_TUNING_API + + // This code should be called only if either the environment var. + // MIGRAPHX_ENABLE_GEMM_TUNING, or option --exhaustive-tune, is set + + if(solution_idx == 0) + { + auto gemm_item = gemm_impl(output_shape, input_shapes, alpha, beta, compute_fp32); + solution_idx = gemm_item.tune(ctx, input_shapes); + gemm_save_solution(ctx, output_shape, input_shapes, solution_idx); + } + else + { + // If a tuned solution index is already given, don't tune again but validate + // in case the data was tuned with a different rocBLAS version + auto gemm_item = gemm_impl(output_shape, input_shapes, alpha, beta, compute_fp32); + solution_idx = gemm_item.validate(ctx, input_shapes, solution_idx); + } +#else + (void)ctx, (void)output_shape, (void)input_shapes; + (void)alpha, (void)beta, (void)compute_fp32; +#endif + return solution_idx; +} + +int32_t gemm_finalize(context& ctx, + const shape& output_shape, + const std::vector& input_shapes, + float alpha, + float beta, + bool compute_fp32, + int32_t solution_idx) +{ + return gemm_finalize_impl( + ctx, output_shape, input_shapes, alpha, beta, compute_fp32, solution_idx); +} + +int32_t gemm_finalize(context& ctx, + const shape& output_shape, + const std::vector& input_shapes, + int32_t alpha, + int32_t beta, + bool compute_fp32, + int32_t solution_idx) +{ + return gemm_finalize_impl( + ctx, output_shape, input_shapes, alpha, beta, compute_fp32, solution_idx); +} +#endif +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/hip.cpp b/docker/rocm/migraphx/targets/gpu/hip.cpp new file mode 100644 index 000000000..0fb7deb93 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/hip.cpp @@ -0,0 +1,330 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#if MIGRAPHX_USE_MIOPEN +#include +#endif +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_REGISTER_OP(hip_allocate) +MIGRAPHX_REGISTER_OP(hip_fill) +MIGRAPHX_REGISTER_OP(hip_sync_stream) +MIGRAPHX_REGISTER_OP(hip_copy_to_gpu) +MIGRAPHX_REGISTER_OP(hip_copy_from_gpu) +MIGRAPHX_REGISTER_OP(hip_copy) +MIGRAPHX_REGISTER_OP(hip_allocate_memory) +MIGRAPHX_REGISTER_OP(hip_copy_literal) + +using hip_ptr = MIGRAPHX_MANAGE_PTR(void, hipFree); +using hip_host_ptr = MIGRAPHX_MANAGE_PTR(void, hipHostUnregister); + +std::string hip_error(int error) { return hipGetErrorString(static_cast(error)); } + +bool is_device_ptr(const void* ptr) +{ + hipPointerAttribute_t attr; + auto status = hipPointerGetAttributes(&attr, ptr); + if(status != hipSuccess) + return false; + return attr.type == hipMemoryTypeDevice; +} + +std::size_t get_available_gpu_memory() +{ + size_t free; + size_t total; + auto status = hipMemGetInfo(&free, &total); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed getting available memory: " + hip_error(status)); + return free; +} + +void* get_device_ptr(void* hptr) +{ + void* result = nullptr; + auto status = hipHostGetDevicePointer(&result, hptr, 0); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed getting device pointer: " + hip_error(status)); + return result; +} + +struct host_ptr_cache +{ + std::unordered_map> cache; + std::mutex m; + std::shared_ptr get(void* ptr) + { + std::lock_guard lock(m); + auto it = cache.find(ptr); + if(it != cache.end()) + return it->second.lock(); + return nullptr; + } + + void put(const std::shared_ptr& p) + { + std::lock_guard lock(m); + cache[p.get()] = p; + } +}; + +static host_ptr_cache& get_host_ptr_cache() +{ + static host_ptr_cache cache; + return cache; +} + +std::shared_ptr allocate_gpu(std::size_t sz, bool host = false) +{ + if(sz > get_available_gpu_memory()) + MIGRAPHX_THROW("Memory not available to allocate buffer: " + std::to_string(sz)); + void* alloc_ptr = nullptr; + auto status = host ? hipHostMalloc(&alloc_ptr, sz) : hipMalloc(&alloc_ptr, sz); + if(status != hipSuccess) + { + if(host) + MIGRAPHX_THROW("Gpu allocation failed: " + hip_error(status)); + else + return allocate_gpu(sz, true); + } + assert(alloc_ptr != nullptr); + std::shared_ptr result = share(hip_ptr{alloc_ptr}); + if(host) + { + get_host_ptr_cache().put(result); + } + return result; +} + +std::shared_ptr register_on_gpu(void* ptr, std::size_t sz) +{ + std::shared_ptr result = get_host_ptr_cache().get(ptr); + if(result) + { + return result; + } + auto status = hipHostRegister(ptr, sz, hipHostRegisterMapped); + if(status != hipSuccess) + MIGRAPHX_THROW("Gpu register failed: " + hip_error(status)); + result = share(hip_host_ptr{ptr}); + get_host_ptr_cache().put(result); + return result; +} + +template +std::vector read_from_gpu(const void* x, std::size_t sz) +{ + gpu_sync(); + std::vector result(sz); + assert(not is_device_ptr(result.data())); + if(not is_device_ptr(x)) + { + MIGRAPHX_THROW( + "read_from_gpu() requires Src buffer to be on the GPU, Copy from gpu failed\n"); + } + auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost); + if(status != hipSuccess) + MIGRAPHX_THROW("Copy from gpu failed: " + hip_error(status)); // NOLINT + return result; +} + +std::shared_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false) +{ + gpu_sync(); + auto result = allocate_gpu(sz, host); + assert(is_device_ptr(result.get())); + assert(not is_device_ptr(x)); + auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice); + if(status != hipSuccess) + MIGRAPHX_THROW("Copy to gpu failed: " + hip_error(status)); + return result; +} + +template +hip_ptr write_to_gpu(const T& x) +{ + using type = typename T::value_type; + auto size = x.size() * sizeof(type); + return write_to_gpu(x.data(), size); +} + +argument allocate_gpu(const shape& s, bool host) +{ + auto p = allocate_gpu(s.bytes() + 1, host); + return {s, [p]() mutable { return reinterpret_cast(p.get()); }}; +} + +argument register_on_gpu(const argument& arg) +{ + auto arg_shared = arg.share(); + auto p = register_on_gpu(arg_shared.data(), arg_shared.get_shape().bytes()); + auto s = arg_shared.get_shape(); + return {s, [p, a = std::move(arg_shared)]() mutable { return get_device_ptr(p.get()); }}; +} + +argument to_gpu(const argument& arg, bool host) +{ + argument result; + arg.visit( + [&](auto x) { + auto p = write_to_gpu(arg.data(), arg.get_shape().bytes(), host); + result = {x.get_shape(), p}; + }, + [&](const auto& xs) { + std::vector args; + std::transform(xs.begin(), xs.end(), std::back_inserter(args), [&](auto x) { + return to_gpu(x, host); + }); + result = argument{args}; + }); + return result; +} + +argument from_gpu(const argument& arg) +{ + argument result; + arg.visit( + [&](auto x) { + using type = typename decltype(x)::value_type; + auto v = read_from_gpu(arg.data(), x.get_shape().bytes() / sizeof(type)); + // cppcheck-suppress returnDanglingLifetime + result = {x.get_shape(), [v]() mutable { return v.data(); }}; + }, + [&](const auto& xs) { + std::vector args; + std::transform(xs.begin(), xs.end(), std::back_inserter(args), [&](auto x) { + return from_gpu(x); + }); + result = argument{args}; + }); + + return result; +} + +void set_device(std::size_t id) +{ + auto status = hipSetDevice(id); + if(status != hipSuccess) + MIGRAPHX_THROW("Error setting device"); +} + +void gpu_sync() +{ + auto status = hipDeviceSynchronize(); + if(status != hipSuccess) + MIGRAPHX_THROW("hip device synchronization failed: " + hip_error(status)); +} + +void gpu_sync(const context& ctx) { ctx.finish(); } + +void hip_async_memset(context& ctx, const argument& dst, int value) +{ + std::size_t dst_size = dst.get_shape().bytes(); + auto status = hipMemsetAsync(dst.data(), value, dst_size, ctx.get_stream().get()); + if(status != hipSuccess) + MIGRAPHX_THROW("Gpu fill failed: " + hip_error(status)); +} + +void hip_async_copy(context& ctx, const argument& src, const argument& dst, hipMemcpyKind kind) +{ + std::size_t src_size = src.get_shape().bytes(); + std::size_t dst_size = dst.get_shape().bytes(); + if(src_size > dst_size) + MIGRAPHX_THROW("Not enough memory available in destination to do copy"); + auto status = hipMemcpyAsync(dst.data(), src.data(), src_size, kind, ctx.get_stream().get()); + if(status != hipSuccess) + MIGRAPHX_THROW("Gpu copy failed: " + hip_error(status)); +} + +void gpu_copy(context& ctx, const argument& src, const argument& dst) +{ + // Workaround: Use contiguous as hip's memcpy is broken + device::contiguous(ctx.get_stream().get(), dst, src); + // hip_async_copy(ctx, src, dst, hipMemcpyDeviceToDevice); +} + +void copy_to_gpu(context& ctx, const argument& src, const argument& dst) +{ + if(src.get_shape() == dst.get_shape() and dst.get_shape().packed()) + { + hip_async_copy(ctx, src, dst, hipMemcpyHostToDevice); + } + else + { + gpu_copy(ctx, register_on_gpu(src), dst); + } +} + +void copy_from_gpu(context& ctx, const argument& src, const argument& dst) +{ + if(src.get_shape() == dst.get_shape() and dst.get_shape().packed()) + { + hip_async_copy(ctx, src, dst, hipMemcpyDeviceToHost); + } + else + { + gpu_copy(ctx, src, register_on_gpu(dst)); + } +} + +argument get_preallocation(context& ctx, const std::string& id) +{ + return ctx.get_current_device().preallocations.at(id); +} + +void gpu_fill(context& ctx, const argument& dst, int value) +{ + if(dst.get_sub_objects().empty()) + { + // TODO: Handle non-packed tensor when value is not 0 + assert(dst.get_shape().packed() and value == 0); + hip_async_memset(ctx, dst, value); + } + else + { + for(const auto& arg : dst.get_sub_objects()) + gpu_fill(ctx, arg, value); + } +} + +void store_preallocated_param(context& ctx, const std::string& id, const argument& a) +{ + ctx.get_current_device().preallocations[id] = a; +} + +// clang-format off +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/hip_gemm_impl.cpp b/docker/rocm/migraphx/targets/gpu/hip_gemm_impl.cpp new file mode 100644 index 000000000..966927da7 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/hip_gemm_impl.cpp @@ -0,0 +1,754 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#if MIGRAPHX_USE_HIPBLASLT +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +using microseconds = std::chrono::duration; + +hipDataType compute_to_hip_type(hipblasComputeType_t type) +{ + switch(type) + { + case HIPBLAS_COMPUTE_32F: return HIP_R_32F; + case HIPBLAS_COMPUTE_32I: return HIP_R_32I; + case HIPBLAS_COMPUTE_16F: + case HIPBLAS_COMPUTE_64F: + case HIPBLAS_COMPUTE_32I_PEDANTIC: + case HIPBLAS_COMPUTE_16F_PEDANTIC: + case HIPBLAS_COMPUTE_32F_PEDANTIC: + case HIPBLAS_COMPUTE_64F_PEDANTIC: + case HIPBLAS_COMPUTE_32F_FAST_16F: + case HIPBLAS_COMPUTE_32F_FAST_16BF: + case HIPBLAS_COMPUTE_32F_FAST_TF32: + MIGRAPHX_THROW("HIPBLAS_GEMM: conversion from hipComputeType_t to hipDataType failed"); + } +} + +// Convert hipBLAS datatypes to equivalent MIGraphX data types +hipDataType get_type_hipblas(shape::type_t type) +{ + switch(type) + { + case shape::double_type: return HIP_R_64F; + case shape::float_type: return HIP_R_32F; + case shape::half_type: return HIP_R_16F; + case shape::int8_type: return HIP_R_8I; + case shape::uint8_type: return HIP_R_8U; + case shape::int32_type: return HIP_R_32I; + case shape::uint32_type: return HIP_R_32U; + case shape::fp8e4m3fnuz_type: return HIP_R_8F_E4M3_FNUZ; + case shape::fp8e5m2fnuz_type: + return HIP_R_8F_E5M2_FNUZ; +// TODO can remove this preprocessor conditional when hip verison defaults to have these types +#ifdef ROCM_USE_FLOAT8 + case shape::fp8e4m3fn_type: return HIP_R_8F_E4M3; + case shape::fp8e5m2_type: return HIP_R_8F_E5M2; +#else + case shape::fp8e4m3fn_type: + case shape::fp8e5m2_type: +#endif + case shape::tuple_type: + case shape::bool_type: + case shape::uint16_type: + case shape::int16_type: + case shape::int64_type: + case shape::uint64_type: MIGRAPHX_THROW("HIPBLAS_GEMM: data type not supported!"); + case shape::bf16_type: return HIP_R_16BF; + } + + MIGRAPHX_THROW("HIPBLAS_GEMM: data type not supported!"); +} + +void blas_shape_hip(const shape& in_shape) +{ + if(in_shape.lens().size() < 2) + return; + auto s = in_shape.normalize_standard(); + if(std::none_of(s.strides().end() - 2, s.strides().end(), [](auto i) { return i == 1; })) + MIGRAPHX_THROW("GPU_GEMM: needs to have one matrix stride as 1"); + if(std::any_of(s.strides().end() - 2, s.strides().end(), [](auto i) { return i == 0; })) + MIGRAPHX_THROW("GPU_GEMM: matrix dimensions can't be broadcasted"); + if(s.lens().size() < 3) + return; + shape batch_shape{s.type(), + {s.lens().begin(), s.lens().end() - 2}, + {s.strides().begin(), s.strides().end() - 2}}; + auto batch_shapes = reduce_dims({batch_shape}); + if(batch_shapes.front().lens().size() != 1) + MIGRAPHX_THROW("GPU_GEMM: Batch dimension is not collapsible"); +} + +shape transpose_batch_hip(const shape& s, unsigned trans_batch) +{ + if(trans_batch == 0) + return s; + if(s.lens().size() < 3) + return s; + auto batch = s.lens().size() - 3; + std::vector perm(s.lens().size()); + std::iota(perm.begin(), perm.end(), 0); + std::swap(perm[batch], perm[batch + trans_batch]); + return shape::from_permutation(s.type(), s.lens(), perm); +} + +static bool is_transposed_hip(const shape& s) { return s.transposed() and s.strides().back() != 1; } + +static int32_t get_batch_stride_hip(const shape& s) +{ + // This value is not needed for non-strided inputs + if(s.strides().size() < 3) + return 0; + else + return s.strides()[s.strides().size() - 3]; +} + +/** + * Wrapper for multiple hipBLASLt calls. The constructor creates parameters for + * these calls based on data shapes and other values contained in the associated + * instruction and operation. + */ +struct hip_gemm_impl +{ + hip_gemm_impl(const shape& output_shape, + const std::vector& input_shapes, + float alpha_param, + float beta_param) + : alpha(alpha_param), beta(beta_param), is_3inputs(input_shapes.size() == 5) + { + if(not is_3inputs) + { + beta = 0; + } + + // Create lambdas that will cast alpha, beta to the output shape's type + // and retain the values being pointed to + output_shape.visit_type([&](auto as) { + if(as.is_integral()) + { + int32_t alpha_r = int32_t(alpha); + int32_t beta_r = int32_t(beta); + get_alpha = [=] { return &alpha_r; }; + get_beta = [=] { return &beta_r; }; + } + else + { + get_alpha = [=] { return α }; + get_beta = [=] { return β }; + } + }); + + transa = is_transposed_hip(input_shapes[0]); + transb = is_transposed_hip(input_shapes[1]); + op_a = transa ? HIPBLAS_OP_T : HIPBLAS_OP_N; + op_b = transb ? HIPBLAS_OP_T : HIPBLAS_OP_N; + + auto n_dim = output_shape.lens().size(); + auto dim_0 = n_dim - 2; + auto dim_1 = n_dim - 1; + // Leading dimensions of matrices + lda = input_shapes[0].strides()[transa ? dim_1 : dim_0]; + ldb = input_shapes[1].strides()[transb ? dim_1 : dim_0]; + ldc = is_3inputs ? input_shapes[2].strides()[dim_0] : input_shapes[3].strides()[dim_0]; + ldd = is_3inputs ? input_shapes[4].strides()[dim_0] : ldc; + + auto out_lens = output_shape.lens(); + m = out_lens[dim_0]; + n = out_lens[dim_1]; + k = input_shapes[0].lens()[dim_1]; + + a_stride = get_batch_stride_hip(input_shapes[0]); + b_stride = get_batch_stride_hip(input_shapes[1]); + c_stride = is_3inputs ? get_batch_stride_hip(input_shapes[2]) + : get_batch_stride_hip(input_shapes[3]); + d_stride = is_3inputs ? get_batch_stride_hip(input_shapes[4]) : c_stride; + num_matrices = std::accumulate( + out_lens.rbegin() + 2, out_lens.rend(), std::size_t{1}, std::multiplies()); + + arg_type = get_type_hipblas(input_shapes[0].type()); + output_type = is_3inputs ? get_type_hipblas(input_shapes[4].type()) + : get_type_hipblas(input_shapes[3].type()); + + if(arg_type == HIP_R_8I or arg_type == HIP_R_8U) + { + compute_type = HIPBLAS_COMPUTE_32I; + } + else + { + compute_type = HIPBLAS_COMPUTE_32F; + } + if(op_a == HIPBLAS_OP_T) + { + hipblaslt_invoke( + [&]() { return hipblasLtMatrixLayoutCreate(&mat_a, arg_type, m, k, lda); }); + } + else + { + hipblaslt_invoke( + [&]() { return hipblasLtMatrixLayoutCreate(&mat_a, arg_type, k, m, lda); }); + } + if(op_b == HIPBLAS_OP_T) + { + hipblaslt_invoke( + [&]() { return hipblasLtMatrixLayoutCreate(&mat_b, arg_type, k, n, ldb); }); + } + else + { + hipblaslt_invoke( + [&]() { return hipblasLtMatrixLayoutCreate(&mat_b, arg_type, n, k, ldb); }); + } + hipblaslt_invoke( + [&]() { return hipblasLtMatrixLayoutCreate(&mat_c, output_type, n, m, ldc); }); + + if(is_3inputs) + { + hipblaslt_invoke( + [&]() { return hipblasLtMatrixLayoutCreate(&mat_d, output_type, n, m, ldd); }); + } + if(num_matrices > 1) + { + hipblaslt_invoke([&]() { + return hipblasLtMatrixLayoutSetAttribute(mat_a, + HIPBLASLT_MATRIX_LAYOUT_BATCH_COUNT, + &num_matrices, + sizeof(num_matrices)); + }); + hipblaslt_invoke([&]() { + return hipblasLtMatrixLayoutSetAttribute(mat_b, + HIPBLASLT_MATRIX_LAYOUT_BATCH_COUNT, + &num_matrices, + sizeof(num_matrices)); + }); + hipblaslt_invoke([&]() { + return hipblasLtMatrixLayoutSetAttribute(mat_c, + HIPBLASLT_MATRIX_LAYOUT_BATCH_COUNT, + &num_matrices, + sizeof(num_matrices)); + }); + + hipblaslt_invoke([&]() { + return hipblasLtMatrixLayoutSetAttribute( + mat_a, + HIPBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, + &a_stride, + sizeof(a_stride)); + }); + hipblaslt_invoke([&]() { + return hipblasLtMatrixLayoutSetAttribute( + mat_b, + HIPBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, + &b_stride, + sizeof(b_stride)); + }); + hipblaslt_invoke([&]() { + return hipblasLtMatrixLayoutSetAttribute( + mat_c, + HIPBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, + &c_stride, + sizeof(c_stride)); + }); + + if(is_3inputs) + { + hipblaslt_invoke([&]() { + return hipblasLtMatrixLayoutSetAttribute(mat_d, + HIPBLASLT_MATRIX_LAYOUT_BATCH_COUNT, + &num_matrices, + sizeof(num_matrices)); + }); + hipblaslt_invoke([&]() { + return hipblasLtMatrixLayoutSetAttribute( + mat_d, + HIPBLASLT_MATRIX_LAYOUT_STRIDED_BATCH_OFFSET, + &d_stride, + sizeof(d_stride)); + }); + } + } + hipblaslt_invoke([&]() { + return hipblasLtMatmulDescCreate( + &hipblaslt_desc, compute_type, compute_to_hip_type(compute_type)); + }); + hipblaslt_invoke([&]() { + return hipblasLtMatmulDescSetAttribute( + hipblaslt_desc, HIPBLASLT_MATMUL_DESC_TRANSB, &op_a, sizeof(int32_t)); + }); + hipblaslt_invoke([&]() { + return hipblasLtMatmulDescSetAttribute( + hipblaslt_desc, HIPBLASLT_MATMUL_DESC_TRANSA, &op_b, sizeof(int32_t)); + }); + + // Transfer ownership of raw pointers to managed pointers. + managed_hipblaslt_desc.reset(hipblaslt_desc); + managed_mat_a.reset(mat_a); + managed_mat_b.reset(mat_b); + managed_mat_c.reset(mat_c); + if(is_3inputs) + { + managed_mat_d.reset(mat_d); + } + } + + ~hip_gemm_impl() {} + + struct solution + { + solution() : handle(nullptr), preference(nullptr) {} + + auto get_hipblaslt_preference() + { + if(hbltpreference == nullptr) + { + hbltpreference = create_hipblaslt_preference_ptr(); + } + assert(hbltpreference.get() != nullptr); + return hbltpreference.get(); + } + + void init(context& ctx) + { + if(handle == nullptr) + { + handle = ctx.get_stream().get_hipblaslt(); + preference = get_hipblaslt_preference(); + } + } + + auto& get_result(context& ctx, hip_gemm_impl& gemm, int32_t idx) + { + init(ctx); + if(idx == 0) + { + // use default solution + const int n_sol = 1; + int returned_algo_count; + heuristic_result.resize(n_sol); + uint64_t max_workspace = std::numeric_limits::max(); + hipblaslt_invoke([&]() { + return hipblasLtMatmulPreferenceSetAttribute( + preference, + HIPBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES, + &max_workspace, + sizeof(uint64_t)); + }); + hipblaslt_invoke([&]() { + return hipblasLtMatmulAlgoGetHeuristic(handle, + gemm.hipblaslt_desc, + gemm.mat_b, + gemm.mat_a, + gemm.mat_c, + gemm.is_3inputs ? gemm.mat_d + : gemm.mat_c, + preference, + n_sol, + heuristic_result.data(), + &returned_algo_count); + }); + + if(returned_algo_count != n_sol) + { + std::cout << "less solution found! request: " << n_sol + << ", found: " << returned_algo_count << std::endl; + } + } + else + { + // query for the solutions. 1st as the best. + std::vector algo_index = {idx}; + hipblaslt_invoke([&]() { + return hipblaslt_ext::getAlgosFromIndex(handle, algo_index, heuristic_result); + }); + assert(heuristic_result.size() == 1); + } + return heuristic_result; + } + + private: + hipblasLtHandle_t handle; + hipblasLtMatmulPreference_t preference; + std::vector heuristic_result; + shared hbltpreference = nullptr; + } solution; + + /** + * Helper method to create that subset of a long hipblaslt argument list that is common + * to multiple "hipblasLtMatmul" calls. + * + * The hipblaslt GEMM API handles inputs and output matrices as + * column-major format. When doing a C = A * B, we actually do + * C^T = (B^T) * (A^T). That is the reason we input args[1] as + * A and args[0] as B in calling the hipblaslt GEMM. + * + * */ + auto create_hipblaslt_args_common(context& ctx, + const std::vector& args, + int32_t solution_idx) + { + auto* algo = &solution.get_result(ctx, *this, solution_idx)[0].algo; + size_t workspace_size = ((is_3inputs ? args[3] : args[2]).get_shape()).bytes(); + return pack(ctx.get_stream().get_hipblaslt(), + hipblaslt_desc, + get_alpha(), // alpha + args[1].data(), // A + mat_b, // Adesc + args[0].data(), // B + mat_a, // Bdesc + get_beta(), // beta + is_3inputs ? args[2].data() : args[3].data(), // C + mat_c, // Cdesc + is_3inputs ? args[4].data() : args[3].data(), // D + is_3inputs ? mat_d : mat_c, // Ddesc + algo, // algo + is_3inputs ? args[3].data() : args[2].data(), // workspace + workspace_size, // workspaceSizeInBytes + ctx.get_stream().get() // stream + ); + } + + auto create_hipblaslt_supporting_args_common(context& ctx, + const std::vector& args, + hipblasLtMatmulAlgo_t& algo, + size_t& workspace_size) const + { + (void)(args); + return pack(ctx.get_stream().get_hipblaslt(), + hipblaslt_desc, + get_alpha(), + mat_b, + mat_a, + get_beta(), + mat_c, + is_3inputs ? mat_d : mat_c, + algo, + workspace_size); + } + + void + run(context& ctx, const std::vector& input_args, int32_t solution_idx = 0) // const + { + auto common_args = create_hipblaslt_args_common(ctx, input_args, solution_idx); + hipblaslt_invoke(&hipblasLtMatmul, common_args); + } + + auto + validate(context& ctx, const std::vector& input_shapes, int32_t solution_idx) // const + { + // Create dummy arguments for the shapes, and call the overloaded method + std::vector input_args; + std::transform(input_shapes.begin(), + input_shapes.end(), + std::back_inserter(input_args), + [](const shape& x) { return to_gpu(generate_argument(x)); }); + + return validate(ctx, input_args, solution_idx); + } + + /** + * Checks a particular solution for validity by running it (could be invalid if this model was + * tuned with a different hipBLASLt version) + * + * @return Returns either solution_idx if valid, or else the default value 0 + * if not. The default does not mean list index 0, but tells the picker + * to choose a solution. + */ + int32_t + validate(context& ctx, const std::vector& input_args, int32_t solution_idx) // const + { + auto common_args = create_hipblaslt_args_common(ctx, input_args, solution_idx); + auto check_valid = hipblaslt_invoke(&hipblasLtMatmul, common_args, false); + if(check_valid != HIPBLAS_STATUS_SUCCESS) + { + std::cerr << "WARNING: tuned solution is invalid; reverting to default" << std::endl; + return 0; + } + return solution_idx; + } + + /** + * Get workspace size for the solution index: Gets algo from the solution index, + * and calls matmulIsAlgoSupported() to get the workspace size. + */ + + size_t get_workspace_size(context& ctx, + const std::vector& input_shapes, + int32_t solution_idx) const + { + size_t workspace_size = hipblaslt_workspace_size; + std::vector input_args; + std::transform(input_shapes.begin(), + input_shapes.end(), + std::back_inserter(input_args), + [](const shape& x) { return to_gpu(generate_argument(x)); }); + + std::vector algo_index = {solution_idx}; + std::vector heuristic_result; + + hipblaslt_invoke([&]() { + return hipblaslt_ext::getAlgosFromIndex( + ctx.get_stream().get_hipblaslt(), algo_index, heuristic_result); + }); + assert(heuristic_result.size() == 1); + + auto algo = heuristic_result[0].algo; + size_t ret_workspace_size = 0; + auto supporting_args = + create_hipblaslt_supporting_args_common(ctx, input_args, algo, ret_workspace_size); + + auto status = + hipblaslt_invoke(&hipblaslt_ext::matmulIsAlgoSupported, supporting_args, false); + + // If algo is supported, update the workspace size to the actual size needed. + // Otherwise, use the default workspace size. + if(status == HIPBLAS_STATUS_SUCCESS) + { + // TODO: Remove this check once issues with '0' workspace size are resolved. + // Temporarily, we use the approach where, if the returned workspace size is '0', + // we use the default workspace size. + // Otherwise, we use the returned workspace size. + if(ret_workspace_size != 0) + workspace_size = ret_workspace_size; + } + return workspace_size; + } + + /** + * Find best hipBLASLt solution: Get list of solutions and try them all, returning the index + * of the fastest one. + */ + int tune(context& ctx, const std::vector& input_shapes) // const + { + // tuning meta parameters + const int hot_calls = 40; + + std::vector input_args; + std::transform(input_shapes.begin(), + input_shapes.end(), + std::back_inserter(input_args), + [](const shape& x) { return to_gpu(generate_argument(x)); }); + + std::vector result; + hipblaslt_invoke([&]() { + return hipblaslt_ext::getAllAlgos(ctx.get_stream().get_hipblaslt(), + hipblaslt_ext::GemmType::HIPBLASLT_GEMM, + op_a, + op_b, + arg_type, + arg_type, + output_type, + output_type, + compute_type, + result); + }); + std::vector solution_indices; + int returned_algo_count = result.size(); + for(int i = 0; i < returned_algo_count; i++) + { + auto algo = result[i].algo; + size_t ret_workspace_size = 0; + auto supporting_args = + create_hipblaslt_supporting_args_common(ctx, input_args, algo, ret_workspace_size); + try + { + hipblaslt_invoke(&hipblaslt_ext::matmulIsAlgoSupported, supporting_args); + solution_indices.push_back(hipblaslt_ext::getIndexFromAlgo(algo)); + } + catch(...) + { + // algo is not supported, continue in that case + continue; + } + } + + double best_time = std::numeric_limits::max(); + double first_time = -1; + + // Initialize to default solution index + int32_t best_sol = 0; + // If no valid/supported solution is returned, use hipblasLtMatmulAlgoGetHeuristic + // to get an algo and use solution index from that algo. + if(solution_indices.empty()) + { + auto algo = solution.get_result(ctx, *this, 0)[0].algo; + solution_indices.push_back(hipblaslt_ext::getIndexFromAlgo(algo)); + } + for(auto sol : solution_indices) + { + // Warmup: the first call to an op. may not be representative since there is + // more time taken initializing caches, etc. so we won't time it. + run(ctx, input_args, sol); + double host_time = time([&] { + for([[maybe_unused]] int hc : range(hot_calls)) + run(ctx, input_args, sol); + ctx.finish(); + }); + + host_time /= hot_calls; + + // dev/evaluation only: track time for first solution. + if(first_time < 0) + first_time = host_time; + + // track current best + if(host_time < best_time) + { + best_sol = sol; + best_time = host_time; + } + } + + std::cout << "Winning GEMM solution: " << best_sol << " in " << best_time << " ms, beats " + << first_time << "ms" << std::endl; + return best_sol; + } + + // hipblaslt + size_t num_matrices = 0; + uint64_t m = 0; + uint64_t n = 0; + uint64_t k = 0; + bool transa = false; + bool transb = false; + float alpha = 0; + float beta = 0; + std::function get_alpha{}; + std::function get_beta{}; + + int64_t lda = 0; + int64_t ldb = 0; + int64_t ldc = 0; + int64_t ldd = 0; + int64_t a_stride = 0; + int64_t b_stride = 0; + int64_t c_stride = 0; + int64_t d_stride = 0; + bool is_3inputs = true; + + hipDataType arg_type = HIP_R_32F; + hipblasComputeType_t compute_type = HIPBLAS_COMPUTE_32F; + hipDataType output_type = HIP_R_32F; + hipblasLtMatmulDesc_t hipblaslt_desc; + hipblasOperation_t op_a; + hipblasOperation_t op_b; + using hipblaslt_matrix_layout = MIGRAPHX_MANAGE_PTR(hipblasLtMatrixLayout_t, + hipblasLtMatrixLayoutDestroy); + using hipblaslt_mat_mul_desc = MIGRAPHX_MANAGE_PTR(hipblasLtMatmulDesc_t, + hipblasLtMatmulDescDestroy); + hipblaslt_matrix_layout managed_mat_a, managed_mat_b, managed_mat_c, managed_mat_d; + hipblaslt_mat_mul_desc managed_hipblaslt_desc; + hipblasLtMatrixLayout_t mat_a, mat_b, mat_c, mat_d; + hipblasLtHandle_t handle; + hipblasLtMatmulPreference_t preference; +}; // hip_gemm_impl + +void hip_gemm_compute(context& ctx, + const shape& output_shape, + const std::vector& args, + float alpha, + float beta, + int32_t solution_idx) +{ + std::vector input_shapes; + std::transform(args.begin(), + args.end(), + std::back_inserter(input_shapes), + [](const argument& x) { return x.get_shape().normalize_standard(); }); + auto gemm_item = hip_gemm_impl(output_shape, input_shapes, alpha, beta); + gemm_item.run(ctx, args, solution_idx); +} + +static value hip_gemm_problem(const shape& output_shape, std::vector input_shapes) +{ + input_shapes.push_back(output_shape); + return to_value(input_shapes); +} + +static void hip_gemm_save_solution(context& ctx, + const shape& output_shape, + const std::vector& input_shapes, + int32_t solution_idx) +{ + ctx.get_problem_cache().insert( + "hipblaslt", hip_gemm_problem(output_shape, input_shapes), solution_idx); +} + +int32_t hip_gemm_finalize(context& ctx, + const shape& output_shape, + const std::vector& input_shapes, + float alpha, + float beta, + int32_t solution_idx) +{ + auto gemm_item = hip_gemm_impl(output_shape, input_shapes, alpha, beta); + if(solution_idx == 0) + { + solution_idx = gemm_item.tune(ctx, input_shapes); + hip_gemm_save_solution(ctx, output_shape, input_shapes, solution_idx); + } + // If a tuned solution index is already given, don't tune again but validate + // in case the data was tuned with a different hipBLASLt version. + else + { + solution_idx = gemm_item.validate(ctx, input_shapes, solution_idx); + } + return solution_idx; +} + +int32_t hip_gemm_default_solution(context& ctx, + const shape& output_shape, + const std::vector& input_shapes) +{ + auto sol = + ctx.get_problem_cache().get("hipblaslt", hip_gemm_problem(output_shape, input_shapes)); + if(sol.has_value()) + return sol->to(); + return 0; +} + +size_t hip_gemm_workspace_size(context& ctx, + const shape& output_shape, + const std::vector& input_shapes, + float alpha, + float beta, + int32_t solution_idx) +{ + auto gemm_item = hip_gemm_impl(output_shape, input_shapes, alpha, beta); + return gemm_item.get_workspace_size(ctx, input_shapes, solution_idx); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_USE_HIPBLASLT diff --git a/docker/rocm/migraphx/targets/gpu/hipblaslt.cpp b/docker/rocm/migraphx/targets/gpu/hipblaslt.cpp new file mode 100644 index 000000000..47a9e9273 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/hipblaslt.cpp @@ -0,0 +1,69 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +#if MIGRAPHX_USE_HIPBLASLT +// for hipblaslt only +static const size_t workspace_size = hipblaslt_workspace_size; + +hipblaslt_handle_ptr create_hipblaslt_handle_ptr() +{ + hipblasLtHandle_t handle; + hipblasLtCreate(&handle); + return hipblaslt_handle_ptr{handle}; +} + +hipblaslt_preference_ptr create_hipblaslt_preference_ptr() +{ + hipblasLtMatmulPreference_t preference; + hipblasLtMatmulPreferenceCreate(&preference); + hipblaslt_invoke([&]() { + return hipblasLtMatmulPreferenceSetAttribute(preference, + HIPBLASLT_MATMUL_PREF_MAX_WORKSPACE_BYTES, + &workspace_size, + sizeof(workspace_size)); + }); + return hipblaslt_preference_ptr{preference}; +} + +bool hipblaslt_supported() +{ + const auto device_name = trim(split_string(get_device_name(), ':').front()); + // hipblaslt is supported for MI200 and above, and Navi3x and above. + return (device_name == "gfx90a" or + (starts_with(device_name, "gfx94") and device_name >= "gfx940") or + starts_with(device_name, "gfx110") or starts_with(device_name, "gfx120")); +} + +#endif // MIGRAPHX_USE_HIPBLASLT + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/hiprtc/CMakeLists.txt b/docker/rocm/migraphx/targets/gpu/hiprtc/CMakeLists.txt new file mode 100644 index 000000000..a8cb3cec0 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/hiprtc/CMakeLists.txt @@ -0,0 +1,40 @@ +##################################################################################### +# The MIT License (MIT) +# +# Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +##################################################################################### + +add_executable(migraphx-hiprtc-driver + main.cpp +) +rocm_clang_tidy_check(migraphx-hiprtc-driver) +# On Windows, the driver's default 1MB stack size is not enough - increasing to 4MB. +set(STACK_SIZE 4194304) +if(MSVC) + target_link_options(migraphx-hiprtc-driver PRIVATE /STACK:${STACK_SIZE}) +elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_SIMULATE_ID MATCHES "MSVC") + target_link_options(migraphx-hiprtc-driver PRIVATE -Xlinker /stack:${STACK_SIZE}) +endif() +target_link_libraries(migraphx-hiprtc-driver PRIVATE migraphx_gpu) +add_dependencies(migraphx_all_targets migraphx-hiprtc-driver) +rocm_install_targets( + TARGETS migraphx-hiprtc-driver +) diff --git a/docker/rocm/migraphx/targets/gpu/hiprtc/main.cpp b/docker/rocm/migraphx/targets/gpu/hiprtc/main.cpp new file mode 100644 index 000000000..d443ce49e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/hiprtc/main.cpp @@ -0,0 +1,92 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include +#endif + +std::vector read_stdin() +{ +#ifdef _WIN32 + // Set stream translation mode to BINARY to suppress translations. + // https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/setmode?view=msvc-170 + auto old_mode = _setmode(_fileno(stdin), _O_BINARY); + if(old_mode == -1) + MIGRAPHX_THROW(std::strerror(errno)); +#endif + std::vector result; + std::array buffer{}; + std::size_t len = 0; + while((len = std::fread(buffer.data(), 1, buffer.size(), stdin)) > 0) + { + if(std::ferror(stdin) != 0 and std::feof(stdin) == 0) + MIGRAPHX_THROW(std::strerror(errno)); + + result.insert(result.end(), buffer.data(), buffer.data() + len); + } +#ifdef _WIN32 + // Reset to the previously set translation mode. + _setmode(_fileno(stdin), old_mode); +#endif + return result; +} + +int main(int argc, char const* argv[]) +{ + if(argc < 2 or migraphx::contains({"-h", "--help", "-v", "--version"}, std::string(argv[1]))) + { + std::cout << "USAGE:" << std::endl; + std::cout << " "; + std::cout << "Used internally by migraphx to compile hip programs out-of-process." + << std::endl; + std::exit(0); + } + std::string output_name = argv[1]; + try + { + auto v = migraphx::from_msgpack(read_stdin()); + std::vector srcs; + migraphx::from_value(v.at("srcs"), srcs); + auto out = + migraphx::gpu::compile_hip_src_with_hiprtc(std::move(srcs), + v.at("params").to_vector(), + v.at("arch").to()); + if(not out.empty()) + migraphx::write_buffer(output_name, out.front()); + } + catch(const std::exception& err) + { + std::cout << err.what() << std::endl; + } +} diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/abs.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/abs.hpp new file mode 100644 index 000000000..1a9f4b878 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/abs.hpp @@ -0,0 +1,65 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_ABS_HPP +#define MIGRAPHX_GUARD_RTGLIB_ABS_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; +#if MIGRAPHX_USE_MIOPEN + +struct miopen_abs +{ + op::abs op; + shared ad; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "gpu::abs"; } + shape compute_shape(const std::vector& inputs) const; + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const; + void finalize(context&, const shape&, const std::vector&); + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; +#endif +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/allocation_model.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/allocation_model.hpp new file mode 100644 index 000000000..249901f23 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/allocation_model.hpp @@ -0,0 +1,49 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_GPU_ALLOCATION_MODEL_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_GPU_ALLOCATION_MODEL_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct MIGRAPHX_GPU_EXPORT gpu_allocation_model +{ + std::string name() const; + std::string copy() const; + operation allocate(const shape& s) const; + operation preallocate(const shape& s, const std::string& id) const; + bool needs_out_params() const { return true; } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/analyze_streams.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/analyze_streams.hpp new file mode 100644 index 000000000..cf3d2ee42 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/analyze_streams.hpp @@ -0,0 +1,43 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_ANALYZE_STREAMS_HPP +#define MIGRAPHX_GUARD_RTGLIB_GPU_ANALYZE_STREAMS_HPP + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct module; + +namespace gpu { + +MIGRAPHX_GPU_EXPORT std::vector analyze_streams(const module& m); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/argmax.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/argmax.hpp new file mode 100644 index 000000000..e05678fa8 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/argmax.hpp @@ -0,0 +1,61 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_ARGMAX_HPP +#define MIGRAPHX_GUARD_RTGLIB_ARGMAX_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; + +struct hip_argmax +{ + op::argmax op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "gpu::argmax"; } + shape compute_shape(const std::vector& inputs) const; + argument compute(context& ctx, const shape&, const std::vector& args) const; + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/argmin.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/argmin.hpp new file mode 100644 index 000000000..071eb525e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/argmin.hpp @@ -0,0 +1,61 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_ARGMIN_HPP +#define MIGRAPHX_GUARD_RTGLIB_ARGMIN_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; + +struct hip_argmin +{ + op::argmin op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "gpu::argmin"; } + shape compute_shape(const std::vector& inputs) const; + argument compute(context& ctx, const shape&, const std::vector& args) const; + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/ck.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/ck.hpp new file mode 100644 index 000000000..18d4dce25 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/ck.hpp @@ -0,0 +1,165 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_CK_HPP +#define MIGRAPHX_GUARD_GPU_CK_HPP + +#include +#include +#include +#include +#include + +#include "ck/host/device_gemm_multiple_d.hpp" +#include "ck/host/device_batched_gemm_softmax_gemm.hpp" + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +#ifndef _WIN32 +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_CK); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_LOG_CK_GEMM); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_CK_DEBUG); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TUNE_CK); +#endif + +// NOLINTNEXTLINE +const char* const disable_warning_pragma = R"__migraphx__( +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Weverything" +${content} +#pragma clang diagnostic pop +)__migraphx__"; + +template +std::string ck_disable_warnings(P p) +{ + return interpolate_string(disable_warning_pragma, + {{"content", std::string{p.data(), p.size()}}}); +} + +static std::unordered_map create_ck_header_strings() +{ + std::unordered_map result; + auto ck_headers = ck::host::GetHeaders(); + + std::transform( + ck_headers.begin(), ck_headers.end(), std::inserter(result, result.begin()), [&](auto& p) { + return std::pair(p.first, ck_disable_warnings(p.second)); + }); + return result; +} + +static std::vector create_ck_headers() +{ + static const auto& header_strings = create_ck_header_strings(); + std::vector srcs; + std::transform(header_strings.begin(), + header_strings.end(), + std::back_inserter(srcs), + [&](auto& p) { return src_file{p}; }); + return srcs; +} + +static inline const std::vector& ck_headers() +{ + static const auto& headers = create_ck_headers(); + return headers; +} + +inline bool transposed_matrix(const shape& s) { return s.strides().back() != 1; } + +inline ck::host::DataType get_type(const shape& s) +{ + if(s.type() == shape::half_type) + return ck::host::DataType::Half; + else if(s.type() == shape::float_type) + return ck::host::DataType::Float; + else if(s.type() == shape::int8_type) + return ck::host::DataType::Int8; + else if(s.type() == shape::int32_type) + return ck::host::DataType::Int32; + MIGRAPHX_THROW("Unsupported ck type"); +} + +inline std::size_t get_batch_count(const shape& s) +{ + return std::accumulate( + s.lens().rbegin() + 2, s.lens().rend(), std::size_t{1}, std::multiplies()); +} + +inline void fold_batch_dims(shape& s) +{ + auto lens = s.lens(); + if(lens.size() <= 2) + return; + auto batch_count = get_batch_count(s); + auto m1 = lens.at(lens.size() - 2); + auto m2 = lens.at(lens.size() - 1); + if(transposed_matrix(s)) + s = shape{s.type(), {m1, m2 * batch_count}}; + else + s = shape{s.type(), {m1 * batch_count, m2}}; +} + +inline void remove_batch_dims(shape& s) +{ + auto lens = s.lens(); + if(lens.size() <= 2) + return; + auto m1 = lens.at(lens.size() - 2); + auto m2 = lens.at(lens.size() - 1); + s = shape{s.type(), {m1, m2}}; +} + +inline bool standard_batch(const shape& s) +{ + if(s.lens().size() < 3) + return true; + std::vector lens(s.lens().begin(), s.lens().end() - 2); + std::vector strides(s.strides().begin(), s.strides().end() - 2); + auto base = *(s.lens().end() - 2) * *(s.lens().end() - 1); + std::transform(strides.begin(), strides.end(), strides.begin(), [&](auto stride) { + return stride / base; + }); + return shape{s.type(), lens, strides}.standard(); +} + +inline bool can_fold_batch(const std::vector& inputs) +{ + const auto& b_shape = inputs[1]; + if(std::any_of(inputs.begin() + 2, inputs.end() - 1, [](auto input) { + return not standard_batch(input); + })) + return false; + const auto& b_strides = b_shape.strides(); + return std::all_of( + b_strides.begin(), b_strides.end() - 2, [](auto stride) { return stride == 0; }); +} + +} // namespace gpu + +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_CK_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/code_object_op.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/code_object_op.hpp new file mode 100644 index 000000000..818676728 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/code_object_op.hpp @@ -0,0 +1,98 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_CODE_OBJECT_OP_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_CODE_OBJECT_OP_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; + +struct code_object_op +{ + value::binary code_object{}; + std::string symbol_name = ""; + std::size_t global = 0; + std::size_t local = 0; + std::vector expected_inputs{}; + shape output{}; + std::int64_t output_arg = -1; + kernel k{}; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.code_object, "code_object"), + f(self.symbol_name, "symbol_name"), + f(self.global, "global"), + f(self.local, "local"), + f(self.expected_inputs, "expected_inputs"), + f(self.output, "output"), + f(self.output_arg, "output_arg")); + } + + value attributes() const { return {{"group", group()}}; } + + std::string group() const { return "gpu::code_object::" + symbol_name; } + + std::string name() const { return "gpu::code_object"; } + shape compute_shape(std::vector inputs) const; + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const; + void finalize(context&, const shape&, const std::vector&); + std::int64_t get_output_arg(std::size_t n) const + { + return output_arg < 0 ? n + output_arg : output_arg; + } + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return get_output_arg(shapes.size()); + } + + friend std::ostream& operator<<(std::ostream& os, const code_object_op& op) + { + os << op.name() << "["; + os << "code_object=" << op.code_object.size() << ","; + os << "symbol_name=" << op.symbol_name << ","; + os << "global=" << op.global << ","; + os << "local=" << op.local << ","; + if(op.output_arg != -1) + os << "output_arg=" << op.output_arg << ","; + os << "]"; + return os; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_gen.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_gen.hpp new file mode 100644 index 000000000..03dad2364 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_gen.hpp @@ -0,0 +1,121 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_COMPILE_GEN_HPP +#define MIGRAPHX_GUARD_GPU_COMPILE_GEN_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct shape; +struct operation; + +namespace gpu { + +struct context; + +namespace gen { + +struct vectorize +{ + std::size_t size = 1; + std::size_t axis = 0; + static vectorize elements(std::size_t axis, const std::vector& inputs); + static vectorize elements(context& ctx, std::size_t axis, const std::vector& inputs); + static vectorize elements(std::size_t axis, + const std::vector& inputs, + const std::vector& sizes); + std::string str() const; +}; +struct preload +{ + std::vector args = {}; + static preload broadcasts(std::size_t axis, const std::vector& inputs); + bool is_preloading() const; + std::string str() const; +}; +struct tile +{ + enum mode + { + store, + load, + none + }; + std::vector args = {}; + std::size_t axis = 0; + std::size_t ntiles = 0; + std::size_t block_size = 0; + std::vector inner{}; + std::vector outer{}; + static tile elements(const std::vector& inputs, std::size_t noutputs); + // bool is_preloading() const; + std::string str() const; +}; + +MIGRAPHX_GPU_EXPORT std::size_t find_fast_axis(const shape& input); +MIGRAPHX_GPU_EXPORT std::size_t find_fast_axis(const std::vector& inputs); + +std::string make_transformer_args(std::vector transformers); + +template +std::string make_transformer_args(Ts... xs) +{ + return make_transformer_args({xs.str()...}); +} + +std::string +generate_pointwise(const module& pm, const std::string& name, bool always_return_tuple = false); + +std::string generate_reduce(module m, const std::string& name); + +std::string generate_name_from_ops(const module& m, const std::string& postname = ""); + +struct reduce_op +{ + std::vector inputs = {}; + std::string reduction = ""; + std::string init = "0"; + std::string read = "op::id{}"; + std::string write = "op::id{}"; + + void set(instruction_ref ins, const operation& op); + void set(const std::string& name, const shape& input, const shape& output); + std::string str() const; + static std::string generate(instruction_ref ins, const std::vector& x); +}; + +} // namespace gen +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_COMPILE_GEN_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_hip.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_hip.hpp new file mode 100644 index 000000000..d2fa4bcb6 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_hip.hpp @@ -0,0 +1,75 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_COMPILE_HIP_HPP +#define MIGRAPHX_GUARD_RTGLIB_COMPILE_HIP_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +#ifdef MIGRAPHX_USE_HIPRTC +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_HIPRTC); +#endif + +struct hiprtc_src_file +{ + hiprtc_src_file() = default; + hiprtc_src_file(const src_file& s) : path(s.path.string()), content(s.content) {} + std::string path; + std::string content; + template + static auto reflect(Self& self, F f) + { + return pack(f(self.path, "path"), f(self.content, "content")); + } +}; + +MIGRAPHX_GPU_EXPORT bool hip_has_flags(const std::vector& flags); + +MIGRAPHX_GPU_EXPORT std::vector> +compile_hip_src_with_hiprtc(std::vector srcs, + const std::vector& params, + const std::string& arch); + +MIGRAPHX_GPU_EXPORT std::vector> +compile_hip_src(const std::vector& srcs, + const std::vector& params, + const std::string& arch); + +MIGRAPHX_GPU_EXPORT std::string enum_params(std::size_t count, std::string param); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp new file mode 100644 index 000000000..60b8f20a8 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp @@ -0,0 +1,94 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_COMPILE_HIP_CODE_OBJECT_HPP +#define MIGRAPHX_GUARD_GPU_COMPILE_HIP_CODE_OBJECT_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; + +struct hip_compile_options +{ + std::size_t global; + std::size_t local; + std::vector inputs; + shape output; + std::string kernel_name = "kernel"; + std::vector params = {}; + std::vector virtual_inputs = {}; + std::vector additional_src_files = {}; + std::int64_t output_arg = -1; + + /** + * @brief Set the launch parameters but allow v to override the values + * + * @param v A value class which can have a "global" and/or "local" keys to override the default + * global and local + * @param compute_global A function used to compute the global based on the local + * @param default_local The defaul local to use if its missing from the v parameter + */ + void set_launch_params(const value& v, + const std::function& compute_global, + std::size_t default_local = 1024); + + void + set_launch_params(const value& v, std::size_t default_global, std::size_t default_local = 1024) + { + set_launch_params( + v, [=](auto) { return default_global; }, default_local); + } + + void emplace_param(std::string_view s) { params.emplace_back(s); } +}; + +/// Compute global for n elements, but max out on target-specific upper limit +MIGRAPHX_GPU_EXPORT std::function +compute_global_for(context& ctx, std::size_t n, std::size_t over = 1); + +MIGRAPHX_GPU_EXPORT operation compile_hip_code_object(context& ctx, + const std::string& content, + hip_compile_options options); + +MIGRAPHX_GPU_EXPORT std::size_t +compute_block_size(context& ctx, std::size_t n, std::size_t max_block_size = 1024); + +template +std::string generate_index_ints(const std::vector& v) +{ + return "index_ints<" + to_string_range(v) + ">{}"; +} + +MIGRAPHX_GPU_EXPORT std::string generate_make_shape(const shape& s); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_COMPILE_HIP_CODE_OBJECT_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_hipblaslt.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_hipblaslt.hpp new file mode 100644 index 000000000..380fafa44 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_hipblaslt.hpp @@ -0,0 +1,77 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_COMPILE_HIPBLASLT_HPP +#define MIGRAPHX_GUARD_GPU_COMPILE_HIPBLASLT_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct module; +struct context; +struct operation; + +namespace gpu { + +struct hipblaslt_op +{ + operation op = op::identity{}; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.op, "op")); + } + + std::string name() const { return "gpu::hipblaslt_op"; } + + shape compute_shape(std::vector inputs) const + { + inputs.push_back(inputs.back()); + return op.compute_shape(inputs); + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; +MIGRAPHX_REGISTER_OP(hipblaslt_op); + +struct compile_hipblaslt +{ + context* ctx = nullptr; + std::string name() const { return "gpu::compile_hipblaslt"; } + void apply(module& m) const; +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_COMPILE_HIPBLASLT_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_miopen.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_miopen.hpp new file mode 100644 index 000000000..03dd669e5 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_miopen.hpp @@ -0,0 +1,51 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_COMPILE_MIOPEN_HPP +#define MIGRAPHX_GUARD_GPU_COMPILE_MIOPEN_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct module; +struct context; +struct operation; + +namespace gpu { + +struct compile_miopen +{ + context* ctx = nullptr; + std::string name() const { return "gpu::compile_miopen"; } + void apply(module& m) const; + std::size_t compile(operation& op, instruction_ref ins) const; +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_COMPILE_MIOPEN_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_ops.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_ops.hpp new file mode 100644 index 000000000..6986822a5 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_ops.hpp @@ -0,0 +1,51 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_COMPILE_OPS_HPP +#define MIGRAPHX_GUARD_GPU_COMPILE_OPS_HPP + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct module; + +namespace gpu { + +struct context; + +struct MIGRAPHX_GPU_EXPORT compile_ops +{ + context* ctx = nullptr; + bool exhaustive_tune = false; + std::string name() const { return "gpu::compile_ops"; } + void apply(module& m) const; +}; + +} // namespace gpu + +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_COMPILE_OPS_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_pointwise.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_pointwise.hpp new file mode 100644 index 000000000..8e6dc229a --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compile_pointwise.hpp @@ -0,0 +1,45 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_COMPILE_POINTWISE_HPP +#define MIGRAPHX_GUARD_GPU_COMPILE_POINTWISE_HPP + +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +namespace gpu { + +operation +compile_pointwise(context& ctx, const std::vector& in_shapes, const_module_ref pm); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_COMPILE_POINTWISE_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compiler.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compiler.hpp new file mode 100644 index 000000000..30f927051 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/compiler.hpp @@ -0,0 +1,201 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_COMPILER_HPP +#define MIGRAPHX_GUARD_GPU_COMPILER_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; + +struct compiler_replace +{ + compiler_replace() = default; + + compiler_replace(const operation& op) : code_objects{{op}} {} + + template + compiler_replace(const operation& op, F f) : code_objects{{op}}, replace_fn(make_replace(f)) + { + } + + template + compiler_replace(const operation& op, F f, Trace t) + : code_objects{{op}}, replace_fn(make_replace(f)), trace_fn(t) + { + } + + template + compiler_replace(const std::vector& op, F f) + : code_objects{op}, replace_fn(make_replace_all(f)) + { + } + + template + compiler_replace(const std::vector& op, F f, Trace t) + : code_objects{op}, replace_fn(make_replace_all(f)), trace_fn(t) + { + } + + std::vector code_objects = {}; + std::function replace_fn = + nullptr; + std::function trace_fn = nullptr; + + template + static auto make_replace(F f) + { + return [=](const compiler_replace& cr, module& m, instruction_ref ins) { + f(m, ins, cr.code_objects.front()); + }; + } + + template + static auto make_replace_all(F f) + { + return [=](const compiler_replace& cr, module& m, instruction_ref ins) { + f(m, ins, cr.code_objects); + }; + } + + void replace(module& m, instruction_ref ins) const + { + if(replace_fn) + replace_fn(*this, m, ins); + else + { + if(code_objects.size() != 1) + { + MIGRAPHX_THROW("Provide custom replace function to insert multiple code objects\n"); + } + m.replace_instruction(ins, code_objects.front(), ins->inputs()); + } + } + + void trace(std::ostream& os, instruction_ref ins) const + { + if(trace_fn) + trace_fn(os, ins); + } +}; + +using compiler_compile = + std::function; +using compiler_compile_op = + std::function& inputs, const value&)>; +using compiler_tuning_config = + std::function(context&, instruction_ref, const operation&, bool)>; + +MIGRAPHX_GPU_EXPORT void register_compiler(const std::string& name, + compiler_compile c, + compiler_compile_op cop, + compiler_tuning_config ctg); + +MIGRAPHX_GPU_EXPORT bool has_compiler_for(const std::string& name); +MIGRAPHX_GPU_EXPORT compiler_replace compile(context& ctx, + instruction_ref ins, + const operation& op, + const value& solution); +MIGRAPHX_GPU_EXPORT operation compile_op(const std::string& name, + context& ctx, + const std::vector& inputs, + const value& v); +MIGRAPHX_GPU_EXPORT optional +get_tuning_config(context& ctx, instruction_ref ins, const operation& op, bool exhaustive); + +template +void register_compiler() +{ + T c; + for(auto&& name : c.names()) + { + register_compiler( + name, + [=](auto&&... xs) { + return c.invoke_compile(rank<1>{}, std::forward(xs)...); + }, + [=](auto&&... xs) { return c.compile_op(std::forward(xs)...); }, + [=](auto&&... xs) { return c.get_tuning_config(std::forward(xs)...); }); + } +} + +struct register_compiler_action +{ + template + static void apply() + { + register_compiler(); + } +}; + +template +using auto_register_compiler = auto_register; + +template +struct compiler : auto_register_compiler +{ + const Derived& derived() const { return static_cast(*this); } + optional + get_tuning_config(context&, instruction_ref, const operation&, bool) const + { + return nullopt; + } + operation compile_op(context&, const std::vector&, const value&) const { return {}; } + + template + auto invoke_compile( + rank<1>, context& ctx, instruction_ref ins, operation op, const value& solution) const + -> decltype(std::declval().compile(ctx, ins, std::move(op), solution)) + { + return derived().compile(ctx, ins, std::move(op), solution); + } + + template + auto invoke_compile( + rank<0>, context& ctx, instruction_ref ins, operation op, const value& solution) const + -> decltype(std::declval().compile(ctx, ins, std::move(op))) + { + assert(solution.empty()); + (void)solution; + return derived().compile(ctx, ins, std::move(op)); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif // MIGRAPHX_GUARD_GPU_COMPILER_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/concat_gpu_opt.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/concat_gpu_opt.hpp new file mode 100644 index 000000000..d5d2f1197 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/concat_gpu_opt.hpp @@ -0,0 +1,52 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_CONCAT_GPU_OPT_HPP +#define MIGRAPHX_GUARD_RTGLIB_CONCAT_GPU_OPT_HPP + +#include +#include +#include + +namespace migraphx { +namespace gpu { + +struct concat_gpu_optimization +{ + std::string allocate() const { return "hip::allocate"; } + optional get_concat(const migraphx::operation& op) const + { + if(op.name() != "gpu::precompile_op") + return nullopt; + auto r = from_value(op.to_value().at("op")); + if(r.name() == "concat") + return any_cast(r); + return nullopt; + } +}; + +} // namespace gpu + +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/config.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/config.hpp new file mode 100644 index 000000000..cd8c6702b --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/config.hpp @@ -0,0 +1,31 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef MIGRAPHX_GUARD_GPU_CONFIG_HPP +#define MIGRAPHX_GUARD_GPU_CONFIG_HPP + +#include +#include + +#endif // MIGRAPHX_GUARD_GPU_CONFIG_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/context.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/context.hpp new file mode 100644 index 000000000..7a1a7d34b --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/context.hpp @@ -0,0 +1,399 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP +#define MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP + +#include +#include +#include +#if !MIGRAPHX_USE_MIOPEN +#include +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_NULL_STREAM) +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_NSTREAMS) + +using hip_event_ptr = MIGRAPHX_MANAGE_PTR(hipEvent_t, hipEventDestroy); + +struct hip_device +{ + hip_device() : device_props{} { add_stream(); } + + hip_device(std::size_t id, std::size_t n) : device_id(id) + { + auto status = hipGetDeviceProperties(&device_props, device_id); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed to allocate stream"); + + for(std::size_t i = 0; i < n; i++) + add_stream(); + } + + struct stream + { + using hip_stream_ptr = MIGRAPHX_MANAGE_PTR(hipStream_t, hipStreamDestroy); + + stream() {} + + stream(std::size_t device_number) : id(device_number) {} + + void setup() const { set_device(id); } + + static hip_stream_ptr create_stream() + { + hipStream_t result = nullptr; + auto status = hipStreamCreateWithFlags(&result, hipStreamNonBlocking); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed to allocate stream"); + return hip_stream_ptr{result}; + } + + hipStream_t get() + { + if(not enabled(MIGRAPHX_ENABLE_NULL_STREAM{})) + { + setup(); + if(s == nullptr) + s = create_stream(); + assert(s.get() != nullptr); + return s.get(); + } + return nullptr; + } + +#if MIGRAPHX_USE_MIOPEN + auto create_miopen_handle() + { + if(not enabled(MIGRAPHX_ENABLE_NULL_STREAM{})) + return make_obj(&miopenCreateWithStream, get()); + else + return make_obj(&miopenCreate); + } + + auto get_miopen() + { + setup(); + if(mihandle == nullptr) + mihandle = create_miopen_handle(); + assert(mihandle.get() != nullptr); + return mihandle.get(); + } +#endif + +#if MIGRAPHX_USE_ROCBLAS + auto get_rocblas() + { + setup(); + if(rbhandle == nullptr) + rbhandle = create_rocblas_handle_ptr(get()); + assert(rbhandle.get() != nullptr); + return rbhandle.get(); + } +#endif + +#if MIGRAPHX_USE_HIPBLASLT + auto get_hipblaslt() + { + setup(); + if(hblthandle == nullptr) + { + hblthandle = create_hipblaslt_handle_ptr(); + } + assert(hblthandle.get() != nullptr); + return hblthandle.get(); + } +#endif + + void wait() const + { + if(s == nullptr) + return; + setup(); + auto status = hipStreamSynchronize(s.get()); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed to wait."); + } + + void wait(hipEvent_t event) + { + setup(); + auto status = hipStreamWaitEvent(get(), event, 0); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed to wait."); + } + + void record(hipEvent_t event) + { + setup(); + auto status = hipEventRecord(event, get()); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed to record."); + } + + private: + std::size_t id = 0; + shared s = nullptr; +#if MIGRAPHX_USE_MIOPEN + shared mihandle = nullptr; +#endif +#if MIGRAPHX_USE_ROCBLAS + shared rbhandle = nullptr; +#endif + +#if MIGRAPHX_USE_HIPBLASLT + shared hblthandle = nullptr; +#endif + }; + + void add_stream() { streams.emplace_back(device_id); } + + stream& get_stream() { return streams.at(current_stream); } + + stream& get_stream(std::size_t n) { return streams.at(n); } + + const stream& get_stream() const { return streams.at(current_stream); } + + const stream& get_stream(std::size_t n) const { return streams.at(n); } + + void set_stream(std::size_t n) { current_stream = n; } + + std::size_t nstreams() const { return streams.size(); } + + std::size_t stream_id() const { return current_stream; } + + std::string get_device_name() const { return device_props.gcnArchName; } + + std::string get_gfx_name() const { return trim(split_string(get_device_name(), ':').front()); } + + std::size_t get_device_major() const { return device_props.major; } + + std::size_t get_device_minor() const { return device_props.minor; } + + std::size_t get_cu_count() const { return device_props.multiProcessorCount; } + + std::size_t get_max_workitems_per_cu() const + { + return device_props.maxThreadsPerMultiProcessor; + } + + std::size_t get_max_workitems_per_block() const { return device_props.maxThreadsPerBlock; } + + std::size_t get_wavefront_size() const { return device_props.warpSize; } + + private: + std::size_t device_id = 0; + std::size_t current_stream = 0; + std::vector streams; + hipDeviceProp_t device_props; + + public: + std::unordered_map preallocations{}; +}; + +struct context +{ + struct auto_save_problem_cache : problem_cache + { + auto_save_problem_cache() : problem_cache{} {} + + bool auto_save = false; + + auto_save_problem_cache(const auto_save_problem_cache&) = delete; + auto_save_problem_cache& operator=(const auto_save_problem_cache&) = delete; + virtual ~auto_save_problem_cache() + { + if(auto_save) + this->save(); + } + }; + context(std::size_t device_id = 0, std::size_t n = value_of(MIGRAPHX_NSTREAMS{}, 1)) + : current_device(std::make_shared(device_id, n)), + begin_event(create_event()), + finish_event(create_event()), + pc(std::make_shared()) + { + } + + hip_device& get_current_device() + { + assert(current_device != nullptr); + return *current_device; + } + + const hip_device& get_current_device() const + { + assert(current_device != nullptr); + return *current_device; + } + + bool get_exhaustive_tune_flag() const { return exhaustive_tune; } + + void set_exhaustive_tune_flag(bool t) { exhaustive_tune = t; } + + hip_device::stream& get_stream() { return get_current_device().get_stream(); } + hip_device::stream& get_stream(std::size_t n) { return get_current_device().get_stream(n); } + + const hip_device::stream& get_stream() const { return get_current_device().get_stream(); } + const hip_device::stream& get_stream(std::size_t n) const + { + return get_current_device().get_stream(n); + } + + void set_stream(std::size_t n) { get_current_device().set_stream(n); } + + void create_events(std::size_t num_of_events) + { + for(std::size_t i = events.size(); i < num_of_events + 1; ++i) + events.emplace_back(create_event()); + } + + hipEvent_t get_event(std::size_t i) const { return events.at(i).get(); } + + std::vector literals{}; + void finish() const { get_stream().wait(); } + + static hip_event_ptr create_event() + { + hipEvent_t event; + auto status = hipEventCreateWithFlags(&event, hipEventDisableTiming); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed to create event"); + return hip_event_ptr{event}; + } + + static hip_event_ptr create_event_for_timing() + { + hipEvent_t event; + auto status = hipEventCreate(&event); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed to create event"); + return hip_event_ptr{event}; + } + + value to_value() const + { + value result; + result["events"] = events.size(); + result["streams"] = current_device->nstreams(); + + return result; + } + + void from_value(const value& v) + { + auto v_events = v.at("events"); + std::size_t n_events = v_events.without_key().to(); + this->create_events(n_events - 1); + + auto v_streams = v.at("streams"); + std::size_t n_streams = v_streams.without_key().to(); + + auto device = get_device_id(); + this->current_device = std::make_shared(device, n_streams); + } + + void wait_for(any_ptr queue) + { + auto status = hipEventRecord(begin_event.get(), queue.get()); + if(status != hipSuccess) + MIGRAPHX_THROW("failed to record " + hip_error(status)); + + get_stream().wait(begin_event.get()); + } + + void finish_on(any_ptr queue) + { + get_stream().record(finish_event.get()); + + auto status = hipStreamWaitEvent(queue.get(), finish_event.get(), 0); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed to wait on event " + hip_error(status)); + } + + any_ptr get_queue() { return get_stream().get(); } + + std::pair get_perf_events() const + { + if(measure_perf) + return std::make_pair(start_event.get(), stop_event.get()); + return std::make_pair(nullptr, nullptr); + } + + static float get_elapsed_ms(hipEvent_t start, hipEvent_t stop) + { + float result = 0; + if(start != nullptr and stop != nullptr) + { + auto status = hipEventElapsedTime(&result, start, stop); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed hipEventElapsedTime: " + hip_error(status)); + } + return result; + } + + problem_cache& get_problem_cache() { return *pc; } + void load_problem_cache() + { + pc->load(); + pc->auto_save = true; + } + + private: + // TODO: Make this a vector to support multiple devices + std::shared_ptr current_device; + std::vector> events; + bool exhaustive_tune = false; + bool measure_perf = false; + // for event perf timing + shared start_event = nullptr; + shared stop_event = nullptr; + // for stream syncronization + shared begin_event = nullptr; + shared finish_event = nullptr; + std::shared_ptr pc = nullptr; +}; + +inline void migraphx_to_value(value& v, const context& ctx) { v = ctx.to_value(); } +inline void migraphx_from_value(const value& v, context& ctx) { ctx.from_value(v); } + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/contiguous.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/contiguous.hpp new file mode 100644 index 000000000..638f4571a --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/contiguous.hpp @@ -0,0 +1,54 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_CONTIGUOUS_HPP +#define MIGRAPHX_GUARD_RTGLIB_CONTIGUOUS_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; + +struct miopen_contiguous : unary_device +{ + std::string name() const { return "gpu::contiguous"; } + shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, *this}.has(2); + auto lens = inputs.at(0).lens(); + auto t = inputs.at(0).type(); + return {t, lens}; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/convolution.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/convolution.hpp new file mode 100644 index 000000000..1a6d1bc24 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/convolution.hpp @@ -0,0 +1,352 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_CONVOLUTION_HPP +#define MIGRAPHX_GUARD_RTGLIB_GPU_CONVOLUTION_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +inline shape reshape_if_1d(const shape& input) +{ + shape new_shape{input}; + auto dims = new_shape.lens(); + + if(dims.size() == 3) + { + std::vector new_dims = dims; + new_dims.insert(new_dims.begin() + 2, 1); + new_shape = shape{input.type(), new_dims}; + } + return new_shape; +} +#if MIGRAPHX_USE_MIOPEN +template +struct miopen_convolution +{ + Op op; + shared cd = nullptr; + miopenConvFwdAlgorithm_t algo{}; +#ifdef MIGRAPHX_HAS_FIND_2_API + value::binary solution_object{}; + shared solution_ptr = nullptr; +#endif + uint64_t solution_id = 0; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.op, "op"), +#ifdef MIGRAPHX_HAS_FIND_2_API + f(self.solution_object, "solution_object"), +#endif + f(self.algo, "algo"), + f(self.solution_id, "solution_id")); + } + + std::string name() const { return "gpu::" + op.name(); } + + inline shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, op}.has(4); + std::vector conv_inputs(inputs.begin(), inputs.begin() + 2); + check_shapes{conv_inputs, *this} + .max_ndims(5) + .packed_layouts({{0, 1, 2}, {0, 1, 2, 3}, {0, 2, 3, 1}, {0, 1, 2, 3, 4}}) + .same_layout(); + return migraphx::compute_shape(op, conv_inputs); + } + + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const + { + auto x_desc = make_tensor(reshape_if_1d(args[0].get_shape())); + auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape())); + auto y_desc = make_tensor(reshape_if_1d(output_shape)); + auto* miopen_stream_handle = ctx.get_stream().get_miopen(); + auto workspace_size = args[2].get_shape().bytes(); + +#ifdef MIGRAPHX_HAS_FIND_2_API + { + const miopenTensorArgument_t tensor_args[3] = { + {miopenTensorConvolutionX, nullptr, args[0].implicit()}, + {miopenTensorConvolutionW, nullptr, args[1].implicit()}, + {miopenTensorConvolutionY, nullptr, args[3].implicit()}, + }; + + if(solution_ptr.get() == nullptr) + MIGRAPHX_THROW("MIOpen " + op.name() + " : Load MIOpen Solution before running it"); + + auto status = miopenRunSolution(miopen_stream_handle, + solution_ptr.get(), + 3, + tensor_args, + args[2].implicit(), + workspace_size); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("MIOpen " + op.name() + + " : running convolution using find_2.0 failed"); + + return args[3]; + } +#else + // else use immediate mode + if(solution_id == 0) + MIGRAPHX_THROW("MIOpen " + op.name() + " : invalid solution ID"); + + auto status = miopenConvolutionForwardImmediate(miopen_stream_handle, + w_desc.get(), + args[1].implicit(), + x_desc.get(), + args[0].implicit(), + cd.get(), + y_desc.get(), + args[3].implicit(), + args[2].implicit(), + workspace_size, + solution_id); + + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("MIOpen " + op.name() + ": running convolution failed"); + return args[3]; +#endif + } + + void set_conv_descriptor() + { + cd = + (op.name() == "convolution_backwards") ? make_convolution_backwards(op) : make_conv(op); + } + + value compile(migraphx::context& ctx, const shape& output, const std::vector& input) + { + set_conv_descriptor(); + auto ws = find(any_cast(ctx), output, input); + return {{"workspace", ws.bytes()}}; + } + + shape find(context& ctx, const shape& output_shape, const std::vector& inputs) + { + shape workspace_shape{}; + auto x_desc = make_tensor(reshape_if_1d(inputs[0])); + auto w_desc = make_tensor(reshape_if_1d(inputs[1])); + auto y_desc = make_tensor(reshape_if_1d(output_shape)); + + auto* miopen_stream_handle = ctx.get_stream().get_miopen(); + std::size_t workspace_size = 0; + auto status = miopenConvolutionForwardGetWorkSpaceSize(miopen_stream_handle, + w_desc.get(), + x_desc.get(), + cd.get(), + y_desc.get(), + &workspace_size); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("MIOpen" + op.name() + " : Failed to get forward workspace size"); + + workspace_shape = shape{shape::int8_type, {workspace_size}}; + + const auto& x_shape = inputs[0]; + const auto& w_shape = inputs[1]; + + unsigned long seed = 0; +#ifdef MIGRAPHX_HAS_FIND_2_API + { + auto conv_problem = make_obj( + &miopenCreateConvProblem, cd.get(), miopenProblemDirectionForward); + + set_tensor_descriptor(miopenTensorConvolutionX, x_desc, conv_problem); + set_tensor_descriptor(miopenTensorConvolutionW, w_desc, conv_problem); + bool preallocate = false; +#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS + // MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6 + preallocate = true; +#endif + auto x = preallocate ? to_gpu(generate_argument(x_shape, seed++, random_mode::random)) + : argument{inputs[0]}; + auto w = preallocate ? to_gpu(generate_argument(w_shape, seed++, random_mode::random)) + : argument{inputs[1]}; + auto y = preallocate ? allocate_gpu(output_shape) : argument{inputs[2]}; + auto workspace = + preallocate ? allocate_gpu(workspace_shape) : migraphx::argument(workspace_shape); + + set_tensor_descriptor(miopenTensorConvolutionY, y_desc, conv_problem); + + const miopenTensorArgument_t tensor_args[3] = { + {miopenTensorConvolutionX, nullptr, x.implicit()}, + {miopenTensorConvolutionW, nullptr, w.implicit()}, + {miopenTensorConvolutionY, nullptr, y.implicit()}, + }; + + solution_ptr = find_solution(miopen_stream_handle, + 3, + tensor_args, + workspace.implicit(), + workspace_size, + conv_problem.get(), + ctx.get_exhaustive_tune_flag()); + + status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("MIOpen" + op.name() + " : failed to get solution's workspace size"); + + std::size_t solution_size; + status = miopenGetSolutionSize(solution_ptr.get(), &solution_size); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("MIOpen" + op.name() + ": Failed to fetch solution size"); + + auto solution_binary = std::vector{}; + solution_binary.resize(solution_size); + + status = miopenSaveSolution(solution_ptr.get(), solution_binary.data()); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("MIOpen" + op.name() + ": Saving solution failed"); + solution_object = value::binary{solution_binary.data(), solution_size}; + return shape{shape::int8_type, {workspace_size}}; + } +#else + auto x = to_gpu(generate_argument(x_shape, seed++, random_mode::random)); + auto w = to_gpu(generate_argument(w_shape, seed++, random_mode::random)); + auto y = allocate_gpu(output_shape); + auto workspace = allocate_gpu(workspace_shape); + int algo_count = 1; + miopenConvAlgoPerf_t perf; + status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(), + x_desc.get(), + x.implicit(), + w_desc.get(), + w.implicit(), + cd.get(), + y_desc.get(), + y.implicit(), + 1, + &algo_count, + &perf, + workspace.implicit(), + workspace_size, + ctx.get_exhaustive_tune_flag()); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("MIOpen " + op.name() + " : find convolution failed"); + algo = perf.fwd_algo; + size_t solution_count; + + status = miopenConvolutionForwardGetSolutionCount(ctx.get_stream().get_miopen(), + w_desc.get(), + x_desc.get(), + cd.get(), + y_desc.get(), + &solution_count); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("MIOpen " + op.name() + ": get solution count failed"); + + std::vector solutions(solution_count); + + status = miopenConvolutionForwardGetSolution(ctx.get_stream().get_miopen(), + w_desc.get(), + x_desc.get(), + cd.get(), + y_desc.get(), + solution_count, + &solution_count, + solutions.data()); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("MIOpen " + op.name() + ": get solution failed"); + + solution_id = solutions.front().solution_id; + + return shape{shape::int8_type, {perf.memory}}; +#endif + } + + void finalize(context& ctx, const shape& output_shape, const std::vector& inputs) + { +#ifdef MIGRAPHX_HAS_FIND_2_API + { + (void)(ctx); // avoid warnings + (void)(output_shape); + (void)(inputs); + // load solution + if(solution_ptr == nullptr) + { + miopenSolution_t ptr; + auto status = + miopenLoadSolution(&ptr, + reinterpret_cast(solution_object.data()), + solution_object.size()); + solution_ptr = miopen_solution{ptr}; + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("MIOpen " + op.name() + ": loading convolution solution failed"); + } + } +#else + // Use immediate mode API + { + set_conv_descriptor(); + if(solution_id == 0) + { + // Check that workspace hasn't changed + auto size = inputs.at(2).bytes(); + auto ws = find(ctx, output_shape, inputs); + if(ws.bytes() > size) + MIGRAPHX_THROW("MIOpen " + op.name() + + ": workspace has changed during finalization."); + } + + auto x_desc = make_tensor(reshape_if_1d(inputs[0])); + auto w_desc = make_tensor(reshape_if_1d(inputs[1])); + auto y_desc = make_tensor(reshape_if_1d(output_shape)); + + auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(), + w_desc.get(), + x_desc.get(), + cd.get(), + y_desc.get(), + solution_id); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("MIOpen Convolution: compile solution failed"); + } +#endif + } + + inline std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; +#endif +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/arg_op.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/arg_op.hpp new file mode 100644 index 000000000..db8505b09 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/arg_op.hpp @@ -0,0 +1,172 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ARG_OP_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARG_OP_HPP + +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +template +struct val_index +{ + T val; + int64_t index; +}; + +template +MIGRAPHX_DEVICE_CONSTEXPR val_index make_val_index(T v) +{ + return {v, -1}; +} + +template +MIGRAPHX_DEVICE_CONSTEXPR val_index make_val_index(T v, int64_t i) +{ + return {v, i}; +} + +struct argmax_op_first_index +{ + template + MIGRAPHX_DEVICE_CONSTEXPR val_index operator()(val_index x, val_index y) const + { + if(x.val > y.val) + return x; + else if(x.val < y.val) + return y; + else + { + return (x.index < y.index) ? x : y; + } + } + + MIGRAPHX_DEVICE_CONSTEXPR auto init() const { return lowest(); } +}; + +struct argmax_op_last_index +{ + template + MIGRAPHX_DEVICE_CONSTEXPR val_index operator()(val_index x, val_index y) const + { + if(x.val > y.val) + return x; + else if(x.val < y.val) + return y; + else + { + return (x.index > y.index) ? x : y; + } + } + + MIGRAPHX_DEVICE_CONSTEXPR auto init() const { return lowest(); } +}; + +struct argmin_op_first_index +{ + template + MIGRAPHX_DEVICE_CONSTEXPR val_index operator()(val_index x, val_index y) const + { + if(x.val < y.val) + return x; + else if(x.val > y.val) + return y; + else + { + return (x.index < y.index) ? x : y; + } + } + + MIGRAPHX_DEVICE_CONSTEXPR auto init() const { return highest(); } +}; + +struct argmin_op_last_index +{ + template + MIGRAPHX_DEVICE_CONSTEXPR val_index operator()(val_index x, val_index y) const + { + if(x.val < y.val) + return x; + else if(x.val > y.val) + return y; + else + { + return (x.index > y.index) ? x : y; + } + } + + MIGRAPHX_DEVICE_CONSTEXPR auto init() const { return highest(); } +}; + +template +void arg_op(Op op, hipStream_t stream, const argument& result, const argument& arg, int64_t axis) +{ + auto arg_shape = arg.get_shape(); + auto batch_lens = arg_shape.lens(); + size_t batch_item_num = batch_lens[axis]; + batch_lens[axis] = 1; + migraphx::shape batch_shape{arg_shape.type(), batch_lens}; + migraphx::shape std_arg_shape{arg_shape.type(), arg_shape.lens()}; + + hip_visit_all(arg, std_arg_shape, batch_shape)([&](auto input, auto arg_s, auto batch_s) { + auto* output = device_cast(result.get().data()); + using type = device_type>; + // use one block for items in one batch. + const size_t max_block_size = 256; + const std::size_t block_size = compute_block_size(batch_item_num, max_block_size); + gs_launch(stream, + batch_shape.elements() * block_size, + block_size)([=](auto i, auto idx) __device__ { + auto batch_idx = batch_s.multi(i / block_size); + auto data_idx = batch_idx; + auto init = make_val_index(op.init()); + + auto op_output = + block_reduce(idx, op, init, batch_item_num, [&](auto j) __device__ { + data_idx[axis] = j; + return make_val_index(input[arg_s.index(data_idx)], j); + }); + + if(idx.local == 0) + { + output[batch_s.index(batch_idx)] = op_output.index; + } + }); + }); +} + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/argmax.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/argmax.hpp new file mode 100644 index 000000000..be6023737 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/argmax.hpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ARGMAX_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARGMAX_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void MIGRAPHX_DEVICE_EXPORT argmax(hipStream_t stream, + const argument& result, + const argument& arg, + int64_t axis, + bool select_last_index); + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/argmin.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/argmin.hpp new file mode 100644 index 000000000..c205fcf72 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/argmin.hpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ARGMIN_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARGMIN_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void MIGRAPHX_DEVICE_EXPORT argmin(hipStream_t stream, + const argument& result, + const argument& arg, + int64_t axis, + bool select_last_index); + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/config.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/config.hpp new file mode 100644 index 000000000..014a5f3a3 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/config.hpp @@ -0,0 +1,30 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_CONFIG_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_CONFIG_HPP + +#include +#include + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/contiguous.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/contiguous.hpp new file mode 100644 index 000000000..5012955de --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/contiguous.hpp @@ -0,0 +1,45 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_KERNELS_HPP +#define MIGRAPHX_GUARD_MIGRAPHLIB_KERNELS_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void MIGRAPHX_DEVICE_EXPORT contiguous(hipStream_t stream, + const argument& result, + const argument& arg); + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/fill.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/fill.hpp new file mode 100644 index 000000000..643b26b2e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/fill.hpp @@ -0,0 +1,43 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_FILL_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_FILL_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void MIGRAPHX_DEVICE_EXPORT fill(hipStream_t stream, const argument& result, unsigned long val); + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/logsoftmax.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/logsoftmax.hpp new file mode 100644 index 000000000..0f08b84b5 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/logsoftmax.hpp @@ -0,0 +1,46 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_LOGSOFTMAX_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_LOGSOFTMAX_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void MIGRAPHX_DEVICE_EXPORT logsoftmax(hipStream_t stream, + const argument& result, + const argument& arg, + int64_t axis); + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/multinomial.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/multinomial.hpp new file mode 100644 index 000000000..7998945b0 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/multinomial.hpp @@ -0,0 +1,46 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_MULTINOMIAL_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_MULTINOMIAL_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void MIGRAPHX_DEVICE_EXPORT multinomial(hipStream_t stream, + const argument& result, + const argument& arg0, + const argument& arg1); + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/nonzero.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/nonzero.hpp new file mode 100644 index 000000000..a470a337a --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/nonzero.hpp @@ -0,0 +1,45 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_NONZERO_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_NONZERO_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +argument MIGRAPHX_DEVICE_EXPORT nonzero(hipStream_t stream, + const argument& result, + const argument& arg_data); + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/prefix_scan_sum.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/prefix_scan_sum.hpp new file mode 100644 index 000000000..a51815ec4 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/prefix_scan_sum.hpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_DEVICE_PREFIX_SCAN_SUM_HPP +#define MIGRAPHX_GUARD_DEVICE_PREFIX_SCAN_SUM_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void MIGRAPHX_DEVICE_EXPORT prefix_scan_sum(hipStream_t stream, + const argument& result, + const argument& arg, + int32_t axis, + bool exclusive, + bool reverse); + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_DEVICE_PREFIX_SCAN_SUM_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/reverse.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/reverse.hpp new file mode 100644 index 000000000..1414314e6 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/reverse.hpp @@ -0,0 +1,46 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_REVERSE_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_REVERSE_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +argument MIGRAPHX_DEVICE_EXPORT reverse(hipStream_t stream, + argument result, + argument arg1, + const std::vector& axes); + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/rnn_variable_seq_lens.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/rnn_variable_seq_lens.hpp new file mode 100644 index 000000000..950848057 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/rnn_variable_seq_lens.hpp @@ -0,0 +1,58 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_RNN_VARIABLE_SEQ_LENS_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_RNN_VARIABLE_SEQ_LENS_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +void MIGRAPHX_DEVICE_EXPORT rnn_var_sl_shift_sequence(hipStream_t stream, + const argument& result, + const argument& arg_hs, + const argument& arg_sl); + +void MIGRAPHX_DEVICE_EXPORT rnn_var_sl_shift_output(hipStream_t stream, + const argument& result, + const argument& arg_hs, + const argument& arg_sl, + bool is_reverse); + +void MIGRAPHX_DEVICE_EXPORT rnn_var_sl_last_output(hipStream_t stream, + const argument& result, + const argument& arg_hs, + const argument& arg_sl, + bool is_reverse); + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/topk.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/topk.hpp new file mode 100644 index 000000000..b1fb4e8e2 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device/topk.hpp @@ -0,0 +1,55 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP +#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TOPK_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +namespace device { + +argument MIGRAPHX_DEVICE_EXPORT topk_smallest(hipStream_t stream, + const argument& val_res, + const argument& ind_res, + const argument& arg, + int64_t k, + int64_t axis); + +argument MIGRAPHX_DEVICE_EXPORT topk_largest(hipStream_t stream, + const argument& val_res, + const argument& ind_res, + const argument& arg, + int64_t k, + int64_t axis); + +} // namespace device +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device_name.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device_name.hpp new file mode 100644 index 000000000..bdd9530aa --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/device_name.hpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP +#define MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP + +#include +#include + +struct hipDeviceProp_t; + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_GPU_EXPORT std::string get_device_name(); + +MIGRAPHX_GPU_EXPORT int get_device_id(); + +MIGRAPHX_GPU_EXPORT bool gfx_has_fp8fnuz_intrinsics(); + +MIGRAPHX_GPU_EXPORT bool gfx_has_fp8ocp_intrinsics(); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_DEVICE_NAME_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/fuse_ck.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/fuse_ck.hpp new file mode 100644 index 000000000..ee726b5b7 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/fuse_ck.hpp @@ -0,0 +1,48 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_FUSE_CK_HPP +#define MIGRAPHX_GUARD_GPU_FUSE_CK_HPP + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct module_pass_manager; + +namespace gpu { + +struct fuse_ck +{ + context* ctx = nullptr; + std::string name() const { return "gpu::fuse_ck"; } + void apply(module_pass_manager& mpm) const; +}; + +} // namespace gpu + +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_FUSE_CK_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp new file mode 100644 index 000000000..e1cb8f0bb --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/fuse_mlir.hpp @@ -0,0 +1,50 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP +#define MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP + +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct module_pass_manager; + +namespace gpu { + +MIGRAPHX_GPU_EXPORT bool mlir_enabled(); + +struct MIGRAPHX_GPU_EXPORT fuse_mlir +{ + context* ctx = nullptr; + bool enable_extra = false; + std::string name() const { return "gpu::fuse_mlir"; } + void apply(module_pass_manager& mpm) const; +}; + +} // namespace gpu + +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/fuse_ops.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/fuse_ops.hpp new file mode 100644 index 000000000..fc8ef2256 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/fuse_ops.hpp @@ -0,0 +1,48 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_FUSE_OPS_HPP +#define MIGRAPHX_GUARD_RTGLIB_FUSE_OPS_HPP + +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct module; + +namespace gpu { + +struct MIGRAPHX_GPU_EXPORT fuse_ops +{ + context* ctx = nullptr; + bool fast_math = true; + std::string name() const { return "gpu::fuse_ops"; } + void apply(module& m) const; +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/gemm.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/gemm.hpp new file mode 100644 index 000000000..23f053dd5 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/gemm.hpp @@ -0,0 +1,163 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_GEMM_HPP +#define MIGRAPHX_GUARD_RTGLIB_GPU_GEMM_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; +shape transpose_batch(const shape& s, unsigned trans_batch); +void blas_shape(const shape& s); + +template +struct rocblas_gemm +{ + Op op; + float alpha = 1; + float beta = 0; + bool compute_fp32 = false; + unsigned trans_batch = 0; + int32_t solution_idx = 0; + template + static auto reflect(Self& self, F f) + { + return pack_join(migraphx::reflect(self.op, f), + pack(f(self.alpha, "alpha"), + f(self.beta, "beta"), + f(self.compute_fp32, "compute_fp32"), + f(self.trans_batch, "trans_batch"), + f(self.solution_idx, "solution_idx"))); + } + + std::string name() const + { + if(contains(op.name(), "quant_")) + { + return "gpu::quant_gemm"; + } + return "gpu::gemm"; + } + + shape compute_shape(const std::vector& inputs) const + { + std::vector in_shapes(inputs); + in_shapes.pop_back(); + // When input shapes are A, B, C the GEMM equation is C  =  α AB+ β C where α, β are + // scalars + check_shapes{in_shapes, *this}.has(2, 3); + blas_shape(inputs[0]); + blas_shape(inputs[1]); + // if gemm and add are fused + if(in_shapes.size() > 2) + { + auto cmat_shape = in_shapes.back(); + check_shapes{{cmat_shape}, *this}.not_transposed().not_broadcasted(); + in_shapes.pop_back(); + blas_shape(cmat_shape); + auto op_out_shape = op.compute_shape(in_shapes); + if(cmat_shape.lens() != op_out_shape.lens()) + { + MIGRAPHX_THROW(this->name() + " : dimension mismatch, operand C: {" + + to_string_range(cmat_shape.lens()) + + "}, cannot add to operand A * B: {" + + to_string_range(op_out_shape.lens()) + "}"); + } + if(cmat_shape.type() != op_out_shape.type()) + { + MIGRAPHX_THROW(this->name() + " : operand C type mismatch, operand C is of type: " + + to_string(cmat_shape.type()) + + ", it must be: " + to_string(op_out_shape.type())); + } + return transpose_batch(op_out_shape, trans_batch); + } + + return transpose_batch(op.compute_shape(in_shapes), trans_batch); + } + + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const + { + if(this->name() == "gpu::gemm" or output_shape.type() == migraphx::shape::float_type) + { + gemm_compute(ctx, output_shape, args, alpha, beta, compute_fp32, solution_idx); + } + else + { + gemm_compute( + ctx, output_shape, args, int32_t(alpha), int32_t(beta), compute_fp32, solution_idx); + } + return args.back(); + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } + + void finalize(context& ctx, const shape& output_shape, const std::vector& input_shapes) + { +#ifdef MIGRAPHX_USE_ROCBLAS_TUNING_API + if(solution_idx == 0) + solution_idx = gemm_default_solution(ctx, output_shape, input_shapes); + if(enabled(MIGRAPHX_ENABLE_GEMM_TUNING{}) or ctx.get_exhaustive_tune_flag()) + { + if(this->name() == "gpu::gemm") + { + solution_idx = gemm_finalize( + ctx, output_shape, input_shapes, alpha, beta, compute_fp32, solution_idx); + } + else + { + solution_idx = gemm_finalize(ctx, + output_shape, + input_shapes, + int32_t(alpha), + int32_t(beta), + compute_fp32, + solution_idx); + } + } +#else + // suppress compiler warnings + (void)ctx, (void)output_shape, (void)input_shapes; +#endif // MIGRAPHX_USE_ROCBLAS_TUNING_API + } +}; +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_RTGLIB_GPU_GEMM_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/gemm_impl.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/gemm_impl.hpp new file mode 100644 index 000000000..76891cbb8 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/gemm_impl.hpp @@ -0,0 +1,94 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_GEMM_IMPL_HPP +#define MIGRAPHX_GUARD_RTGLIB_GEMM_IMPL_HPP + +#include +#include +#include +#include + +// Set this environment variable to "true" to perform GEMM tuning even when the +// --exhaustive-tune option isn't set. Can be used to skip slow convolution tuning. +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_GEMM_TUNING); + +using milliseconds = std::chrono::duration; +using microseconds = std::chrono::duration; + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +/** + * @brief Templated implementations of the compute() and finalize() methods of the Gemm operator. + * For each function there are overloads using either float or int32_t for the arguments + * alpha and beta. + * + * @param ctx . + * @param output_shape . + * @param args . + * @param alpha . + * @param beta . + * @param compute_fp32 . + */ +void gemm_compute(context& ctx, + const shape& output_shape, + const std::vector& args, + float alpha, + float beta, + bool compute_fp32, + int32_t solution_idx); + +void gemm_compute(context& ctx, + const shape& output_shape, + const std::vector& args, + int32_t alpha, + int32_t beta, + bool compute_fp32, + int32_t solution_idx); + +int32_t gemm_finalize(context& ctx, + const shape& output_shape, + const std::vector& input_shapes, + float alpha, + float beta, + bool compute_fp32); + +int32_t gemm_finalize(context& ctx, + const shape& output_shape, + const std::vector& input_shapes, + int32_t alpha, + int32_t beta, + bool compute_fp32, + int32_t solution_idx); + +int32_t gemm_default_solution(context& ctx, + const shape& output_shape, + const std::vector& input_shapes); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/gemm_softmax_gemm.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/gemm_softmax_gemm.hpp new file mode 100644 index 000000000..6a63bde37 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/gemm_softmax_gemm.hpp @@ -0,0 +1,117 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_GEMM_SOFTMAX_GEMM_HPP +#define MIGRAPHX_GUARD_GPU_GEMM_SOFTMAX_GEMM_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct gemm_softmax_gemm +{ + operation op = make_op("dot"); + float scale = 1.0; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.op, "op"), f(self.scale, "scale")); + } + + std::string name() const { return "gpu::gemm_softmax_gemm"; } + + void check_gemm_shape(const shape& s) const + { + if(not contains(range(s.strides().rbegin(), s.strides().rbegin() + 3), 1) and + not s.scalar()) + MIGRAPHX_THROW("Invalid shape for " + name()); + } + + shape compute_shape(std::vector inputs, const std::vector&) const + { + check_shapes{inputs, *this}.same_ndims(); + if(inputs.size() < 3) + MIGRAPHX_THROW(name() + ": Expected 3 inputs but got " + to_string(inputs.size())); + + const bool is_bias_enabled = inputs.size() == 4; + const bool is_mul_where = inputs.size() == 5; + auto a = inputs[0]; + auto b = inputs[1]; + auto b1 = inputs.back(); + + for(const auto& input : inputs) + { + check_gemm_shape(input); + } + auto gemm0_shape = op.compute_shape({a, b}); + if(is_mul_where) + { + auto select_cond = inputs[2]; + auto select_const = inputs[3]; + if(select_cond.lens() != select_const.lens()) + { + std::stringstream err_msg; + err_msg << name() << ": has inconsistent where op condition and constant size: " + << select_cond << "!=" << select_const; + MIGRAPHX_THROW(err_msg.str()); + } + if(select_cond.lens() != gemm0_shape.lens()) + { + std::stringstream err_msg; + err_msg << name() << ": has inconsistent where op condition size" + << ". Expected: " << gemm0_shape << ". Given: " << select_cond; + MIGRAPHX_THROW(err_msg.str()); + } + } + if(is_bias_enabled) + { + auto bias_shape = inputs[2]; + if(bias_shape.lens() != gemm0_shape.lens()) + { + std::stringstream err_msg; + err_msg << name() << ": has inconsistent bias size" + << ". Expected: " << gemm0_shape << ". Given: " << bias_shape; + MIGRAPHX_THROW(err_msg.str()); + } + } + + return op.compute_shape({gemm0_shape, b1}); + } + + static bool is_ck_supported_type(shape::type_t t) { return contains({shape::half_type}, t); } + static bool is_mlir_supported_type(shape::type_t t) + { + return contains({shape::type_t::float_type, shape::half_type}, t); + } +}; + +} // namespace gpu + +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_GEMM_SOFTMAX_GEMM_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/group_query_attention.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/group_query_attention.hpp new file mode 100644 index 000000000..b7690c4bf --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/group_query_attention.hpp @@ -0,0 +1,135 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_GROUP_QUERY_ATTENTION_HPP +#define MIGRAPHX_GUARD_GPU_GROUP_QUERY_ATTENTION_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct gqa_parameters +{ + float scale; + std::uint32_t batch_size; // Batch size used by input + std::uint32_t sequence_length; // Sequence length used by input + std::uint32_t hidden_size; // Hidden size used by input + std::uint32_t head_size; // Head size + std::uint32_t rotary_embedding_dim; // Rotary embedding dimension. + std::uint32_t num_heads; // num_heads = hidden_size / head_size + std::uint32_t max_sequence_length; // Sequence length used by cos/sin cache + std::uint32_t head_stride; // Head stride + std::uint32_t seq_stride; // Sequence stride + std::uint32_t batch_stride; // Batch stride + std::uint32_t position_ids_format; // Format of position ids - 0 is (1), 1 is (batch_size, + // sequence_length) + std::uint32_t seqlen_present_kv_cache; // Sequence length of present kv-cache (4096 when using + // shared buffer) + bool do_rotary; // Whether to use rotary position embedding. Default value is 0. + std::uint32_t kv_num_heads; // Number of attention heads for k and v + int local_window_size; // left_window_size for local attention. Default value is -1 meaning + // unused. + bool rotary_interleaved; // Rotate using interleaved pattern. Default value is 0 (False). + bool past_present_share_buffer; // Whether to use same buffer for KV-cache inputs and outputs + + std::string make_init_str() const + { + return "MIGRAPHX_MAKE_CONSTANT(float{" + std::to_string(scale) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(uint32_t{" + std::to_string(batch_size) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(uint32_t{" + std::to_string(sequence_length) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(uint32_t{" + std::to_string(hidden_size) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(uint32_t{" + std::to_string(head_size) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(uint32_t{" + std::to_string(rotary_embedding_dim) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(uint32_t{" + std::to_string(num_heads) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(uint32_t{" + std::to_string(max_sequence_length) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(uint32_t{" + std::to_string(head_stride) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(uint32_t{" + std::to_string(seq_stride) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(uint32_t{" + std::to_string(batch_stride) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(uint32_t{" + std::to_string(position_ids_format) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(uint32_t{" + std::to_string(seqlen_present_kv_cache) + + "}), " + "MIGRAPHX_MAKE_CONSTANT(bool{" + + std::to_string(static_cast(do_rotary)) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(uint32_t{" + std::to_string(kv_num_heads) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(int32_t{" + std::to_string(local_window_size) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(bool{" + + std::to_string(static_cast(rotary_interleaved)) + "}), " + + "MIGRAPHX_MAKE_CONSTANT(bool{" + + std::to_string(static_cast(past_present_share_buffer)) + "})"; + } +}; + +static inline gqa_parameters init_params(const std::vector& inputs, const value& v) +{ + auto num_heads = v.at("num_heads").to(); + auto kv_num_heads = v.at("kv_num_heads").to(); + auto do_rotary = v.at("do_rotary").to(); + auto local_window_size = v.at("local_window_size").to(); + auto rotary_interleaved = v.at("rotary_interleaved").to(); + auto scale = v.at("scale").to(); + auto present_kv_seqlen = inputs[1].lens().size() == 4 ? inputs[1].lens()[2] : 0; + + const auto& q_shape = inputs[0]; + auto q_lens = q_shape.lens(); + const std::size_t batch_size = q_lens[0]; + const std::size_t sequence_length = q_lens[2]; + std::size_t head_size = q_lens[3]; + auto q_hidden_size = kv_num_heads * head_size; + + std::size_t rotary_dim = inputs[3].lens()[1] * 2; + auto seq_stride = head_size; + auto head_stride = sequence_length * seq_stride; + auto batch_stride = (num_heads + 2 * kv_num_heads) * head_stride; + auto position_ids_format = sequence_length == 1 ? 1 : 0; + bool past_present_share_buffer = true; + gqa_parameters gqa_params; + gqa_params.batch_size = batch_size; + gqa_params.sequence_length = sequence_length; + gqa_params.hidden_size = q_hidden_size; + gqa_params.head_size = head_size; + gqa_params.rotary_embedding_dim = rotary_dim; + gqa_params.num_heads = num_heads; + gqa_params.max_sequence_length = sequence_length; + gqa_params.seq_stride = head_size; + gqa_params.head_stride = head_stride; + gqa_params.batch_stride = batch_stride; + gqa_params.position_ids_format = position_ids_format; + gqa_params.seqlen_present_kv_cache = present_kv_seqlen; + gqa_params.do_rotary = do_rotary; + gqa_params.kv_num_heads = kv_num_heads; + gqa_params.local_window_size = local_window_size; + gqa_params.rotary_interleaved = rotary_interleaved; + gqa_params.scale = scale; + gqa_params.past_present_share_buffer = past_present_share_buffer; + + return gqa_params; +} + +} // namespace gpu + +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_GROUP_QUERY_ATTENTION_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/hip.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/hip.hpp new file mode 100644 index 000000000..acd7525d6 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/hip.hpp @@ -0,0 +1,288 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP +#define MIGRAPHX_GUARD_MIGRAPHLIB_HIP_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; + +MIGRAPHX_GPU_EXPORT std::string hip_error(int error); + +MIGRAPHX_GPU_EXPORT argument allocate_gpu(const shape& s, bool host = false); + +MIGRAPHX_GPU_EXPORT argument register_on_gpu(const argument& arg); + +MIGRAPHX_GPU_EXPORT argument to_gpu(const argument& arg, bool host = false); + +MIGRAPHX_GPU_EXPORT argument from_gpu(const argument& arg); + +MIGRAPHX_GPU_EXPORT void set_device(std::size_t id); + +MIGRAPHX_GPU_EXPORT void gpu_sync(); +MIGRAPHX_GPU_EXPORT void gpu_sync(const context& ctx); + +MIGRAPHX_GPU_EXPORT void gpu_copy(context& ctx, const argument& src, const argument& dst); +MIGRAPHX_GPU_EXPORT void copy_to_gpu(context& ctx, const argument& src, const argument& dst); +MIGRAPHX_GPU_EXPORT void copy_from_gpu(context& ctx, const argument& src, const argument& dst); + +MIGRAPHX_GPU_EXPORT argument get_preallocation(context& ctx, const std::string& id); + +MIGRAPHX_GPU_EXPORT void gpu_fill(context& ctx, const argument& dst, int value = 0); + +struct hip_allocate +{ + shape s; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.s, "shape")); + } + + std::string name() const { return "hip::allocate"; } + shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, *this}.has(0); + return s; + } + argument compute(context&, const shape& output_shape, const std::vector&) const + { + return allocate_gpu(output_shape); + } +}; + +struct hip_fill +{ + int value = 0; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.value, "value")); + } + + std::string name() const { return "hip::fill"; } + shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, *this}.has(1); + return inputs.front(); + } + argument compute(context& ctx, const shape&, const std::vector& args) const + { + gpu_fill(ctx, args.front(), value); + return args.front(); + } + std::ptrdiff_t output_alias(const std::vector&) const { return 0; } +}; + +struct hip_sync_stream +{ + + std::string name() const { return "hip::sync_stream"; } + shape compute_shape(const std::vector& inputs) const + { + if(inputs.empty()) + return {}; + return inputs.front(); + } + + argument compute(const context& ctx, const shape&, const std::vector& args) const + { + gpu_sync(ctx); + if(args.empty()) + return {}; + return args.front(); + } + + std::ptrdiff_t output_alias(const std::vector& args) const + { + if(args.empty()) + return -1; + return 0; + } +}; + +struct hip_copy_to_gpu +{ + std::string name() const { return "hip::copy_to_gpu"; } + shape compute_shape(std::vector inputs) const + { + check_shapes{inputs, *this, true}.has(1, 2).same_type(); + return inputs.at(0); + } + argument compute(context& ctx, const shape&, const std::vector& args) const + { + auto input = register_on_gpu(args[0]); + if(args.size() == 1) + return input; + argument result = args[1].share(); + if(result.get_shape().dynamic()) + { + result = result.reshape(args[0].get_shape()); + } + gpu_copy(ctx, input, result); + // Associate the input since it was registered with hip + return {result.get_shape(), [input, result]() mutable { return result.data(); }}; + } + std::ptrdiff_t output_alias(const std::vector& args) const + { + if(args.size() == 1) + return -1; + return 1; + } +}; + +struct hip_copy_from_gpu +{ + std::string name() const { return "hip::copy_from_gpu"; } + shape compute_shape(std::vector inputs) const + { + check_shapes{inputs, *this, true}.has(1, 2).same_type(); + return inputs.at(0); + } + argument + compute(context& ctx, const dyn_output& dyn_out, const std::vector& args) const + { + if(args.size() == 1) + { + argument result = allocate_gpu(dyn_out.computed_shape, true); + gpu_copy(ctx, args[0], result); + return result; + } + argument input = args[0].share(); + if(input.get_shape().dynamic()) + { + input = input.reshape(args[1].get_shape()); + } + copy_from_gpu(ctx, input, args[1]); + return args[1]; + } + std::ptrdiff_t output_alias(const std::vector& args) const + { + if(args.size() == 1) + return -1; + return 1; + } +}; + +struct hip_copy +{ + std::string name() const { return "hip::copy"; } + shape compute_shape(std::vector inputs) const + { + check_shapes{inputs, *this}.has(2).same_type(); + return inputs.at(1); + } + argument compute(context& ctx, const shape&, std::vector args) const + { + gpu_copy(ctx, args[0], args[1]); + return args[1]; + } + std::ptrdiff_t output_alias(const std::vector&) const { return 1; } +}; + +MIGRAPHX_GPU_EXPORT void +store_preallocated_param(context& ctx, const std::string& id, const argument& a); + +struct hip_allocate_memory +{ + shape s; + std::string id{}; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.s, "shape"), f(self.id, "id")); + } + + std::string name() const { return "hip::hip_allocate_memory"; } + shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, *this}.has(0); + return s; + } + + argument compute(context& ctx, const shape&, const std::vector&) const + { + return get_preallocation(ctx, id); + } + + void finalize(context& ctx, const shape&, const std::vector&) const + { + argument a = allocate_gpu(s); + store_preallocated_param(ctx, id, a); + } +}; + +struct hip_copy_literal +{ + literal l; + std::string id{}; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.l, "literal"), f(self.id, "id")); + } + + std::string name() const { return "hip::hip_copy_literal"; } + shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, *this}.has(0); + return l.get_shape(); + } + + argument compute(context& ctx, const shape&, const std::vector&) const + { + return get_preallocation(ctx, id); + } + + void finalize(context& ctx, const shape&, const std::vector&) const + { + argument a = to_gpu(l.get_argument()); + store_preallocated_param(ctx, id, a); + } + friend std::ostream& operator<<(std::ostream& os, const hip_copy_literal& x) + { + os << x.name() << "[id=" << x.id << "]"; + return os; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/hip_gemm.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/hip_gemm.hpp new file mode 100644 index 000000000..8c3d67bcd --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/hip_gemm.hpp @@ -0,0 +1,146 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_HIP_GEMM_HPP +#define MIGRAPHX_GUARD_RTGLIB_GPU_HIP_GEMM_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; +void blas_shape_hip(const shape& s); +shape transpose_batch_hip(const shape& s, unsigned trans_batch); + +template +struct hip_gemm +{ + Op op; + float alpha = 1; + float beta = 0; + unsigned trans_batch = 0; + int32_t solution_idx = 0; + + template + static auto reflect(Self& self, F f) + { + return pack_join(migraphx::reflect(self.op, f), + pack(f(self.alpha, "alpha"), + f(self.beta, "beta"), + f(self.trans_batch, "trans_batch"), + f(self.solution_idx, "solution_idx"))); + } + + std::string name() const + { + if(contains(op.name(), "quant_")) + { + return "gpu::hip_quant_gemm"; + } + return "gpu::hip_gemm"; + } + + shape compute_shape(const std::vector& inputs) const + { + std::vector in_shapes(inputs); + in_shapes.pop_back(); + in_shapes.pop_back(); + // When input shapes are A, B, C the GEMM equation is C  =  α AB+ β C where α, β are + // scalars + check_shapes{in_shapes, *this}.has(2, 3); + blas_shape_hip(inputs[0]); + blas_shape_hip(inputs[1]); + // if gemm and add are fused + if(in_shapes.size() > 2) + { + auto cmat_shape = in_shapes.back(); + check_shapes{{cmat_shape}, *this}.not_transposed().not_broadcasted(); + in_shapes.pop_back(); + blas_shape_hip(cmat_shape); + auto op_out_shape = op.compute_shape(in_shapes); + if(cmat_shape.lens() != op_out_shape.lens()) + { + MIGRAPHX_THROW(this->name() + " : dimension mismatch, operand C: {" + + to_string_range(cmat_shape.lens()) + + "}, cannot add to operand A * B: {" + + to_string_range(op_out_shape.lens()) + "}"); + } + if(cmat_shape.type() != op_out_shape.type()) + { + MIGRAPHX_THROW(this->name() + " : operand C type mismatch, operand C is of type: " + + to_string(cmat_shape.type()) + + ", it must be: " + to_string(op_out_shape.type())); + } + return transpose_batch_hip(op_out_shape, trans_batch); + } + + return transpose_batch_hip(op.compute_shape(in_shapes), trans_batch); + } + + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const + { + hip_gemm_compute(ctx, output_shape, args, alpha, beta, solution_idx); + return args.back(); + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } + + void finalize(context& ctx, const shape& output_shape, const std::vector& input_shapes) + { + if(solution_idx == 0) + solution_idx = hip_gemm_default_solution(ctx, output_shape, input_shapes); + if(enabled(MIGRAPHX_ENABLE_HIP_GEMM_TUNING{}) or ctx.get_exhaustive_tune_flag()) + { + solution_idx = + hip_gemm_finalize(ctx, output_shape, input_shapes, alpha, beta, solution_idx); + } + } + + value + compile(migraphx::context& ctx, const shape& output, const std::vector& input_shapes) + { + finalize(any_cast(ctx), output, input_shapes); + size_t ws = hip_gemm_workspace_size( + any_cast(ctx), output, input_shapes, alpha, beta, solution_idx); + return {{"workspace", ws}}; + } +}; +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_RTGLIB_GPU_HIP_GEMM_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/hip_gemm_impl.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/hip_gemm_impl.hpp new file mode 100644 index 000000000..f26d594d8 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/hip_gemm_impl.hpp @@ -0,0 +1,82 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_HIP_GEMM_IMPL_HPP +#define MIGRAPHX_GUARD_RTGLIB_HIP_GEMM_IMPL_HPP + +#include +#include +#include + +// Set this environment variable to "true" to perform GEMM tuning even when the +// --exhaustive-tune option isn't set. Can be used to skip slow convolution tuning. +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_HIP_GEMM_TUNING); + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +using milliseconds = std::chrono::duration; +using microseconds = std::chrono::duration; + +/** + * @brief Templated implementations of the compute() and finalize() methods of the Gemm operator. + * For each function there are overloads using either float or int32_t for the arguments + * alpha and beta. + * + * @param ctx . + * @param output_shape . + * @param args . + * @param alpha . + * @param beta . + */ +void hip_gemm_compute(context& ctx, + const shape& output_shape, + const std::vector& args, + float alpha, + float beta, + int32_t solution_idx); + +int32_t hip_gemm_finalize(context& ctx, + const shape& output_shape, + const std::vector& input_shapes, + float alpha, + float beta, + int32_t solution_idx); + +int32_t hip_gemm_default_solution(context& ctx, + const shape& output_shape, + const std::vector& input_shapes); + +size_t hip_gemm_workspace_size(context& ctx, + const shape& output_shape, + const std::vector& input_shapes, + float alpha, + float beta, + int32_t solution_idx); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/hipblaslt.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/hipblaslt.hpp new file mode 100644 index 000000000..49d41bf4d --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/hipblaslt.hpp @@ -0,0 +1,109 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_HIPBLASLT_HPP +#define MIGRAPHX_GUARD_MIGRAPHLIB_HIPBLASLT_HPP +#include +#include +#include +#include +#if MIGRAPHX_USE_HIPBLASLT +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +// TODO: Remove hipblas_status_to_string() function when hipblaslt +// provides an API for doing this in hipBLASLt. + +// Convert hipblas_status to string +inline const char* hipblas_status_to_string(hipblasStatus_t status) +{ + switch(status) + { + case HIPBLAS_STATUS_SUCCESS: return "HIPBLAS_STATUS_SUCCESS"; + case HIPBLAS_STATUS_NOT_INITIALIZED: return "HIPBLAS_STATUS_NOT_INITIALIZED"; + case HIPBLAS_STATUS_ALLOC_FAILED: return "HIPBLAS_STATUS_ALLOC_FAILED"; + case HIPBLAS_STATUS_INVALID_VALUE: return "HIPBLAS_STATUS_INVALID_VALUE"; + case HIPBLAS_STATUS_MAPPING_ERROR: return "HIPBLAS_STATUS_MAPPING_ERROR"; + case HIPBLAS_STATUS_EXECUTION_FAILED: return "HIPBLAS_STATUS_EXECUTION_FAILED"; + case HIPBLAS_STATUS_INTERNAL_ERROR: return "HIPBLAS_STATUS_INTERNAL_ERROR"; + case HIPBLAS_STATUS_NOT_SUPPORTED: return "HIPBLAS_STATUS_NOT_SUPPORTED"; + case HIPBLAS_STATUS_ARCH_MISMATCH: return "HIPBLAS_STATUS_ARCH_MISMATCH"; + case HIPBLAS_STATUS_HANDLE_IS_NULLPTR: return "HIPBLAS_STATUS_HANDLE_IS_NULLPTR"; + case HIPBLAS_STATUS_INVALID_ENUM: return "HIPBLAS_STATUS_INVALID_ENUM"; + case HIPBLAS_STATUS_UNKNOWN: return "HIPBLAS_STATUS_UNKNOWN"; + } + return ""; +} + +template +inline auto hipblaslt_invoke(F f, Ts... xs) +{ + // Call the function `f` with `xs...` and capture the status + auto status = f(xs...); + + if(status != HIPBLAS_STATUS_SUCCESS) + { + std::string error_message = + "hipBLAS error: '" + std::string(hipblas_status_to_string(status)) + "'(" + + std::to_string(status) + ") at " + __FILE__ + ":" + std::to_string(__LINE__); + MIGRAPHX_THROW(EXIT_FAILURE, error_message); + } + return status; +} + +// Invoke a hipBLASLt call. If used to validate a call, set fatal_error = false to prevent +// throwing an exception on failure. +template +auto hipblaslt_invoke(F f, Pack p, Ts... xs, bool fatal_error = true) +{ + return p([=](auto... ws) { + auto status = f(ws..., xs...); + if(status != HIPBLAS_STATUS_SUCCESS) + { + if(fatal_error) + { + MIGRAPHX_THROW("hipblaslt_invoke: hipBlasLt call failed with status " + + std::to_string(status)); + } + } + return status; + }); +} + +using hipblaslt_handle_ptr = MIGRAPHX_MANAGE_PTR(hipblasLtHandle_t, hipblasLtDestroy); +using hipblaslt_preference_ptr = MIGRAPHX_MANAGE_PTR(hipblasLtMatmulPreference_t, + hipblasLtMatmulPreferenceDestroy); + +hipblaslt_handle_ptr create_hipblaslt_handle_ptr(); +hipblaslt_preference_ptr create_hipblaslt_preference_ptr(); +bool hipblaslt_supported(); +const size_t hipblaslt_workspace_size = 2 * 128 * 1024 * 1024; +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_USE_HIPBLASLT +#endif // MIGRAPHX_GUARD_MIGRAPHLIB_HIPBLASLT_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/kernel.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/kernel.hpp new file mode 100644 index 000000000..63accdea4 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/kernel.hpp @@ -0,0 +1,80 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP +#define MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP + +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct kernel_impl; + +struct MIGRAPHX_GPU_EXPORT kernel +{ + kernel() = default; + kernel(const char* image, const std::string& name); + template + kernel(const std::vector& image, const std::string& name) + : kernel(reinterpret_cast(image.data()), name) + { + } + + void launch(hipStream_t stream, + std::size_t global, + std::size_t local, + const std::vector& args, + hipEvent_t start = nullptr, + hipEvent_t stop = nullptr) const; + + void launch(hipStream_t stream, + std::size_t global, + std::size_t local, + std::vector args, + hipEvent_t start = nullptr, + hipEvent_t stop = nullptr) const; + + template + auto launch(hipStream_t stream, std::size_t global, std::size_t local, Ts... zs) const + { + return [=](auto&&... xs) { + launch(stream, global, local, std::vector{xs...}, zs...); + }; + } + + private: + std::shared_ptr impl; +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/logsoftmax.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/logsoftmax.hpp new file mode 100644 index 000000000..5ea23ee27 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/logsoftmax.hpp @@ -0,0 +1,60 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_LOGSOFTMAX_HPP +#define MIGRAPHX_GUARD_RTGLIB_LOGSOFTMAX_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct hip_logsoftmax +{ + op::logsoftmax op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "gpu::logsoftmax"; } + shape compute_shape(const std::vector& inputs) const; + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const; + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/loop.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/loop.hpp new file mode 100644 index 000000000..792c84b74 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/loop.hpp @@ -0,0 +1,66 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_LOOP_HPP +#define MIGRAPHX_GUARD_RTGLIB_LOOP_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; + +struct hip_loop +{ + op::loop op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "gpu::loop"; } + shape compute_shape(std::vector inputs, std::vector mods) const; + argument + compute(context& ctx, + const shape& output_shape, + const std::vector& args, + const std::vector& mods, + const std::function( + module_ref&, const std::unordered_map&)>& run) const; + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/lowering.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/lowering.hpp new file mode 100644 index 000000000..6f4a3ca3e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/lowering.hpp @@ -0,0 +1,54 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP +#define MIGRAPHX_GUARD_RTGLIB_MIOPEN_LOWERING_HPP + +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct module_pass_manager; + +namespace gpu { + +/** + * Compiler pass that makes GPU-specific instruction changes. + * * Copies to and from the device if `offload_copy` is true. + * * Maps instructions to their GPU-specific counterparts. + * * Inserts `allocate` instructions before GPU operators. + */ +struct MIGRAPHX_GPU_EXPORT lowering +{ + context* ctx; + bool offload_copy; + std::string name() const { return "gpu::lowering"; } + void apply(module_pass_manager& mpm) const; +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/lrn.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/lrn.hpp new file mode 100644 index 000000000..8ccda7bba --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/lrn.hpp @@ -0,0 +1,63 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_LRN_HPP +#define MIGRAPHX_GUARD_RTGLIB_LRN_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; +#if MIGRAPHX_USE_MIOPEN +struct miopen_lrn +{ + op::lrn op; + shared ldesc; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "gpu::lrn"; } + shape compute_shape(const std::vector& inputs) const; + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const; + void finalize(context&, const shape&, const std::vector&); + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; +#endif +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/miopen.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/miopen.hpp new file mode 100644 index 000000000..87a561ad6 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/miopen.hpp @@ -0,0 +1,343 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_MIOPEN_HPP +#define MIGRAPHX_GUARD_MIGRAPHLIB_MIOPEN_HPP + +#include +#include +#include +#if MIGRAPHX_USE_MIOPEN +#include +#include +#include +#include + +#include + +#ifdef MIGRAPHX_HAS_FIND_MODE_API +extern "C" miopenStatus_t +miopenHiddenSetConvolutionFindMode(miopenConvolutionDescriptor_t convDesc, // NOLINT + int findMode); // NOLINT +#endif + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +using miopen_handle = MIGRAPHX_MANAGE_PTR(miopenHandle_t, miopenDestroy); +using tensor_descriptor = MIGRAPHX_MANAGE_PTR(miopenTensorDescriptor_t, + miopenDestroyTensorDescriptor); +using convolution_descriptor = MIGRAPHX_MANAGE_PTR(miopenConvolutionDescriptor_t, + miopenDestroyConvolutionDescriptor); +using pooling_descriptor = MIGRAPHX_MANAGE_PTR(miopenPoolingDescriptor_t, + miopenDestroyPoolingDescriptor); +using activation_descriptor = MIGRAPHX_MANAGE_PTR(miopenActivationDescriptor_t, + miopenDestroyActivationDescriptor); +using fusion_plan_descriptor = MIGRAPHX_MANAGE_PTR(miopenFusionPlanDescriptor_t, + miopenDestroyFusionPlan); +using fused_operator_args = MIGRAPHX_MANAGE_PTR(miopenOperatorArgs_t, miopenDestroyOperatorArgs); + +using lrn_descriptor = MIGRAPHX_MANAGE_PTR(miopenLRNDescriptor_t, miopenDestroyLRNDescriptor); + +template +Result make_obj(F f, Ts... xs) +{ + typename Result::pointer x = nullptr; + auto status = f(&x, xs...); + Result r{x}; + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("MAKE_OBJ: MIOpen call failed"); + return r; +} + +#ifdef MIGRAPHX_HAS_FIND_2_API +using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestroyFindOptions); +using miopen_problem = MIGRAPHX_MANAGE_PTR(miopenProblem_t, miopenDestroyProblem); +using miopen_solution = MIGRAPHX_MANAGE_PTR(miopenSolution_t, miopenDestroySolution); + +inline miopen_solution find_solution(miopenHandle_t handle, + size_t num_inputs, + const miopenTensorArgument_t* tensor_args, + void* workspace, + size_t workspace_size, + miopenProblem_t problem, + bool tune = false) +{ + miopenSolution_t solution; + size_t found = 0; + miopen_find_options fo = make_obj(&miopenCreateFindOptions); + if(tune) + { + miopenSetFindOptionTuning(fo.get(), 1); + } +#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS + for(auto i : range(num_inputs)) + { + auto status = miopenSetFindOptionPreallocatedTensor( + fo.get(), tensor_args[i].id, tensor_args[i].buffer); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("MIOpen: failed to preallocate tensors for the find process"); + } + auto status = miopenSetFindOptionPreallocatedWorkspace(fo.get(), workspace, workspace_size); + if(status != miopenStatusSuccess) + MIGRAPHX_THROW("MIOpen: failed to preallocate workspace for the find process"); +#else + miopenStatus_t status; + (void)(num_inputs); + (void)(tensor_args); + (void)(workspace_size); + (void)(workspace); +#endif + status = miopenFindSolutions(handle, problem, fo.get(), &solution, &found, 1); + auto result = miopen_solution{solution}; + if(status != miopenStatusSuccess or found == 0) + MIGRAPHX_THROW("MIOpen: miopenFindSolutions failed"); + return result; +} + +inline void set_tensor_descriptor(miopenTensorArgumentId_t name, + tensor_descriptor& desc, + miopen_problem& problem_ptr) +{ + auto status = miopenSetProblemTensorDescriptor(problem_ptr.get(), name, desc.get()); + if(status != miopenStatusSuccess) + { + MIGRAPHX_THROW("setting problem tensor description failed"); + } +} +#endif + +inline tensor_descriptor make_tensor(const migraphx::shape& os) +{ + auto s = os.normalize_standard(); + auto t = make_obj(&miopenCreateTensorDescriptor); + // Convert to ints + std::vector lens(s.lens().begin(), s.lens().end()); + std::vector strides(s.strides().begin(), s.strides().end()); + miopenDataType_t d; + if(s.type() == shape::float_type) + d = miopenFloat; + else if(s.type() == shape::half_type) + d = miopenHalf; + else if(s.type() == shape::int32_type) + d = miopenInt32; + else if(s.type() == shape::int8_type) + d = miopenInt8; + else if(s.type() == shape::bf16_type) + d = miopenBFloat16; + else + MIGRAPHX_THROW("MAKE_TENSOR: unsupported type"); + miopenSetTensorDescriptor(t.get(), d, s.lens().size(), lens.data(), strides.data()); + + return t; +} + +template +inline convolution_descriptor make_conv(const T& op) +{ + auto c = make_obj(&miopenCreateConvolutionDescriptor); + miopenConvolutionMode_t c_mode = miopenConvolution; + if(op.group > 1) + c_mode = miopenGroupConv; + + int kdims = op.kdims(); + std::vector padding(std::max(2, kdims), 0); + std::vector stride(std::max(2, kdims), 1); + std::vector dilation(std::max(2, kdims), 1); + + std::copy_backward(op.padding.begin(), op.padding.begin() + kdims, padding.end()); + std::copy_backward(op.stride.begin(), op.stride.end(), stride.end()); + std::copy_backward(op.dilation.begin(), op.dilation.end(), dilation.end()); + + miopenInitConvolutionNdDescriptor( + c.get(), padding.size(), padding.data(), stride.data(), dilation.data(), c_mode); + if(op.group > 1) + miopenSetConvolutionGroupCount(c.get(), op.group); +#ifdef MIGRAPHX_HAS_FIND_MODE_API + miopenHiddenSetConvolutionFindMode(c.get(), 1); // Normal mode +#endif + return c; +} + +template +inline convolution_descriptor make_convolution_backwards(const T& op) +{ + auto c = make_obj(&miopenCreateConvolutionDescriptor); + miopenConvolutionMode_t c_mode = miopenTranspose; + int kdims = op.kdims(); + std::vector padding(std::max(2, kdims), 0); + std::vector stride(std::max(2, kdims), 1); + std::vector dilation(std::max(2, kdims), 1); + + std::copy_backward(op.padding.begin(), op.padding.end(), padding.end()); + std::copy_backward(op.stride.begin(), op.stride.end(), stride.end()); + std::copy_backward(op.dilation.begin(), op.dilation.end(), dilation.end()); + + miopenInitConvolutionNdDescriptor( + c.get(), padding.size(), padding.data(), stride.data(), dilation.data(), c_mode); + if(op.group > 1) + miopenSetConvolutionGroupCount(c.get(), op.group); + return c; +} + +inline pooling_descriptor make_pooling(const migraphx::op::pooling& op) +{ + miopenPoolingMode_t mode; + if(op.mode == op::pooling_mode::max) + mode = miopenPoolingMax; + else if(op.mode == op::pooling_mode::average) + mode = miopenPoolingAverage; + else + { + std::stringstream ss("Unknown mode for pooling: "); + ss << op.mode; + MIGRAPHX_THROW(ss.str()); + } + if(not std::all_of( + op.dilations.cbegin(), op.dilations.cend(), [](std::size_t d) { return d == 1; })) + { + MIGRAPHX_THROW("Unsupported dilations for pooling: [" + to_string_range(op.dilations) + + "]"); + } + auto p = make_obj(&miopenCreatePoolingDescriptor); + + int kdims = op.kdims(); + std::vector padding(std::max(2, kdims), 0); + std::vector stride(std::max(2, kdims), 1); + std::vector lengths(std::max(2, kdims), 1); + + std::copy_backward(op.padding.begin(), op.padding.begin() + kdims, padding.end()); + std::copy_backward(op.stride.begin(), op.stride.end(), stride.end()); + std::copy_backward(op.lengths.begin(), op.lengths.end(), lengths.end()); + + miopenSetNdPoolingDescriptor( + p.get(), mode, padding.size(), lengths.data(), padding.data(), stride.data()); + return p; +} + +inline lrn_descriptor make_lrn(const migraphx::op::lrn& op) +{ + auto ldesc = make_obj(&miopenCreateLRNDescriptor); + miopenSetLRNDescriptor(ldesc.get(), miopenLRNCrossChannel, op.size, op.alpha, op.beta, op.bias); + return ldesc; +} + +inline activation_descriptor make_relu() +{ + auto ad = make_obj(&miopenCreateActivationDescriptor); + miopenSetActivationDescriptor(ad.get(), miopenActivationRELU, 0, 0, 0); + return ad; +} + +inline activation_descriptor make_sigmoid() +{ + auto ad = make_obj(&miopenCreateActivationDescriptor); + miopenSetActivationDescriptor(ad.get(), miopenActivationLOGISTIC, 0, 0, 0); + return ad; +} + +inline activation_descriptor make_tanh() +{ + auto ad = make_obj(&miopenCreateActivationDescriptor); + // onnx operator does not apply additional scaling for tanh + // defaults for alpha and beta are therefore set to 1 + miopenSetActivationDescriptor(ad.get(), miopenActivationTANH, 1, 1, 0); + return ad; +} + +inline activation_descriptor make_abs() +{ + auto ad = make_obj(&miopenCreateActivationDescriptor); + miopenSetActivationDescriptor(ad.get(), miopenActivationABS, 0, 0, 0); + return ad; +} + +inline activation_descriptor make_leaky_relu(double alpha) +{ + auto ad = make_obj(&miopenCreateActivationDescriptor); + miopenSetActivationDescriptor(ad.get(), miopenActivationLEAKYRELU, alpha, 0, 0); + return ad; +} + +inline activation_descriptor make_elu(double alpha) +{ + auto ad = make_obj(&miopenCreateActivationDescriptor); + miopenSetActivationDescriptor(ad.get(), miopenActivationELU, alpha, 0, 0); + return ad; +} + +inline fusion_plan_descriptor make_fusion_plan(const shape& input) +{ + auto t = make_tensor(input); + return make_obj(&miopenCreateFusionPlan, miopenVerticalFusion, t.get()); +} + +// Temporary hack to workaround memory problems in miopen +inline fusion_plan_descriptor make_fusion_plan(const tensor_descriptor& input) +{ + return make_obj( + &miopenCreateFusionPlan, miopenVerticalFusion, input.get()); +} + +inline fused_operator_args make_fused_args() +{ + return make_obj(&miopenCreateOperatorArgs); +} + +template +auto reflect(miopenActivationDescriptor_t ad, F f) +{ + assert(ad != nullptr); + miopenActivationMode_t mode = miopenActivationPASTHRU; + double alpha = 0.0; + double beta = 0.0; + double gamma = 0.0; + miopenGetActivationDescriptor(ad, &mode, &alpha, &beta, &gamma); + return pack(f(std::move(mode), "mode"), // NOLINT + f(std::move(alpha), "alpha"), // NOLINT + f(std::move(beta), "beta"), // NOLINT + f(std::move(gamma), "gamma")); // NOLINT +} + +template +auto reflect(miopenLRNDescriptor_t lrnd, F f) +{ + assert(lrnd != nullptr); + miopenLRNMode_t mode = miopenLRNWithinChannel; + unsigned int n = 0; + double alpha = 0.0; + double beta = 0.0; + double k = 0.0; + miopenGetLRNDescriptor(lrnd, &mode, &n, &alpha, &beta, &k); + return pack(f(std::move(mode), "mode"), // NOLINT + f(std::move(n), "n"), // NOLINT + f(std::move(alpha), "alpha"), // NOLINT + f(std::move(beta), "beta"), // NOLINT + f(std::move(k), "k")); // NOLINT +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/mlir.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/mlir.hpp new file mode 100644 index 000000000..d1f19c1e8 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/mlir.hpp @@ -0,0 +1,80 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_MLIR_HPP +#define MIGRAPHX_GUARD_RTGLIB_GPU_MLIR_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +struct module; +namespace gpu { + +MIGRAPHX_GPU_EXPORT std::string dump_mlir(module m); +MIGRAPHX_GPU_EXPORT std::string dump_mlir(module m, const std::vector& inputs); +MIGRAPHX_GPU_EXPORT void +dump_mlir_to_file(module m, const std::vector& inputs, const fs::path& location); + +MIGRAPHX_GPU_EXPORT bool +is_module_fusible(const module& m, const context& migraphx_ctx, const value& solution); + +struct MIGRAPHX_GPU_EXPORT mlir_code_object +{ + code_object_op cop; + std::vector prefill_indices = {}; + std::vector prefill_values = {}; +}; + +MIGRAPHX_GPU_EXPORT bool is_reduce(const instruction& ins); + +MIGRAPHX_GPU_EXPORT mlir_code_object compile_mlir(const context& migraphx_ctx, + module m, + const std::vector& in_shapes, + const value& solution); + +MIGRAPHX_GPU_EXPORT instruction_ref insert_mlir(module& m, + instruction_ref ins, + code_object_op co, + const std::vector& inputs); + +MIGRAPHX_GPU_EXPORT tuning_config get_tuning_config_mlir(const context& migraphx_ctx, + module m, + const std::vector& inputs, + bool exhaustive); + +MIGRAPHX_GPU_EXPORT void +dump_mlir_to_mxr(module m, const std::vector& inputs, const fs::path& location); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/multinomial.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/multinomial.hpp new file mode 100644 index 000000000..c44d48082 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/multinomial.hpp @@ -0,0 +1,59 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_MULTINOMIAL_HPP +#define MIGRAPHX_GUARD_RTGLIB_MULTINOMIAL_HPP + +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; + +struct hip_multinomial +{ + op::multinomial op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "gpu::multinomial"; } + shape compute_shape(std::vector inputs) const; + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const; + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/name.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/name.hpp new file mode 100644 index 000000000..390d7ea0b --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/name.hpp @@ -0,0 +1,67 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_OP_NAME_HPP +#define MIGRAPHX_GUARD_RTGLIB_OP_NAME_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +template +struct oper +{ + // function to extract the name part of an operator. For example, we have + // a operation "sin", then the get_type_name() will return + // "migraphx::version_1::gpu::hip_sin", this functin will return the name + // "gpu::sin" as the operator name + std::string name() const + { + const std::string& name = get_type_name(); + // search the namespace gpu (::gpu::) + auto pos_ns = name.find("::gpu::"); + if(pos_ns != std::string::npos) + { + auto pos_name = name.find("hip_", pos_ns + std::string("::gpu::").length()); + if(pos_name != std::string::npos) + { + return std::string("gpu::") + name.substr(pos_name + 4); + } + else + { + return name.substr(pos_ns + 2); + } + } + return "unknown_operator_name"; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/nonzero.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/nonzero.hpp new file mode 100644 index 000000000..cfc7e78db --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/nonzero.hpp @@ -0,0 +1,62 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_NONZERO_HPP +#define MIGRAPHX_GUARD_RTGLIB_NONZERO_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; + +struct hip_nonzero +{ + op::nonzero op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "gpu::nonzero"; } + shape compute_shape(std::vector inputs) const; + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const; + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/oper.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/oper.hpp new file mode 100644 index 000000000..13ac11a3d --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/oper.hpp @@ -0,0 +1,168 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_UNARY_HPP +#define MIGRAPHX_GUARD_RTGLIB_UNARY_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +template +struct device_base : oper +{ + template + static auto reflect(Self&, F) + { + return pack(); + } + + std::vector reduce_shapes; + + void finalize(context&, const shape&, const std::vector& inputs) + { + reduce_shapes = reduce_dims(inputs); + } + + argument get_arg(const std::vector& args, std::size_t i) const + { + if(reduce_shapes.empty()) + return args[i]; + return args.at(i).reshape(reduce_shapes.at(i)); + } + + shape compute_shape(const std::vector& inputs) const + { + check_shapes{inputs, *this}.has(N + 1); + auto s0 = inputs.at(0); + if(std::all_of(inputs.begin(), inputs.end() - 1, [&](auto s) { return s == s0; }) and + s0.packed()) + return s0; + else + return {s0.type(), s0.lens()}; + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +template +struct unary_device : device_base +{ + argument compute(context& ctx, const shape&, const std::vector& args) const + { + F(ctx.get_stream().get(), this->get_arg(args, 1), this->get_arg(args, 0)); + return args[1]; + } +}; + +template +struct binary_device : device_base +{ + argument compute(context& ctx, const shape&, const std::vector& args) const + { + F(ctx.get_stream().get(), + this->get_arg(args, 2), + this->get_arg(args, 0), + this->get_arg(args, 1)); + return args[2]; + } +}; + +template +struct ternary_device : device_base +{ + argument compute(context& ctx, const shape&, const std::vector& args) const + { + F(ctx.get_stream().get(), + this->get_arg(args, 3), + this->get_arg(args, 0), + this->get_arg(args, 1), + this->get_arg(args, 2)); + return args[3]; + } +}; + +template +struct quaternary_device : device_base +{ + argument compute(context& ctx, const shape&, const std::vector& args) const + { + F(ctx.get_stream().get(), + this->get_arg(args, 4), + this->get_arg(args, 0), + this->get_arg(args, 1), + this->get_arg(args, 2), + this->get_arg(args, 3)); + return args[4]; + } +}; + +template +struct quinary_device : device_base +{ + argument compute(context& ctx, const shape&, const std::vector& args) const + { + F(ctx.get_stream().get(), + this->get_arg(args, 5), + this->get_arg(args, 0), + this->get_arg(args, 1), + this->get_arg(args, 2), + this->get_arg(args, 3), + this->get_arg(args, 4)); + return args[5]; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/pack_args.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/pack_args.hpp new file mode 100644 index 000000000..1896a3008 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/pack_args.hpp @@ -0,0 +1,55 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP +#define MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct kernel_argument +{ + template , + MIGRAPHX_REQUIRES(not std::is_base_of{})> + kernel_argument(T&& x) : size(sizeof(U)), align(alignof(U)), data(&x) // NOLINT + { + } + std::size_t size; + std::size_t align; + void* data; +}; + +MIGRAPHX_GPU_EXPORT std::vector pack_args(const std::vector& args); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/perfdb.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/perfdb.hpp new file mode 100644 index 000000000..21aed313c --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/perfdb.hpp @@ -0,0 +1,49 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_PERFDB_HPP +#define MIGRAPHX_GUARD_GPU_PERFDB_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct problem_params +{ + operation op; + std::vector inputs; + shape output; +}; + +std::string get_mlir_perf_for_conv(const problem_params& pp, bool xdlops); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_PERFDB_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/pooling.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/pooling.hpp new file mode 100644 index 000000000..7f6722b11 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/pooling.hpp @@ -0,0 +1,64 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_POOLING_HPP +#define MIGRAPHX_GUARD_RTGLIB_POOLING_HPP + +#include +#include +#include +#include +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; +#if MIGRAPHX_USE_MIOPEN +struct miopen_pooling +{ + op::pooling op; + shared pd; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "gpu::pooling"; } + shape compute_shape(const std::vector& inputs) const; + void finalize(context&, const shape&, const std::vector&); + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const; + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; +#endif + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/prefix_scan_sum.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/prefix_scan_sum.hpp new file mode 100644 index 000000000..cca8efd60 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/prefix_scan_sum.hpp @@ -0,0 +1,79 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_PREFIX_SCAN_SUM_HPP +#define MIGRAPHX_GUARD_GPU_PREFIX_SCAN_SUM_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; + +struct hip_prefix_scan_sum : oper +{ + op::prefix_scan_sum op; + + template + static auto reflect(Self& self, T f) + { + return migraphx::reflect(self.op, f); + } + + shape compute_shape(const std::vector& inputs) const + { + std::vector in_shapes{inputs}; + in_shapes.pop_back(); + check_shapes{in_shapes, *this}.standard(); + return op.normalize_compute_shape(in_shapes); + } + + argument compute(context& ctx, const shape&, const std::vector& args) const + { + device::prefix_scan_sum( + ctx.get_stream().get(), args[1], args[0], op.axis, op.exclusive, op.reverse); + return args[1]; + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_PREFIX_SCAN_SUM_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/prefuse_ops.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/prefuse_ops.hpp new file mode 100644 index 000000000..bed640520 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/prefuse_ops.hpp @@ -0,0 +1,48 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_PREFUSE_OPS_HPP +#define MIGRAPHX_GUARD_GPU_PREFUSE_OPS_HPP + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct module_pass_manager; + +namespace gpu { + +struct MIGRAPHX_GPU_EXPORT prefuse_ops +{ + bool enable_attention = false; + std::string name() const { return "gpu::prefuse_ops"; } + void apply(module_pass_manager& mpm) const; +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif // MIGRAPHX_GUARD_GPU_PREFUSE_OPS_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/prepare_reduce.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/prepare_reduce.hpp new file mode 100644 index 000000000..3c6bfdd42 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/prepare_reduce.hpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#ifndef MIGRAPHX_GUARD_GPU_PREPARE_REDUCE_HPP +#define MIGRAPHX_GUARD_GPU_PREPARE_REDUCE_HPP + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct module; + +namespace gpu { + +struct prepare_reduce +{ + std::string name() const { return "gpu::prepare_reduce"; } + void apply(module& m) const; +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_PREPARE_REDUCE_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/problem_cache.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/problem_cache.hpp new file mode 100644 index 000000000..d70e0687b --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/problem_cache.hpp @@ -0,0 +1,50 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#ifndef MIGRAPHX_GUARD_GPU_PROBLEM_CACHE_HPP +#define MIGRAPHX_GUARD_GPU_PROBLEM_CACHE_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +struct MIGRAPHX_GPU_EXPORT problem_cache +{ + bool has(const std::string& name, const value& problem) const; + void insert(const std::string& name, const value& problem, const value& solution); + void mark(const std::string& name, const value& problem); + optional get(const std::string& name, const value& problem) const; + void load(); + void save() const; + std::unordered_map cache; +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_PROBLEM_CACHE_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/reduce_op.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/reduce_op.hpp new file mode 100644 index 000000000..10f3dcf84 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/reduce_op.hpp @@ -0,0 +1,81 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_REDUCE_OP_HPP +#define MIGRAPHX_GUARD_RTGLIB_REDUCE_OP_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; + +template +struct reduce_op : oper +{ + Op op; + + template + static auto reflect(Self& self, T f) + { + return migraphx::reflect(self.op, f); + } + + shape compute_shape(const std::vector& inputs) const + { + std::vector in_shapes{inputs}; + in_shapes.pop_back(); + check_shapes{in_shapes, *this}.standard(); + return op.normalize_compute_shape(in_shapes); + } + + argument compute(context& ctx, const shape&, const std::vector& args) const + { + F(ctx.get_stream().get(), args[1], args[0]); + return args[1]; + } + + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } + + reduce_op() {} + reduce_op(const Op& op_ref) : op(op_ref) {} +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/reverse.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/reverse.hpp new file mode 100644 index 000000000..8ef825235 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/reverse.hpp @@ -0,0 +1,62 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_REVERSE_HPP +#define MIGRAPHX_GUARD_RTGLIB_REVERSE_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; + +struct hip_reverse +{ + op::reverse op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "gpu::reverse"; } + shape compute_shape(std::vector inputs) const; + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const; + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/rnn_variable_seq_lens.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/rnn_variable_seq_lens.hpp new file mode 100644 index 000000000..7d811192d --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/rnn_variable_seq_lens.hpp @@ -0,0 +1,101 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_RNN_VARIABLE_SEQ_LENS_HPP +#define MIGRAPHX_GUARD_RTGLIB_RNN_VARIABLE_SEQ_LENS_HPP + +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct hip_rnn_var_sl_shift_sequence +{ + op::rnn_var_sl_shift_sequence op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "gpu::rnn_var_sl_shift_sequence"; } + shape compute_shape(std::vector inputs) const; + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const; + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +struct hip_rnn_var_sl_shift_output +{ + op::rnn_var_sl_shift_output op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "gpu::rnn_var_sl_shift_output"; } + shape compute_shape(std::vector inputs) const; + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const; + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +struct hip_rnn_var_sl_last_output +{ + op::rnn_var_sl_last_output op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "gpu::" + op.name(); } + shape compute_shape(std::vector inputs) const; + argument compute(context& ctx, const shape&, const std::vector& args) const; + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/rocblas.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/rocblas.hpp new file mode 100644 index 000000000..d23c40f9d --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/rocblas.hpp @@ -0,0 +1,52 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP +#define MIGRAPHX_GUARD_MIGRAPHLIB_ROCBLAS_HPP +#include +#include +#if MIGRAPHX_USE_ROCBLAS +#include +#endif + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +#if MIGRAPHX_USE_ROCBLAS + +using rocblas_handle_ptr = MIGRAPHX_MANAGE_PTR(rocblas_handle, rocblas_destroy_handle); + +rocblas_handle_ptr create_rocblas_handle_ptr(); +rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s); +#endif +struct context; + +MIGRAPHX_GPU_EXPORT bool get_compute_fp32_flag(); + +MIGRAPHX_GPU_EXPORT bool rocblas_fp8_available(); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/schedule_model.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/schedule_model.hpp new file mode 100644 index 000000000..d9c692cb7 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/schedule_model.hpp @@ -0,0 +1,53 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_SCHEDULE_MODEL_HPP +#define MIGRAPHX_GUARD_RTGLIB_GPU_SCHEDULE_MODEL_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +struct module; +struct operation; + +namespace gpu { + +struct schedule_model +{ + std::size_t streams = 0; + std::size_t concurrency() const; + void sched(module& m, instruction_ref ins, std::size_t n) const; + void wait(module& m, instruction_ref ins, std::size_t wait_id) const; + void record(module& m, instruction_ref ins, std::size_t wait_id) const; + std::size_t weight(const operation& op) const; +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/sync_device.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/sync_device.hpp new file mode 100644 index 000000000..331152cbf --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/sync_device.hpp @@ -0,0 +1,45 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_SYNC_DEVICE_HPP +#define MIGRAPHX_GUARD_RTGLIB_GPU_SYNC_DEVICE_HPP + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +struct module; + +namespace gpu { + +struct sync_device +{ + std::string name() const { return "sync_device"; } + void apply(module& m) const; +}; +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/target.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/target.hpp new file mode 100644 index 000000000..407c44fec --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/target.hpp @@ -0,0 +1,49 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_MIOPEN_TARGET_HPP +#define MIGRAPHX_GUARD_MIGRAPHLIB_MIOPEN_TARGET_HPP + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct MIGRAPHX_GPU_EXPORT target +{ + std::string name() const; + std::vector get_passes(migraphx::context& gctx, const compile_options& options) const; + migraphx::context get_context() const; + argument copy_to(const argument& arg) const; + argument copy_from(const argument& arg) const; + argument allocate(const shape& s) const; +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/time_op.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/time_op.hpp new file mode 100644 index 000000000..2c5893eed --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/time_op.hpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_DRIVER_PERF_HPP +#define MIGRAPHX_GUARD_GPU_DRIVER_PERF_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_GPU_EXPORT double +time_op(const context& ictx, operation op, const std::vector& inputs, int n = 100); + +MIGRAPHX_GPU_EXPORT double time_program(const context& ictx, program p, int n = 100); + +/* benchmark gpu::code_object with expected input shapes over n iterations */ +MIGRAPHX_GPU_EXPORT double time_op(const context& ictx, operation op, int n = 100); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_DRIVER_PERF_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/topk.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/topk.hpp new file mode 100644 index 000000000..f1df9d469 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/topk.hpp @@ -0,0 +1,62 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_TOPK_HPP +#define MIGRAPHX_GUARD_RTGLIB_TOPK_HPP + +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct context; + +struct hip_topk +{ + op::topk op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "gpu::topk"; } + shape compute_shape(std::vector inputs) const; + argument + compute(context& ctx, const shape& output_shape, const std::vector& args) const; + std::ptrdiff_t output_alias(const std::vector& shapes) const + { + return shapes.size() - 1; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/tuning_config.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/tuning_config.hpp new file mode 100644 index 000000000..23538c0d3 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/tuning_config.hpp @@ -0,0 +1,43 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP +#define MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct tuning_config +{ + value problem; + std::vector solutions; +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif // MIGRAPHX_GUARD_GPU_TUNING_CONFIG_HPP diff --git a/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/write_literals.hpp b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/write_literals.hpp new file mode 100644 index 000000000..85a2ce3a8 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/include/migraphx/gpu/write_literals.hpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_WRITE_LITERALS_HPP +#define MIGRAPHX_GUARD_RTGLIB_MIOPEN_WRITE_LITERALS_HPP + +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +struct module; + +namespace gpu { + +struct MIGRAPHX_GPU_EXPORT write_literals +{ + context* ctx = nullptr; + std::string name() const { return "gpu::write_literals"; } + + void apply(module& m) const; +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/jit/ck_gemm.cpp b/docker/rocm/migraphx/targets/gpu/jit/ck_gemm.cpp new file mode 100644 index 000000000..392eaa0c6 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/ck_gemm.cpp @@ -0,0 +1,235 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +namespace gpu { + +using namespace migraphx::gpu::gen; // NOLINT + +// NOLINTNEXTLINE +static const char* const ck_gemm_kernel = R"__migraphx__( +#include +#include +#include +#include +#include <${include}> + +namespace migraphx { + +${preamble} + +extern "C" { + +MIGRAPHX_GLOBAL void ${kernel}(${params}) +{ + transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) { + ck_gemm<${solution}, ${blocks_per_batch}>(xs...); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct ck_gemm_compiler : compiler +{ + std::vector names() const { return {"ck_gemm", "gpu::ck_gemm"}; } + + ck::host::device_gemm_multiple_d::Problem create_problem(const std::vector& inputs, + const value& v) const + { + const auto& a_shape = inputs[0]; + const auto& b_shape = inputs[1]; + const auto& c_shape = inputs.back(); + + // cppcheck-suppress unreadVariable + auto rank = a_shape.ndim(); + auto batch_count = get_batch_count(c_shape); + auto m = c_shape.lens()[rank - 2]; + m = can_fold_batch(inputs) ? m * batch_count : m; + auto n = c_shape.lens().back(); + auto k = a_shape.lens().back(); + + const bool trans_a = transposed_matrix(a_shape); + const bool trans_b = transposed_matrix(b_shape); + const bool trans_e = transposed_matrix(c_shape); + const auto a_type = get_type(a_shape); + const auto b_type = get_type(b_shape); + const auto e_type = get_type(c_shape); + std::vector ds_layout; + std::transform(inputs.begin() + 2, + inputs.end() - 1, + std::back_inserter(ds_layout), + [](const auto& i) { return transposed_matrix(i); }); + std::vector ds_type; + std::transform(inputs.begin() + 2, + inputs.end() - 1, + std::back_inserter(ds_type), + [](const auto& i) { return get_type(i); }); + + std::string ck_passthrough = "ck_passthrough"; + std::string cde_op = ck_passthrough; + assert(inputs.size() < 4 or v.contains("post")); + if(v.contains("post")) + { + cde_op = v.at("post").to(); + } + + return ck::host::device_gemm_multiple_d::Problem{m, + n, + k, + trans_a, + trans_b, + trans_e, + ds_layout, + a_type, + b_type, + e_type, + ds_type, + ck_passthrough, + ck_passthrough, + cde_op}; + } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + const auto& c_shape = inputs.back(); + auto tuning_value = v.get("tuning_value", 34); + auto batch_count = get_batch_count(c_shape); + auto problem = create_problem(inputs, v); + + const auto include_header = problem.GetIncludeHeader(); + const auto solutions = problem.GetSolutions(ctx.get_current_device().get_gfx_name()); + const auto& solution = solutions.at(tuning_value); + const auto template_str = solution.template_str; + const auto blocks_per_batch = solution.grid_size; + const auto block_size = solution.block_size; + + hip_compile_options options; + options.additional_src_files = ck_headers(); + auto grid_size = can_fold_batch(inputs) ? blocks_per_batch : batch_count * blocks_per_batch; + options.set_launch_params(v, grid_size * block_size, block_size); + options.inputs = inputs; + options.output = c_shape; + options.kernel_name = v.get("kernel", "ck_gemm_kernel"); + options.virtual_inputs = inputs; + if(can_fold_batch(inputs)) + { + auto vinputs = inputs; + fold_batch_dims(vinputs[0]); + remove_batch_dims(vinputs[1]); + std::for_each(vinputs.begin() + 2, vinputs.end(), fold_batch_dims); + options.virtual_inputs = vinputs; + } + + if(v.get("check", false) or enabled(MIGRAPHX_CK_DEBUG{})) + options.emplace_param("-DMIGRAPHX_CK_CHECK=1"); + + auto src = interpolate_string(ck_gemm_kernel, + {{"solution", template_str}, + {"include", include_header}, + {"params", enum_params(inputs.size(), "void * private_p")}, + {"args", enum_params(inputs.size(), "private_p")}, + {"blocks_per_batch", to_string(blocks_per_batch)}, + {"preamble", v.get("preamble", std::string{})}, + {"kernel", options.kernel_name}}); + + return compile_hip_code_object(ctx, src, options); + } + + value create_settings(instruction_ref ins, const operation& op) const + { + auto v = op.to_value(); + v["kernel"] = "ck_gemm_kernel"; + if(not ins->module_inputs().empty()) + { + auto* pm = ins->module_inputs().front(); + v["preamble"] = generate_pointwise(*pm, "post_ck_gemm_function") + + "\nMIGRAPHX_LIFT_CLASS(post_ck_gemm, post_ck_gemm_function);"; + v["post"] = "ck_function_adaptor"; + v["kernel"] = to_c_id("ck_gemm_" + generate_name_from_ops(*pm) + "_kernel"); + } + return v; + } + + compiler_replace + compile(context& ctx, instruction_ref ins, const operation& op, const value& solution) const + { + auto shapes = to_shapes(ins->inputs()); + auto v = create_settings(ins, op); + if(not solution.is_null()) + v["tuning_value"] = solution; + return {compile_op(ctx, shapes, v), + [=](module& m, instruction_ref ins2, const operation& code_object) { + if(enabled(MIGRAPHX_LOG_CK_GEMM{})) + { + std::vector gemm_shapes{ + shapes[0], shapes[1], shapes.back().with_type(shapes[0].type())}; + std::cout << "gpu::ck_gemm: " << to_json_string(to_value(gemm_shapes)) + << std::endl; + } + m.replace_instruction(ins2, code_object, ins2->inputs()); + }}; + } + + optional + get_tuning_config(context& ctx, instruction_ref ins, const operation& op, bool exhaustive) const + { + if(not exhaustive and not enabled(MIGRAPHX_TUNE_CK{})) + return nullopt; + tuning_config tc; + auto shapes = to_shapes(ins->inputs()); + auto problem = create_problem(shapes, create_settings(ins, op)); + auto solutions = problem.GetSolutions(ctx.get_current_device().get_gfx_name()); + tc.solutions.resize(solutions.size()); + std::iota(tc.solutions.begin(), tc.solutions.end(), 0); + std::vector gemm_shapes{shapes[0], shapes[1], shapes.back()}; + tc.problem = to_value(gemm_shapes); + return tc; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/ck_gemm_softmax_gemm.cpp b/docker/rocm/migraphx/targets/gpu/jit/ck_gemm_softmax_gemm.cpp new file mode 100644 index 000000000..693153d09 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/ck_gemm_softmax_gemm.cpp @@ -0,0 +1,236 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +namespace gpu { + +using namespace migraphx::gpu::gen; // NOLINT + +// NOLINTNEXTLINE +static const char* const ck_gemm_softmax_gemm_kernel = R"__migraphx__( +#include +#include +#include +#include +#include +#include +#include <${include}> + +namespace migraphx { + +${preamble} + +extern "C" { + +MIGRAPHX_GLOBAL void ${kernel}(${params}) +{ + transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) { + auto settings = make_ck_gemm_softmax_gemm_settings(MIGRAPHX_MAKE_CONSTANT(float{SCALE})); + ck_gemm_softmax_gemm<${solution}, ${blocks_per_batch}>(settings, xs...); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct ck_gemm_softmax_gemm_compiler : compiler +{ + std::vector names() const + { + return {"ck_gemm_softmax_gemm", "gpu::ck_gemm_softmax_gemm"}; + } + + ck::host::device_batched_gemm_softmax_gemm::Problem + create_problem(const std::vector& inputs, const value&) const + { + const auto& a_shape = inputs[0]; + const auto& b_shape = inputs[1]; + const auto& b1_shape = inputs[2]; + const auto& c_shape = inputs.back(); + + // cppcheck-suppress unreadVariable + auto rank = a_shape.ndim(); + auto batch_count = get_batch_count(c_shape); + auto m = c_shape.lens()[rank - 2]; + m = can_fold_batch(inputs) ? m * batch_count : m; + auto n = c_shape.lens().back(); + auto k = a_shape.lens().back(); + auto o = c_shape.lens().back(); + + const bool trans_a = transposed_matrix(a_shape); + const bool trans_b = transposed_matrix(b_shape); + const bool trans_b1 = transposed_matrix(b1_shape); + const bool trans_c = transposed_matrix(c_shape); + const auto a_type = get_type(a_shape); + const auto b_type = get_type(b_shape); + const auto b1_type = get_type(b1_shape); + const auto c_type = get_type(c_shape); + + std::string ck_passthrough = "ck_passthrough"; + return ck::host::device_batched_gemm_softmax_gemm::Problem{m, + n, + k, + o, + trans_a, + trans_b, + trans_b1, + trans_c, + a_type, + b_type, + b1_type, + c_type, + ck_passthrough, + ck_passthrough, + ck_passthrough, + ck_passthrough}; + } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + const auto& c_shape = inputs.back(); + auto tuning_value = v.get("tuning_value", 5); + auto batch_count = get_batch_count(c_shape); + auto problem = create_problem(inputs, v); + + const auto include_header = problem.GetIncludeHeader(); + const auto solutions = problem.GetSolutions(ctx.get_current_device().get_gfx_name()); + const auto& solution = solutions.at(tuning_value); + const auto template_str = solution.template_str; + const auto blocks_per_batch = solution.grid_size; + const auto block_size = solution.block_size; + + hip_compile_options options; + options.additional_src_files = ck_headers(); + auto grid_size = can_fold_batch(inputs) ? blocks_per_batch : batch_count * blocks_per_batch; + options.set_launch_params(v, grid_size * block_size, block_size); + options.inputs = inputs; + options.output = c_shape; + options.kernel_name = v.get("kernel", "ck_gemm_softmax_gemm_kernel"); + options.virtual_inputs = inputs; + if(can_fold_batch(inputs)) + { + auto vinputs = inputs; + fold_batch_dims(vinputs[0]); + remove_batch_dims(vinputs[1]); + std::for_each(vinputs.begin() + 2, vinputs.end(), fold_batch_dims); + options.virtual_inputs = vinputs; + } + + if(v.get("check", false) or enabled(MIGRAPHX_CK_DEBUG{})) + options.emplace_param("-DMIGRAPHX_CK_CHECK=1"); + + // scale + assert(v.contains("scale")); + auto scale = v.at("scale").to(); + options.emplace_param("-DSCALE=" + std::to_string(scale)); + + auto src = interpolate_string(ck_gemm_softmax_gemm_kernel, + {{"solution", template_str}, + {"include", include_header}, + {"params", enum_params(inputs.size(), "void * private_p")}, + {"args", enum_params(inputs.size(), "private_p")}, + {"blocks_per_batch", to_string(blocks_per_batch)}, + {"preamble", v.get("preamble", std::string{})}, + {"kernel", options.kernel_name}}); + + return compile_hip_code_object(ctx, src, options); + } + + value create_settings(instruction_ref ins, const operation& op) const + { + auto v = op.to_value(); + v["kernel"] = "ck_gemm_softmax_gemm_kernel"; + if(not ins->module_inputs().empty()) + { + auto* pm = ins->module_inputs().front(); + v["preamble"] = generate_pointwise(*pm, "post_ck_gemm_softmax_gemm_function") + + "\nMIGRAPHX_LIFT_CLASS(post_ck_gemm_softmax_gemm, " + "post_ck_gemm_softmax_gemm_function);"; + v["post"] = "ck_function_adaptor"; + v["kernel"] = "ck_gemm_softmax_gemm_" + generate_name_from_ops(*pm) + "_kernel"; + } + return v; + } + + compiler_replace + compile(context& ctx, instruction_ref ins, const operation& op, const value& solution) const + { + auto shapes = to_shapes(ins->inputs()); + auto v = create_settings(ins, op); + if(not solution.is_null()) + v["tuning_value"] = solution; + return {compile_op(ctx, shapes, v), + [=](module& m, instruction_ref ins2, const operation& code_object) { + if(enabled(MIGRAPHX_LOG_CK_GEMM{})) + { + std::vector gemm_shapes{ + shapes[0], shapes[1], shapes.back().with_type(shapes[0].type())}; + std::cout << "gpu::ck_gemm_softmax_gemm: " + << to_json_string(to_value(gemm_shapes)) << std::endl; + } + m.replace_instruction(ins2, code_object, ins2->inputs()); + }}; + } + + optional + get_tuning_config(context& ctx, instruction_ref ins, const operation& op, bool exhaustive) const + { + if(not exhaustive and not enabled(MIGRAPHX_TUNE_CK{})) + return nullopt; + tuning_config tc; + auto shapes = to_shapes(ins->inputs()); + auto problem = create_problem(shapes, create_settings(ins, op)); + auto solutions = problem.GetSolutions(ctx.get_current_device().get_gfx_name()); + tc.solutions.resize(solutions.size()); + std::iota(tc.solutions.begin(), tc.solutions.end(), 0); + std::vector gemm_shapes{shapes[0], shapes[1], shapes.back()}; + tc.problem = to_value(gemm_shapes); + return tc; + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/compute_attention_probabilities.cpp b/docker/rocm/migraphx/targets/gpu/jit/compute_attention_probabilities.cpp new file mode 100644 index 000000000..8a0c72207 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/compute_attention_probabilities.cpp @@ -0,0 +1,114 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +namespace gpu { + +using namespace migraphx::gpu::gen; // NOLINT + +// NOLINTNEXTLINE +static const char* const compute_attention_probabilities_kernel = R"__migraphx__( +#include +#include +#include +#include +#include + +namespace migraphx { + +extern "C" { + +MIGRAPHX_GLOBAL void ${kernel}(${params}) +{ + transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) { + + compute_attention_probabilities(xs..., make_gqa_parameters(${gqa_params})); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct compute_attention_probabilities_compiler : compiler +{ + std::vector names() const + { + return {"compute_attention_probabilities", "gpu::compute_attention_probabilities"}; + } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + auto params = init_params(inputs, v); + auto gqa_params_str = params.make_init_str(); + + hip_compile_options options; + options.set_launch_params( + v, + compute_global_for(ctx, + params.batch_size * params.num_heads * params.sequence_length * + params.seqlen_present_kv_cache)); + options.inputs = inputs; + options.output = inputs.back(); + options.kernel_name = v.get("kernel", "compute_attention_probabilities_kernel"); + + auto src = interpolate_string(compute_attention_probabilities_kernel, + {{"params", enum_params(inputs.size(), "void * private_p")}, + {"args", enum_params(inputs.size(), "private_p")}, + {"gqa_params", gqa_params_str}, + {"kernel", options.kernel_name}}); + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + auto shapes = to_shapes(ins->inputs()); + auto v = op.to_value(); + return compile_op(ctx, shapes, v); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/compute_attention_scores.cpp b/docker/rocm/migraphx/targets/gpu/jit/compute_attention_scores.cpp new file mode 100644 index 000000000..a8834a24e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/compute_attention_scores.cpp @@ -0,0 +1,119 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +namespace gpu { + +using namespace migraphx::gpu::gen; // NOLINT + +// NOLINTNEXTLINE +static const char* const compute_attention_scores_kernel = R"__migraphx__( +#include +#include +#include +#include +#include + + +namespace migraphx { + + + +extern "C" { + + +MIGRAPHX_GLOBAL void ${kernel}(${params}) +{ + transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) { + + compute_attention_scores(xs..., make_gqa_parameters(${gqa_params})); + }); +} + + +} + +} // namespace migraphx + +)__migraphx__"; + +struct compute_attention_scores_compiler : compiler +{ + std::vector names() const + { + return {"compute_attention_scores", "gpu::compute_attention_scores"}; + } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + auto params = init_params(inputs, v); + auto gqa_params_str = params.make_init_str(); + + hip_compile_options options; + options.set_launch_params( + v, + compute_global_for(ctx, + params.batch_size * params.num_heads * params.sequence_length * + params.head_size)); + options.inputs = inputs; + options.output = inputs.back(); + options.kernel_name = v.get("kernel", "compute_attention_scores_kernel"); + + auto src = interpolate_string(compute_attention_scores_kernel, + {{"params", enum_params(inputs.size(), "void * private_p")}, + {"args", enum_params(inputs.size(), "private_p")}, + {"gqa_params", gqa_params_str}, + {"kernel", options.kernel_name}}); + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + auto shapes = to_shapes(ins->inputs()); + auto v = op.to_value(); + return compile_op(ctx, shapes, v); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/concat.cpp b/docker/rocm/migraphx/targets/gpu/jit/concat.cpp new file mode 100644 index 000000000..322f863e3 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/concat.cpp @@ -0,0 +1,202 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +using namespace migraphx::gpu::gen; // NOLINT + +// NOLINTNEXTLINE +static const char* const concat_kernel = R"__migraphx__( +#include +#include +#include +#include + +namespace migraphx { + +${preamble} + +extern "C" { + +MIGRAPHX_GLOBAL void ${kernel}(${params}) +{ + transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, ${concat_params}, auto... xs) { + concat<${axis}>(${concat_args})(${post}, y, xs...); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct concat_compiler : compiler +{ + std::vector names() const { return {"fused_concat", "concat"}; } + + static std::vector normalize(std::vector inputs, std::size_t& axis) + { + auto s = inputs.back(); + std::vector strides(s.lens().size()); + strides[axis] = 1; + + inputs.push_back(shape{s.type(), s.lens(), strides}); + + auto result = reduce_dims(normalize_permutation(inputs)); + auto rstrides = result.back().strides(); + auto it = std::find_if(rstrides.begin(), rstrides.end(), [](auto x) { return x == 1; }); + axis = it - rstrides.begin(); + result.pop_back(); + return result; + } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + hip_compile_options options; + options.inputs = inputs; + options.output = inputs.back(); + auto concat_axis = v.at("axis").to(); + options.virtual_inputs = normalize(inputs, concat_axis); + options.kernel_name = v.get("kernel", "concat_kernel"); + auto axis = find_fast_axis(options.virtual_inputs); + auto op_names = v.at("ops").to_vector(); + auto args = v.at("args"); + vectorize vec{}; + if(axis != concat_axis) + vec = vectorize::elements(ctx, axis, options.virtual_inputs); + auto nelements_per_op = options.virtual_inputs.back().elements() / op_names.size(); + options.set_launch_params(v, compute_global_for(ctx, nelements_per_op / vec.size, 256)); + options.emplace_param("-Wno-float-equal"); + std::vector concat_params; + std::vector concat_args; + for(auto i : range(op_names.size())) + { + const auto& name = op_names[i]; + auto n = args.at(name).to(); + auto prefix = to_c_id(name + std::to_string(i) + "_concat_x"); + transform(range(n), std::back_inserter(concat_params), [&](auto j) { + return "auto " + prefix + std::to_string(j); + }); + std::vector pack_args = {"MIGRAPHX_LIFT(" + name + ")"}; + transform(range(n), std::back_inserter(pack_args), [&](auto j) { + return prefix + std::to_string(j); + }); + concat_args.push_back("pack(" + join_strings(pack_args, ", ") + ")"); + } + auto src = interpolate_string(concat_kernel, + {{"kernel", options.kernel_name}, + {"params", enum_params(inputs.size(), "void * private_p")}, + {"args", enum_params(inputs.size(), "private_p")}, + {"concat_params", join_strings(concat_params, ", ")}, + {"concat_args", join_strings(concat_args, ", ")}, + {"post", v.get("post", std::string{"op::id{}"})}, + {"transformers", make_transformer_args(vec)}, + {"preamble", v.get("preamble", std::string{})}, + {"axis", std::to_string(concat_axis)}}); + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + auto v = op.to_value(); + if(op.name() == "fused_concat") + { + std::unordered_map mod_names_lookup; + transform(range(ins->module_inputs().size()), + std::inserter(mod_names_lookup, mod_names_lookup.end()), + [&](auto i) { + return std::make_pair(ins->module_inputs()[i]->name(), + "pointwise" + std::to_string(i)); + }); + v["preamble"] = transform_accumulate( + ins->module_inputs().begin(), + ins->module_inputs().end(), + std::string{}, + std::plus<>{}, + [&](module_ref mod) { + return generate_pointwise(*mod, mod_names_lookup.at(mod->name())) + "\n"; + }); + std::vector mod_names; + std::transform(ins->module_inputs().begin(), + ins->module_inputs().end() - 1, + std::back_inserter(mod_names), + [&](module_ref mod) { return mod_names_lookup.at(mod->name()); }); + v["ops"] = mod_names; + module_ref last_mod = ins->module_inputs().back(); + v["post"] = "MIGRAPHX_LIFT(" + mod_names_lookup.at(last_mod->name()) + ")"; + std::unordered_map mod_args; + std::transform(ins->module_inputs().begin(), + ins->module_inputs().end() - 1, + std::inserter(mod_args, mod_args.end()), + [&](module_ref mod) { + const auto& name = mod_names_lookup.at(mod->name()); + return std::make_pair(name, mod->get_parameter_names().size()); + }); + v["args"] = mod_args; + auto prefix_name = transform_accumulate(ins->module_inputs().begin(), + ins->module_inputs().end() - 1, + std::string{}, + std::plus<>{}, + [&](module_ref mod) -> std::string { + auto name = generate_name_from_ops(*mod); + if(name.empty()) + return ""; + return name + "_"; + }); + v["kernel"] = prefix_name + "concat_" + + generate_name_from_ops(*(ins->module_inputs().back())) + "_kernel"; + } + else if(op.name() == "concat") + { + auto concat_inputs = ins->inputs().size() - 1; + if(not ins->module_inputs().empty()) + { + auto* pm = ins->module_inputs().front(); + concat_inputs = ins->inputs().size() - pm->get_parameter_names().size(); + v["preamble"] = generate_pointwise(*pm, "post_concat"); + v["post"] = "MIGRAPHX_LIFT(post_concat)"; + v["kernel"] = "concat_" + generate_name_from_ops(*pm) + "_kernel"; + } + std::vector mod_names(concat_inputs, "op::id{}"); + v["ops"] = mod_names; + std::unordered_map mod_args = {{"op::id{}", 1}}; + v["args"] = mod_args; + } + return compile_op(ctx, to_shapes(ins->inputs()), v); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/concat_past_present.cpp b/docker/rocm/migraphx/targets/gpu/jit/concat_past_present.cpp new file mode 100644 index 000000000..b18d70108 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/concat_past_present.cpp @@ -0,0 +1,119 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +namespace gpu { + +using namespace migraphx::gpu::gen; // NOLINT + +// NOLINTNEXTLINE +static const char* const concat_past_present_kernel = R"__migraphx__( +#include +#include +#include +#include +#include + +namespace migraphx { + + + +extern "C" { + + +MIGRAPHX_GLOBAL void ${kernel}(${params}) +{ + transform_args(make_tensors())(${args})([](auto... xs) { + + concat_past_present(xs..., make_gqa_parameters(${gqa_params})); + }); +} + + +} + +} // namespace migraphx + +)__migraphx__"; + +struct concat_past_present_compiler : compiler +{ + std::vector names() const + { + return {"concat_past_present", "gpu::concat_past_present"}; + } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + auto params = init_params(inputs, v); + auto gqa_params_str = params.make_init_str(); + + hip_compile_options options; + options.set_launch_params(v, + compute_global_for(ctx, + 2 * params.batch_size * params.kv_num_heads * + params.sequence_length * + params.head_size)); + options.inputs = inputs; + options.output = inputs.front(); + options.kernel_name = v.get("kernel", "concat_past_present_kernel"); + options.output_arg = 0; + + auto src = interpolate_string(concat_past_present_kernel, + {{"params", enum_params(inputs.size(), "void * private_p")}, + {"args", enum_params(inputs.size(), "private_p")}, + {"gqa_params", gqa_params_str}, + {"kernel", options.kernel_name}}); + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + auto shapes = to_shapes(ins->inputs()); + auto v = op.to_value(); + return compile_op(ctx, shapes, v); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/gather.cpp b/docker/rocm/migraphx/targets/gpu/jit/gather.cpp new file mode 100644 index 000000000..9dc17db09 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/gather.cpp @@ -0,0 +1,89 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +// NOLINTNEXTLINE +static const char* const gather_kernel = R"__migraphx__( +#include +#include +#include +#include +#include + +namespace migraphx { + +extern "C" { + +MIGRAPHX_GLOBAL void gather_kernel(void* in_data, void* in_indices, void* output) +{ + make_tensors()(in_data, in_indices, output)([](auto&&... xs) { + gather<${axis}>(xs...); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct gather_compiler : compiler +{ + std::vector names() const { return {"gather"}; } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + hip_compile_options options; + const auto& out_s = inputs.back(); + options.set_launch_params(v, compute_global_for(ctx, out_s.elements())); + options.inputs = inputs; + options.output = out_s; + options.kernel_name = "gather_kernel"; + options.virtual_inputs = inputs; + + auto axis = v.at("axis").to(); + + auto src = interpolate_string(gather_kernel, {{"axis", axis}}); + + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + return compile_op(ctx, to_shapes(ins->inputs()), op.to_value()); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/gathernd.cpp b/docker/rocm/migraphx/targets/gpu/jit/gathernd.cpp new file mode 100644 index 000000000..05a48f4e9 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/gathernd.cpp @@ -0,0 +1,91 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +// NOLINTNEXTLINE +static const char* const gathernd_kernel = R"__migraphx__( +#include +#include +#include +#include +#include + +namespace migraphx { + +extern "C" { + +MIGRAPHX_GLOBAL void gathernd_kernel(void* in_data, void* in_indices, void* output) +{ + make_tensors()(in_data, in_indices, output)([](auto&&... xs) { + auto settings = make_gathernd_settings(MIGRAPHX_MAKE_CONSTANT(int64_t{BATCH_DIMS})); + gathernd(xs..., settings); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct gathernd_compiler : compiler +{ + std::vector names() const { return {"gathernd"}; } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + hip_compile_options options; + const auto& out_s = inputs.back(); + options.set_launch_params(v, compute_global_for(ctx, out_s.elements())); + options.inputs = inputs; + options.output = out_s; + options.kernel_name = "gathernd_kernel"; + options.virtual_inputs = inputs; + + // batch_dims + assert(v.contains("batch_dims")); + auto batch_dims = v.at("batch_dims").to(); + options.emplace_param("-DBATCH_DIMS=" + std::to_string(batch_dims)); + + return compile_hip_code_object(ctx, gathernd_kernel, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + return compile_op(ctx, to_shapes(ins->inputs()), op.to_value()); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/gqa_rotary_embedding.cpp b/docker/rocm/migraphx/targets/gpu/jit/gqa_rotary_embedding.cpp new file mode 100644 index 000000000..340616352 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/gqa_rotary_embedding.cpp @@ -0,0 +1,114 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +namespace gpu { + +using namespace migraphx::gpu::gen; // NOLINT + +// NOLINTNEXTLINE +static const char* const gqa_rotary_embedding_kernel = R"__migraphx__( +#include +#include +#include +#include +#include + +namespace migraphx { + + + +extern "C" { + + +MIGRAPHX_GLOBAL void ${kernel}(${params}) +{ + transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) { + + gqa_rotary_embedding(xs..., make_gqa_parameters(${gqa_params})); + }); +} + + +} + +} // namespace migraphx + +)__migraphx__"; + +struct gqa_rotary_embedding_compiler : compiler +{ + std::vector names() const + { + return {"gqa_rotary_embedding", "gpu::gqa_rotary_embedding"}; + } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + auto params = init_params(inputs, v); + auto gqa_params_str = params.make_init_str(); + + hip_compile_options options; + options.set_launch_params(v, compute_global_for(ctx, inputs.back().elements())); + options.inputs = inputs; + options.output = inputs.back(); + options.kernel_name = v.get("kernel", "gqa_rotary_embedding_kernel"); + + auto src = interpolate_string(gqa_rotary_embedding_kernel, + {{"params", enum_params(inputs.size(), "void * private_p")}, + {"args", enum_params(inputs.size(), "private_p")}, + {"gqa_params", gqa_params_str}, + {"kernel", options.kernel_name}}); + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + auto shapes = to_shapes(ins->inputs()); + auto v = op.to_value(); + return compile_op(ctx, shapes, v); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/gqa_softmax.cpp b/docker/rocm/migraphx/targets/gpu/jit/gqa_softmax.cpp new file mode 100644 index 000000000..c1ff241dd --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/gqa_softmax.cpp @@ -0,0 +1,113 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +namespace gpu { + +using namespace migraphx::gpu::gen; // NOLINT + +// NOLINTNEXTLINE +static const char* const gqa_softmax_kernel = R"__migraphx__( +#include +#include +#include +#include +#include + +namespace migraphx { + + + +extern "C" { + + +MIGRAPHX_GLOBAL void ${kernel}(${params}) +{ + transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) { + + gqa_softmax(xs..., make_gqa_parameters(${gqa_params})); + }); +} + + +} + +} // namespace migraphx + +)__migraphx__"; + +struct gqa_softmax_compiler : compiler +{ + std::vector names() const { return {"gqa_softmax", "gpu::gqa_softmax"}; } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + auto params = init_params(inputs, v); + auto gqa_params_str = params.make_init_str(); + + hip_compile_options options; + options.set_launch_params( + v, + compute_global_for(ctx, params.batch_size * params.num_heads * params.sequence_length)); + options.inputs = inputs; + options.output = inputs.back(); + options.kernel_name = v.get("kernel", "gqa_softmax_kernel"); + + auto src = interpolate_string(gqa_softmax_kernel, + {{"params", enum_params(inputs.size(), "void * private_p")}, + {"args", enum_params(inputs.size(), "private_p")}, + {"gqa_params", gqa_params_str}, + {"kernel", options.kernel_name}}); + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + auto shapes = to_shapes(ins->inputs()); + auto v = op.to_value(); + return compile_op(ctx, shapes, v); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/layernorm.cpp b/docker/rocm/migraphx/targets/gpu/jit/layernorm.cpp new file mode 100644 index 000000000..09736031b --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/layernorm.cpp @@ -0,0 +1,131 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +using namespace migraphx::gpu::gen; // NOLINT + +static const char* const layernorm_kernel = R"__migraphx__( +#include +#include +#include +#include +#include + +namespace migraphx { + +${preamble} + +extern "C" { +MIGRAPHX_GLOBAL void ${kernel}(${params}) +{ + transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto... xs) { + ${layernorm}<${axis}>(${post}, ${eps}, xs...); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct layernorm_compiler : compiler +{ + std::vector names() const + { + return {"layernorm", "gpu::prelayernorm", "gpu::preadd_layernorm"}; + } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + // TODO: Use reduce_dims + auto axis = inputs.front().lens().size() - 1; + auto faxis = find_fast_axis({inputs.front()}); + vectorize vec{}; + // Vectorize if the axis is a reduction axis + if(axis == faxis) + { + vec = vectorize::elements(ctx, faxis, inputs); + } + auto relements = inputs[0].lens()[axis] / vec.size; + auto nelements = (inputs.back().elements() / inputs[0].lens()[axis]); + auto block_size = compute_block_size(ctx, relements, 256); + hip_compile_options options; + options.set_launch_params( + v, compute_global_for(ctx, nelements * block_size, 256), block_size); + options.output = inputs.back(); + options.inputs = inputs; + options.kernel_name = v.get("kernel", "layernorm_kernel"); + auto eps = v.get("epsilon", 1e-12f); + + auto src = interpolate_string(layernorm_kernel, + {{"kernel", options.kernel_name}, + {"params", enum_params(inputs.size(), "void * private_p")}, + {"args", enum_params(inputs.size(), "private_p")}, + {"transformers", make_transformer_args(vec)}, + {"post", v.get("post", std::string{"op::id{}"})}, + {"preamble", v.get("preamble", std::string{})}, + {"layernorm", v.get("layernorm", std::string{"layernorm"})}, + {"axis", to_string(axis)}, + {"eps", to_string(eps)}}); + + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + auto v = op.to_value(); + v["layernorm"] = "layernorm"; + v["kernel"] = "layernorm_kernel"; + if(op.name() == "gpu::preadd_layernorm") + { + v["layernorm"] = "add_layernorm"; + v["kernel"] = "add_layernorm_kernel"; + } + if(not ins->module_inputs().empty()) + { + auto* pm = ins->module_inputs().front(); + v["preamble"] = generate_pointwise(*pm, "post_layernorm"); + v["post"] = "MIGRAPHX_LIFT(post_layernorm)"; + v["kernel"] = + v["layernorm"].to() + "_" + generate_name_from_ops(*pm) + "_kernel"; + } + return compile_op(ctx, to_shapes(ins->inputs()), v); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/mlir.cpp b/docker/rocm/migraphx/targets/gpu/jit/mlir.cpp new file mode 100644 index 000000000..4893743c2 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/mlir.cpp @@ -0,0 +1,283 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_DUMP_TO_MXR); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_DUMP); + +static module create_pointwise_module(module_ref in_mod) +{ + module pw_mod; + std::unordered_map map_ins; + for(auto param : in_mod->get_parameters()) + { + map_ins[param] = + pw_mod.add_parameter(any_cast(param->get_operator()).parameter, + shape{param->get_shape().type()}); + } + auto return_args = pw_mod.add_instructions( + in_mod, + &map_ins, + [](module& m, + instruction_ref ins, + const operation& op, + const std::vector& inputs, + const std::vector& mod_args) -> instruction_ref { + if(op.name() == "multibroadcast" and inputs.front()->name() == "@literal") + return inputs.front(); + else + return m.insert_instruction(ins, op, inputs, mod_args); + }); + pw_mod.add_return(return_args); + return pw_mod; +} + +struct mlir_compiler : compiler +{ + std::vector names() const { return {"gpu::mlir_op"}; } + + operation compile_op(context&, const std::vector&, const value&) const { return {}; } + + compiler_replace + compile(context& ctx, instruction_ref ins, const operation&, const value& solution) const + { + auto* smod = ins->module_inputs().front(); + assert(smod->get_parameter_names().size() == ins->inputs().size() - 1); + auto gemm_like_ins = std::find_if(smod->begin(), smod->end(), [&](const auto& i) { + return contains({"dot", "quant_dot", "convolution", "quant_convolution"}, i.name()); + }); + auto pointwise_ins = std::find_if(gemm_like_ins, smod->end(), [&](const auto& i) { + return i.get_operator().attributes().get("pointwise", false) == true; + }); + + // check if (a) module is fused (b) contains a "gemm/conv" instruction and (c) + // perfConfig can not allow fused module + if(gemm_like_ins != smod->end() and pointwise_ins != smod->end() and + not is_module_fusible(*smod, ctx, solution)) + { + auto input_args = ins->inputs(); + // remove alloc buffer + input_args.pop_back(); + auto split_ins = std::prev(pointwise_ins); + std::array mod_splits; + mod_splits = smod->split(input_args, {split_ins}); + auto dot_mlir_inputs = to_shapes(mod_splits[0].inputs); + // add alloc for the gemm output + dot_mlir_inputs.push_back(mod_splits[0].mod.get_output_shapes().front()); + mlir_code_object cop1 = compile_mlir(ctx, mod_splits[0].mod, dot_mlir_inputs, solution); + auto pw_shapes = to_shapes(mod_splits[1].inputs); + if(mod_splits[1].mod.get_output_shapes().size() == 1) + { + pw_shapes.push_back(mod_splits[1].mod.get_output_shapes().front()); + } + else + { + pw_shapes.push_back(shape{mod_splits[1].mod.get_output_shapes()}); + } + assert(pw_shapes.back() == ins->get_shape()); + auto pw_mod = create_pointwise_module(&mod_splits[1].mod); + auto cop2 = compile_pointwise(ctx, pw_shapes, &pw_mod); + std::vector cops = {cop1, + mlir_code_object{any_cast(cop2)}}; + return insert(cops, mod_splits, ins, split_ins); + } + return insert(compile_mlir(ctx, *smod, to_shapes(ins->inputs()), solution)); + } + + compiler_replace insert(const mlir_code_object& mco) const + { + return {std::vector{mco.cop}, + [=](module& m, instruction_ref ins, const std::vector& ops) { + std::vector inputs = ins->inputs(); + + // Tuple inputs not supported + assert(std::all_of(inputs.begin(), inputs.end() - 1, [](auto i) { + return i->get_shape().sub_shapes().empty(); + })); + + // Multiple output case (allocate ins will give a tuple) + std::vector flat_inputs(inputs); + bool multi_out = not flat_inputs.back()->get_shape().sub_shapes().empty(); + if(multi_out) + { + auto allocs = flat_inputs.back(); + flat_inputs.pop_back(); + auto sub_shape_idx = range(allocs->get_shape().sub_shapes().size()); + std::transform(sub_shape_idx.begin(), + sub_shape_idx.end(), + std::back_inserter(flat_inputs), + [&](int i) { + return m.insert_instruction( + ins, + migraphx::make_op("get_tuple_elem", {{"index", i}}), + allocs); + }); + } + std::vector tuple_replacements; + + for(const auto i : range(mco.prefill_indices.size())) + { + auto prefilled_ins = m.insert_instruction( + ins, + migraphx::make_op("hip::fill", {{"value", mco.prefill_values[i]}}), + flat_inputs[mco.prefill_indices[i]]); + if(not multi_out or mco.prefill_indices[i] < inputs.size() - 1) + { + replace(inputs, inputs[mco.prefill_indices[i]], prefilled_ins); + } + else + { + tuple_replacements.push_back(prefilled_ins); + } + } + + if(multi_out and not tuple_replacements.empty()) + { + // Add identity to make sure fill operations happen before kernel call + tuple_replacements.insert(tuple_replacements.begin(), inputs.back()); + inputs.back() = m.insert_instruction( + ins, migraphx::make_op("identity"), tuple_replacements); + } + + auto mlir = insert_mlir(m, ins, any_cast(ops.front()), inputs); + return m.replace_instruction(ins, mlir); + }, + &trace}; + } + + compiler_replace insert(const std::vector& mcos, + const std::array& mods, + instruction_ref precompile_ins, + instruction_ref split_ins) const + { + std::vector cobjs(mcos.size()); + std::transform( + mcos.begin(), mcos.end(), cobjs.begin(), [](const auto& mco) { return mco.cop; }); + auto precompiled_inputs = precompile_ins->inputs(); + return { + cobjs, [=](module& m, instruction_ref ins, const std::vector& ops) { + auto compiled_inputs = ins->inputs(); + std::unordered_map inputs_rep_map; + for(const auto i : range(precompiled_inputs.size())) + { + inputs_rep_map[precompiled_inputs[i]] = compiled_inputs[i]; + } + auto dot_inputs = mods[0].inputs; + auto dot_mod_out_shape = mods[0].mod.get_output_shapes().front(); + auto dot_alloc = m.insert_instruction( + ins, + migraphx::make_op("hip::allocate", {{"shape", to_value(dot_mod_out_shape)}})); + dot_inputs.push_back(dot_alloc); + for(const auto i : range(mcos[0].prefill_indices.size())) + { + auto prefilled_ins = m.insert_instruction( + ins, + migraphx::make_op("hip::fill", {{"value", mcos[0].prefill_values[i]}}), + dot_inputs[mcos[0].prefill_indices[i]]); + replace(dot_inputs, dot_inputs[mcos[0].prefill_indices[i]], prefilled_ins); + } + + std::vector dot_inputs_updated; + std::transform(dot_inputs.begin(), + dot_inputs.end(), + std::back_inserter(dot_inputs_updated), + [&](const auto& i) { + if(inputs_rep_map.find(i) != inputs_rep_map.end()) + { + assert(inputs_rep_map.at(i)->get_shape() == i->get_shape()); + return inputs_rep_map.at(i); + } + return i; + }); + auto mlir_ins = + insert_mlir(m, ins, any_cast(ops[0]), dot_inputs_updated); + auto pwm = mods[1]; + pwm.replace(split_ins, mlir_ins); + auto pw_inputs = pwm.inputs; + pw_inputs.push_back(ins->inputs().back()); + std::vector pw_inputs_updated; + std::transform(pw_inputs.begin(), + pw_inputs.end(), + std::back_inserter(pw_inputs_updated), + [&](const auto& i) { + if(inputs_rep_map.find(i) != inputs_rep_map.end()) + { + assert(inputs_rep_map.at(i)->get_shape() == i->get_shape()); + return inputs_rep_map.at(i); + } + return i; + }); + auto pw_ins = + insert_mlir(m, ins, any_cast(ops[1]), pw_inputs_updated); + return m.replace_instruction(ins, pw_ins); + }}; + } + + optional get_tuning_config(const context& ctx, + instruction_ref ins, + const operation&, + bool exhaustive) const + { + static const auto mxr_loc = string_value_of(MIGRAPHX_MLIR_DUMP_TO_MXR{}); + static const auto mlir_loc = string_value_of(MIGRAPHX_MLIR_DUMP{}); + + auto shapes = to_shapes(ins->inputs()); + auto* smod = ins->module_inputs().front(); + if(not mxr_loc.empty()) + { + dump_mlir_to_mxr(*smod, ins->inputs(), mxr_loc); + } + if(not mlir_loc.empty()) + { + dump_mlir_to_file(*smod, shapes, mlir_loc); + } + return get_tuning_config_mlir(ctx, *smod, shapes, exhaustive); + } + + static void trace(std::ostream& os, instruction_ref ins) + { + auto shapes = to_shapes(ins->inputs()); + auto* smod = ins->module_inputs().front(); + os << dump_mlir(*smod, shapes); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/pad.cpp b/docker/rocm/migraphx/targets/gpu/jit/pad.cpp new file mode 100644 index 000000000..9ea77ee99 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/pad.cpp @@ -0,0 +1,121 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +using namespace migraphx::gpu::gen; // NOLINT + +static const char* const pointwise_kernel = R"__migraphx__( +#include +#include +#include +#include + +namespace migraphx { + +extern "C" { +MIGRAPHX_GLOBAL void pad_kernel(void* input_p, void* output_p) +{ + auto offsets = index_ints<${offsets}>{}; + auto idx = make_index(); + make_tensors()(input_p, output_p)([&](auto input, auto output) { + pad(idx, offsets, input, output, ${pad_val}); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct pad_compiler : compiler +{ + std::vector names() const { return {"pad"}; } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + auto padding = v.at("pads").to_vector(); + auto input_lens = inputs.front().lens(); + std::vector offsets(input_lens.size()); + std::copy(padding.begin(), padding.begin() + offsets.size(), offsets.begin()); + + auto offset_lens = input_lens; + std::transform(input_lens.begin(), + input_lens.end(), + offsets.begin(), + offset_lens.begin(), + [&](auto input, auto offset) { return input + offset; }); + + auto vinputs = inputs; + vinputs.push_back(inputs.front().with_lens(offset_lens)); + auto rinputs = reduce_dims(normalize_permutation(vinputs)); + + auto rinput_lens = rinputs.front().lens(); + auto roffset_lens = rinputs.back().lens(); + std::vector roffsets(roffset_lens.size()); + std::transform(rinput_lens.begin(), + rinput_lens.end(), + roffset_lens.begin(), + roffsets.begin(), + [](auto input, auto offset_dim) { return offset_dim - input; }); + rinputs.pop_back(); + + hip_compile_options options; + options.inputs = inputs; + options.output = inputs.back(); + options.virtual_inputs = rinputs; + options.kernel_name = "pad_kernel"; + options.set_launch_params(v, compute_global_for(ctx, inputs.at(1).elements())); + + auto pad_val = v.get("value", 0.f); + auto pad_val_string = to_string(pad_val); + if(float_equal(pad_val, std::numeric_limits::lowest())) + pad_val_string = "lowest{}"; + if(float_equal(pad_val, std::numeric_limits::max())) + pad_val_string = "highest{}"; + + auto src = interpolate_string( + pointwise_kernel, + {{"pad_val", to_string(pad_val_string)}, {"offsets", to_string_range(roffsets)}}); + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + return compile_op(ctx, to_shapes(ins->inputs()), op.to_value()); + } +}; +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/pointwise.cpp b/docker/rocm/migraphx/targets/gpu/jit/pointwise.cpp new file mode 100644 index 000000000..9e352888e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/pointwise.cpp @@ -0,0 +1,122 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +using namespace migraphx::gpu::gen; // NOLINT + +static const char* const pointwise_kernel = R"__migraphx__( +#include +#include +#include + +namespace migraphx { + +${preamble} + +extern "C" { +MIGRAPHX_GLOBAL void ${kernel}(${params}) +{ + auto idx = make_index(); + pointwise<${noutputs}, ${tiled}>(idx, ${transformers})(${lambda}, ${args}); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct pointwise_compiler : compiler +{ + std::vector names() const { return {"pointwise", "contiguous", "layout"}; } + + static std::size_t oversubscribe_if(bool b) + { + if(b) + return 256; + else + return 1; + } + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + hip_compile_options options; + options.inputs = flatten(inputs); + options.output = inputs.back(); + options.virtual_inputs = reduce_dims(normalize_permutation(options.inputs)); + options.emplace_param("-Wno-float-equal"); + auto axis = find_fast_axis(options.virtual_inputs); + auto vec = vectorize::elements(ctx, axis, options.virtual_inputs); + options.kernel_name = v.get("kernel", "kernel"); + auto noutputs = options.inputs.size() - inputs.size() + 1; + auto t = tile::elements(options.virtual_inputs, noutputs); + // auto t = tile{}; + if(t.ntiles == 0) + options.set_launch_params( + v, compute_global_for(ctx, options.inputs.front().elements() / vec.size, 256)); + else + options.set_launch_params( + v, compute_global_for(ctx, t.ntiles * t.block_size, 256), t.block_size); + auto src = + interpolate_string(pointwise_kernel, + {{"kernel", options.kernel_name}, + {"params", enum_params(options.inputs.size(), "void * private_p")}, + {"args", enum_params(options.inputs.size(), "private_p")}, + {"lambda", v.at("lambda").to()}, + {"transformers", make_transformer_args(t, vec)}, + {"tiled", t.ntiles > 0 ? "true" : "false"}, + {"noutputs", std::to_string(noutputs)}, + {"preamble", v.get("preamble", std::string{})}}); + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + if(contains({"layout", "contiguous"}, op.name())) + { + return compile_op(ctx, + to_shapes(ins->inputs()), + {{"lambda", "[](auto x) { return make_tuple(x); }"}, + {"kernel", op.name() + "_kernel"}}); + } + else + { + assert(not ins->module_inputs().empty()); + const_module_ref pm = ins->module_inputs().front(); + return compile_pointwise(ctx, to_shapes(ins->inputs()), pm); + } + } +}; +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/pooling.cpp b/docker/rocm/migraphx/targets/gpu/jit/pooling.cpp new file mode 100644 index 000000000..f245a2269 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/pooling.cpp @@ -0,0 +1,193 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +// NOLINTNEXTLINE +static const char* const pooling_kernel = R"__migraphx__( +#include +#include +#include +#include +#include + +namespace migraphx { + +extern "C" { + +MIGRAPHX_GLOBAL void pooling_kernel(void* in_data, void* output) +{ + transform_args(make_tensors(), rotate_last())(in_data, output)([](auto&&... xs) { + pooling<${algo}, ${group_size}>(${op}, make_window(index_ints<${window}>{}, index_ints<${stride}>{}, index_ints<${padding}>{}), xs...); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct pooling_compiler : compiler +{ + + static std::size_t compute_subwave_size(context& ctx, std::size_t n) + { + std::size_t max_wavefront_size = ctx.get_current_device().get_wavefront_size(); + std::size_t wavefront_size = 1; + while(wavefront_size <= n and wavefront_size < max_wavefront_size) + wavefront_size *= 2; + return wavefront_size / 2; + } + + struct algorithm + { + std::string name = "reduce::lane"; + std::size_t reduce_size = 1; + std::size_t block_size = 256; + std::size_t group_size = 1; + + static std::size_t compute_group_size(const shape& output) + { + auto n = output.lens().back(); + const std::size_t max_group_size = 32; + std::size_t group_size = 1; + while((n % (group_size * 2) == 0) and group_size <= max_group_size) + group_size *= 2; + return group_size; + } + + algorithm() {} + + algorithm(context& ctx, const shape& input, const std::vector& window) + { + if(input.strides().back() != 1) + return; + std::size_t max_wavefront_size = ctx.get_current_device().get_wavefront_size(); + auto wsize = window.back(); + if(wsize > max_wavefront_size) + { + block_size = compute_block_size(ctx, wsize, 256); + reduce_size = block_size; + name = "reduce::block"; + } + else + { + block_size = max_wavefront_size; + reduce_size = compute_subwave_size(ctx, wsize); + name = "reduce::subwave<" + to_string(reduce_size) + ">"; + } + } + }; + + template + static void normalize(std::vector& inputs, Ts&... xs) + { + auto perm = find_permutation(inputs); + std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto s) { + return reorder_shape(s, perm); + }); + each_args([&](auto& dims) { dims = reorder_dims(dims, perm); }, xs...); + } + + std::vector names() const { return {"pooling"}; } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + hip_compile_options options; + const auto& out_s = inputs.back(); + options.inputs = inputs; + options.output = out_s; + options.kernel_name = "pooling_kernel"; + options.virtual_inputs = inputs; + + auto ndim = out_s.ndim(); + auto pool_ndim = ndim - 2; + + auto read_value = [&](const std::string& name, std::size_t def) { + if(v.contains(name)) + { + std::vector result(2, def); + auto x = v.at(name).to_vector(); + if(x.size() >= pool_ndim) + result.insert(result.end(), x.begin(), x.begin() + pool_ndim); + return result; + } + else + { + std::vector result(ndim, def); + return result; + } + }; + + auto padding = read_value("padding", 0); + auto stride = read_value("stride", 1); + auto window = read_value("lengths", 1); + + const auto& mode_v = v.at("mode"); + std::string mode = + mode_v.is_string() ? mode_v.get_string() : to_string(mode_v.to()); + bool count_include_pad = v.get("count_include_pad", false); + if(count_include_pad and mode == "average") + mode = "average_include_pad"; + + std::string op = mode + "_pool"; + if(mode == "lpnorm") + op += "<" + v.at("lp_order").to() + ">"; + + algorithm algo{}; + options.set_launch_params( + v, + compute_global_for(ctx, (out_s.elements() / algo.group_size) * algo.reduce_size, 256), + algo.block_size); + normalize(options.virtual_inputs, padding, stride, window); + auto src = interpolate_string(pooling_kernel, + {{"op", op + "{}"}, + {"algo", algo.name}, + {"group_size", to_string(algo.group_size)}, + {"window", to_string_range(window)}, + {"stride", to_string_range(stride)}, + {"padding", to_string_range(padding)}}); + + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + return compile_op(ctx, to_shapes(ins->inputs()), op.to_value()); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/reduce.cpp b/docker/rocm/migraphx/targets/gpu/jit/reduce.cpp new file mode 100644 index 000000000..bdf7313f5 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/reduce.cpp @@ -0,0 +1,408 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +using namespace migraphx::gpu::gen; // NOLINT + +static const char* const simple_reduce_kernel = R"__migraphx__( +#include +#include +#include +#include + +namespace migraphx { + +${preamble} + +extern "C" { +MIGRAPHX_GLOBAL void reduce_kernel(void* input_p, void* output_p) +{ + + transform_args(make_tensors(), ${transformers})(input_p, output_p)([](auto input, auto output) { + + simple_reduce(${reduction}, ${init}, input, output, ${read}, ${write}); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +static std::vector get_reduce_lens(const std::vector& input_lens, + const std::vector& output_lens) +{ + std::vector reduce_lens; + std::transform(output_lens.begin(), + output_lens.end(), + input_lens.begin(), + std::back_inserter(reduce_lens), + [](auto x, auto y) -> std::size_t { + if(x == y) + return 1; + else + return y; + }); + return reduce_lens; +} + +template +static shape get_reduced_shape(const shape& s, const std::vector& axes) +{ + auto lens = s.lens(); + std::fill(lens.begin(), lens.end(), 1); + for(const auto& axis : axes) + lens[axis] = s.lens()[axis]; + return s.with_lens(lens); +} + +template +static shape get_output_shape(const shape& s, const std::vector& axes) +{ + auto lens = s.lens(); + for(const auto& axis : axes) + lens[axis] = 1; + return s.with_lens(lens); +} + +template +static std::string get_reduce_algo(context& ctx, const std::vector& inputs, ReduceLens rlens) +{ + const auto init = std::numeric_limits::max(); + auto relements = std::accumulate(rlens.begin(), rlens.end(), 1, std::multiplies<>{}); + // The minimum stride + auto min_stride = std::inner_product( + rlens.begin(), + rlens.end(), + inputs.front().strides().begin(), + init, + [](auto x, auto y) { return std::min(x, y); }, + [](auto len, auto stride) { return len == 1 ? init : stride; }); + if(min_stride > 2) + return "lane"; + if(relements <= ctx.get_current_device().get_wavefront_size()) + return "wave"; + return "block"; +} + +static std::string get_reduce_algo(context& ctx, const std::vector& inputs) +{ + auto rlens = get_reduce_lens(inputs.front().lens(), inputs.back().lens()); + return get_reduce_algo(ctx, inputs, rlens); +} + +static std::size_t compute_subwave_size(context& ctx, std::size_t n) +{ + std::size_t max_wavefront_size = ctx.get_current_device().get_wavefront_size(); + std::size_t wavefront_size = 1; + while(wavefront_size <= n and wavefront_size < max_wavefront_size) + wavefront_size *= 2; + return wavefront_size; +} + +/// This will adjust the input shapes so a partial reduction is done per workgroup. +/// This is done by splitting the reduction axis so each split group becomes +/// part of the batch. So if we want to do a split redution of a tensor +/// {K}, then this will create a tensor of {K/N, N} where N is the number of +/// split groups. To compute the number of split groups it finds the largest +/// divisor that can divide K to make it less than min_size. +static std::vector split_reduce(const std::vector& inputs, + std::size_t min_size = 1024) +{ + std::vector result; + auto input_shape = inputs.front(); + const auto& reduce_shape = inputs[inputs.size() - 2]; + const auto& output_shape = inputs[inputs.size() - 1]; + + auto is = range(reduce_shape.lens().size()); + using array_type = std::array; + auto initial = array_type{std::numeric_limits::max(), + std::numeric_limits::max()}; + auto faxis = transform_accumulate( + is.begin(), is.end(), initial, MIGRAPHX_LIFT(std::min), [&](auto i) -> array_type { + if(input_shape.lens()[i] == output_shape.lens()[i]) + return initial; + return {input_shape.strides()[i], std::size_t(i)}; + })[1]; + + assert(faxis < reduce_shape.lens().size()); + + std::size_t n = 1; + auto r = input_shape.lens()[faxis]; + auto factors = make_array(2, 3, 5, 7, 11); + while(r > min_size) + { + // NOLINTNEXTLINE(readability-qualified-auto) + auto it = std::find_if(factors.begin(), factors.end(), [&](auto d) { return r % d == 0; }); + if(it == factors.end()) + break; + r /= *it; + n *= *it; + } + assert(n != 1); + std::transform( + inputs.begin(), inputs.end(), std::back_inserter(result), [&](const shape& s) -> shape { + auto lens = s.lens(); + auto strides = s.strides(); + + lens.push_back(n); + if(lens[faxis] == 1) + { + strides.push_back(0); + } + else + { + lens[faxis] /= n; + strides.push_back(strides[faxis] * lens[faxis]); + } + + return {s.type(), lens, strides}; + }); + return reduce_dims(normalize_permutation(result)); +} + +struct simple_reduce_compiler : compiler +{ + std::vector names() const + { + return {"simple_reduce", + "reduce_sum", + "reduce_mean", + "reduce_max", + "reduce_min", + "reduce_prod", + "reduce_any", + "reduce_all"}; + } + + static std::size_t get_reduce_elements(const std::vector& inputs) + { + return inputs.front().elements() / inputs.back().elements(); + } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + hip_compile_options options; + options.inputs = inputs; + options.output = inputs.back(); + options.virtual_inputs = reduce_dims(inputs); + auto faxis = find_fast_axis({options.virtual_inputs.front()}); + vectorize vec{}; + auto nelements = options.virtual_inputs.back().elements(); + auto algo = v.get("algo", get_reduce_algo(ctx, options.virtual_inputs)); + if(algo == "block" or algo == "wave") + { + // Vectorize if the axis is a reduction axis + if(options.virtual_inputs.back().lens()[faxis] == 1) + vec = vectorize::elements(ctx, faxis, options.virtual_inputs); + auto relements = get_reduce_elements(options.virtual_inputs) / vec.size; + if(algo == "block") + { + auto block_size = compute_block_size(ctx, relements, 256); + if(relements >= block_size * 256) + algo = "block_large"; + options.set_launch_params( + v, compute_global_for(ctx, nelements * block_size, 256), block_size); + } + else + { + auto subwave_size = compute_subwave_size(ctx, relements); + algo = "subwave<" + std::to_string(subwave_size) + ">"; + options.set_launch_params(v, + compute_global_for(ctx, nelements * subwave_size, 256), + ctx.get_current_device().get_wavefront_size()); + } + } + else if(algo == "lane") + { + options.set_launch_params(v, compute_global_for(ctx, nelements, 256)); + } + else + { + MIGRAPHX_THROW("Unknown reduce algo: " + algo); + } + options.kernel_name = "reduce_kernel"; + std::string identity = "[](auto x) { return x; }"; + auto src = interpolate_string(simple_reduce_kernel, + {{"reduction", v.at("reduction").to()}, + {"init", v.get("init", std::string{"0"})}, + {"read", v.get("read", identity)}, + {"write", v.get("write", identity)}, + {"algo", algo}, + {"transformers", make_transformer_args(vec)}, + {"preamble", v.get("preamble", std::string{})}}); + options.emplace_param("-Wno-float-equal"); + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + value v = value::object{}; + reduce_op r{}; + r.set(ins, op); + v["reduction"] = r.reduction; + v["read"] = r.read; + v["write"] = r.write; + v["init"] = r.init; + return compile_op(ctx, to_shapes(ins->inputs()), v); + } +}; + +static const char* const fused_reduce_kernel = R"__migraphx__( +#include +#include +#include +#include +#include + +namespace migraphx { + +${preamble} + +extern "C" { +MIGRAPHX_GLOBAL void ${kernel}(${params}) +{ + transform_args(make_tensors(), ${transformers}, rotate_and_pack_last<${noutputs}>())(${args})([](auto y, auto... xs) { + fused_reduce(y, ${assign}{}, partial(${lambda})(xs...)); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct fused_reduce_compiler : compiler +{ + std::vector names() const { return {"fused_reduce", "split_fused_reduce"}; } + + static shape get_input_shape(const std::vector& inputs) + { + auto it = std::max_element(inputs.begin(), + inputs.end(), + by(std::less<>{}, [](const shape& s) { return s.elements(); })); + return *it; + } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + auto assign = v.get("assign", "assign_none"); + auto axes = v.at("axes").to_vector(); + auto finputs = flatten(inputs); + auto noutputs = finputs.size() - inputs.size() + 1; + auto virtual_inputs = finputs; + virtual_inputs.push_back(get_reduced_shape(get_input_shape(finputs), axes)); + virtual_inputs.push_back(get_output_shape(get_input_shape(finputs), axes)); + virtual_inputs = reduce_dims(normalize_permutation(virtual_inputs)); + if(assign != "assign_none") + virtual_inputs = split_reduce(virtual_inputs); + auto reduce_output_shape = virtual_inputs.back(); + virtual_inputs.pop_back(); + auto reduction_shape = virtual_inputs.back(); + virtual_inputs.pop_back(); + + hip_compile_options options; + options.inputs = finputs; + options.output = inputs.back(); + options.virtual_inputs = virtual_inputs; + auto faxis = find_fast_axis({options.virtual_inputs.front()}); + vectorize vec{}; + auto nelements = reduce_output_shape.elements(); + auto algo = + v.get("algo", get_reduce_algo(ctx, options.virtual_inputs, reduction_shape.lens())); + if(algo == "block" or algo == "wave") + { + // Vectorize if the axis is a reduction axis + if(reduce_output_shape.lens()[faxis] == 1) + vec = vectorize::elements(ctx, faxis, options.virtual_inputs); + auto relements = reduction_shape.elements() / vec.size; + if(algo == "block") + { + auto block_size = compute_block_size(ctx, relements, 256); + if(relements >= block_size * 256) + algo = "block_large"; + options.set_launch_params( + v, compute_global_for(ctx, nelements * block_size, 256), block_size); + } + else + { + auto subwave_size = compute_subwave_size(ctx, relements); + algo = "subwave<" + std::to_string(subwave_size) + ">"; + options.set_launch_params(v, + compute_global_for(ctx, nelements * subwave_size, 256), + ctx.get_current_device().get_wavefront_size()); + } + } + else if(algo == "lane") + { + options.set_launch_params(v, compute_global_for(ctx, nelements, 256)); + } + else + { + MIGRAPHX_THROW("Unknown reduce algo: " + algo); + } + options.kernel_name = v.get("kernel", "reduce_kernel"); + auto src = interpolate_string( + fused_reduce_kernel, + {{"kernel", options.kernel_name}, + {"params", enum_params(finputs.size(), "void * private_p")}, + {"args", enum_params(finputs.size(), "private_p")}, + {"assign", assign}, + {"algo", algo}, + {"reduced", "decltype(" + generate_make_shape(reduce_output_shape) + ")"}, + {"lambda", v.at("lambda").to()}, + {"transformers", make_transformer_args(vec)}, + {"noutputs", std::to_string(noutputs)}, + {"preamble", v.get("preamble", std::string{})}}); + options.emplace_param("-Wno-float-equal"); + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + assert(not ins->module_inputs().empty()); + auto v = op.to_value(); + auto* rm = ins->module_inputs().front(); + v["preamble"] = generate_reduce(*rm, "fused_reduce_op"); + v["lambda"] = "MIGRAPHX_LIFT(fused_reduce_op)"; + v["kernel"] = generate_name_from_ops(*rm) + "_kernel"; + return compile_op(ctx, to_shapes(ins->inputs()), v); + } +}; +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/roialign.cpp b/docker/rocm/migraphx/targets/gpu/jit/roialign.cpp new file mode 100644 index 000000000..aeaf7a858 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/roialign.cpp @@ -0,0 +1,104 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#if !MIGRAPHX_USE_MIOPEN +#include +#endif + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +// NOLINTNEXTLINE +static const char* const roialign_kernel = R"__migraphx__( +#include +#include +#include +#include + +namespace migraphx { + +extern "C" { + +MIGRAPHX_GLOBAL void roialign_kernel(void* in_x, void* in_rois, void* in_ind, void* y) +{ + make_tensors()(in_x, in_rois, in_ind, y)([](auto&&... xs) { + auto settings = make_roalign_settings(MIGRAPHX_MAKE_CONSTANT(float{ROIS_OFFSET}), + _c, + _c, + MIGRAPHX_MAKE_CONSTANT(float{SPATIAL_SCALE})); + roialign(xs..., settings); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct roialign_compiler : compiler +{ + std::vector names() const { return {"roialign"}; } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + hip_compile_options options; + options.set_launch_params(v, compute_global_for(ctx, inputs.back().elements()), 128); + options.output = inputs.back(); + options.inputs = inputs; + options.kernel_name = "roialign_kernel"; + + // sampling_ratio + options.emplace_param("-DSAMPLING_RATIO=" + v.at("sampling_ratio").to()); + + // pooling_mode + auto mode = v.at("mode").to(); + std::string is_avg_pooling = + (mode == migraphx::op::pooling_mode::average) ? "true" : "false"; + options.emplace_param("-DIS_AVG_POOLING=" + is_avg_pooling); + + // coord_trans_mode + auto ctm = v.at("coordinate_transformation_mode").to(); + float rois_offset = (ctm == "half_pixel") ? -0.5f : 0.0f; + options.emplace_param("-DROIS_OFFSET=" + std::to_string(rois_offset)); + + // spatial_scale + options.emplace_param("-DSPATIAL_SCALE=" + v.at("spatial_scale").to()); + + return compile_hip_code_object(ctx, roialign_kernel, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + return compile_op(ctx, to_shapes(ins->inputs()), op.to_value()); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/scatter.cpp b/docker/rocm/migraphx/targets/gpu/jit/scatter.cpp new file mode 100644 index 000000000..1a1264f23 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/scatter.cpp @@ -0,0 +1,78 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "scatter.hpp" + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +// NOLINTNEXTLINE +static const char* const scatter_elements_kernel = R"__migraphx__( +#include +#include +#include +#include + +namespace migraphx { + +extern "C" { + +MIGRAPHX_GLOBAL void scatter_elements_kernel(void* in_indices, void* in_updates, void* output) +{ + make_tensors()(in_indices, in_updates, output)([](auto&&... xs) { + scatter<${axis}, ${skip_out_of_bounds}>(xs..., ${reduction}{}); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct scatter_elements_compiler : scatter_compiler +{ + std::vector names() const + { + return {"scatter_none", "scatter_add", "scatter_mul", "scatter_min", "scatter_max"}; + } + + std::string make_interpolated_string(const operation& op) const + { + const auto reduction = op.name().substr(std::char_traits::length("scatter_")); + auto axis = std::to_string(op.to_value().get("axis", 0)); + auto skip_out_of_bounds = std::to_string(op.to_value().get("skip_out_of_bounds", 0)); + + return interpolate_string(scatter_elements_kernel, + {{"reduction", "assign_" + reduction}, + {"axis", axis}, + {"skip_out_of_bounds", skip_out_of_bounds}}); + } + + std::string get_kernel_name(const operation&) const { return "scatter_elements_kernel"; } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/scatter.hpp b/docker/rocm/migraphx/targets/gpu/jit/scatter.hpp new file mode 100644 index 000000000..6fb955647 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/scatter.hpp @@ -0,0 +1,81 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_JIT_SCATTER_HPP +#define MIGRAPHX_GUARD_JIT_SCATTER_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +template +struct scatter_compiler : compiler +{ + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + const auto inputs = + to_shapes(std::vector{ins->inputs().begin() + 1, ins->inputs().end()}); + + hip_compile_options options; + options.set_launch_params(op.to_value(), compute_global_for(ctx, inputs.at(1).elements())); + options.inputs = inputs; + options.output = inputs.back(); + options.kernel_name = derived().get_kernel_name(op); + options.virtual_inputs = inputs; + options.emplace_param("-DMIGRAPHX_ALLOW_ATOMIC_CAS=1"); + + const auto src = derived().make_interpolated_string(op); + return prepend_copy_data_to_output(compile_hip_code_object(ctx, src, options)); + } + + // ONNX spec states the following for ScatterElements and ScatterND: + // "The output of the operation is produced by creating a copy of the input data, ..." + // The sole responsibility of the MIGraphX Scatter operator implementations being to perform the + // update operations as specified by ONNX, it is necessary to place the copying of the input + // data before the MIGraphX operator in the graph. + compiler_replace prepend_copy_data_to_output(const operation& co) const + { + return {co, [](module& m, instruction_ref ins, const operation& op) { + auto args = ins->inputs(); + args.back() = + m.insert_instruction(ins, make_op("hip::copy"), args.front(), args.back()); + args.erase(args.begin()); + return m.replace_instruction(ins, op, args); + }}; + } + + std::string get_kernel_name(const operation& op) const { return op.name() + "_kernel"; } + + const Derived& derived() const { return static_cast(*this); } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/jit/scatternd.cpp b/docker/rocm/migraphx/targets/gpu/jit/scatternd.cpp new file mode 100644 index 000000000..cf3ab00ed --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/scatternd.cpp @@ -0,0 +1,73 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "scatter.hpp" + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +// NOLINTNEXTLINE +static const char* const scatternd_kernel = R"__migraphx__( +#include +#include +#include +#include + +namespace migraphx { + +extern "C" { + +MIGRAPHX_GLOBAL void scatternd_kernel(void* in_indices, void* in_updates, void* output) +{ + make_tensors()(in_indices, in_updates, output)([](auto&&... xs) { + scatternd(xs..., ${reduction}{}); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct scatternd_compiler : scatter_compiler +{ + std::vector names() const + { + return { + "scatternd_none", "scatternd_add", "scatternd_mul", "scatternd_min", "scatternd_max"}; + } + + std::string make_interpolated_string(const operation& op) const + { + const auto reduction = op.name().substr(std::char_traits::length("scatternd_")); + return interpolate_string(scatternd_kernel, {{"reduction", "assign_" + reduction}}); + } + + std::string get_kernel_name(const operation&) const { return "scatternd_kernel"; } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/softmax.cpp b/docker/rocm/migraphx/targets/gpu/jit/softmax.cpp new file mode 100644 index 000000000..d2e24a233 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/softmax.cpp @@ -0,0 +1,104 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_USE_FAST_SOFTMAX) + +using namespace migraphx::gpu::gen; // NOLINT + +static const char* const softmax_kernel = R"__migraphx__( +#include +#include +#include +#include + +namespace migraphx { + +extern "C" { +MIGRAPHX_GLOBAL void softmax_kernel(void* input_p, void* output_p) +{ + transform_args(make_tensors(), ${transformers})(input_p, output_p)([](auto input, auto output) { + softmax<${axis}>(input, output); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct softmax_compiler : compiler +{ + std::vector names() const { return {"softmax"}; } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + // TODO: Use reduce_dims + auto axis = v.at("axis").to(); + auto faxis = find_fast_axis({inputs.front()}); + vectorize vec{}; + // Vectorize if the axis is a reduction axis + if(faxis == axis) + { + vec = vectorize::elements(ctx, faxis, inputs); + } + auto relements = inputs[0].lens()[axis] / vec.size; + auto nelements = (inputs.back().elements() / inputs[0].lens()[axis]); + auto block_size = compute_block_size(ctx, relements, 256); + hip_compile_options options; + options.set_launch_params( + v, compute_global_for(ctx, nelements * block_size, 256), block_size); + options.output = inputs.back(); + options.inputs = inputs; + options.kernel_name = "softmax_kernel"; + + if(enabled(MIGRAPHX_USE_FAST_SOFTMAX{})) + options.emplace_param("-DMIGRAPHX_USE_FAST_SOFTMAX"); + + auto src = interpolate_string( + softmax_kernel, + {{"transformers", make_transformer_args(vec)}, {"axis", to_string(axis)}}); + + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + return compile_op(ctx, to_shapes(ins->inputs()), op.to_value()); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/jit/unpack_int4.cpp b/docker/rocm/migraphx/targets/gpu/jit/unpack_int4.cpp new file mode 100644 index 000000000..68f2038b8 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/jit/unpack_int4.cpp @@ -0,0 +1,90 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "migraphx/instruction.hpp" +#include "migraphx/instruction_ref.hpp" +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +using namespace migraphx::gpu::gen; // NOLINT + +static const char* const unpack_int4_kernel = R"__migraphx__( +#include +#include + +namespace migraphx { + +extern "C" { + +MIGRAPHX_GLOBAL void ${kernel}(${params}) +{ + transform_args(make_tensors(), rotate_last())(${args})([](auto... xs) { + unpack_int4<${axis}>(xs...); + }); +} + +} + +} // namespace migraphx + +)__migraphx__"; + +struct unpack_int4_compiler : compiler +{ + std::vector names() const { return {"unpack_int4"}; } + + operation compile_op(context& ctx, const std::vector& inputs, const value& v) const + { + hip_compile_options options; + options.inputs = inputs; + options.output = inputs.back(); + options.virtual_inputs = reduce_dims(normalize_permutation(options.inputs)); + options.kernel_name = "unpack_int4_kernel"; + options.set_launch_params(v, compute_global_for(ctx, inputs.front().elements())); + + auto src = + interpolate_string(unpack_int4_kernel, + {{"kernel", options.kernel_name}, + {"params", enum_params(options.inputs.size(), "void * private_p")}, + {"args", enum_params(options.inputs.size(), "private_p")}, + {"axis", std::to_string(v.at("axis").to())}}); + return compile_hip_code_object(ctx, src, options); + } + + compiler_replace compile(context& ctx, instruction_ref ins, const operation& op) const + { + return compile_op(ctx, to_shapes(ins->inputs()), op.to_value()); + } +}; + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/kernel.cpp b/docker/rocm/migraphx/targets/gpu/kernel.cpp new file mode 100644 index 000000000..a7c79bded --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernel.cpp @@ -0,0 +1,159 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#else +// extern declare the function since hip/hip_ext.h header is broken +extern hipError_t hipExtModuleLaunchKernel(hipFunction_t, // NOLINT + uint32_t, + uint32_t, + uint32_t, + uint32_t, + uint32_t, + uint32_t, + size_t, + hipStream_t, + void**, + void**, + hipEvent_t = nullptr, + hipEvent_t = nullptr, + uint32_t = 0); +#endif + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +extern std::string hip_error(int error); + +using hip_module_ptr = MIGRAPHX_MANAGE_PTR(hipModule_t, hipModuleUnload); + +struct kernel_impl +{ + hip_module_ptr module = nullptr; + hipFunction_t fun = nullptr; +}; + +hip_module_ptr load_module(const char* image) +{ + hipModule_t raw_m; + auto status = hipModuleLoadData(&raw_m, image); + hip_module_ptr m{raw_m}; + if(status != hipSuccess) + MIGRAPHX_THROW("Failed to load module: " + hip_error(status)); + return m; +} + +kernel::kernel(const char* image, const std::string& name) : impl(std::make_shared()) +{ + impl->module = load_module(image); + auto status = hipModuleGetFunction(&impl->fun, impl->module.get(), name.c_str()); + if(hipSuccess != status) + MIGRAPHX_THROW("Failed to get function: " + name + ": " + hip_error(status)); +} + +void launch_kernel(hipFunction_t fun, + hipStream_t stream, + std::size_t global, + std::size_t local, + void* kernargs, + std::size_t size, + hipEvent_t start, + hipEvent_t stop) +{ + assert(global > 0); + assert(local > 0); + void* config[] = { +// HIP_LAUNCH_PARAM_* are macros that do horrible things +#ifdef MIGRAPHX_USE_CLANG_TIDY + nullptr, kernargs, nullptr, &size, nullptr +#else + HIP_LAUNCH_PARAM_BUFFER_POINTER, + kernargs, + HIP_LAUNCH_PARAM_BUFFER_SIZE, + &size, + HIP_LAUNCH_PARAM_END +#endif + }; + + auto status = hipExtModuleLaunchKernel(fun, + global, + 1, + 1, + local, + 1, + 1, + 0, + stream, + nullptr, + reinterpret_cast(&config), + start, + stop); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed to launch kernel: " + hip_error(status)); + if(stop != nullptr) + { + status = hipEventSynchronize(stop); + if(status != hipSuccess) + MIGRAPHX_THROW("Failed to sync event: " + hip_error(status)); + } +} + +void kernel::launch(hipStream_t stream, + std::size_t global, + std::size_t local, + std::vector args, + hipEvent_t start, + hipEvent_t stop) const +{ + assert(impl != nullptr); + void* kernargs = reinterpret_cast(args.data()); + std::size_t size = args.size() * sizeof(void*); + + launch_kernel(impl->fun, stream, global, local, kernargs, size, start, stop); +} + +void kernel::launch(hipStream_t stream, + std::size_t global, + std::size_t local, + const std::vector& args, + hipEvent_t start, + hipEvent_t stop) const +{ + assert(impl != nullptr); + std::vector kernargs = pack_args(args); + std::size_t size = kernargs.size(); + + launch_kernel(impl->fun, stream, global, local, kernargs.data(), size, start, stop); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/algorithm.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/algorithm.hpp new file mode 100644 index 000000000..2e5b376c2 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/algorithm.hpp @@ -0,0 +1,334 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_ALGORITHM_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_ALGORITHM_HPP + +#include + +namespace migraphx { + +template +constexpr void swap(T& a, T& b) noexcept +{ + T old = a; + a = b; + b = old; +} + +template +constexpr void iter_swap(Iterator1 a, Iterator2 b) +{ + if(a == b) + return; + swap(*a, *b); +} + +struct less +{ + template + constexpr auto operator()(T x, U y) const + { + return x < y; + } +}; + +struct greater +{ + template + constexpr auto operator()(T x, U y) const + { + return x > y; + } +}; + +template +constexpr T accumulate(InputIt first, InputIt last, T init, BinaryOperation op) +{ + for(; first != last; ++first) + { + init = op(static_cast(init), *first); + } + return init; +} + +template +constexpr OutputIt copy(InputIt first, InputIt last, OutputIt d_first) +{ + while(first != last) + { + *d_first++ = *first++; + } + return d_first; +} + +template +constexpr OutputIt copy_if(InputIt first, InputIt last, OutputIt d_first, UnaryPredicate pred) +{ + for(; first != last; ++first) + { + if(pred(*first)) + { + *d_first = *first; + ++d_first; + } + } + return d_first; +} + +template +constexpr Iterator is_sorted_until(Iterator first, Iterator last, Compare comp) +{ + if(first != last) + { + Iterator next = first; + while(++next != last) + { + if(comp(*next, *first)) + return next; + first = next; + } + } + return last; +} + +template +constexpr bool is_sorted(Iterator first, Iterator last, Compare comp) +{ + return is_sorted_until(first, last, comp) == last; +} + +template +constexpr F for_each(Iterator first, Iterator last, F f) +{ + for(; first != last; ++first) + { + f(*first); + } + return f; +} + +template +constexpr Iterator find_if(Iterator first, Iterator last, Predicate p) +{ + for(; first != last; ++first) + { + if(p(*first)) + { + return first; + } + } + return last; +} + +template +constexpr Iterator find(Iterator first, Iterator last, const T& value) +{ + return find_if(first, last, [&](const auto& x) { return x == value; }); +} + +template +constexpr bool any_of(InputIt first, InputIt last, UnaryPredicate p) +{ + return find_if(first, last, p) != last; +} + +template +constexpr bool none_of(InputIt first, InputIt last, UnaryPredicate p) +{ + return find_if(first, last, p) == last; +} + +template +constexpr bool all_of(InputIt first, InputIt last, UnaryPredicate p) +{ + return none_of(first, last, [=](auto&& x) { return not p(x); }); +} + +template +constexpr Iterator1 search(Iterator1 first, Iterator1 last, Iterator2 s_first, Iterator2 s_last) +{ + for(;; ++first) + { + Iterator1 it = first; + for(Iterator2 s_it = s_first;; ++it, ++s_it) + { + if(s_it == s_last) + { + return first; + } + if(it == last) + { + return last; + } + if(not(*it == *s_it)) + { + break; + } + } + } +} + +template +constexpr T inner_product(InputIt1 first1, + InputIt1 last1, + InputIt2 first2, + T init, + BinaryOperation1 op1, + BinaryOperation2 op2) +{ + while(first1 != last1) + { + init = op1(init, op2(*first1, *first2)); + ++first1; + ++first2; + } + return init; +} + +template +constexpr T inner_product(InputIt1 first1, InputIt1 last1, InputIt2 first2, T init) +{ + return inner_product( + first1, + last1, + first2, + init, + [](auto x, auto y) { return x + y; }, + [](auto x, auto y) { return x * y; }); +} + +template +constexpr bool equal(Iterator1 first1, Iterator1 last1, Iterator2 first2, BinaryPred p) +{ + for(; first1 != last1; ++first1, ++first2) + if(not p(*first1, *first2)) + { + return false; + } + return true; +} + +template +constexpr void iota(Iterator first, Iterator last, T value) +{ + for(; first != last; ++first, ++value) + *first = value; +} + +template +constexpr Iterator min_element(Iterator first, Iterator last, Compare comp) +{ + if(first == last) + return last; + + Iterator smallest = first; + + while(++first != last) + if(comp(*first, *smallest)) + smallest = first; + + return smallest; +} + +template +constexpr Iterator rotate(Iterator first, Iterator middle, Iterator last) +{ + if(first == middle) + return last; + + if(middle == last) + return first; + + Iterator write = first; + Iterator next_read = first; + + for(Iterator read = middle; read != last; ++write, ++read) + { + if(write == next_read) + next_read = read; + iter_swap(write, read); + } + + rotate(write, next_read, last); + return write; +} + +template +constexpr Iterator upper_bound(Iterator first, Iterator last, const T& value, Compare comp) +{ + auto count = last - first; + + while(count > 0) + { + auto it = first; + auto step = count / 2; + it += step; + + if(not comp(value, *it)) + { + first = ++it; + count -= step + 1; + } + else + count = step; + } + + return first; +} + +template +constexpr void sort(Iterator first, Iterator last, Compare comp) +{ + if(first == last) + return; + for(auto i = first; i != last - 1; ++i) + iter_swap(i, min_element(i, last, comp)); + MIGRAPHX_ASSERT(is_sorted(first, last, comp)); +} + +template +constexpr void sort(Iterator first, Iterator last) +{ + sort(first, last, less{}); +} + +template +constexpr void stable_sort(Iterator first, Iterator last, Compare comp) +{ + if(first == last) + return; + for(auto i = first; i != last; ++i) + rotate(upper_bound(first, i, *i, comp), i, i + 1); + MIGRAPHX_ASSERT(is_sorted(first, last, comp)); +} + +template +constexpr void stable_sort(Iterator first, Iterator last) +{ + stable_sort(first, last, less{}); +} + +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/args.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/args.hpp new file mode 100644 index 000000000..2706e14a4 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/args.hpp @@ -0,0 +1,50 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_ARGS_HPP +#define MIGRAPHX_GUARD_KERNELS_ARGS_HPP + +#include +#include + +namespace migraphx { + +// Use template specialization since ADL is broken on hcc +template +struct make_tensor; + +template +__device__ auto make_tensors_impl(F f, detail::seq, Ts*... xs) +{ + return f(make_tensor::apply(xs)...); +} + +inline __device__ auto make_tensors() +{ + return [](auto*... xs) { + return [=](auto f) { return make_tensors_impl(f, detail::gens{}, xs...); }; + }; +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_ARGS_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/array.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/array.hpp new file mode 100644 index 000000000..623d4d8dd --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/array.hpp @@ -0,0 +1,388 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_ARRAY_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_ARRAY_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { + +// NOLINTNEXTLINE +#define MIGRAPHX_DEVICE_ARRAY_OP(op, binary_op) \ + template \ + constexpr array& operator op(const array& x) \ + { \ + array_detail::array_for_each(*this, x)([](auto& sy, auto sx) { sy op sx; }); \ + return *this; \ + } \ + template {})> \ + constexpr array& operator op(const U& x) \ + { \ + array_detail::array_for_each (*this)([&](auto& sy) { sy op x; }); \ + return *this; \ + } \ + template \ + friend constexpr auto operator binary_op(const array& x, const array& y) \ + { \ + array z{}; \ + array_detail::array_for_each(z, x, y)( \ + [&](auto& sz, auto sx, auto sy) { sz = sx binary_op sy; }); \ + return z; \ + } \ + template {})> \ + friend constexpr auto operator binary_op(const array& x, const U& y) \ + { \ + array z{}; \ + array_detail::array_for_each(z, x)([&](auto& sz, auto sx) { sz = sx binary_op y; }); \ + return z; \ + } \ + template {})> \ + friend constexpr auto operator binary_op(const U& x, const array& y) \ + { \ + array z{}; \ + array_detail::array_for_each(z, y)([&](auto& sz, auto sy) { sz = x binary_op sy; }); \ + return z; \ + } + +namespace array_detail { +template +constexpr auto is_vectorizable() +{ + return not is_same{} and (is_fundamental{} or is_same{}); +} + +template +__device__ auto& array2vec(T& x) +{ + using value_type = typename T::value_type; + constexpr auto size = decltype(x.size()){}; + using type = vec; + if constexpr(is_const{}) + return reinterpret_cast(x); + else + return reinterpret_cast(x); +} + +template +constexpr auto array_for_each(T& x, Ts&... xs) +{ + MIGRAPHX_ASSERT(((x.size() == xs.size()) and ...)); + return [&](auto f) { + constexpr auto size = decltype(x.size()){}; + if constexpr((is_vectorizable() or + (is_vectorizable() or ...)) and + size <= 8 and size > 1 and (size % 2 == 0)) + { + if(__builtin_is_constant_evaluated()) + { + for(index_int i = 0; i < size; i++) + f(x[i], xs[i]...); + } + else + { + using vec_type = remove_reference_t; + f(array2vec(x), __builtin_convertvector(array2vec(xs), vec_type)...); + } + } + else + { + for(index_int i = 0; i < size; i++) + f(x[i], xs[i]...); + } + }; +} +} // namespace array_detail + +template +struct array +{ + using value_type = T; + T d[N]; + + constexpr array() = default; + + template {} and ...))> + constexpr array(Ts... xs) : d{xs...} + { + } + + template {} and (N > 1))> + constexpr explicit array(U x) + { + for(index_int i = 0; i < N; i++) + d[i] = x; + } + + constexpr T& operator[](index_int i) + { + MIGRAPHX_ASSERT(i < N); + return d[i]; + } + constexpr const T& operator[](index_int i) const + { + MIGRAPHX_ASSERT(i < N); + return d[i]; + } + + constexpr T& front() { return d[0]; } + constexpr const T& front() const { return d[0]; } + + constexpr T& back() { return d[N - 1]; } + constexpr const T& back() const { return d[N - 1]; } + + constexpr T* data() { return d; } + constexpr const T* data() const { return d; } + + constexpr index_constant size() const { return {}; } + constexpr auto empty() const { return size() == _c<0>; } + + constexpr T* begin() { return d; } + constexpr const T* begin() const { return d; } + + constexpr T* end() { return d + size(); } + constexpr const T* end() const { return d + size(); } + + constexpr T dot(const array& x) const + { + auto r = x * (*this); + return r.reduce([](auto a, auto b) { return a + b; }, 0); + } + + constexpr T product() const + { + return reduce([](auto x, auto y) { return x * y; }, 1); + } + + constexpr T single(index_int width = 100) const + { + T result = 0; + T a = 1; + for(index_int i = 0; i < N; i++) + { + result += d[N - i - 1] * a; + a *= width; + } + return result; + } + + template + constexpr auto apply(F f) const + { + array result; + for(index_int i = 0; i < N; i++) + result[i] = f(d[i]); + return result; + } + + template + constexpr auto reduce(F f, T init) const + { + T result = init; + for(index_int i = 0; i < N; i++) + result = f(result, d[i]); + return result; + } + + MIGRAPHX_DEVICE_ARRAY_OP(+=, +) + MIGRAPHX_DEVICE_ARRAY_OP(-=, -) + MIGRAPHX_DEVICE_ARRAY_OP(*=, *) + MIGRAPHX_DEVICE_ARRAY_OP(/=, /) + MIGRAPHX_DEVICE_ARRAY_OP(%=, %) + MIGRAPHX_DEVICE_ARRAY_OP(&=, &) + MIGRAPHX_DEVICE_ARRAY_OP(|=, |) + MIGRAPHX_DEVICE_ARRAY_OP(^=, ^) + + friend constexpr bool operator==(const array& x, const array& y) + { + for(index_int i = 0; i < N; i++) + { + if(x[i] != y[i]) + return false; + } + return true; + } + + template {})> + friend constexpr bool operator==(const array& x, const U& y) + { + for(index_int i = 0; i < N; i++) + { + if(x[i] != y) + return false; + } + return true; + } + + template {})> + friend constexpr bool operator==(const U& x, const array& y) + { + return y == x; + } + + template + friend constexpr bool operator!=(const U& x, const array& y) + { + return not(x == y); + } + template + friend constexpr bool operator!=(const array& x, const U& y) + { + return not(x == y); + } + // This uses the product order rather than lexical order + friend constexpr bool operator<(const array& x, const array& y) + { + for(index_int i = 0; i < N; i++) + { + if(not(x[i] < y[i])) + return false; + } + return true; + } + friend constexpr bool operator>(const array& x, const array& y) { return y < x; } + friend constexpr bool operator<=(const array& x, const array& y) { return (x < y) or (x == y); } + friend constexpr bool operator>=(const array& x, const array& y) { return (y < x) or (x == y); } + + constexpr array carry(array result) const + { + index_int overflow = 0; + for(diff_int i = result.size() - 1; i > 0; i--) + { + auto z = result[i] + overflow; + // Reset overflow + overflow = 0; + // Compute overflow using while loop instead of mod + while(z >= d[i]) + { + z -= d[i]; + overflow += 1; + } + result[i] = z; + } + result[0] += overflow; + return result; + } + + /// Get the multi-dimensional index from the given 1D index. + constexpr array multi(T idx) const + { + array result; + index_int tidx = idx; + for(diff_int is = result.size() - 1; is > 0; is--) + { + result[is] = tidx % d[is]; + tidx = tidx / d[is]; + } + result[0] = tidx; + return result; + } + + template + friend constexpr const Stream& operator<<(const Stream& ss, const array& a) + { + for(index_int i = 0; i < N; i++) + { + if(i > 0) + ss << ", "; + ss << a[i]; + } + return ss; + } +}; + +template +constexpr auto array_apply(F f) +{ + return [=](auto&& x) { return x.apply(f); }; +} + +template +constexpr array make_array(T x, Ts... xs) +{ + return {x, static_cast(xs)...}; +} +template +struct integral_const_array : array +{ + using base_array = array; + MIGRAPHX_DEVICE_CONSTEXPR integral_const_array() : base_array({Xs...}) {} + + constexpr const base_array& base() const { return *this; } +}; + +template +constexpr auto make_const_array(T x, Ts... xs) +{ + return integral_const_array{}; +} + +template +constexpr auto generate_array(N n, F f) +{ + return sequence_c([=](auto... is) { return array{f(is)...}; }); +} + +template +constexpr auto unpack(integral_const_array, F f) +{ + return f(_c...); +} + +template +constexpr auto transform(integral_const_array, F f) +{ + return integral_const_array{}; +} + +template +constexpr auto transform_i(integral_const_array, F f) +{ + return sequence_c( + [=](auto... is) { return integral_const_array{}; }); +} + +template +constexpr auto transform(integral_const_array, integral_const_array, F f) +{ + return integral_const_array{}; +} + +template +constexpr auto return_array_c(F f) +{ + constexpr auto r = f(); + return sequence(r.size(), [&](auto... is) { return make_const_array(_c...); }); +} + +template +using index_ints = integral_const_array; + +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/atomic.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/atomic.hpp new file mode 100644 index 000000000..76e0409cc --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/atomic.hpp @@ -0,0 +1,142 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#ifndef MIGRAPHX_GUARD_KERNELS_ATOMIC_HPP +#define MIGRAPHX_GUARD_KERNELS_ATOMIC_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef MIGRAPHX_ALLOW_ATOMIC_CAS +// NOLINTNEXTLINE +#define MIGRAPHX_ALLOW_ATOMIC_CAS 0 +#endif + +// NOLINTNEXTLINE +#define MIGRAPHX_ATOMIC_CAS_WARNING() \ + MIGRAPHX_ASSERT(MIGRAPHX_ALLOW_ATOMIC_CAS and "Using atomicCAS is slow") + +namespace migraphx { +namespace atomic { + +using cas_rank = rank<1>; + +template +MIGRAPHX_DEVICE_CONSTEXPR void cas(rank<1>, T& x, T y, Op op) +{ + MIGRAPHX_ATOMIC_CAS_WARNING(); + using storage = conditional_t; + storage* address = reinterpret_cast(&x); + storage expected = __hip_atomic_load(address, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); + while(not __hip_atomic_compare_exchange_strong(address, + &expected, + bit_cast(op(bit_cast(expected), y)), + __ATOMIC_RELAXED, + __ATOMIC_RELAXED, + __HIP_MEMORY_SCOPE_AGENT)) + { + } +} + +template +MIGRAPHX_DEVICE_CONSTEXPR auto cas(rank<0>, vec& x, vec y, Op op) + -> decltype(cas(cas_rank{}, x[0], y[0], op), void()) +{ + for(index_int i = 0; i < N; i++) + { + cas(cas_rank{}, x[i], y[i], op); + } +} + +template +MIGRAPHX_DEVICE_CONSTEXPR auto builtin_assign(T& x, T y, op::sum) + MIGRAPHX_RETURNS(unsafeAtomicAdd(&x, y)); + +__device__ inline void builtin_assign(half2& x, half2 y, op::sum) +{ + __builtin_amdgcn_global_atomic_fadd_v2f16(&x, y); +} + +template +constexpr bool is_aligned(const void* ptr) +{ + auto iptr = bit_cast(ptr); + return (iptr % alignof(T)) == 0; +} + +__device__ inline void builtin_assign(half& x, half y, op::sum) +{ + half* address = &x; + if(is_aligned(address)) + { + __builtin_amdgcn_global_atomic_fadd_v2f16(address, half2{half(y), half(0)}); + } + else + { + __builtin_amdgcn_global_atomic_fadd_v2f16(address - 1, half2{half(0), half(y)}); + } +} + +template +MIGRAPHX_DEVICE_CONSTEXPR auto builtin_assign(T& x, T y, op::min) + MIGRAPHX_RETURNS(unsafeAtomicMin(&x, y)); + +template +MIGRAPHX_DEVICE_CONSTEXPR auto builtin_assign(T& x, T y, op::max) + MIGRAPHX_RETURNS(unsafeAtomicMax(&x, y)); + +template +MIGRAPHX_DEVICE_CONSTEXPR auto builtin_assign(vec& x, vec y, Op op) + -> decltype(builtin_assign(x[0], y[0], op), void()) +{ + for(index_int i = 0; i < N; i++) + { + builtin_assign(x[i], y[i], op); + } +} + +template +MIGRAPHX_DEVICE_CONSTEXPR auto assign(rank<0>, T& x, T y, Op op) + MIGRAPHX_RETURNS(cas(cas_rank{}, x, y, op)); + +template +MIGRAPHX_DEVICE_CONSTEXPR auto assign(rank<1>, T& x, T y, Op op) + MIGRAPHX_RETURNS(builtin_assign(x, y, op)); + +} // namespace atomic + +template +MIGRAPHX_DEVICE_CONSTEXPR void atomic_assign(T& x, U y, Op op) +{ + atomic::assign(rank<1>{}, x, T(y), op); +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_ATOMIC_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/bit_cast.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/bit_cast.hpp new file mode 100644 index 000000000..e559658a0 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/bit_cast.hpp @@ -0,0 +1,42 @@ +/* ************************************************************************ + * Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- + * ies of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- + * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- + * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ +#ifndef MIGRAPHX_GUARD_KERNELS_BITCAST_HPP +#define MIGRAPHX_GUARD_KERNELS_BITCAST_HPP + +#include +#include + +namespace migraphx { + +template {} and is_trivially_copyable{})> +inline constexpr auto bit_cast(From fr) noexcept +{ + return vec_transform(fr)([](auto x) -> To { + static_assert(sizeof(To) == sizeof(decltype(x))); + return __builtin_bit_cast(To, x); + }); +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_BITCAST_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ck.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ck.hpp new file mode 100644 index 000000000..de22e7b07 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ck.hpp @@ -0,0 +1,175 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_CK_HPP +#define MIGRAPHX_GUARD_KERNELS_CK_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { + +namespace detail { +template +struct to_ck_type_impl +{ + using type = T; +}; +template <> +struct to_ck_type_impl +{ + using type = ck::half_t; +}; + +template +struct to_ck_type_impl +{ + using type = const typename to_ck_type_impl::type; +}; + +template +constexpr bool is_row_major() +{ + constexpr auto strides = Shape{}.strides; + MIGRAPHX_ASSERT(strides.size() >= 2); + if(strides.back() == 1) + { + MIGRAPHX_ASSERT(not Shape{}.is_transposed()); + return true; + } + MIGRAPHX_ASSERT(strides[strides.size() - 2] == 1); + + return false; +} + +} // namespace detail + +template +using to_ck_type = typename detail::to_ck_type_impl::type; + +template +constexpr auto to_ck_pointer(T* x) +{ + return static_cast*>(x); +} + +template +constexpr auto to_ck_const_pointer(const T* x) +{ + return static_cast*>(x); +} + +template +using to_ck_gemm_layout = conditional_t>(), + ck::tensor_layout::gemm::RowMajor, + ck::tensor_layout::gemm::ColumnMajor>; + +template +constexpr auto to_ck_tensor() +{ + constexpr auto s = get_shape_c{}; + return sequence(s.lens.size(), [&](auto... is) { + return ck::make_naive_tensor_descriptor(ck::make_tuple(s.lens[is]...), + ck::make_tuple(s.strides[is]...)); + }); +} + +template +struct ck_function_adaptor : F +{ + template + constexpr ck_function_adaptor(Ts&&... xs) : F(static_cast(xs)...) + { + } + + template + constexpr void operator()(T& out, Ts&&... xs) const + { + out = static_cast(*this)(static_cast(xs)...); + } +}; + +struct ck_nop +{ + template + constexpr void operator()(T&) const + { + } +}; + +struct ck_passthrough +{ + template + constexpr void operator()(T& y, U x) const + { + y = x; + } +}; + +struct ck_scale +{ + constexpr ck_scale(float s) : scale(s) {} + + template + constexpr void operator()(T& y, U x) const + { + y = x * static_cast(scale); + } + + float scale; +}; + +struct ck_add +{ + template + constexpr void operator()(T& y, U x) const + { + y += x; + } +}; + +// In CK, the B matrix is ordered as N,K instead of K,N +template +constexpr auto ck_transposeb_dims(Dims dims) +{ + return unpack(dims, [](auto k, auto n) { return make_const_array(n, k); }); +} + +template +using ck_transposeb = decltype(make_shape(ck_transposeb_dims(get_shape_c{}.lens), + ck_transposeb_dims(get_shape_c{}.strides))); + +#ifdef MIGRAPHX_CK_CHECK +#define MIGRAPHX_CK_STATIC_ASSERT static_assert +#else +#define MIGRAPHX_CK_STATIC_ASSERT(...) +#endif + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_CK_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ck_gemm.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ck_gemm.hpp new file mode 100644 index 000000000..ccef52132 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ck_gemm.hpp @@ -0,0 +1,62 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_CK_GEMM_HPP +#define MIGRAPHX_GUARD_KERNELS_CK_GEMM_HPP + +#include +#include +#include +#include +#include +#include + +namespace migraphx { + +template +__device__ void ck_gemm_matrix(E e, A a, B b, Ds... ds) +{ + constexpr auto desc = G::make_descriptor(to_ck_tensor(), + to_ck_tensor>(), + ck::make_tuple(to_ck_tensor()...), + to_ck_tensor()); + + MIGRAPHX_STATIC_ASSERT_FOR(desc.IsValid()) + { + G::Run(desc, + to_ck_const_pointer(a.data()), + to_ck_const_pointer(b.data()), + ck::make_tuple(to_ck_const_pointer(ds.data())...), + to_ck_pointer(e.data())); + } +} + +template +__device__ void ck_gemm(Ts... xs) +{ + gemm_batch_args(make_index(), _c, xs...)( + [](auto... ys) { ck_gemm_matrix(ys...); }); +} + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ck_gemm_softmax_gemm.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ck_gemm_softmax_gemm.hpp new file mode 100644 index 000000000..8e381f375 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ck_gemm_softmax_gemm.hpp @@ -0,0 +1,75 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_CK_GEMM_SOFTMAX_GEMM_HPP +#define MIGRAPHX_GUARD_KERNELS_CK_GEMM_SOFTMAX_GEMM_HPP + +#include +#include +#include +#include +#include +#include + +namespace migraphx { + +template +struct ck_gemm_softmax_gemm_settings +{ + T scale{}; +}; + +template +constexpr ck_gemm_softmax_gemm_settings make_ck_gemm_softmax_gemm_settings(Ts... xs) +{ + return {xs...}; +} + +template +__device__ void ck_gemm_softmax_gemm_matrix(C c, A a, B b, B1 b1, Settings s) +{ + constexpr auto desc = G::make_descriptor(to_ck_tensor(), + to_ck_tensor>(), + to_ck_tensor>(), + to_ck_tensor()); + + MIGRAPHX_STATIC_ASSERT_FOR(desc.IsValid()) + { + G::Run(desc, + s.scale, + to_ck_const_pointer(a.data()), + to_ck_const_pointer(b.data()), + to_ck_const_pointer(b1.data()), + to_ck_pointer(c.data())); + } +} + +template +__device__ void ck_gemm_softmax_gemm(Settings s, Ts... xs) +{ + gemm_batch_args(make_index(), _c, xs...)( + [&](auto... ys) { ck_gemm_softmax_gemm_matrix(ys..., s); }); +} + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/compute_attention_probabilities.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/compute_attention_probabilities.hpp new file mode 100644 index 000000000..5838d9874 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/compute_attention_probabilities.hpp @@ -0,0 +1,111 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_COMPUTE_ATTENTION_PROBABILITIES_HPP +#define MIGRAPHX_GUARD_KERNELS_COMPUTE_ATTENTION_PROBABILITIES_HPP + +#include +#include +#include + +namespace migraphx { + +template +__device__ void +calculate_attention_probs(AttnProbs attention_probs, // output buffer with size BxNxSxT + Query query, // Q data. Its size is BxNxSxH + SeqLensK seqlens_k, // past sequence lengths tensor + PresentKey present_key, // present key only + Params params, + index_int idx) +{ + const index_int batch_size = params.batch_size; + const index_int sequence_length = params.sequence_length; + const index_int head_size = params.head_size; + const index_int present_buffer_sequence_length = params.seqlen_present_kv_cache; + const index_int num_heads = params.num_heads; + const index_int kv_num_heads = params.kv_num_heads; + const index_int packed_batch_stride = + (num_heads + 2 * kv_num_heads) * sequence_length * head_size; + const index_int kv_num_heads_factor = num_heads / kv_num_heads; + const index_int q_input_chunk_length = sequence_length * head_size; // S x H + const index_int present_buff_chunk_length = present_buffer_sequence_length * head_size; // T x H + + const index_int loop_len = batch_size * num_heads; + const float alpha = + params.scale == 0.0f ? 1.0f / sqrt(static_cast(head_size)) : params.scale; + + const index_int i = idx / (sequence_length * present_buffer_sequence_length); + const index_int inner_i = idx % (sequence_length * present_buffer_sequence_length); + if(i < loop_len) + { + const auto batch_index = i / num_heads; + const auto head_index = i % num_heads; + const index_int total_seqlen = seqlens_k[batch_index] + 1; + const index_int output_offset = i * sequence_length * present_buffer_sequence_length; + auto output = attention_probs + output_offset; + auto pk = present_key + ((i / kv_num_heads_factor) * present_buff_chunk_length); + auto q = query + packed_batch_stride * batch_index + q_input_chunk_length * head_index; + + naive_gemm gemm{sequence_length, + total_seqlen, + head_size, + head_size, + head_size, + present_buffer_sequence_length, + true, + alpha, + 0.0f}; + gemm.compute(output, q, pk, inner_i); + } +} + +template +__device__ void compute_attention_probabilities(Output output, + Query query, + PresentKey present_key, + PresentValue, + SeqLensK seqlens_k, + Params params) +{ + auto ind = make_index(); + ind.global_stride( + params.batch_size * params.num_heads * params.sequence_length * + params.seqlen_present_kv_cache, + [&](auto idx) { + calculate_attention_probs( + output.begin(), query.begin(), seqlens_k.begin(), present_key.begin(), params, idx); + }); +} + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/compute_attention_scores.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/compute_attention_scores.hpp new file mode 100644 index 000000000..f51cfacd5 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/compute_attention_scores.hpp @@ -0,0 +1,112 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_COMPUTE_ATTENTION_SCORES_HPP +#define MIGRAPHX_GUARD_KERNELS_COMPUTE_ATTENTION_SCORES_HPP + +#include +#include +#include + +namespace migraphx { + +template +__device__ void +calculate_attention_score(Output output, // buffer for the result with size BxSxNxH + const AttnProbs attention_probs, // Attention probs with size BxNxSxT + const SeqLensK seqlens_k, // past sequence lengths tensor + PresentValue present_value, // present value only + Params params, + index_int idx) +{ + const index_int batch_size = params.batch_size; + const index_int num_heads = params.num_heads; + const index_int sequence_length = params.sequence_length; + const index_int head_size = params.head_size; + const index_int hidden_size = params.hidden_size; + const index_int present_buffer_sequence_length = params.seqlen_present_kv_cache; + const index_int kv_num_heads = params.kv_num_heads; + const index_int kv_num_heads_factor = num_heads / kv_num_heads; + const index_int present_buff_chunk_length = present_buffer_sequence_length * head_size; // T x H + + auto loop_len = batch_size * num_heads; + const index_int i = idx / (sequence_length * head_size); + const index_int inner_i = idx % (sequence_length * head_size); + if(i < loop_len) + { + const index_int batch_index = i / num_heads; + const index_int head_index = i % num_heads; + const index_int total_seqlen = seqlens_k[batch_index] + 1; + + auto pv = present_value + ((i / kv_num_heads_factor) * present_buff_chunk_length); + Output output_current = + output + (batch_index * sequence_length * num_heads + head_index) * head_size; + ptrdiff_t attention_probs_offset = sequence_length * present_buffer_sequence_length * i; + + naive_gemm gemm{sequence_length, + head_size, + total_seqlen, + present_buffer_sequence_length, + head_size, + hidden_size, + false, + 1.0f, + 0.0f}; + gemm.compute(output_current, attention_probs + attention_probs_offset, pv, inner_i); + } +} + +template +__device__ void compute_attention_scores(Output output, + Query, + PresentKey, + PresentValue present_value, + SeqLensK seqlens_k, + AttnProbs attn_probs, + Params params) +{ + const index_int elements = + params.batch_size * params.num_heads * params.sequence_length * params.head_size; + auto ind = make_index(); + ind.global_stride(elements, [&](auto idx) { + calculate_attention_score(output.begin(), + attn_probs.begin(), + seqlens_k.begin(), + present_value.begin(), + params, + idx); + }); +} + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/concat.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/concat.hpp new file mode 100644 index 000000000..9dd2ec6b6 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/concat.hpp @@ -0,0 +1,87 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include + +#ifndef MIGRAPHX_GUARD_KERNELS_CONCAT_HPP +#define MIGRAPHX_GUARD_KERNELS_CONCAT_HPP + +namespace migraphx { + +template +constexpr auto concat_slice(Output out, Input, Start) +{ + constexpr auto lens = get_shape_c{}.lens; + constexpr auto strides = get_shape_c{}.strides; + constexpr auto offset = return_c([] { + constexpr auto output_shape = get_shape_c{}; + return Start{} * output_shape.strides[Axis]; + }); + constexpr auto s = make_shape(lens, strides); + MIGRAPHX_ASSERT(offset < out.get_shape().element_space()); + MIGRAPHX_ASSERT((s.element_space() + offset) <= out.get_shape().element_space()); + return make_tensor_view(out.data() + offset, s); +} + +template +constexpr auto concat_slices(Input input, Start start, Ts... xs) +{ + return [=](auto f) { return f(concat_slice(xs, input, start)...); }; +} + +template +constexpr auto concat_ends(Input) +{ + constexpr auto lens = get_shape_c{}.lens; + return _c; +} + +template +__device__ auto concat_each(index idx, Start start, InputPack input_pack, F f, Ts... ts) +{ + return input_pack([&](auto g, auto x, auto... xs) { + return concat_slices(x, start, ts...)([&](auto z, auto... ys) { + idx.global_stride(x.get_shape().elements(), + [&](auto i) { z[i] = f(g(x[i], xs[i]...), ys[i]...); }); + + return start + concat_ends(x); + }); + }); +} + +template +__device__ auto concat(InputPacks... input_packs) +{ + return [=](auto f, auto... ts) { + auto idx = make_index(); + fold([&](auto start, auto input_pack) { + return concat_each(idx, start, input_pack, f, ts...); + })(_c<0>, input_packs...); + }; +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_CONCAT_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/concat_past_present.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/concat_past_present.hpp new file mode 100644 index 000000000..dcfebbbe0 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/concat_past_present.hpp @@ -0,0 +1,141 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_CONCAT_PAST_PRESENT_HPP +#define MIGRAPHX_GUARD_KERNELS_CONCAT_PAST_PRESENT_HPP + +#include +#include +#include + +namespace migraphx { + +template +__device__ void copy_data(Dest destination, const Src source, index_int n, index_int idx) +{ + if(idx < n) + { + destination[idx] = source[idx]; + } +} + +struct concat_state_chunk +{ + index_int present_buff_chunk_length; + index_int past_buff_chunk_length; + index_int past_chunk_length; + index_int new_chunk_length; + bool is_prompt; + bool past_present_share_buffer; + std::ptrdiff_t i; + + template + __device__ Present compute(Past past, const Chunk chunk, Present present, index_int idx) + { + auto start = present + i * present_buff_chunk_length; + + auto p = start; + if(not is_prompt) + { + if(not past_present_share_buffer) + { + const auto src_past = past + i * past_buff_chunk_length; + copy_data(p, src_past, past_chunk_length, idx); + } + p += past_chunk_length; + } + copy_data(p, chunk, new_chunk_length, idx); + return start; + } +}; + +template +__device__ void +update_cache(const Present present, SeqLensK seqlens_k, Cache cache, Params params, index_int idx) +{ + const index_int batch_size = params.batch_size; + const index_int sequence_length = params.sequence_length; + const index_int head_size = params.head_size; + const index_int past_buffer_sequence_length = params.seqlen_present_kv_cache; + const index_int present_buffer_sequence_length = past_buffer_sequence_length; + const index_int num_heads = params.num_heads; + const index_int kv_num_heads = params.kv_num_heads; + const bool is_prompt = sequence_length != 1; + const index_int packed_batch_stride = + (num_heads + 2 * kv_num_heads) * sequence_length * head_size; + const index_int kv_num_heads_factor = num_heads / kv_num_heads; + const index_int kv_input_chunk_length = sequence_length * head_size; // L x H + const index_int past_buff_chunk_length = past_buffer_sequence_length * head_size; // L x H + const index_int present_buff_chunk_length = present_buffer_sequence_length * head_size; // T x H + + const index_int loop_len = batch_size * num_heads; + const index_int i = idx / (sequence_length * head_size); + const index_int inner_i = idx % (sequence_length * head_size); + if(i < loop_len) + { + const index_int batch_index = i / num_heads; + const index_int head_index = i % num_heads; + const index_int past_seqlen = sequence_length == 1 + ? static_cast(seqlens_k[batch_index]) + : past_buffer_sequence_length; + const index_int past_chunk_length = past_seqlen * head_size; + + auto current = present + packed_batch_stride * batch_index + + kv_input_chunk_length * (head_index / kv_num_heads_factor); + + concat_state_chunk concat{present_buff_chunk_length, + past_buff_chunk_length, + past_chunk_length, + kv_input_chunk_length, + is_prompt, + params.past_present_share_buffer, + i / kv_num_heads_factor}; + concat.compute(cache, current, cache, inner_i); + } +} + +template +__device__ void concat_past_present( + const Query query, PastKey past_key, PastValue past_value, SeqLensK seqlens_k, Params params) +{ + auto ind = make_index(); + auto elements = + 2 * params.batch_size * params.kv_num_heads * params.sequence_length * params.head_size; + ind.global_stride(elements, [&](auto idx) { + auto q = query.begin(); + auto k = q + params.num_heads * params.sequence_length * params.head_size; + auto v = q + (params.num_heads + params.kv_num_heads) * params.sequence_length * + params.head_size; + if(idx < elements / 2) + { + update_cache(k, seqlens_k, past_key.begin(), params, idx); + } + else if(idx < elements) + { + update_cache(v, seqlens_k, past_value.begin(), params, idx - (elements / 2)); + } + }); +} + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/copy.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/copy.hpp new file mode 100644 index 000000000..972988992 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/copy.hpp @@ -0,0 +1,65 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#ifndef MIGRAPHX_GUARD_KERNELS_COPY_HPP +#define MIGRAPHX_GUARD_KERNELS_COPY_HPP + +#include +#include + +namespace migraphx { + +template +__device__ void local_vector_copy(Index idx, T* src, U* dst, Size size) +{ + constexpr auto n = find_vectorize_size([&](auto i) { return (size % i) == 0; }); + auto vsrc = as_vec(remove_bool(src)); + auto vdst = as_vec(remove_bool(dst)); + index_int vsize = size / n; + idx.local_stride(vsize, [&](auto i) { vdst[i] = vsrc[i]; }); +} + +template +__device__ void local_tensor_copy(Index idx, T src, U dst) +{ + constexpr auto src_shape = get_shape_c{}; + constexpr auto dst_shape = get_shape_c{}; + if constexpr(src_shape == dst_shape and (src_shape.packed() or src_shape.broadcasted())) + { + local_vector_copy(idx, src.data(), dst.data(), src_shape.element_space()); + } + else + { + constexpr auto perm = find_permutation(src_shape, dst_shape); + auto new_src = reorder_tensor_view(src, perm); + auto new_dst = reorder_tensor_view(dst, perm); + auto_vectorize()(new_src, new_dst)([&](auto vsrc, auto vdst) { + index_int size = vsrc.get_shape().elements(); + idx.local_stride(size, [&](auto i) { vdst[i] = vsrc[i]; }); + }); + } +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_COPY_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/debug.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/debug.hpp new file mode 100644 index 000000000..5e5e16b13 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/debug.hpp @@ -0,0 +1,230 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_DEBUG_HPP +#define MIGRAPHX_GUARD_KERNELS_DEBUG_HPP + +#include + +namespace migraphx { + +#define MIGRAPHX_STRINGIZE_1(...) #__VA_ARGS__ +#define MIGRAPHX_STRINGIZE(...) MIGRAPHX_STRINGIZE_1(__VA_ARGS__) + +// Workaround hip's broken abort on device code +#ifdef __HIP_DEVICE_COMPILE__ +// NOLINTNEXTLINE +#define MIGRAPHX_HIP_NORETURN +#else +// NOLINTNEXTLINE +#define MIGRAPHX_HIP_NORETURN [[noreturn]] +#endif + +namespace debug { +struct swallow +{ + template + constexpr swallow(Ts&&...) + { + } +}; + +template +struct print_buffer +{ + char buffer[N + 1] = {0}; + char* pos = buffer; + + constexpr void append(char c) + { + if(c == 0) + return; + if(pos < buffer + N) + { + *pos = c; + pos++; + } + } + static constexpr void reverse(char* first, char* last) + { + if(first == last) + return; + last--; + while(first < last) + { + char tmp = *first; + *first = *last; + *last = tmp; + first++; + last--; + } + } + + template + constexpr void append(T i) + { + if(i < 0) + { + append('-'); + i = -i; + } + if(i == 0) + { + append('0'); + return; + } + char* start = pos; + while(i != 0) + { + char c = (i % 10) + '0'; + append(c); + i = i / 10; + } + reverse(start, pos); + } + + constexpr void append(const char* str) + { + if(str == nullptr) + return; + int i = 512; + while(*str != 0 and i > 0) + { + append(*str); + str++; + i--; + } + } + + template + constexpr void append(const char (&array)[M]) + { + for(int i = 0; i < M; i++) + append(array[i]); + } +}; + +template +__host__ __device__ void print(const Ts&... xs) +{ + print_buffer<1024> buffer; + swallow{(buffer.append(xs), 0)...}; + printf("%s", buffer.buffer); +} + +} // namespace debug + +struct source_location +{ + int line = __builtin_LINE(); + const char* file = __builtin_FILE(); + const char* function = __builtin_FUNCTION(); +}; + +template +struct source_location_capture +{ + T x; + source_location loc; + // declval is a workaround since default constructor for "U" is not working with rocm-5.6 + template + static U&& declval(); + template ()))> + constexpr source_location_capture(U px, source_location ploc = source_location{}) + : x(px), loc(ploc) + { + } + template ()))> + constexpr source_location_capture(source_location_capture slc) : x(slc.x), loc(slc.loc) + { + } + + constexpr operator source_location() const { return loc; } + + constexpr operator T() const { return x; } +}; + +template +constexpr auto capture_transform(source_location_capture slc, F f) +{ + auto r = f(slc.x); + return source_location_capture(r, slc.loc); +} + +template +constexpr auto capture_transform(T x, F f) +{ + return f(x); +} + +// noreturn cannot be used on this function because abort in hip is broken +template +MIGRAPHX_HIP_NORETURN inline __host__ __device__ void +assert_fail(const T1& assertion, const T2& file, const T3& line, const T4& function) +{ + // printf is broken on hip with more than one argument, so use a simple print functions instead + debug::print(file, ":", line, ": ", function, ": assertion '", assertion, "' failed.\n"); + // printf("%s:%s: %s: assertion '%s' failed.\n", file, line, function, assertion); + abort(); +} + +template +MIGRAPHX_HIP_NORETURN inline __host__ __device__ void assert_fail(const source_location& loc, + Ts... xs) +{ + debug::print(loc.file, ":", loc.line, ": ", loc.function, ": error: ", xs..., "\n"); + abort(); +} + +// NOLINTNEXTLINE +#define MIGRAPHX_ASSERT_FAIL(cond, ...) \ + ((cond) ? void(0) : [](auto&&... private_migraphx_xs) { \ + assert_fail(private_migraphx_xs...); \ + }(__VA_ARGS__)) + +// NOLINTNEXTLINE +#define MIGRAPHX_CHECK(cond) \ + MIGRAPHX_ASSERT_FAIL(cond, #cond, __FILE__, __LINE__, __PRETTY_FUNCTION__) + +#ifdef MIGRAPHX_DEBUG +// NOLINTNEXTLINE +#define MIGRAPHX_CAPTURE_SOURCE_LOCATION(T) source_location_capture +#define MIGRAPHX_WARN(cond, loc, ...) MIGRAPHX_ASSERT_FAIL(cond, loc, __VA_ARGS__) +#define MIGRAPHX_ASSERT MIGRAPHX_CHECK +#define MIGRAPHX_ASSUME MIGRAPHX_CHECK +#define MIGRAPHX_UNREACHABLE() MIGRAPHX_ASSERT(false) +#else +// NOLINTNEXTLINE +#define MIGRAPHX_CAPTURE_SOURCE_LOCATION(T) T +#define MIGRAPHX_ASSUME __builtin_assume +#define MIGRAPHX_UNREACHABLE __builtin_unreachable +#define MIGRAPHX_ASSERT(cond) +#define MIGRAPHX_WARN(...) +#endif + +#define MIGRAPHX_STATIC_ASSERT_FOR(...) \ + static_assert(__VA_ARGS__); \ + if constexpr(__VA_ARGS__) + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_DEBUG_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/dfor.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/dfor.hpp new file mode 100644 index 000000000..d8255d4b9 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/dfor.hpp @@ -0,0 +1,48 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_DFOR_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_DFOR_HPP + +namespace migraphx { + +// Multidimensional for loop +inline constexpr auto dfor() +{ + return [](auto f) { f(); }; +} + +template +constexpr auto dfor(T x, Ts... xs) +{ + return [=](auto f) { + for(T i = 0; i < x; i++) + { + dfor(xs...)([&](Ts... is) { f(i, is...); }); + } + }; +} + +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp new file mode 100644 index 000000000..5ae4c6866 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/dpp.hpp @@ -0,0 +1,101 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_DPP_HPP +#define MIGRAPHX_GUARD_KERNELS_DPP_HPP + +#include +#include +#include + +namespace migraphx { + +constexpr bool is_power_of_2(unsigned int x) { return x > 0 and (x & (x - 1)) == 0u; } + +#ifndef MIGRAPHX_HAS_DPP +#define MIGRAPHX_HAS_DPP 1 +#endif + +#if MIGRAPHX_HAS_DPP +constexpr unsigned int dpp_row_shr(unsigned int x) { return 0x110u | x; } + +constexpr unsigned int dpp_row_bcast(unsigned int x) +{ + unsigned int y = 0; + switch(x) + { + case 15: y = 0x142; break; + case 31: y = 0x143; break; + default: MIGRAPHX_UNREACHABLE(); + } + return y; +} + +template +__device__ T dpp_op(T& x, F f) +{ + static const index_int n = sizeof(T) < 4 ? 1 : sizeof(T) / 4; + union type + { + uint32_t reg[n]; + T data; + }; + type output{}; + type input{}; + // cppcheck-suppress unreadVariable + input.data = x; + for(index_int i = 0; i < n; i++) + { + output.reg[i] = f(input.reg[i]); + } + return output.data; +} + +template +__device__ T dpp_mov(T& x) +{ + return dpp_op(x, + [](auto i) { return __hip_move_dpp(i, DppCtrl, RowMask, BankMask, BoundCtrl); }); +} + +template +__device__ T dpp_swizzle(T& x) +{ + return dpp_op(x, [](auto i) { return __hip_ds_swizzle(i, Mask); }); +} + +template +__device__ T readlane(T& x) +{ + static_assert(is_power_of_2(Width), "Width must be a power of 2"); + return dpp_op(x, [](auto i) { return __shfl(i, SrcLane, Width); }); +} + +#endif // MIGRAPHX_HAS_DPP + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_DPP_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/float8.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/float8.hpp new file mode 100644 index 000000000..8227ae220 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/float8.hpp @@ -0,0 +1,567 @@ +/* ************************************************************************ + * + * The MIT License (MIT) + * + * Copyright (C) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- + * ies of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- + * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- + * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ + +#ifndef MIGRAPHX_GUARD_KERNELS_FLOAT8_HPP +#define MIGRAPHX_GUARD_KERNELS_FLOAT8_HPP +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wfloat-equal" +#pragma clang diagnostic ignored "-Wc++20-extensions" // required for "asm" inside constexpr +#endif // __clang__ + +// We are clipping in down conversion by default +#define MIGRAPHX_F8_DOWNCAST_CLIPPING 1 // NOLINT + +#include +#include +#include + +namespace migraphx { +namespace fp8 { + +enum class rounding_mode +{ + standard, // standard rounding is doing RNE -- round to nearest even + stochastic +}; + +enum class f8_type +{ + bf8 = 0, // s1e5m2 + fp8 = 1 // s1e4m3 +}; + +template +class numeric_limits; + +template +struct float8 +{ + uint8_t data; + // default constructor + __device__ constexpr float8() = default; + // default copy constructor + __device__ constexpr float8(const float8& y) = default; + struct from_bits_t + { + }; + static constexpr __device__ from_bits_t from_bits() { return from_bits_t(); } + + __device__ explicit constexpr float8(uint8_t bits, from_bits_t) : data(bits) {} + +#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) + // device specific optimized F8 down-conversion code + + template + static __device__ uint8_t cast_to_f8fnuz_from_f32(float v, uint32_t rng = 0) + { + uint8_t i8data = 0x00; + union + { + float fval; + uint32_t i32val; + uint8_t i8val[4]; // NOTE: not endian independent + } val; + + uint32_t ival = 0; + val.fval = v; + +#ifdef MIGRAPHX_F8_DOWNCAST_CLIPPING + if constexpr(T == migraphx::fp8::f8_type::fp8) + { + if((val.i32val & 0x7F800000) != 0x7F800000) /// propagate NAN/INF, no clipping + val.fval = __builtin_amdgcn_fmed3f(val.fval, 240.0, -240.0); + } + else + { + if((val.i32val & 0x7F800000) != 0x7F800000) // propagate NAN/INF, no clipping + val.fval = __builtin_amdgcn_fmed3f(val.fval, 57344.0, -57344.0); + } +#endif + if(stochastic_rounding) + { + if constexpr(T == migraphx::fp8::f8_type::fp8) + { + ival = __builtin_amdgcn_cvt_sr_fp8_f32(val.fval, rng, ival, 0); // 0 pos + } + else + { + ival = __builtin_amdgcn_cvt_sr_bf8_f32(val.fval, rng, ival, 0); // 0 pos + } + } + else // RNE CVT + { + if constexpr(T == migraphx::fp8::f8_type::fp8) + { + ival = __builtin_amdgcn_cvt_pk_fp8_f32( + val.fval, val.fval, ival, false); // false -> WORD0 + } + else + { + ival = __builtin_amdgcn_cvt_pk_bf8_f32( + val.fval, val.fval, ival, false); // false -> WORD0} + } + } + val.i32val = ival; + i8data = val.i8val[0]; // little endian + + return i8data; + } +#endif // __gfx940__ + + // constructor from float +#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) + + // NOTE: ON-DEVICE... always optimal bias + explicit constexpr __device__ + float8(const float v, + migraphx::fp8::rounding_mode rm = migraphx::fp8::rounding_mode::standard, + uint32_t rng = 0) + { + if(__builtin_is_constant_evaluated() or !FNUZ) + { + if constexpr(T == migraphx::fp8::f8_type::fp8) + { +#ifdef MIGRAPHX_F8_DOWNCAST_CLIPPING + data = migraphx::fp8::impl:: + cast_to_f8<3, 4, float, FNUZ /*negative_zero_nan*/, true /*clip*/>( + v, (rm == migraphx::fp8::rounding_mode::stochastic), rng); +#else // MIGRAPHX_F8_DOWNCAST_CLIPPING + data = migraphx::fp8::impl:: + cast_to_f8<3, 4, float, FNUZ /*negative_zero_nan*/, false /*clip*/>( + v, (rm == migraphx::fp8::rounding_mode::stochastic), rng); +#endif // MIGRAPHX_F8_DOWNCAST_CLIPPING + } + else + { +#ifdef MIGRAPHX_F8_DOWNCAST_CLIPPING + data = migraphx::fp8::impl:: + cast_to_f8<2, 5, float, FNUZ /*negative_zero_nan*/, true /*clip*/>( + v, (rm == migraphx::fp8::rounding_mode::stochastic), rng); +#else // MIGRAPHX_F8_DOWNCAST_CLIPPING + data = migraphx::fp8::impl:: + cast_to_f8<2, 5, float, FNUZ /*negative_zero_nan*/, false /*clip*/>( + v, (rm == migraphx::fp8::rounding_mode::stochastic), rng); +#endif // MIGRAPHX_FP8_DOWNCAST_CLIPPING} + } + } + else + { + // runtime branch, use cast_to_f8fnuz_from_f32 if want to avoid it + if(rm == migraphx::fp8::rounding_mode::stochastic) + data = cast_to_f8fnuz_from_f32(v, rng); + else + data = cast_to_f8fnuz_from_f32(v); + } + } +#else + // DEVICE for non-gfx940 using s/w simulation + explicit constexpr __device__ + float8(const float v, + migraphx::fp8::rounding_mode rm = migraphx::fp8::rounding_mode::standard, + uint32_t rng = 0) + { + if constexpr(T == migraphx::fp8::f8_type::fp8) + { +#ifdef MIGRAPHX_F8_DOWNCAST_CLIPPING + data = migraphx::fp8::impl:: + cast_to_f8<3, 4, float, FNUZ /*negative_zero_nan*/, true /*clip*/>( + v, (rm == migraphx::fp8::rounding_mode::stochastic), rng); +#else // MIGRAPHX_F8_DOWNCAST_CLIPPING + data = migraphx::fp8::impl:: + cast_to_f8<3, 4, float, FNUZ /*negative_zero_nan*/, false /*clip*/>( + v, (rm == migraphx::fp8::rounding_mode::stochastic), rng); +#endif // MIGRAPHX_F8_DOWNCAST_CLIPPING + } + else + { +#ifdef MIGRAPHX_F8_DOWNCAST_CLIPPING + data = migraphx::fp8::impl:: + cast_to_f8<2, 5, float, FNUZ /*negative_zero_nan*/, true /*clip*/>( + v, (rm == migraphx::fp8::rounding_mode::stochastic), rng); +#else // MIGRAPHX_F8_DOWNCAST_CLIPPING + data = migraphx::fp8::impl:: + cast_to_f8<2, 5, float, FNUZ /*negative_zero_nan*/, false /*clip*/>( + v, (rm == migraphx::fp8::rounding_mode::stochastic), rng); +#endif // MIGRAPHX_FP8_DOWNCAST_CLIPPING} + } + } +#endif // __gfx940___ + + // Constructor from half + explicit constexpr __device__ + float8(const _Float16 v, rounding_mode rm = rounding_mode::standard, uint32_t rng = 0) + : float8(static_cast(v), rm, rng) + { + } + + // constructor from int + explicit constexpr __device__ + float8(const int v, rounding_mode rm = rounding_mode::standard, uint32_t rng = 0) + : float8(static_cast(v), rm, rng) + { + } + + // constructor from uint + explicit constexpr __device__ + float8(const uint32_t v, rounding_mode rm = rounding_mode::standard, uint32_t rng = 0) + : float8(static_cast(v), rm, rng) + { + } + + // constructor from double + explicit constexpr __device__ + float8(const double v, rounding_mode rm = rounding_mode::standard, uint32_t rng = 0) + : float8(static_cast(v), rm, rng) + { + } + + // constructor from bool + explicit constexpr __device__ + float8(const bool v, rounding_mode rm = rounding_mode::standard, uint32_t rng = 0) + : float8(static_cast(v), rm, rng) + { + } + // convert to float +#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) // NOLINT + // upcast using device specific intrinsic + inline constexpr __device__ operator float() const + { + if(__builtin_is_constant_evaluated() or !FNUZ) + { + if constexpr(T == migraphx::fp8::f8_type::fp8) + { + return migraphx::fp8::impl::cast_from_f8<3, 4, float, FNUZ /*negative_zero_nan*/>( + data); + } // else + return migraphx::fp8::impl::cast_from_f8<2, 5, float, FNUZ /*negative_zero_nan*/>(data); + } + else + { + float fval = 0; + uint32_t i32val = static_cast(data); + + // upcast + if constexpr(T == migraphx::fp8::f8_type::fp8) + { + __asm__ volatile("v_cvt_f32_fp8 %0, %1 src0_sel:BYTE_0" : "=v"(fval) : "v"(i32val)); + } + else + { + __asm__ volatile("v_cvt_f32_bf8 %0, %1 src0_sel:BYTE_0" : "=v"(fval) : "v"(i32val)); + } + + return fval; + } + } + +#else // non gfx940 + inline constexpr __device__ operator float() const + { + if constexpr(T == migraphx::fp8::f8_type::fp8) + { + return migraphx::fp8::impl::cast_from_f8<3, 4, float, FNUZ /*negative_zero_nan*/>(data); + } // else + return migraphx::fp8::impl::cast_from_f8<2, 5, float, FNUZ /*negative_zero_nan*/>(data); + } +#endif + + inline constexpr explicit __device__ operator bool() const { return not is_zero(); } + + // check for zero + inline __device__ constexpr bool is_zero() const + { + if constexpr(FNUZ) + { + return data == 0x00; + } + else + { + return (data == 0x00) or (data == 0x80); + } + } + + // check for nan + inline __device__ constexpr bool is_nan() const + { + if constexpr(FNUZ) + { + return data == 0x80; + } + else + { + if(T == migraphx::fp8::f8_type::bf8) + { + return (data == 0x7D) or (data == 0x7E) or (data == 0x7F) or (data == 0xFD) or + (data == 0xFE) or (data == 0xFF); + } + else + { + return (data == 0x7F) or (data == 0xFF); + } + } + } + + // check for inf + inline __device__ constexpr bool is_inf() const + { + if constexpr(FNUZ) + { + return data == 0x80; + } + else + { + if(T == migraphx::fp8::f8_type::bf8) + { + return (data == 0x7C) or (data == 0xFC); + } + else + { + // no infinities in e4m3fn, represent them as NaNs + return (data == 0x7F) or (data == 0xFF); + } + } + } + +// NOLINTNEXTLINE +#define MIGRAPHX_FP8_SHORT_UNARY_OP(unary_op, binary_op) \ + constexpr float8& __device__ operator unary_op(const float8& rhs) \ + { \ + const auto tmp = static_cast(*this) binary_op static_cast(rhs); \ + *this = static_cast(tmp); \ + return *this; \ + } \ + constexpr float8& __device__ operator unary_op(const float& rhs) \ + { \ + const auto tmp = static_cast(*this) binary_op static_cast(rhs); \ + *this = static_cast(tmp); \ + return *this; \ + } + + MIGRAPHX_FP8_SHORT_UNARY_OP(*=, *) + MIGRAPHX_FP8_SHORT_UNARY_OP(-=, -) + MIGRAPHX_FP8_SHORT_UNARY_OP(+=, +) + MIGRAPHX_FP8_SHORT_UNARY_OP(/=, /) + + inline __device__ constexpr float8& operator=(const float8& rhs) = default; + inline __device__ constexpr float8& operator=(float8&& rhs) noexcept = default; + + inline __device__ constexpr bool operator<(const float8& rhs) const + { + const auto we = static_cast(*this); + const auto them = static_cast(rhs); + return we < them; + } + + inline __device__ constexpr bool operator>(const float8& rhs) const + { + const auto we = static_cast(*this); + const auto them = static_cast(rhs); + return we > them; + } +}; + +// https://onnx.ai/onnx/technical/float8.html +using fp8e4m3fn = float8; +using fp8e5m2 = float8; +using fp8e4m3fnuz = float8; +using fp8e5m2fnuz = float8; + +// NOLINTNEXTLINE +#define MIGRAPHX_FP8_BINARY_OP(binary_op, T, U) \ + inline constexpr U __device__ operator binary_op(const T& lhs, const T& rhs) \ + { \ + return U(static_cast(lhs) binary_op static_cast(rhs)); \ + } + +// NOLINTNEXTLINE +#define MIGRAPHX_FP8_OTHER_OPS(T) \ + inline constexpr __device__ T fabs(T v) \ + { \ + /*NOLINTNEXTLINE*/ \ + v.data = v.data & 0x7f; \ + return v; \ + } \ + inline __device__ constexpr bool operator==(const T& lhs, const T& rhs) \ + { \ + if(rhs.is_nan() or rhs.is_inf() or lhs.is_nan() or lhs.is_inf()) \ + return false; \ + else if((rhs.is_zero() and lhs.is_zero()) or (lhs.data == rhs.data)) \ + return true; \ + return false; \ + } + +// NOLINTNEXTLINE +#define MIGRAPHX_FP8_GEN_OP_OVERLOADS(T) \ + MIGRAPHX_FP8_BINARY_OP(*, T, T) \ + MIGRAPHX_FP8_BINARY_OP(-, T, T) \ + MIGRAPHX_FP8_BINARY_OP(/, T, T) \ + MIGRAPHX_FP8_BINARY_OP(+, T, T) \ + MIGRAPHX_FP8_BINARY_OP(>=, T, bool) \ + MIGRAPHX_FP8_BINARY_OP(<=, T, bool) \ + MIGRAPHX_FP8_BINARY_OP(!=, T, bool) \ + MIGRAPHX_FP8_OTHER_OPS(T) + +MIGRAPHX_FP8_GEN_OP_OVERLOADS(fp8e5m2) +MIGRAPHX_FP8_GEN_OP_OVERLOADS(fp8e5m2fnuz) +MIGRAPHX_FP8_GEN_OP_OVERLOADS(fp8e4m3fn) +MIGRAPHX_FP8_GEN_OP_OVERLOADS(fp8e4m3fnuz) + +template <> +class numeric_limits +{ + public: + static constexpr bool has_infinity = false; + static constexpr __device__ fp8e4m3fnuz epsilon() + { + return fp8e4m3fnuz(0x28, fp8e4m3fnuz::from_bits()); + } + // NOLINTNEXTLINE + static constexpr __device__ fp8e4m3fnuz quiet_NaN() + { + return fp8e4m3fnuz(0x80, fp8e4m3fnuz::from_bits()); + } + + static constexpr __device__ fp8e4m3fnuz max() + { + return fp8e4m3fnuz(0x7F, fp8e4m3fnuz::from_bits()); + } + // this is min value that is not DeNormalized(DeNorm). DeNorm min is 0x01 + static constexpr __device__ fp8e4m3fnuz min() + { + return fp8e4m3fnuz(0x08, fp8e4m3fnuz::from_bits()); + } + + static constexpr __device__ fp8e4m3fnuz lowest() + { + return fp8e4m3fnuz(0xFF, fp8e4m3fnuz::from_bits()); + } +}; + +template <> +class numeric_limits +{ + public: + static constexpr bool has_infinity = false; + static constexpr __device__ fp8e4m3fn epsilon() + { + return fp8e4m3fn(0x20, fp8e4m3fn::from_bits()); + } + // NOLINTNEXTLINE + static constexpr __device__ fp8e4m3fn quiet_NaN() + { + return fp8e4m3fn(0x7F, fp8e4m3fn::from_bits()); + } + + static constexpr __device__ fp8e4m3fn max() { return fp8e4m3fn(0x7E, fp8e4m3fn::from_bits()); } + // this is min value that is not DeNormalized(DeNorm). DeNorm min is 0x01 + static constexpr __device__ fp8e4m3fn min() { return fp8e4m3fn(0x08, fp8e4m3fn::from_bits()); } + + static constexpr __device__ fp8e4m3fn lowest() + { + return fp8e4m3fn(0xFE, fp8e4m3fn::from_bits()); + } +}; + +template <> +class numeric_limits +{ + public: + static constexpr bool has_infinity = false; + static constexpr __device__ fp8e5m2fnuz epsilon() + { + return fp8e5m2fnuz(0x34, fp8e5m2fnuz::from_bits()); + } + + static constexpr __device__ fp8e5m2fnuz quiet_NaN() // NOLINT + { + return fp8e5m2fnuz(0x80, fp8e5m2fnuz::from_bits()); + } + + static constexpr __device__ fp8e5m2fnuz max() + { + return fp8e5m2fnuz(0x7F, fp8e5m2fnuz::from_bits()); + } + // this is min value that is not DeNormalized(DeNorm). DeNorm min is 0x01. + static constexpr __device__ fp8e5m2fnuz min() + { + return fp8e5m2fnuz(0x4, fp8e5m2fnuz::from_bits()); + } + + static constexpr __device__ fp8e5m2fnuz lowest() + { + return fp8e5m2fnuz(0xFF, fp8e5m2fnuz::from_bits()); + } +}; + +template <> +class numeric_limits +{ + public: + static constexpr bool has_infinity = true; + static constexpr __device__ fp8e5m2 epsilon() { return fp8e5m2(0x34, fp8e5m2::from_bits()); } + // 7D, 7E, 7F are positive NaNs and FD, FE, FF are negative NaNs + static constexpr __device__ fp8e5m2 quiet_NaN() // NOLINT + { + return fp8e5m2(0xFF, fp8e5m2::from_bits()); + } + + static constexpr __device__ fp8e5m2 max() { return fp8e5m2(0x7B, fp8e5m2::from_bits()); } + // this is min value that is not DeNormalized(DeNorm). DeNorm min is 0x01. + static constexpr __device__ fp8e5m2 min() { return fp8e5m2(0x4, fp8e5m2::from_bits()); } + + static constexpr __device__ fp8e5m2 lowest() { return fp8e5m2(0xFB, fp8e5m2::from_bits()); } + // 7C and FC both are infinity + static constexpr __device__ fp8e5m2 infinity() { return fp8e5m2(0x7C, fp8e5m2::from_bits()); } +}; + +} // namespace fp8 +template {} or is_same{} or + is_same{} or is_same{})> +constexpr T numeric_max(migraphx::fp8::f8_type unused = migraphx::fp8::f8_type::fp8) +{ + // unused parameter is added to make this numeric_max different overload definition + // compared to numeric_max defined in type_traits.hpp + (void)(unused); + return fp8::numeric_limits::max(); +} +template {} or is_same{} or + is_same{} or is_same{})> +constexpr T numeric_lowest(migraphx::fp8::f8_type unused = migraphx::fp8::f8_type::fp8) +{ + // unused parameter is added to make this numeric_lowest different overload definition + // compared to numeric_lowest defined in type_traits.hpp + (void)(unused); + return fp8::numeric_limits::lowest(); +} +} // namespace migraphx +// ================================================================================================= +#if defined(__clang__) +#pragma clang diagnostic pop +#endif // __clang__ + +#endif // MIGRAPHX_GUARD_KERNELS_FLOAT8_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/float8_impl.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/float8_impl.hpp new file mode 100644 index 000000000..2eca5ed4a --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/float8_impl.hpp @@ -0,0 +1,331 @@ +/* ************************************************************************ + * Copyright (C) 2016-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop- + * ies of the Software, and to permit persons to whom the Software is furnished + * to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM- + * PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE- + * CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ************************************************************************ */ + +#ifndef MIGRAPHX_GUARD_KERNELS_FP8_IMPL_HPP +#define MIGRAPHX_GUARD_KERNELS_FP8_IMPL_HPP +#include +#include +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +#endif + +namespace migraphx { + +namespace fp8 { +namespace impl { + +// NOLINTBEGIN +template +__device__ constexpr uint8_t cast_to_f8(T f_x, bool stoch = false, uint32_t rng = 0) +{ + constexpr bool is_float = true; + // half is not supported for now + constexpr bool is_half = false; + static_assert(Wm + We == 7, "Wm+We==7"); + static_assert(is_float or is_half, "Only float can be cast to f8"); + + const uint32_t mfmt = (sizeof(T) == 4) ? 23 : 10; + typename migraphx::conditional_t x; + + if constexpr(sizeof(T) == 4) + x = migraphx::bit_cast(f_x); + else + x = migraphx::bit_cast(f_x); + + uint32_t head = 0; + uint32_t mantissa = 0; + int exponent = 0; + uint32_t bias = 0; + uint32_t sign = 0; + if constexpr(sizeof(T) == 4) + { + head = x & 0xFF800000; + mantissa = x & 0x7FFFFF; + exponent = (head >> 23) & 0xFF; + sign = head >> 31; + bias = 127; + } + else + { + head = x & 0xFC00; + mantissa = x & 0x3FF; + exponent = (head >> 10) & 0x1F; + sign = head >> 15; + bias = 15; + } + + uint32_t signed_inf = (sign << 7) + (((1 << We) - 1) << Wm); + uint32_t signed_all_ones = (sign << 7) + ((((1 << We) - 1) << Wm) + ((1 << Wm) - 1)); + + // Calcualte maximum singed value FLT_MAX, FLT_MIN + uint32_t signed_max = signed_all_ones; + if(not NegativeZeroNan) + signed_max = (Wm == 2) ? (signed_max - 4) : (signed_max - 1); + + // Deal with inf and NaNs + if(NegativeZeroNan) // For the FNUZ cases, it is simple just return NaNs + { + if((sizeof(T) == 4 and ((x & 0x7F800000) == 0x7F800000)) or + (sizeof(T) == 2 and ((x & 0x7C00) == 0x7C00))) + return 0x80; + } + else + { + // calculate most common NaN mantissa for FP8, which is all Ones in binary + uint32_t nan_mantissa = 1; + for(auto i = 1; i < Wm; ++i) + { + nan_mantissa |= (nan_mantissa << 1); + } + if((sizeof(T) == 4 and ((x & 0x7F800000) == 0x7F800000)) or + (sizeof(T) == 2 and ((x & 0x7C00) == 0x7C00))) + { + // infinity + if(mantissa == 0) + { + if(sign == 0) + return (Wm == 2) ? 0x7B : 0x7E; + else + return (Wm == 2) ? 0xFB : 0xFE; + } + else // NaNs + return signed_inf + nan_mantissa; + } + } + // handle positive zero + if(x == 0) + return 0; + // handle negative zero + else if((sizeof(T) == 4 and x == 0x80000000) or (sizeof(T) == 2 and x == 0x8000)) + { + return NegativeZeroNan ? 0 : 0x80; // For FNUZ types neg zero is just positive zero + } + + /* First need to check if it is normal or denorm as there is a difference of implict 1 + Then need to adjust the exponent to align with the F8 exponent, in the meanwhile, shift + The mantissa. Then for stochastic rounding, add rng to mantissa and truncate. And for + RNE, no need to add rng. Then probably need to check whether there is carry and adjust + exponent and mantissa again*/ + + // For IEEE bias mode, the bias is 2^(k-1) -1 where k is the width of exponent bits + const int f8_bias = (1 << (We - 1u)) - 1 + (NegativeZeroNan ? 1 : 0); + const int f8_denormal_act_exponent = 1 - f8_bias; // actual exponent of f8 denormal + /* act_exponent is the actual exponent of fp32/fp16 (after subtracting bias) + f8_exponent is the converted f8 exponent with bias encoding + exponent_diff is the diff between fp32/fp16 exponent and f8 exponent, + the difference needs to be adjusted and mantissa shifted*/ + int act_exponent = 0; + int f8_exponent = 0; + int exponent_diff = 0; + + if(exponent == 0 and mantissa != 0) + { // fp32/fp16 is in denormal. + /* fp32 denormal is below 2^-127 so it is usually not a concern here, we mostly concern fp16 + here. In this case, f8 is usually in denormal. But there could be exceptions. fp16 denormal + has exponent bias 15 while bf8 with FNUZ has exponent bias 16. It means that there are some + numbers in fp16 denormal but they are bf8 (FNUZ) normals - smallest bf8 (FNUZ) normal is + 2^-15. fp16 numbers where exponent==0 (actual exponent -14) and highest bit of mantissa is 1 + are bf8 (FNUZ) normal. In this case, the fp16 mantissa should be shift left by 1 */ + act_exponent = 1 - bias; + exponent_diff = f8_denormal_act_exponent - + act_exponent; // actual exponent is exponent-bias+1 as it is denormal + } + else + { // fp32/fp16 is normal with implicit 1 + act_exponent = exponent - bias; + if(act_exponent <= f8_denormal_act_exponent) + { + /* This is the case where fp32/fp16 is normal but it is in f8 denormal range. + For example fp8 FNUZ mode, denormal exponent is -7, but if the fp32/fp16 + actual exponent is -7, it is actually larger due to the implict 1, + Therefore it needs to be adjust to -6 and mantissa shift right by 1. + So for fp32/fp16, exponent -8 is the cut point to convert to fp8 FNUZ */ + exponent_diff = f8_denormal_act_exponent - act_exponent; + } + else + { // both fp32/fp16 and f8 are in normal range + exponent_diff = + 0; // exponent_diff=0 does not mean there is no difference for this case, + // act_exponent could be larger. Just that it does not need shift mantissa + } + mantissa += (1 << mfmt); // Add the implicit 1 into mantissa + } + + // need to know whether the number is right in the middle of two adjacent fp8 numbers. use max + // value of 31 to avoid undefined behaviour + bool midpoint = (mantissa & ((1u << (mfmt - Wm + exponent_diff)) - 1)) == + (1u << (mfmt - Wm + exponent_diff - 1)); + /* This part is a bit tricky. The judgment of whether it is a tie needs to be done before we + shift right as shift right could rip off some residual part and make something not midpoint look + like midpoint. For example, the fp16 number 0x1002 (0 00100 0000000010), it is larger than + midpoint, but after shift right by 4 bits, it would look like midpoint. + */ + + if(exponent_diff > 0) + mantissa >>= exponent_diff; + else if(exponent_diff == -1) + mantissa <<= -exponent_diff; + bool implicit_one = mantissa & (1 << mfmt); + // if there is no implict 1, it means the f8 is denormal and need to adjust to denorm exponent + f8_exponent = + (act_exponent + exponent_diff) /*actual f8 exponent*/ + f8_bias - (implicit_one ? 0 : 1); + + // Now we have the exponent and mantissa adjusted + uint32_t drop_mask = (1 << (mfmt - Wm)) - 1; + bool odd = + mantissa & (1 << (mfmt - Wm)); // if the least significant bit that is not truncated is 1 + /* + This part is doing rounding by adding mantissa part that is going to get dropped. + e.g. if the dropped part for less than 0.5 than it would round down. + if the dropped part is more than 0.5 then it would round up by rolling carry to LSB of retained + mantissa. + For the mid point when bit pattern is like this for Odd: `xy1:10000000` for Odd and + `xy0:10000000` for the Even. where `:` is delimiter for dropped v/s retained part. + For the odd case : + this will add xy1:10000000 + 000:10000000 which would roll over carry to LSB of retained + part making it RNE. + For the even case : this will add xy0:10000000 + 000:01111111 which would + round down and keep number Even + */ + mantissa += (stoch ? rng : (midpoint ? (odd ? mantissa : mantissa - 1) : mantissa)) & drop_mask; + + // Now we deal with overflow + if(f8_exponent == 0 and ((1 << mfmt) & mantissa)) + { + f8_exponent = 1; // denormal overflow to become normal, promote exponent + } + else if((1 << (mfmt + 1)) & mantissa) + { + mantissa >>= 1; + f8_exponent++; + } + + mantissa >>= (mfmt - Wm); + + // above range: quantize to maximum possible float of the same sign + // for e5m2 case, max_exp is 14, since exp = 15 is reserved for Infs and Nans + const int max_exp = (1 << We) - ((NegativeZeroNan or Wm == 3) ? 1 : 2); + if(f8_exponent > max_exp) + { + if(Clip) + return signed_max; + else + { + // https://onnx.ai/onnx/technical/float8.html#cast + if(NegativeZeroNan) + return 0x80; + else + return (Wm == 2) ? signed_inf : signed_all_ones; + } + } + + if(f8_exponent == 0 and mantissa == 0) + return NegativeZeroNan ? 0 : (sign << 7); + mantissa &= (1 << Wm) - 1; + return (sign << 7) | (f8_exponent << Wm) | mantissa; +} +// NOLINTEND + +template +__device__ constexpr T cast_from_f8(uint8_t x) +{ + // half is not supported for now + constexpr bool is_half = false; + constexpr bool is_float = true; + static_assert(is_float or is_half, "Only float are supported"); + + constexpr int weo = is_half ? 5 : 8; + constexpr int wmo = is_half ? 10 : (is_float ? 23 : 7); + // NOLINTNEXTLINE + T f_inf, f_neg_inf, f_nan, f_neg0; + + if constexpr(is_float) + { + const uint32_t if_inf = 0x7F800000; + const uint32_t if_neg_inf = 0xFF800000; + const uint32_t if_nan = 0x7F800001; + const uint32_t if_neg0 = 0x80000000; + f_inf = migraphx::bit_cast(if_inf); + f_neg_inf = migraphx::bit_cast(if_neg_inf); + f_nan = migraphx::bit_cast(if_nan); + f_neg0 = migraphx::bit_cast(if_neg0); + } + + if(x == 0) + return 0; + + uint32_t sign = x >> 7; // NOLINT + uint32_t mantissa = x & ((1 << Wm) - 1); // NOLINT + int exponent = (x & 0x7F) >> Wm; // NOLINT + if(NegativeZeroNan) + { + if(x == 0x80) + return f_nan; + } + else + { + if(x == 0x80) + return f_neg0; + if(exponent == ((1 << We) - 1) and Wm == 2) // NOLINT + return (mantissa == 0) ? (sign ? f_neg_inf : f_inf) : f_nan; + else if(Wm == 3 and (x == 0x7F or x == 0xFF)) + return f_nan; + } + typename migraphx::conditional_t retval; + + const int exp_low_cutoff = + (1 << (weo - 1)) - (1 << (We - 1)) + 1 - (NegativeZeroNan ? 1 : 0); // NOLINT + + // subnormal input + if(exponent == 0) + { + // guaranteed mantissa!=0 since cases 0x0 and 0x80 are handled above + int sh = 1 + __builtin_clz(mantissa) - (32 - Wm); + mantissa <<= sh; // NOLINT + exponent += 1 - sh; + mantissa &= ((1 << Wm) - 1); // NOLINT + } + exponent += exp_low_cutoff - 1; + mantissa <<= wmo - Wm; // NOLINT + + // subnormal output (occurs when T=half, We=5, negative_zero_nan=true) + if(exponent <= 0) + { + mantissa |= 1 << wmo; // NOLINT + mantissa >>= 1 - exponent; // NOLINT + exponent = 0; + } + + if(sizeof(T) == 2) + retval = (sign << 15) | (exponent << 10) | mantissa; // NOLINT + else + retval = (sign << 31) | (exponent << 23) | mantissa; // NOLINT + return migraphx::bit_cast(retval); +} +} // namespace impl +} // namespace fp8 +} // namespace migraphx +#if defined(__clang__) +#pragma clang diagnostic pop +#endif +#endif // MIGRAPHX_GUARD_KERNELS_FP8_IMPL_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/functional.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/functional.hpp new file mode 100644 index 000000000..3e9d80261 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/functional.hpp @@ -0,0 +1,389 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_FUNCTIONAL_HPP +#define MIGRAPHX_GUARD_KERNELS_FUNCTIONAL_HPP + +#include + +// Similiar to decltype(auto) except it will propagate any substitution failures +// NOLINTNEXTLINE +#define MIGRAPHX_RETURNS(...) \ + ->decltype(__VA_ARGS__) { return __VA_ARGS__; } + +// Lifts an expression into a function object so it can be passed to a higher-order function +// NOLINTNEXTLINE +#define MIGRAPHX_LIFT(...) \ + [](auto&&... private_lifts_xs) MIGRAPHX_RETURNS( \ + (__VA_ARGS__)(static_cast(private_lifts_xs)...)) + +// NOLINTNEXTLINE +#define MIGRAPHX_LIFT_CLASS(name, ...) \ + struct name \ + { \ + template \ + constexpr auto operator()(PrivateLiftTs&&... private_lifts_xs) const MIGRAPHX_RETURNS( \ + (__VA_ARGS__)(static_cast(private_lifts_xs)...)) \ + } + +namespace migraphx { + +struct swallow +{ + template + constexpr swallow(Ts&&...) + { + } +}; + +template +using ignore = swallow; + +template +struct overloaded : Fs... +{ + using Fs::operator()...; + constexpr overloaded(Fs... fs) : Fs(fs)... {} +}; + +template +constexpr overloaded overload(Fs... fs) +{ + return {fs...}; +} + +namespace detail { + +template +struct eval_helper +{ + R result; + + template + constexpr eval_helper(const F& f, Ts&&... xs) : result(f(static_cast(xs)...)) + { + } +}; + +template <> +struct eval_helper +{ + int result; + template + constexpr eval_helper(const F& f, Ts&&... xs) : result((f(static_cast(xs)...), 0)) + { + } +}; + +template +struct seq +{ + using type = seq; +}; + +template +struct merge_seq; + +template +struct merge_seq, seq> : seq +{ +}; + +template +struct gens : merge_seq::type, typename gens::type> +{ +}; + +template <> +struct gens<0> : seq<> +{ +}; +template <> +struct gens<1> : seq<0> +{ +}; + +template +constexpr auto sequence_c_impl(F&& f, seq) +{ + return f(index_constant{}...); +} + +template +constexpr auto args_at(seq) +{ + return [](ignore..., auto x, auto...) { return x; }; +} + +} // namespace detail + +template +constexpr auto always(T x) +{ + return [=](auto&&...) { return x; }; +} + +template +constexpr auto sequence_c(F&& f) +{ + return detail::sequence_c_impl(f, detail::gens{}); +} + +template +constexpr auto sequence(IntegerConstant ic, F&& f) +{ + return sequence_c(f); +} + +template +constexpr auto by(F f, G g) +{ + return [=](auto... xs) { + return detail::eval_helper{g, f(xs)...}.result; + }; +} + +template +constexpr auto by(F f) +{ + return by([=](auto x) { return (f(x), 0); }, always(0)); +} + +template +constexpr void each_args(F f, Ts&&... xs) +{ + swallow{(f(static_cast(xs)), 0)...}; +} + +template +constexpr void each_args(F) +{ +} + +template +constexpr void unpack_each(F f) +{ + f(); +} + +template +constexpr void unpack_each(F f, Pack p) +{ + p([&](auto&&... xs) { each_args(f, static_cast(xs)...); }); +} + +template +constexpr void unpack_each(F f, Pack1 p1, Pack2 p2) +{ + p1([&](auto&&... xs) { + p2([&](auto&&... ys) { + each_args( + [&](auto&& p) { p(f); }, + pack_forward(static_cast(xs), static_cast(ys))...); + }); + }); +} + +template +constexpr void unpack_each(F f, Pack1 p1, Pack2 p2, Packs... packs) +{ + unpack_each( + [&](auto&& x, auto&& y) { + unpack_each( + [&](auto&&... zs) { + f(static_cast(x), + static_cast(y), + static_cast(zs)...); + }, + packs...); + }, + p1, + p2); +} + +template +constexpr void repeat_c(F&& f) +{ + sequence_c([&](auto... xs) { each_args(f, xs...); }); +} + +template +constexpr auto repeat(IntegerConstant ic, F&& f) +{ + return repeat_c(f); +} + +template +constexpr auto fold_impl(F&&, T&& x) +{ + return static_cast(x); +} + +template +constexpr auto fold_impl(F&& f, T&& x, U&& y, Ts&&... xs) +{ + return fold_impl(f, f(static_cast(x), static_cast(y)), static_cast(xs)...); +} + +template +constexpr auto fold(F f) +{ + return [=](auto&&... xs) { return fold_impl(f, static_cast(xs)...); }; +} + +template +constexpr auto compose(Fs... fs) +{ + return fold([](auto f, auto g) { + return [=](auto&&... xs) { return f(g(static_cast(xs)...)); }; + })(fs...); +} + +template +constexpr auto partial(F f) +{ + return [=](auto... xs) { + return [=](auto&&... ys) { return f(xs..., static_cast(ys)...); }; + }; +} + +template +constexpr auto pack(Ts... xs) +{ + return [=](auto f) { return f(xs...); }; +} + +template +constexpr auto pack_forward(Ts&&... xs) +{ + return [&](auto f) { return f(static_cast(xs)...); }; +} + +template +constexpr auto join(G g, F f) +{ + return f([=](auto... xs) { return g(xs...); }); +} + +template +constexpr auto join(G g, F f, Fs... fs) +{ + // return f1([=](auto x) { return f2([=](auto y) { return g(x, y); }); }); + return f([=](auto... xs) { return join([=](auto... ys) { return g(xs..., ys...); }, fs...); }); +} + +template +constexpr auto pack_compare(Compare compare, P1 p1, P2 p2) +{ + return p1([&](auto... xs) { + return p2([&](auto... ys) { + auto c = [&](auto x, auto y) -> int { + if(compare(x, y)) + return 1; + else if(compare(y, x)) + return -1; + else + return 0; + }; + return fold([](auto x, auto y) { return x ? x : y; })(c(xs, ys)..., 0); + }); + }); +} + +template +constexpr auto arg_c() +{ + return [](auto... xs) { return detail::args_at(detail::gens{})(xs...); }; +} + +template +constexpr auto arg(IntegralConstant ic) +{ + return arg_c(); +} + +template +constexpr auto make_transform(F f) +{ + return [=](auto... xs) { return [=](auto g) { return f(g, xs...); }; }; +} + +// An arg transformation takes the arguments and then a function to take the new arguments: +// transform(xs...)([](auto... ys) { ... }) +// The transform_args function takes a list of transformations and continually applies them +template +constexpr auto transform_args(F f) +{ + return f; +} + +template +constexpr auto transform_args(F f, Fs... fs) +{ + return make_transform([=](auto g, auto... xs) { + return f(xs...)([=](auto... ys) { return transform_args(fs...)(ys...)(g); }); + }); +} + +// identity transform +inline constexpr auto transform_args() +{ + return make_transform([](auto f, auto... xs) { return f(xs...); }); +} + +// Rotate the last N arguments to the first N arguments +template +constexpr auto rotate_last() +{ + return make_transform([](auto f, auto... xs) { + return sequence_c([&](auto... is) { + constexpr auto size = sizeof...(is); + return f(arg_c<(is + size - N) % size>()(xs...)...); + }); + }); +} + +inline constexpr auto rotate_last() { return rotate_last<1>(); } + +// Pack the first N arguments +template +constexpr auto pack_first() +{ + return make_transform([](auto f, auto... xs) { + return sequence_c([&](auto... is) { + return sequence_c([&](auto... js) { + return f(pack(arg_c()(xs...)...), arg_c()(xs...)...); + }); + }); + }); +} + +// Rotate the last N arguments as the first argument packed +template +constexpr auto rotate_and_pack_last() +{ + return transform_args(rotate_last(), pack_first()); +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_FUNCTIONAL_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gather.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gather.hpp new file mode 100644 index 000000000..45f4ffcde --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gather.hpp @@ -0,0 +1,64 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_GATHER_HPP +#define MIGRAPHX_GUARD_KERNELS_GATHER_HPP + +#include +#include +#include +#include + +namespace migraphx { + +template +constexpr auto gather_shape(Input input, Indices indices) +{ + auto lengths = input.lens; + + lengths[Axis] = indices.elements(); + return make_shape(lengths, input.strides); +} + +template +__device__ void gather(Input input, Indices indices, Output output) +{ + auto ind = make_index(); + auto axis_dim_size = input.get_shape().lens[Axis]; + + constexpr auto out_comp = gather_shape(get_shape_c{}, get_shape_c{}); + + ind.global_stride(output.get_shape().elements(), [&](auto i) { + auto idx = out_comp.multi(i); + auto in_index = indices[idx[Axis]]; + + auto new_in_index = (in_index < 0) ? in_index + axis_dim_size : in_index; + + idx[Axis] = new_in_index; + + output[i] = input[idx]; + }); +} + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gathernd.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gathernd.hpp new file mode 100644 index 000000000..325b7d34f --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gathernd.hpp @@ -0,0 +1,98 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_GATHERND_HPP +#define MIGRAPHX_GUARD_KERNELS_GATHERND_HPP + +#include +#include +#include +namespace migraphx { + +template +struct gathernd_settings +{ + T batch_dims{}; +}; + +template +constexpr gathernd_settings make_gathernd_settings(Ts... xs) +{ + return {xs...}; +} + +template +__device__ void gathernd(const T& data_t, const U& indices_t, const V& output_t, Settings s) +{ + auto ind = make_index(); + auto batch_dims = s.batch_dims; + auto output_shape = output_t.get_shape(); + auto indices_shape = indices_t.get_shape(); + auto data_shape = data_t.get_shape(); + + auto indices_shape_lens = indices_shape.lens; + auto data_shape_lens = data_shape.lens; + auto num_slice_dims = indices_shape_lens.back(); + size_t num_slices = + accumulate(indices_shape_lens.begin(), indices_shape_lens.end() - 1, 1, op::product{}); + size_t slice_size = accumulate(data_shape_lens.begin() + num_slice_dims + batch_dims, + data_shape_lens.end(), + 1, + op::product{}); + const size_t num_batches = + accumulate(data_shape_lens.begin(), data_shape_lens.begin() + batch_dims, 1, op::product{}); + const size_t data_batch_stride = + accumulate(data_shape_lens.begin() + batch_dims, data_shape_lens.end(), 1, op::product{}); + const auto num_slices_per_batch = num_slices / num_batches; + + ind.global_stride(output_shape.elements(), [&](auto i) { + const auto* indices_ptr = indices_t.data(); + const size_t j = i / slice_size; + const size_t batch_idx = j / num_slices_per_batch; + + auto* slice_indices = indices_ptr + (j * num_slice_dims); + size_t relative_slice_offset = 0; + for(size_t idx = 0; idx < num_slice_dims; ++idx) + { + int64_t index = slice_indices[idx]; + const size_t input_dim_idx = batch_dims + idx; + const auto input_dim = data_shape_lens[input_dim_idx]; + MIGRAPHX_ASSERT(index >= -static_cast(input_dim) and + index < static_cast(input_dim)); + if(index < 0) + index += input_dim; + size_t size_from_slice_dims = + accumulate(data_shape_lens.begin() + batch_dims + idx + 1, + data_shape_lens.begin() + batch_dims + num_slice_dims, + slice_size, + op::product{}); + relative_slice_offset += index * size_from_slice_dims; + } + + auto slice_offset = (batch_idx * data_batch_stride) + relative_slice_offset; + output_t[i] = data_t[slice_offset + i % slice_size]; + }); +} + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gemm_batcher.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gemm_batcher.hpp new file mode 100644 index 000000000..d219786c6 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gemm_batcher.hpp @@ -0,0 +1,92 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_GEMM_BATCHER_HPP +#define MIGRAPHX_GUARD_KERNELS_GEMM_BATCHER_HPP + +#include +#include +#include + +namespace migraphx { + +template +constexpr auto gemm_get_batches() +{ + constexpr auto lens = get_shape_c{}.lens; + constexpr auto strides = get_shape_c{}.strides; + constexpr auto new_lens = sequence( + lens.size() - _c<2>, [&](auto... is) { return make_const_array(_c...); }); + constexpr auto new_strides = sequence( + strides.size() - _c<2>, [&](auto... is) { return make_const_array(_c...); }); + return make_shape(new_lens, new_strides); +} + +template +constexpr auto gemm_get_matrix() +{ + constexpr auto lens = get_shape_c{}.lens; + constexpr auto strides = get_shape_c{}.strides; + constexpr auto m = lens.size() - _c<2>; + constexpr auto n = lens.size() - _c<1>; + constexpr auto new_lens = make_const_array(_c, _c); + constexpr auto new_strides = make_const_array(_c, _c); + return make_shape(new_lens, new_strides); +} + +template +constexpr auto gemm_batch_slice(Tensor t, T i) +{ + constexpr auto batch = gemm_get_batches(); + constexpr auto matrix = gemm_get_matrix(); + MIGRAPHX_ASSERT((batch.index(i) + matrix.element_space()) <= t.get_shape().element_space()); + return make_tensor_view(t.data() + batch.index(i), matrix); +} + +template +constexpr auto gemm_batch_args(index idx, BlocksPerBatch bpb, T x, Ts... xs) +{ + return [=](auto f) { + // All tensors should have the same rank + static_assert( + (true and ... and (get_shape_c{}.lens.size() == get_shape_c{}.lens.size()))); + if constexpr(get_shape_c{}.lens.size() > 2) + { + // Get the first batch since all batches should have the same number of elements + constexpr auto batch = gemm_get_batches(); + static_assert( + (true and ... and (batch.elements() == gemm_get_batches().elements()))); + idx.group_stride(bpb * batch.elements(), [&](auto gidx) { + const auto batch_idx = gidx / bpb; + f(gemm_batch_slice(x, batch_idx), gemm_batch_slice(xs, batch_idx)...); + }); + } + else + { + f(x, xs...); + } + }; +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_GEMM_BATCHER_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/generic_constant.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/generic_constant.hpp new file mode 100644 index 000000000..a1c2c9f82 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/generic_constant.hpp @@ -0,0 +1,56 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_GENERIC_CONSTANT_HPP +#define MIGRAPHX_GUARD_KERNELS_GENERIC_CONSTANT_HPP + +namespace migraphx { + +template +struct generic_constant +{ + static constexpr auto value = F{}(); + using value_type = decltype(value); + using type = generic_constant; + constexpr operator value_type() const noexcept { return value; } + constexpr value_type operator()() const noexcept { return value; } +}; + +template +constexpr generic_constant make_generic_constant(F) +{ + return {}; +} + +// NOLINTNEXTLINE +#define MIGRAPHX_MAKE_CONSTANT(x) \ + make_generic_constant([] { \ + struct fun \ + { \ + constexpr auto operator()() const { return x; } \ + }; \ + return fun{}; \ + }()) + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_GENERIC_CONSTANT_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gqa_rotary_embedding.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gqa_rotary_embedding.hpp new file mode 100644 index 000000000..6fd1b15f3 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gqa_rotary_embedding.hpp @@ -0,0 +1,178 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_ROTARY_EMBEDDING_HPP +#define MIGRAPHX_GUARD_KERNELS_ROTARY_EMBEDDING_HPP + +#include +#include +#include + +namespace migraphx { + +template +__device__ void run_rotary_embedding(Input input, + CosCache cos_cache, + SinCache sin_cache, + Output output, + PosIDs pos_ids, + Params params, + index_int idx, + bool is_query = false) +{ + const index_int batch_size = params.batch_size; + const index_int sequence_length = params.sequence_length; + const index_int n_heads = is_query ? params.num_heads : params.kv_num_heads; + const index_int head_size = params.head_size; + const index_int head_stride = params.head_stride; + const index_int seq_stride = params.seq_stride; + const index_int batch_stride = params.batch_stride; + const int position_ids_format = params.position_ids_format; + const index_int rotary_emb_dim = params.rotary_embedding_dim; + const index_int half_rotary_emb_dim = rotary_emb_dim / 2; + + const index_int loop_len = batch_size * sequence_length * n_heads; + const index_int i = idx / head_size; + const index_int ii = idx % head_size; + if(i < loop_len) + { + const index_int b = (i / n_heads) / sequence_length; + const index_int s = (i / n_heads) % sequence_length; + const index_int n = i % n_heads; + const index_int block_offset = b * batch_stride + s * seq_stride + n * head_stride; + auto input_data = input + block_offset; + auto output_data = output + block_offset; + + // Cache is (M, H/2) or (M, rotary_embedding_dim/2) + int position_id = (position_ids_format == 0) + ? static_cast(pos_ids[0]) + s + : static_cast(pos_ids[b * sequence_length + s]); + position_id = (sequence_length == 1) ? position_id : s; + + const index_int cache_offset = position_id * half_rotary_emb_dim; + auto cos_data = cos_cache + cache_offset; + auto sin_data = sin_cache + cache_offset; + + int cache_idx = 0; + double sign = 0.0; + int j = 0; + if(ii < rotary_emb_dim) + { + if(params.rotary_interleaved) + { + cache_idx = (ii / 2) % half_rotary_emb_dim; + sign = (ii % 2 == 0) ? -1.0 : 1.0; + j = (ii % 2 == 0) ? ii + 1 : ii - 1; // i - sign + } + else + { + cache_idx = ii % half_rotary_emb_dim; + sign = (ii < half_rotary_emb_dim) ? -1.0 : 1.0; + j = (ii + half_rotary_emb_dim) % rotary_emb_dim; + } + double out_data = + static_cast(input_data[ii]) * static_cast(cos_data[cache_idx]) + + sign * static_cast(input_data[j]) * + static_cast(sin_data[cache_idx]); + output_data[ii] = out_data; + } + else if(ii < head_size) + { + output_data[ii] = input_data[ii]; + } + } +} + +template +__device__ void +pack_v_into_rotary_qkv(Params params, const Input input, Output output, index_int idx) +{ + const index_int loop_len = params.batch_size * params.sequence_length * params.kv_num_heads; + auto i = idx / params.head_size; + auto ii = idx % params.head_size; + if(i < loop_len) + { + const index_int b = (i / params.kv_num_heads) / params.sequence_length; + const index_int s = (i / params.kv_num_heads) % params.sequence_length; + const index_int n = i % params.kv_num_heads; + const index_int block_offset = + b * params.batch_stride + s * params.seq_stride + n * params.head_stride; + const Input input_data = input + block_offset; + Output output_data = output + block_offset; + if(ii < params.head_size) + { + output_data[ii] = input_data[ii]; + } + } +} + +template +__device__ void gqa_rotary_embedding(Output output, + Query query, + SeqLensK seqlens_k, + CosCache cos_cache, + SinCache sin_cache, + Params params) +{ + auto ind = make_index(); + ind.global_stride(output.get_shape().elements(), [&](auto idx) { + auto q_input = query.begin(); + auto q_rotary = output.begin(); + auto k_input = q_input + params.num_heads * params.sequence_length * params.head_size; + auto k_rotary = q_rotary + params.num_heads * params.sequence_length * params.head_size; + auto v_input = k_input + params.kv_num_heads * params.sequence_length * params.head_size; + auto v_rotary = k_rotary + params.kv_num_heads * params.sequence_length * params.head_size; + auto q_chunk_size = + params.batch_size * params.num_heads * params.sequence_length * params.head_size; + auto kv_chunk_size = + params.batch_size * params.kv_num_heads * params.sequence_length * params.head_size; + if(idx < q_chunk_size) + { + run_rotary_embedding(q_input, + cos_cache.begin(), + sin_cache.begin(), + q_rotary, + seqlens_k.begin(), + params, + idx, + true); + } + else if(idx < q_chunk_size + kv_chunk_size) + { + run_rotary_embedding(k_input, + cos_cache.begin(), + sin_cache.begin(), + k_rotary, + seqlens_k.begin(), + params, + idx - q_chunk_size); + } + else if(idx < output.get_shape().elements()) + { + pack_v_into_rotary_qkv(params, v_input, v_rotary, idx - (q_chunk_size + kv_chunk_size)); + } + }); +} + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gqa_softmax.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gqa_softmax.hpp new file mode 100644 index 000000000..27e2154b6 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/gqa_softmax.hpp @@ -0,0 +1,138 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_GQA_SOFTMAX_HPP +#define MIGRAPHX_GUARD_KERNELS_GQA_SOFTMAX_HPP + +#include +#include +#include + +namespace migraphx { + +template +__device__ void softmax_inplace(T score, int n, int d) +{ + for(int j = 0; j < n; ++j) + { + auto x = score + j * d; + auto y = x; + + // e^x is represented as infinity if x is large enough, like 100.f. + // Infinity divided by Infinity is a NAN. Thus, softmax gets a NAN if + // one or more item are large enough. a math transform as below is + // leveraged to get a stable softmax: e^xi/(e^x1 + ...e^xn) = e^(xi - + // max) / (e^(x1 - max) + ... + e^(xn - max)) + float max = -numeric_max(); + for(int i = 0; i < d; i++) + { + if(max < x[i]) + max = x[i]; + } + for(int i = 0; i < d; i++) + { + y[i] = expf(x[i] - max); + } + + float sum = 0.0; + for(int i = 0; i < d; i++) + { + sum += x[i]; + } + + for(int i = 0; i < d; i++) + { + y[i] = x[i] / static_cast(sum); + } + } +} + +template +__device__ void calculate_softmax(AttnProbs attention_probs, // output buffer with size BxNxSxT + SeqLensK seqlens_k, // past sequence lengths tensor + Params params, + index_int idx) +{ + const index_int batch_size = params.batch_size; + const index_int sequence_length = params.sequence_length; + const index_int num_heads = params.num_heads; + const index_int present_buffer_sequence_length = params.seqlen_present_kv_cache; + + const index_int loop_len = batch_size * num_heads; + const index_int i = idx / sequence_length; + const index_int inner_i = idx % sequence_length; + if(i < loop_len) + { + const index_int batch_index = i / num_heads; + const index_int total_seqlen = seqlens_k[batch_index] + 1; + const index_int output_offset = i * sequence_length * present_buffer_sequence_length; + auto output = attention_probs + output_offset; + + const int local_window_size = params.local_window_size; + auto output_softmax = output; + index_int seq = inner_i; + if(seq < sequence_length) + { + output_softmax += seq * present_buffer_sequence_length; + auto consume = total_seqlen + local_window_size; + seq += consume; + seq -= consume; + int seq_causal_length = sequence_length == 1 ? total_seqlen : seq + 1; + if(local_window_size > 0 and seq_causal_length > local_window_size + 1) + { + for(int total_seq_id = 0; total_seq_id < seq_causal_length - local_window_size - 1; + total_seq_id++) + { + output_softmax[total_seq_id] = 0.f; + } + softmax_inplace(output_softmax + seq_causal_length - local_window_size - 1, + 1, + local_window_size + 1); + } + else + { + softmax_inplace(output_softmax, 1, seq_causal_length); + } + for(int total_seq_id = seq_causal_length; total_seq_id < total_seqlen; total_seq_id++) + { + output_softmax[total_seq_id] = 0.f; + } + } + } +} + +template +__device__ void +gqa_softmax(Output output, Input, PresentKey, Probs, SeqLensK seqlens_k, Params params) +{ + const index_int elements = params.batch_size * params.num_heads * params.sequence_length; + auto ind = make_index(); + ind.global_stride(elements, [&](auto idx) { + calculate_softmax(output.begin(), seqlens_k.begin(), params, idx); + }); +} + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/group_query_attention.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/group_query_attention.hpp new file mode 100644 index 000000000..dbb60e7bd --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/group_query_attention.hpp @@ -0,0 +1,122 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_GROUP_QUERY_ATTENTION_HPP +#define MIGRAPHX_GUARD_KERNELS_GROUP_QUERY_ATTENTION_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { + +template +struct gqa_parameters +{ + T1 scale; + T2 batch_size; // Batch size used by input + T3 sequence_length; // Sequence length used by input + T4 hidden_size; // Hidden size used by input + T5 head_size; // Head size + T6 rotary_embedding_dim; // Rotary embedding dimension. + T7 num_heads; // num_heads = hidden_size / head_size + T8 max_sequence_length; // Sequence length used by cos/sin cache + T9 head_stride; // Head stride + T10 seq_stride; // Sequence stride + T11 batch_stride; // Batch stride + T12 position_ids_format; // Format of position ids - 0 is (1), 1 is (batch_size, + // sequence_length) + T13 seqlen_present_kv_cache; // Sequence length of present kv-cache (4096 when using + // shared buffer) + T14 do_rotary; // Whether to use rotary position embedding. Default value is 0. + T15 kv_num_heads; // Number of attention heads for k and v + T16 local_window_size; // left_window_size for local attention. Default value is -1 meaning + // unused. + T17 rotary_interleaved; // Rotate using interleaved pattern. Default value is 0 (False). + T18 past_present_share_buffer; // Whether to use same buffer for KV-cache inputs and outputs +}; + +template +__device__ gqa_parameters make_gqa_parameters(Ts... ts) +{ + return {ts...}; +} + +struct naive_gemm +{ + index_int max_m; + index_int max_n; + index_int max_k; + index_int lda; + index_int ldb; + index_int ldc; + bool b_transpose; + float alpha; + float beta; + + template + __device__ void compute(C cmat, const A amat, const B bmat, const index_int idx) + { + auto m = idx / max_n; + auto n = idx % max_n; + auto index = [&](auto x, auto y, auto z) { return y + (x * z); }; + + if(m < max_m) + { + if(n < max_n) + { + double s = 0.0; + for(int k = 0; k < max_k; ++k) + { + auto a_i = index(m, k, lda); + auto b_i = b_transpose ? index(n, k, ldb) : index(k, n, ldb); + s += static_cast(amat[a_i]) * static_cast(bmat[b_i]); + } + auto c_i = index(m, n, ldc); + cmat[c_i] = static_cast(alpha) * s + cmat[c_i] * static_cast(beta); + } + } + } +}; + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/hip.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/hip.hpp new file mode 100644 index 000000000..8ddc7ad0e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/hip.hpp @@ -0,0 +1,33 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_HIP_HPP +#define MIGRAPHX_GUARD_KERNELS_HIP_HPP + +#ifndef MIGRAPHX_USE_HIPRTC +#include +#include +#include +#endif + +#endif // MIGRAPHX_GUARD_KERNELS_HIP_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/index.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/index.hpp new file mode 100644 index 000000000..9c43f5d3b --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/index.hpp @@ -0,0 +1,309 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_INDEX_HPP +#define MIGRAPHX_GUARD_KERNELS_INDEX_HPP + +#include +#include +#include +#include +#include +#include + +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wreserved-identifier" +extern "C" __device__ size_t __ockl_get_enqueued_local_size(uint); // NOLINT +extern "C" __device__ size_t __ockl_get_local_size(uint); // NOLINT +#pragma clang diagnostic pop +#endif + +namespace migraphx { + +#if defined(MIGRAPHX_NGLOBAL) && defined(MIGRAPHX_NLOCAL) +#define MIGRAPHX_NGROUP ((MIGRAPHX_NGLOBAL + MIGRAPHX_NLOCAL - 1) / MIGRAPHX_NLOCAL) +#endif + +inline __device__ __attribute__((const)) index_int compute_global_size() +{ +#ifdef MIGRAPHX_NGLOBAL + return MIGRAPHX_NGLOBAL; +#else + // This actually works even when global is not divisible by local size. + // This doesnt actually do a multiplication. Instead it calls a device + // function to get the global size, which is why it works. + return blockDim.x * gridDim.x; // NOLINT +#endif +} + +#ifdef MIGRAPHX_NGROUP +// If global is divisible by local then local can be a const +#if(MIGRAPHX_NGLOBAL % MIGRAPHX_NLOCAL == 0) || (MIGRAPHX_NGROUP == 1) +#define MIGRAPHX_HAS_CONST_LOCAL 1 +#endif +#endif + +inline __device__ __attribute__((const)) index_int compute_local_size() +{ +#ifdef MIGRAPHX_HAS_CONST_LOCAL + return MIGRAPHX_NLOCAL; +#else + // Returns block size. For the non-uniform block it returns the size of the non-uniform block. + return __ockl_get_local_size(0); // NOLINT +#endif +} + +inline __device__ __attribute__((const)) index_int compute_max_local_size() +{ +#ifdef MIGRAPHX_LOCAL + return MIGRAPHX_NLOCAL; +#else + // Returns the block size. When workgrop has non-uniform block, this returns size of the uniform + // block. + return __ockl_get_enqueued_local_size(0); // NOLINT +#endif +} + +struct index +{ + index_int global = 0; + index_int local = 0; + index_int group = 0; + +#ifdef MIGRAPHX_NGLOBAL + constexpr index_constant nglobal() const + { + static_assert(MIGRAPHX_NGLOBAL > 0, "Global size must be greater than 0"); + return {}; + } +#else + __device__ index_int nglobal() const + { + MIGRAPHX_ASSERT(compute_global_size() > 0); + return compute_global_size(); // NOLINT + } +#endif + +#ifdef MIGRAPHX_HAS_CONST_LOCAL + constexpr index_constant nlocal() const + { + static_assert(MIGRAPHX_NLOCAL > 0, "Local size must be greater than 0"); + return {}; + } +#else + __device__ index_int nlocal() const + { +#ifdef MIGRAPHX_NGROUP + static_assert((MIGRAPHX_NGLOBAL % MIGRAPHX_NLOCAL != 0) and (MIGRAPHX_NGROUP > 1), + "Local size should be const"); +#endif + MIGRAPHX_ASSERT(compute_local_size() > 0); + return compute_local_size(); // NOLINT + } +#endif + +#ifdef MIGRAPHX_NLOCAL + constexpr index_constant max_nlocal() const { return {}; } +#else + __device__ index_int max_nlocal() const + { + MIGRAPHX_ASSERT(compute_max_local_size() > 0); + return compute_max_local_size(); + } +#endif + + constexpr auto ngroup() const { return nglobal() / max_nlocal(); } + + template + constexpr index_constant nlocal_subwave() const + { + return {}; + } + template + constexpr auto local_subwave() const + { +#ifdef MIGRAPHX_HAS_CONST_LOCAL + if constexpr(decltype(nlocal()){} == SubWaveSize) + return local; +#endif + return local % nlocal_subwave(); + } + template + constexpr auto nwave() const + { + return max_nlocal() / nlocal_subwave(); + } + + constexpr index_constant nlocal_wave() const { return {}; } + constexpr auto local_wave() const { return local % nlocal_wave(); } + constexpr auto nwave() const { return max_nlocal() / nlocal_wave(); } + constexpr auto wave() const { return local / nlocal_wave(); } + + template + static constexpr auto max_stride_iterations(N n, Stride stride) + { + return (n - _c<1>) / stride + _c<1>; + } + + template + constexpr auto max_global_stride_iterations(N n) const + { + return max_stride_iterations(n, nglobal()); + } + + template + constexpr auto max_local_stride_iterations(N n) const + { + return max_stride_iterations(n, nlocal()); + } + + template + constexpr auto max_local_wave_stride_iterations(N n) const + { + return max_stride_iterations(n, nlocal_wave()); + } + + template + constexpr auto max_local_subwave_stride_iterations(N n) const + { + return max_stride_iterations(n, nlocal_subwave()); + } + + template + static constexpr auto invoke_loop(F f, I i, D d) -> decltype(f(i, d)) + { + return f(i, d); + } + + template + static constexpr auto invoke_loop(F f, I i, D) -> decltype(f(i)) + { + return f(i); + } + + template + static constexpr void for_stride_loop_unroll(index_int start, N n, Stride stride, F f) + { + sequence(max_stride_iterations(n, stride), [&](auto... ks) { + fold([&](auto d, auto k) { + auto i = start + stride * k; + if(i < n) + invoke_loop(f, i, d); + return d + _c<1>; + })(_c<0>, ks...); + }); + } + + template + static constexpr void for_stride_loop(index_int start, N n, Stride stride, F f) + { + index_int k = 0; + for(index_int i = start; i < n; i += stride) + { + invoke_loop(f, i, k); + k++; + } + } + + template + static constexpr void for_stride(index_int start, N n, Stride stride, F f) + { + MIGRAPHX_ASSERT(start < stride); + if constexpr(not is_integral{} and not is_integral{}) + { + if constexpr(max_stride_iterations(n, stride) == 1) + { + if constexpr(stride > n) + { + if(start < n) + invoke_loop(f, start, _c<0>); + } + else + { + invoke_loop(f, start, _c<0>); + } + } + else if constexpr(Unroll) + { + MIGRAPHX_STATIC_ASSERT_FOR(max_stride_iterations(n, stride) < 256) + { + for_stride_loop_unroll(start, n, stride, f); + } + } + else + { + for_stride_loop(start, n, stride, f); + } + } + else + { + for_stride_loop(start, n, stride, f); + } + } + + template + __device__ void global_stride(N n, F f) const + { + for_stride(global, n, nglobal(), f); + } + + template + __device__ void local_stride(N n, F f) const + { + for_stride(local, n, nlocal(), f); + } + + template + __device__ void group_stride(N n, F f) const + { + for_stride(group, n, ngroup(), f); + } + + template + __device__ void local_subwave_stride(N n, F f) const + { + for_stride(local_subwave(), n, nlocal_subwave(), f); + } + + template + __device__ void local_wave_stride(N n, F f) const + { + for_stride(local_wave(), n, nlocal_wave(), f); + } +}; + +#ifdef MIGRAPHX_NLOCAL +#define MIGRAPHX_GLOBAL \ + __global__ __attribute__((amdgpu_flat_work_group_size(MIGRAPHX_NLOCAL, MIGRAPHX_NLOCAL))) +#else +#define MIGRAPHX_GLOBAL __global__ +#endif +inline __device__ __attribute__((const)) index make_index() +{ + return index{ + blockIdx.x * compute_max_local_size() + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_INDEX_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp new file mode 100644 index 000000000..63807ff78 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp @@ -0,0 +1,103 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_INTEGRAL_CONSTANT_HPP +#define MIGRAPHX_GUARD_KERNELS_INTEGRAL_CONSTANT_HPP + +#include + +namespace migraphx { + +template +struct integral_constant +{ + static constexpr T value = V; + using value_type = T; + using type = integral_constant; + constexpr operator value_type() const noexcept { return value; } + constexpr value_type operator()() const noexcept { return value; } + static constexpr type to() { return {}; } +}; + +// NOLINTNEXTLINE +#define MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(op) \ + template \ + constexpr inline integral_constant operator op( \ + integral_constant, integral_constant) noexcept \ + { \ + return {}; \ + } + +// NOLINTNEXTLINE +#define MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(op) \ + template \ + constexpr inline integral_constant operator op( \ + integral_constant) noexcept \ + { \ + return {}; \ + } + +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(+) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(-) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(*) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(/) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(%) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(>>) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(<<) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(>) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(<) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(<=) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(>=) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(==) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(!=) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(&) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(^) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(|) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(and) +MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(or) + +MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(not ) +MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(~) +MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(+) +MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(-) + +template +using bool_constant = integral_constant; + +using true_type = bool_constant; +using false_type = bool_constant; + +template +using index_constant = integral_constant; + +template +static constexpr auto _c = integral_constant{}; // NOLINT + +template +constexpr auto return_c(F f) +{ + return _c; +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_INTEGRAL_CONSTANT_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/iota_iterator.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/iota_iterator.hpp new file mode 100644 index 000000000..c04522778 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/iota_iterator.hpp @@ -0,0 +1,168 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_IOTA_ITERATOR_HPP +#define MIGRAPHX_GUARD_KERNELS_IOTA_ITERATOR_HPP + +#include +#include +#include + +namespace migraphx { + +template +struct basic_iota_iterator +{ + Iterator index; + F f; + + using difference_type = diff_int; + using reference = decltype(f(declval())); + using value_type = remove_reference_t; + using pointer = add_pointer_t; + + constexpr basic_iota_iterator& operator+=(diff_int n) + { + index += n; + return *this; + } + + constexpr basic_iota_iterator& operator-=(diff_int n) + { + index -= n; + return *this; + } + + constexpr basic_iota_iterator& operator++() + { + index++; + return *this; + } + + constexpr basic_iota_iterator& operator--() + { + index--; + return *this; + } + + constexpr basic_iota_iterator operator++(int) // NOLINT + { + basic_iota_iterator it = *this; + index++; + return it; + } + + constexpr basic_iota_iterator operator--(int) // NOLINT + { + basic_iota_iterator it = *this; + index--; + return it; + } + // TODO: operator-> + constexpr reference operator*() const { return f(index); } + + constexpr reference operator[](MIGRAPHX_CAPTURE_SOURCE_LOCATION(index_int) x) const + { + return f(capture_transform(x, [&](auto y) { return index + y; })); + } +}; + +template +constexpr basic_iota_iterator make_basic_iota_iterator(T x, F f) +{ + return basic_iota_iterator{x, f}; +} + +template +constexpr basic_iota_iterator operator+(basic_iota_iterator x, diff_int y) +{ + return x += y; +} + +template +constexpr basic_iota_iterator operator+(diff_int x, basic_iota_iterator y) +{ + return y + x; +} + +template +constexpr diff_int operator-(basic_iota_iterator x, basic_iota_iterator y) +{ + return x.index - y.index; +} + +template +constexpr basic_iota_iterator operator-(basic_iota_iterator x, diff_int y) +{ + return x -= y; +} + +template +constexpr bool operator==(basic_iota_iterator x, basic_iota_iterator y) +{ + return x.index == y.index; +} + +template +constexpr bool operator!=(basic_iota_iterator x, basic_iota_iterator y) +{ + return x.index != y.index; +} + +template +constexpr bool operator<(basic_iota_iterator x, basic_iota_iterator y) +{ + return x.index < y.index; +} + +template +constexpr bool operator>(basic_iota_iterator x, basic_iota_iterator y) +{ + return x.index > y.index; +} + +template +constexpr bool operator>=(basic_iota_iterator x, basic_iota_iterator y) +{ + return x.index >= y.index; +} + +template +constexpr bool operator<=(basic_iota_iterator x, basic_iota_iterator y) +{ + return x.index <= y.index; +} + +struct defaul_iota_iterator +{ + template + constexpr auto operator()(T x) const + { + return x; + } +}; + +using iota_iterator = basic_iota_iterator; + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_IOTA_ITERATOR_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp new file mode 100644 index 000000000..c64ab5531 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp @@ -0,0 +1,112 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP +#define MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP +#include +#include +#include +#include + +namespace migraphx { + +template +struct acc_type +{ + using type = float; +}; + +template <> +struct acc_type +{ + using type = double; +}; + +template +constexpr auto vec_reduce(const array& a, Op op) +{ + return a.apply([&](auto x) { return vec_reduce(x, op); }); +} + +template +__device__ void generic_binary_layernorm( + F compute, BinOp op, float eps, Output output, Input1 input1, Input2 input2, Inputs... inputs) +{ + using block = reduce::auto_block()>; + using reduce_output = reduce::with_axis; + + block::template run([&](auto, auto r) { + using value_type = typename Input1::type; + using vec_value_type = typename acc_type>::type; + + auto input = r.inner([&](auto x1, auto x2) { + return migraphx::convert(op(x1, x2)); + })(input1, input2); + + constexpr auto relements = r.template elements(); + constexpr auto relements_r = vec_value_type{1.0 / relements}; + auto relements_rsqrt = sqrt(relements_r); + + auto means = r.reduce(op::sum{}, make_array(0, 0), [&](auto x) { + auto x_out = x * relements_r; + // dividing x by sqrt(relements) before squaring allows computing + // higher values before overflow in low precision + auto x2_sqrt = x * relements_rsqrt; + return make_array(x_out, x2_sqrt * x2_sqrt); + })(input); + + auto mean_x = means[0]; + auto mean_x2 = means[1]; + auto variance = mean_x2 - (mean_x * mean_x); + vec_value_type eps_val = implicit_conversion(eps); + auto rsqrt_val = rsqrt(variance + eps_val); + + r.inner([&](auto& y, auto x, auto... xs) { + y = compute(migraphx::convert>((x - mean_x) * rsqrt_val), xs...); + })(output, input, inputs...); + }); +} + +template +__device__ void layernorm(F compute, float eps, Output output, Input input, Inputs... inputs) +{ + generic_binary_layernorm( + compute, [](auto x, auto) { return x; }, eps, output, input, input, inputs...); +} + +template +__device__ void +add_layernorm(F compute, float eps, Output output, Input1 input1, Input2 input2, Inputs... inputs) +{ + generic_binary_layernorm( + compute, [](auto x1, auto x2) { return x1 + x2; }, eps, output, input1, input2, inputs...); +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_LAYERNORM_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/math.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/math.hpp new file mode 100644 index 000000000..5052d6611 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/math.hpp @@ -0,0 +1,300 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_MATH_HPP +#define MIGRAPHX_GUARD_KERNELS_MATH_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { + +namespace math { + +template +constexpr auto as_float(T x) +{ + if constexpr(is_integral{}) + return x; + else + return float(x); +} + +template +constexpr auto to_native(T x) +{ + return x; +} + +constexpr migraphx::half to_native(__half x) { return bit_cast(x); } + +template ())> +__device__ auto wrap(F f, T x, Ts... xs) +{ + if constexpr(is_integral{}) + { + return wrap(f, double(x), double(xs)...); + } + else if constexpr(is_callable{}) + { + return to_native(f(x, xs...)); + } + else + { + T result = f(as_float(x), as_float(xs)...); + return result; + } +} + +} // namespace math + +// NOLINTNEXTLINE +#define MIGRAPHX_DEVICE_MATH_LIFT_IMPL(type, ...) \ + [](type x, auto... xs) MIGRAPHX_RETURNS((__VA_ARGS__)(x, xs...)) + +// NOLINTNEXTLINE +#define MIGRAPHX_DEVICE_MATH_LIFT(...) MIGRAPHX_DEVICE_MATH_LIFT_IMPL(__VA_ARGS__) + +// NOLINTNEXTLINE +#define MIGRAPHX_DEVICE_MATH_PARSE(x) x, + +// NOLINTNEXTLINE +#define MIGRAPHX_DEVICE_MATH_EACH(f) MIGRAPHX_DEVICE_MATH_LIFT(MIGRAPHX_DEVICE_MATH_PARSE f) + +// NOLINTNEXTLINE +#define MIGRAPHX_DEVICE_MATH_WRAP(name, ...) \ + namespace math { \ + inline static constexpr auto wrap_##name = \ + overload(MIGRAPHX_PP_TRANSFORM_ARGS(MIGRAPHX_DEVICE_MATH_EACH, __VA_ARGS__)); \ + } \ + template \ + auto __device__ name(Ts... xs) MIGRAPHX_RETURNS(math::wrap(math::wrap_##name, xs...)) + +// NOLINTNEXTLINE +#define MIGRAPHX_DEVICE_MATH(name, fname) \ + template ())> \ + auto __device__ name(Ts... xs) MIGRAPHX_RETURNS(fname(xs...)) + +// NOLINTNEXTLINE +#define MIGRAPHX_DEVICE_MATH_VEC(name) \ + template ())> \ + auto __device__ name(Ts... xs) \ + { \ + return vec_transform(xs...)([](auto... ys) { return name(ys...); }); \ + } + +// NOLINTNEXTLINE +#define MIGRAPHX_DEVICE_MATH_FOR(type, name, fname) \ + template ())> \ + auto __device__ name(type x, Ts... xs) -> type \ + { \ + return fname(x, xs...); \ + } + +// NOLINTNEXTLINE +#define MIGRAPHX_DEVICE_MATH_BINARY_FOR(type, name, fname) \ + inline auto __device__ name(type x, type y) -> type { return fname(x, y); } + +// NOLINTNEXTLINE +#define MIGRAPHX_DEVICE_MATH_HALF2(name, fname) \ + template \ + auto __device__ name(migraphx::vec x, Ts... xs) \ + MIGRAPHX_RETURNS(migraphx::vec{fname(x, xs...)}); \ + template 2))> \ + auto __device__ name(migraphx::vec x, Ts... xs) \ + { \ + return vec_packed_transform<2>(x, xs...)( \ + [](auto... ys) -> migraphx::vec { return fname(ys...); }); \ + } + +// Template with two overloads for math functions, one for half2 type and one for more generic +// vectorization where N is 4 or another even number. +// NOLINTNEXTLINE +#define MIGRAPHX_DEVICE_MATH_VEC2(type, name, fname) \ + template \ + auto __device__ name(migraphx::vec x, Ts... xs) \ + MIGRAPHX_RETURNS(migraphx::vec{fname(x, xs...)}); \ + template 2))> \ + auto __device__ name(migraphx::vec x, Ts... xs) \ + { \ + return vec_packed_transform<2>(x, xs...)( \ + [](auto... ys) -> migraphx::vec { return fname(ys...); }); \ + } + +MIGRAPHX_DEVICE_MATH_WRAP(acos, (double)::acos, (float)::acosf); +MIGRAPHX_DEVICE_MATH_WRAP(acosh, (double)::acosh, (float)::acoshf); +MIGRAPHX_DEVICE_MATH_WRAP(asin, (double)::asin, (float)::asinf); +MIGRAPHX_DEVICE_MATH_WRAP(asinh, (double)::asinh, (float)::asinh); +MIGRAPHX_DEVICE_MATH_WRAP(atan, (double)::atan, (float)::atan); +MIGRAPHX_DEVICE_MATH_WRAP(atanh, (double)::atanh, (float)::atanh); +MIGRAPHX_DEVICE_MATH_WRAP(ceil, (double)::ceil, (float)::ceilf, (half)::hceil); +MIGRAPHX_DEVICE_MATH_WRAP(cos, (double)::cos, (float)::cosf, (half)::hcos); +MIGRAPHX_DEVICE_MATH_WRAP(cosh, (double)::cosh, (float)::coshf); +MIGRAPHX_DEVICE_MATH_WRAP(erf, (double)::erf, (float)::erff); +MIGRAPHX_DEVICE_MATH_WRAP(exp, (double)::exp, (float)::expf, (half)::hexp); +MIGRAPHX_DEVICE_MATH_WRAP(floor, (double)::floor, (float)::floorf, (half)::hfloor); +MIGRAPHX_DEVICE_MATH_WRAP(isnan, (double)::isnan, (float)::isnan, (half)::__hisnan); +MIGRAPHX_DEVICE_MATH_WRAP(isinf, (double)::isinf, (float)::isinf, (half)::__hisinf); +MIGRAPHX_DEVICE_MATH_WRAP(log, (double)::log, (float)::logf, (half)::hlog); +MIGRAPHX_DEVICE_MATH_WRAP(log2, (double)::log2, (float)::log2f, (half)::hlog2); +MIGRAPHX_DEVICE_MATH_WRAP(nearbyint, (double)::nearbyint, (float)::nearbyintf); +MIGRAPHX_DEVICE_MATH_WRAP(pow, (double)::pow, (float)::powf); +MIGRAPHX_DEVICE_MATH_WRAP(remainder, (double)::remainder, (float)::remainderf); +MIGRAPHX_DEVICE_MATH_WRAP(round, (double)::round, (float)::roundf); +MIGRAPHX_DEVICE_MATH_WRAP(rsqrt, (double)::rsqrt, (float)::rsqrtf, (half)::hrsqrt); +MIGRAPHX_DEVICE_MATH_WRAP(sin, (double)::sin, (float)::sinf, (half)::hsin); +MIGRAPHX_DEVICE_MATH_WRAP(sinh, (double)::sinh, (float)::sinhf); +MIGRAPHX_DEVICE_MATH_WRAP(sqrt, (double)::sqrt, (float)::sqrtf, (half)::hsqrt); +MIGRAPHX_DEVICE_MATH_WRAP(tan, (double)::tan, (float)::tanf); +MIGRAPHX_DEVICE_MATH_WRAP(tanh, (double)::tanh, (float)::tanhf); +MIGRAPHX_DEVICE_MATH_WRAP(fmod, (double)::fmod, (float)::fmodf); + +template +constexpr auto where(bool cond, const T& a, const U& b) +{ + return cond ? a : b; +} + +MIGRAPHX_DEVICE_MATH_FOR(float, abs, ::abs) +MIGRAPHX_DEVICE_MATH_FOR(double, abs, ::abs) +MIGRAPHX_DEVICE_MATH_FOR(migraphx::half, abs, ::__habs) +MIGRAPHX_DEVICE_MATH_FOR(migraphx::bf16, abs, ::fabsf) +MIGRAPHX_DEVICE_MATH_BINARY_FOR(float, max, ::fmaxf) +MIGRAPHX_DEVICE_MATH_BINARY_FOR(float, min, ::fminf) +MIGRAPHX_DEVICE_MATH_BINARY_FOR(double, max, ::max) +MIGRAPHX_DEVICE_MATH_BINARY_FOR(double, min, ::min) +MIGRAPHX_DEVICE_MATH_BINARY_FOR(migraphx::half, max, ::__hmax) +MIGRAPHX_DEVICE_MATH_BINARY_FOR(migraphx::half, min, ::__hmin) + +template () and is_integral{})> +constexpr auto abs(const T& a) +{ + return where(a < 0, -a, a); +} + +template ())> +constexpr auto max(const T& a, const T& b) +{ + return where(a < b, b, a); +} + +template ())> +constexpr auto min(const T& a, const T& b) +{ + return where(a < b, a, b); +} + +template {} and not is_any_vec())> +constexpr auto max(const T& a, const U& b) +{ + return max>(a, b); +} + +template {} and not is_any_vec())> +constexpr auto min(const T& a, const U& b) +{ + return min>(a, b); +} + +template ())> +constexpr T mod(const T& a, const T& b) +{ + if constexpr(is_integral{}) + // onnx mod operator requires numpy style modulus + return ((a % b) + b) % b; + return static_cast(fmod(remainder(a, b) + b, b)); +} + +template {} and not is_any_vec())> +constexpr auto mod(const T& a, const U& b) +{ + return mod>(a, b); +} + +MIGRAPHX_DEVICE_MATH_VEC(abs) +MIGRAPHX_DEVICE_MATH_VEC(acos) +MIGRAPHX_DEVICE_MATH_VEC(acosh) +MIGRAPHX_DEVICE_MATH_VEC(asin) +MIGRAPHX_DEVICE_MATH_VEC(asinh) +MIGRAPHX_DEVICE_MATH_VEC(atan) +MIGRAPHX_DEVICE_MATH_VEC(atanh) +MIGRAPHX_DEVICE_MATH_VEC(ceil) +MIGRAPHX_DEVICE_MATH_VEC(cos) +MIGRAPHX_DEVICE_MATH_VEC(cosh) +MIGRAPHX_DEVICE_MATH_VEC(erf) +MIGRAPHX_DEVICE_MATH_VEC(exp) +MIGRAPHX_DEVICE_MATH_VEC(floor) +MIGRAPHX_DEVICE_MATH_VEC(fmod) +MIGRAPHX_DEVICE_MATH_VEC(isinf) +MIGRAPHX_DEVICE_MATH_VEC(isnan) +MIGRAPHX_DEVICE_MATH_VEC(log) +MIGRAPHX_DEVICE_MATH_VEC(log2) +MIGRAPHX_DEVICE_MATH_VEC(max) +MIGRAPHX_DEVICE_MATH_VEC(min) +MIGRAPHX_DEVICE_MATH_VEC(mod) +MIGRAPHX_DEVICE_MATH_VEC(nearbyint) +MIGRAPHX_DEVICE_MATH_VEC(pow) +MIGRAPHX_DEVICE_MATH_VEC(remainder) +MIGRAPHX_DEVICE_MATH_VEC(round) +MIGRAPHX_DEVICE_MATH_VEC(rsqrt) +MIGRAPHX_DEVICE_MATH_VEC(sin) +MIGRAPHX_DEVICE_MATH_VEC(sinh) +MIGRAPHX_DEVICE_MATH_VEC(sqrt) +MIGRAPHX_DEVICE_MATH_VEC(tan) +MIGRAPHX_DEVICE_MATH_VEC(tanh) +MIGRAPHX_DEVICE_MATH_VEC(where) + +// Map math functions to hip half2 functions +// The half2 type is defined in include/hip/amd_detail/hip_fp16_gcc.h and is 2 16-bit floats +// packed into a 32-bit number. See include/hip/amd_detail/hip_fp16_math_fwd.h for the HIP names +// Most but not all of these math ops have operators of the same names. +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, abs, ::__habs2) +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, ceil, ::h2ceil) +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, cos, ::h2cos) +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, exp, ::h2exp) +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, exp10, ::h2exp10) +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, exp2, ::h2exp2) +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, floor, ::h2floor) +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, isinf, ::__hisinf2) +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, isnan, ::__hisnan2) +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, log, ::h2log) +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, log10, ::h2log10) +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, log2, ::h2log2) +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, rsqrt, ::h2rsqrt) +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, sin, ::h2sin) +MIGRAPHX_DEVICE_MATH_VEC2(migraphx::half, sqrt, ::h2sqrt) + +template +constexpr auto convert(U v) +{ + return vec_transform(v)([](auto x) -> T { return static_cast(x); }); +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_MATH_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/operators.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/operators.hpp new file mode 100644 index 000000000..35f9c920e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/operators.hpp @@ -0,0 +1,43 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#ifndef MIGRAPHX_GUARD_KERNELS_OPERATORS_HPP +#define MIGRAPHX_GUARD_KERNELS_OPERATORS_HPP + +#include +#include + +namespace migraphx { + +template +struct equality_comparable +{ + template + friend constexpr auto operator!=(const T& x, const U& y) MIGRAPHX_RETURNS(not(x == y)); + template {} and is_same{})> + friend constexpr auto operator!=(const U& x, const V& y) MIGRAPHX_RETURNS(not(x == y)); +}; + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_OPERATORS_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ops.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ops.hpp new file mode 100644 index 000000000..be1ece0f9 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ops.hpp @@ -0,0 +1,164 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_OPS_HPP +#define MIGRAPHX_GUARD_KERNELS_OPS_HPP + +#include + +namespace migraphx { +namespace op { + +struct sum +{ + template + MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const + { + return x + y; + } +}; + +struct product +{ + template + MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const + { + return x * y; + } +}; + +struct id +{ + template + MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x) const + { + return x; + } +}; + +template +struct convert_to +{ + template + MIGRAPHX_DEVICE_CONSTEXPR auto operator()(U x) const + { + return convert(x); + } +}; + +template +struct mean +{ + template + MIGRAPHX_DEVICE_CONSTEXPR T operator()(T x) const + { + using type = vec_type; + if constexpr(is_floating_point{}) + { + constexpr type d = 1.0 / N; + return x * d; + } + else + { + return x / static_cast(N); + } + } +}; + +struct max +{ + template + MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const + { + return migraphx::max(x, y); + } +}; + +struct min +{ + template + MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const + { + return migraphx::min(x, y); + } +}; + +struct logical_and +{ + template + MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const + { + if(static_cast(x) and static_cast(y)) + return static_cast(1); + return static_cast(0); + } +}; + +struct logical_or +{ + template + MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const + { + if(static_cast(x) or static_cast(y)) + return static_cast(1); + return static_cast(0); + } +}; +} // namespace op + +// NOLINTNEXTLINE +#define MIGRAPHX_OPS_DEFINE_COMMON_TYPE(T) \ + template \ + struct common_type \ + { \ + using type = U; \ + }; \ + template \ + struct common_type \ + { \ + using type = U; \ + }; + +struct lowest +{ + template + constexpr operator T() const + { + return numeric_lowest>(); + } +}; +MIGRAPHX_OPS_DEFINE_COMMON_TYPE(lowest) + +struct highest +{ + template + constexpr operator T() const + { + return numeric_max>(); + } +}; + +MIGRAPHX_OPS_DEFINE_COMMON_TYPE(highest) + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_OPS_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/pad.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/pad.hpp new file mode 100644 index 000000000..38d8be2de --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/pad.hpp @@ -0,0 +1,64 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_PAD_HPP +#define MIGRAPHX_GUARD_KERNELS_PAD_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { + +template +__device__ void pad(const index& idx, + const Offsets& offsets, + const Input& input, + Output& output, + const PadVal& pad_val) +{ + auto output_shape = output.get_shape(); + idx.global_stride(output_shape.elements(), [&](auto i) { + // 1. get current multi-index for output + // 2. get the size of the input to determine input boundaries + // 3. compute the corresponding multi-index for input by accounting for offsets + // 4. if current multi-index is within offsets or input's new multi-index is out of bounds, + // use pad value instead of input's value + auto multi = output_shape.multi(i); + auto input_bounds = input.get_shape().lens; + auto input_idx = multi - offsets; + auto range_multi = range(multi.size()); + + if(any_of(range_multi.begin(), range_multi.end(), [&](auto j) { + return multi[j] < offsets[j] or input_idx[j] >= input_bounds[j]; + })) + output[multi] = implicit_conversion(pad_val); + else + output[multi] = implicit_conversion(input[input_idx]); + }); +} + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/permutation.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/permutation.hpp new file mode 100644 index 000000000..970484d6f --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/permutation.hpp @@ -0,0 +1,108 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#ifndef MIGRAPHX_GUARD_KERNELS_PERMUTATION_HPP +#define MIGRAPHX_GUARD_KERNELS_PERMUTATION_HPP + +#include +#include +#include + +namespace migraphx { + +template +constexpr auto reorder_dims(const Array1& dims, const Array2& permutation) +{ + return generate_array( + dims.size(), [&](auto i) { return dims[permutation[i]]; }); +} + +template +constexpr auto reorder_dims(integral_const_array, integral_const_array) +{ + return return_array_c([] { + constexpr integral_const_array dims{}; + constexpr integral_const_array permutation{}; + return reorder_dims(dims.base(), permutation.base()); + }); +} + +template +constexpr auto invert_permutation(const Array& permutation) +{ + return reorder_dims(transform_i(permutation, [](auto, auto i) { return i; }), permutation); +} + +template +struct find_permutation_impl +{ + static constexpr auto compute() + { + return return_array_c([] { + constexpr Shape s{}; + typename Shape::index_array perm; + iota(perm.begin(), perm.end(), 0); + if constexpr(s.transposed() or s.broadcasted()) + { + stable_sort( + perm.begin(), + perm.end(), + by([&](auto x) { return make_tuple(s.strides[x], s.lens[x]); }, greater{})); + } + return perm; + }); + } + using type = decltype(compute()); +}; + +template +constexpr auto find_permutation(Shape) +{ + return typename find_permutation_impl::type{}; +} + +template +constexpr auto find_permutation(Shape1, Shape2) +{ + return return_array_c([] { + constexpr Shape1 s1{}; + constexpr Shape2 s2{}; + auto perm1 = find_permutation(s1).base(); + auto perm2 = find_permutation(s2).base(); + if(perm1 == perm2) + return perm1; + if(s1.standard()) + return perm1; + if(s2.standard()) + return perm2; + if(s1.packed()) + return perm1; + if(s2.packed()) + return perm2; + return perm1; + }); +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_PERMUTATION_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp new file mode 100644 index 000000000..d97355a1d --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp @@ -0,0 +1,62 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_POINTWISE_HPP +#define MIGRAPHX_GUARD_KERNELS_POINTWISE_HPP + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { + +template +__device__ void pointwise_tensor(Stride stride, F f, Output out, T x, Ts... xs) +{ + stride(x.get_shape().elements(), [&](auto i) { + auto r = f(x[i], xs[i]...); + out([&](auto... outs) { + r([&](auto... rs) { + static_assert(sizeof...(outs) == sizeof...(rs)); + swallow{(outs[i] = implicit_conversion(rs))...}; + }); + }); + }); +} + +template +__device__ auto pointwise(index idx, Transforms... transforms) +{ + return [=](auto f, auto*... ps) { + auto t = transform_args(make_tensors(), transforms..., rotate_and_pack_last()); + t(ps...)([&](auto... xs) { pointwise_tensor(tile_stride(idx), f, xs...); }); + }; +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_POINTWISE_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/pooling.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/pooling.hpp new file mode 100644 index 000000000..76bb7c3cb --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/pooling.hpp @@ -0,0 +1,233 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#ifndef MIGRAPHX_GUARD_KERNELS_POOLING_HPP +#define MIGRAPHX_GUARD_KERNELS_POOLING_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { + +template +struct pool_op +{ + template + MIGRAPHX_DEVICE_CONSTEXPR T apply(T x) const + { + return x; + } + + MIGRAPHX_DEVICE_CONSTEXPR auto pad() const + { + const auto& self = static_cast(*this); + return self.init(); + } + + template + MIGRAPHX_DEVICE_CONSTEXPR T final(T x, U) const + { + return x; + } +}; + +struct max_pool : pool_op +{ + MIGRAPHX_DEVICE_CONSTEXPR auto init() const { return lowest{}; } + + MIGRAPHX_DEVICE_CONSTEXPR auto reduce() const { return op::max{}; } +}; + +struct average_pool : pool_op +{ + MIGRAPHX_DEVICE_CONSTEXPR auto init() const { return make_tuple(0.0, 0); } + + template + MIGRAPHX_DEVICE_CONSTEXPR tuple apply(T x) const + { + return {x, 1}; + } + + MIGRAPHX_DEVICE_CONSTEXPR auto reduce() const { return op::sum{}; } + + template + MIGRAPHX_DEVICE_CONSTEXPR T final(tuple t, U) const + { + T x = t[_c<0>]; + index_int y = t[_c<1>]; + return (y == 0) ? T{0.0} : T{x / y}; + } +}; + +struct average_include_pad_pool : pool_op +{ + MIGRAPHX_DEVICE_CONSTEXPR auto init() const { return 0.0; } + + MIGRAPHX_DEVICE_CONSTEXPR auto reduce() const { return op::sum{}; } + + template + MIGRAPHX_DEVICE_CONSTEXPR T final(T x, U y) const + { + if constexpr(y == 0) + return T{0.0}; + constexpr auto scale = T{1.0} / y; + return T{x * scale}; + } +}; + +struct lpnorm_pool_base +{ +}; + +template +struct lpnorm_pool : lpnorm_pool_base, pool_op> +{ + MIGRAPHX_DEVICE_CONSTEXPR auto init() const { return 0.0; } + + template + MIGRAPHX_DEVICE_CONSTEXPR T apply(T x) const + { + if constexpr(P == 0) + return 1; + else if constexpr(P == 1) + return migraphx::abs(x); + else if constexpr(P == 2) + return x * x; + else + return migraphx::pow(migraphx::abs(x), T(P)); + } + + MIGRAPHX_DEVICE_CONSTEXPR auto pad() const { return apply(init()); } + + MIGRAPHX_DEVICE_CONSTEXPR auto reduce() const { return op::sum{}; } + + template + MIGRAPHX_DEVICE_CONSTEXPR T final(T x, U) const + { + if constexpr(P == 0) + return 1; + else if constexpr(P == 1) + return x; + else if constexpr(P == 2) + return migraphx::sqrt(x); + else + return migraphx::pow(x, 1. / P); + } +}; + +template +struct window +{ + Window win = {}; + Stride stride = {}; + Padding padding = {}; + + using rank = decltype(Window{}.size()); + + constexpr auto size() const + { + return return_c([] { return Window{}.product(); }); + } + + constexpr auto has_padding() const + { + return return_c([] { return Padding{} == 0; }); + } + + template + constexpr auto apply(OutputIndex i, F f) const + { + auto win_start = generate_array(rank{}, [&](auto j) { + diff_int dim = i[j]; + MIGRAPHX_ASSERT(win[j] >= 1); + diff_int s = stride[j]; + diff_int p = padding[j]; + return (dim * s) - p; + }); + return [=](auto j) { return f(win_start + win.multi(j)); }; + } + + template + constexpr void visit(Index i, F f) const + { + repeat(size(), apply(i, f)); + } +}; + +template +constexpr window make_window(Window w, Stride s, Padding p) +{ + return {w, s, p}; +} + +template +__device__ void pooling_reduce(Output output, F f) +{ + if constexpr(GroupSize < 2) + { + Algo::template run( + [&](auto out_idx, auto r) { r.outer([&] { output[out_idx] = f(out_idx, r); }); }); + } + else + { + auto goutput = as_vec(output, output.get_shape().lens.size() - _c<1>); + Algo::template run([&](auto out_idx, auto r) { + auto i = out_idx; + i.back() *= GroupSize; + auto result = vec_generate([&](auto) { + i.back()++; + return f(i, r); + }); + r.outer([&] { goutput[out_idx] = result; }); + }); + } +} + +template +__device__ void pooling(Op op, Window w, Output output, Input input) +{ + pooling_reduce(output, [&](auto out_idx, auto r) { + auto x = r.reduce(op.reduce(), op.init(), w.apply(out_idx, [&](auto j) { + using itype = decltype(op.apply(input[j])); + + if(j < input.get_shape().lens) + { + return op.apply(input[j]); + } + else + { + return itype(op.pad()); + } + }))(reduce::make_indices(w.size())); + return op.final(x, w.size()); + }); +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_POOLING_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/pp.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/pp.hpp new file mode 100644 index 000000000..89b38ac24 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/pp.hpp @@ -0,0 +1,129 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_PP_HPP +#define MIGRAPHX_GUARD_KERNELS_PP_HPP + +// NOLINTBEGIN(*-macro-to-enum) + +#define MIGRAPHX_PP_PRIMITIVE_CAT(x, y) x##y +#define MIGRAPHX_PP_CAT(x, y) MIGRAPHX_PP_PRIMITIVE_CAT(x, y) + +#define MIGRAPHX_PP_EAT(...) +#define MIGRAPHX_PP_EXPAND(...) __VA_ARGS__ +#define MIGRAPHX_PP_COMMA(...) , + +#define MIGRAPHX_PP_IIF(c) MIGRAPHX_PP_PRIMITIVE_CAT(MIGRAPHX_PP_IIF_, c) +#define MIGRAPHX_PP_IIF_0(t, ...) __VA_ARGS__ +#define MIGRAPHX_PP_IIF_1(t, ...) t + +#define MIGRAPHX_PP_COMPL(b) MIGRAPHX_PP_PRIMITIVE_CAT(MIGRAPHX_PP_COMPL_, b) +#define MIGRAPHX_PP_COMPL_0 1 +#define MIGRAPHX_PP_COMPL_1 0 + +#define MIGRAPHX_PP_BITAND(x) MIGRAPHX_PP_PRIMITIVE_CAT(MIGRAPHX_PP_BITAND_, x) +#define MIGRAPHX_PP_BITAND_0(y) 0 +#define MIGRAPHX_PP_BITAND_1(y) y + +#define MIGRAPHX_PP_CHECK(...) MIGRAPHX_PP_CHECK_N(__VA_ARGS__, 0, ) +#define MIGRAPHX_PP_CHECK_N(x, n, ...) n +#define MIGRAPHX_PP_PROBE(x) x, 1, + +#define MIGRAPHX_PP_IS_PAREN(x) MIGRAPHX_PP_CHECK(MIGRAPHX_PP_IS_PAREN_PROBE x) +#define MIGRAPHX_PP_IS_PAREN_PROBE(...) MIGRAPHX_PP_PROBE(~) + +#define MIGRAPHX_PP_PRIMITIVE_IS_EMPTY(x) \ + MIGRAPHX_PP_CHECK(MIGRAPHX_PP_PRIMITIVE_IS_EMPTY_PROBE x()) +#define MIGRAPHX_PP_PRIMITIVE_IS_EMPTY_PROBE(...) MIGRAPHX_PP_PROBE(~) + +#define MIGRAPHX_PP_IS_EMPTY_ARG(x) \ + MIGRAPHX_PP_BITAND(MIGRAPHX_PP_COMPL(MIGRAPHX_PP_IS_PAREN(x))) \ + (MIGRAPHX_PP_PRIMITIVE_IS_EMPTY(x)) + +#define MIGRAPHX_PP_REPEAT0(m, ...) m(0, __VA_ARGS__) +#define MIGRAPHX_PP_REPEAT1(m, ...) MIGRAPHX_PP_REPEAT0(m, __VA_ARGS__) m(1, __VA_ARGS__) +#define MIGRAPHX_PP_REPEAT2(m, ...) MIGRAPHX_PP_REPEAT1(m, __VA_ARGS__) m(2, __VA_ARGS__) +#define MIGRAPHX_PP_REPEAT3(m, ...) MIGRAPHX_PP_REPEAT2(m, __VA_ARGS__) m(3, __VA_ARGS__) +#define MIGRAPHX_PP_REPEAT4(m, ...) MIGRAPHX_PP_REPEAT3(m, __VA_ARGS__) m(4, __VA_ARGS__) +#define MIGRAPHX_PP_REPEAT5(m, ...) MIGRAPHX_PP_REPEAT4(m, __VA_ARGS__) m(5, __VA_ARGS__) +#define MIGRAPHX_PP_REPEAT6(m, ...) MIGRAPHX_PP_REPEAT5(m, __VA_ARGS__) m(6, __VA_ARGS__) +#define MIGRAPHX_PP_REPEAT7(m, ...) MIGRAPHX_PP_REPEAT6(m, __VA_ARGS__) m(7, __VA_ARGS__) +#define MIGRAPHX_PP_REPEAT8(m, ...) MIGRAPHX_PP_REPEAT7(m, __VA_ARGS__) m(8, __VA_ARGS__) +#define MIGRAPHX_PP_REPEAT9(m, ...) MIGRAPHX_PP_REPEAT8(m, __VA_ARGS__) m(9, __VA_ARGS__) +#define MIGRAPHX_PP_REPEAT10(m, ...) MIGRAPHX_PP_REPEAT9(m, __VA_ARGS__) m(10, __VA_ARGS__) + +#define MIGRAPHX_PP_REPEAT(n, m, ...) \ + MIGRAPHX_PP_PRIMITIVE_CAT(MIGRAPHX_PP_REPEAT, n)(m, __VA_ARGS__) + +#define MIGRAPHX_PP_RES_ARGS() , , , , , , , , , , , , , , , + +#define MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARGS(...) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARGS_IMPL(__VA_ARGS__) + +#define MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARGS_IMPL( \ + m, delim, x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ...) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x0) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x1) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x1) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x2) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x2) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x3) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x3) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x4) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x4) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x5) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x5) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x6) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x6) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x7) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x7) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x8) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x8) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x9) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x9) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x10) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x10) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x11) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x11) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x12) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x12) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x13) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x13) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x14) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x14) \ + MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(delim, x15) MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x15) + +#define MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARG(m, x) \ + MIGRAPHX_PP_IIF(MIGRAPHX_PP_IS_EMPTY_ARG(x))(MIGRAPHX_PP_EAT, m)(x) + +#define MIGRAPHX_PP_EACH_ARGS(m, ...) \ + MIGRAPHX_PP_EXPAND(MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARGS( \ + m, MIGRAPHX_PP_EAT, __VA_ARGS__, MIGRAPHX_PP_RES_ARGS())) + +#define MIGRAPHX_PP_TRANSFORM_ARGS(m, ...) \ + MIGRAPHX_PP_EXPAND(MIGRAPHX_PP_PRIMITIVE_TRANSFORM_ARGS( \ + m, MIGRAPHX_PP_COMMA, __VA_ARGS__, MIGRAPHX_PP_RES_ARGS())) + +// NOLINTEND(*-macro-to-enum) + +#endif // MIGRAPHX_GUARD_KERNELS_PP_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/preload.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/preload.hpp new file mode 100644 index 000000000..3978d0af3 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/preload.hpp @@ -0,0 +1,198 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_PRELOAD_HPP +#define MIGRAPHX_GUARD_KERNELS_PRELOAD_HPP + +#include +#include +#include +#include + +namespace migraphx { + +template +struct remove_vec_impl +{ + using type = T; +}; + +template +struct remove_vec_impl> +{ + using type = T; +}; + +template +using remove_vec = typename remove_vec_impl::type; + +template +constexpr auto traverse_preload(Shapes... ss) +{ + return [=](auto f, auto... g) { + index_int offset = 0; + auto each = [&](auto x) { + using type = remove_vec; + constexpr auto s = decltype(x.get_shape()){}; + constexpr auto size = s.element_space(); + if constexpr(not s.broadcasted() or (s.elements() - size) < 64 or + not is_same{}) + return f(x, offset, false_type{}); + else + { + auto pre_offset = offset; + offset += size; + offset += offset % 4; + return f(x, pre_offset, true_type{}); + } + }; + return by(each, g...)(ss...); + }; +} + +template +constexpr index_int compute_preload_size_c(Shapes...) +{ + index_int size = 0; + traverse_preload(Shapes{}...)( + [&](auto s, auto offset, auto) { size = offset + s.element_space(); }); + return size; +} + +template +constexpr auto compute_preload_size(Shapes...) +{ + return _c(Shapes{}...)>; +} + +template +__device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs) +{ + auto invoke = [&](auto... ys) { + __syncthreads(); + f(ys...); + }; + traverse_preload(xs...)( + [&](auto x, auto offset, auto copy) { + if constexpr(copy) + { + if constexpr(decltype(tensor_vec_size(x)){} == 0) + { + auto v = auto_vectorize(x); + auto b = as_vec(tensor_vec_size(v), buffer + offset); + idx.local_stride(v.get_shape().element_space(), + [&](auto i) { b[i] = v.data()[i]; }); + return x.with(buffer + offset); + } + else + { + auto b = as_vec(tensor_vec_size(x), buffer + offset); + idx.local_stride(x.get_shape().element_space(), + [&](auto i) { b[i] = x.data()[i]; }); + return x.with(b); + } + } + else + { + return x; + } + }, + invoke); +} + +template +struct shape_type : Shape +{ + using type = T; +}; + +template +constexpr auto make_shape_type(T) +{ + return shape_type{}; +} + +template +__device__ auto preload(index idx, Ts... xs) +{ + using type = remove_vec; + constexpr auto size = decltype(compute_preload_size(make_shape_type(xs)...)){}; + const index_int max_size = 512 * sizeof(type); + return [=](auto f) { + if constexpr(size > 0 and size < max_size) + { + __shared__ type buffer[size]; + preload_copy(idx, f, buffer, xs...); + } + else + { + f(xs...); + } + }; +} + +inline __device__ auto auto_preload(index idx) +{ + return make_transform([=](auto f, auto out, auto... xs) { + preload(idx, xs...)([&](auto... ys) { f(out, ys...); }); + }); +} + +template +__device__ auto preload_copy(index idx, T x) +{ + return [=](auto f) { + if constexpr(B) + { + using type = typename T::type; + constexpr auto size = get_shape_c{}.element_space(); + __shared__ type buffer[size]; + // TODO: Always vecotrize when size > 4, and then use a second loop for remainder + constexpr auto n = find_vectorize_size([&](auto i) { return (size % i) == 0; }); + auto input = as_vec(remove_bool(x.data())); + auto b = as_vec(remove_bool(buffer)); + idx.local_stride(size / n, [&](auto i) { b[i] = input[i]; }); + return f(x.with(buffer)); + } + else + { + return f(x); + } + }; +} + +template +__device__ auto auto_preload(index idx) +{ + return make_transform([=](auto f, auto... xs) { + auto invoke = [=](auto... ys) { + if constexpr((Bs or ...)) + __syncthreads(); + f(ys...); + }; + join(invoke, preload_copy(idx, xs)...); + }); +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_PRELOAD_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/print.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/print.hpp new file mode 100644 index 000000000..a12424535 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/print.hpp @@ -0,0 +1,270 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_PRINT_HPP +#define MIGRAPHX_GUARD_KERNELS_PRINT_HPP + +#include +#include +#include +#include + +namespace migraphx { + +template +struct on_exit +{ + F f; + G g; + template + __host__ __device__ auto operator()(T x) const + { + return f(x); + } + + __host__ __device__ ~on_exit() { f(g); } +}; + +template +constexpr auto print_type_name_probe() +{ + constexpr auto name = __PRETTY_FUNCTION__; + constexpr auto size = sizeof(__PRETTY_FUNCTION__); + constexpr auto parameter_name = "PrivateMIGraphXTypeNameProbe = "; + constexpr auto parameter_name_size = sizeof("PrivateMIGraphXTypeNameProbe = ") - 1; + constexpr auto begin = + search(name, name + size, parameter_name, parameter_name + parameter_name_size); + static_assert(begin < name + size, "Type probe not found."); + constexpr auto start = begin + parameter_name_size; + constexpr auto last = find_if(start, name + size, [](auto c) { return c == ']' or c == ';'; }); + return [=](const auto& s) { s.print_string(start, last - start); }; +} + +template +struct type_printer +{ + template + friend constexpr const Stream& operator<<(const Stream& s, type_printer) + { + print_type_name_probe()(s); + return s; + } +}; + +template +constexpr type_printer type_of() +{ + return {}; +} + +template +constexpr type_printer type_of(T) +{ + return {}; +} + +template +constexpr type_printer sub_type_of() +{ + return {}; +} + +template +constexpr type_printer sub_type_of(T) +{ + return {}; +} + +template +struct basic_printer +{ + F f; + __host__ __device__ const basic_printer& print_long(long value) const + { + f([&] { printf("%li", value); }); + return *this; + } + __host__ __device__ const basic_printer& print_ulong(unsigned long value) const + { + f([&] { printf("%lu", value); }); + return *this; + } + __host__ __device__ const basic_printer& print_char(char value) const + { + f([&] { printf("%c", value); }); + return *this; + } + __host__ __device__ const basic_printer& print_string(const char* value) const + { + f([&] { printf("%s", value); }); + return *this; + } + __host__ __device__ const basic_printer& print_string(const char* value, int size) const + { + f([&] { printf("%.*s", size, value); }); + return *this; + } + __host__ __device__ const basic_printer& print_double(double value) const + { + f([&] { printf("%f", value); }); + return *this; + } + __host__ __device__ const basic_printer& print_bool(bool value) const + { + f([&] { + if(value) + printf("true"); + else + printf("false"); + }); + return *this; + } + __host__ __device__ const basic_printer& operator<<(short value) const + { + return print_long(value); + } + __host__ __device__ const basic_printer& operator<<(unsigned short value) const + { + return print_ulong(value); + } + __host__ __device__ const basic_printer& operator<<(int value) const + { + return print_long(value); + } + __host__ __device__ const basic_printer& operator<<(unsigned int value) const + { + return print_ulong(value); + } + __host__ __device__ const basic_printer& operator<<(long value) const + { + return print_long(value); + } + __host__ __device__ const basic_printer& operator<<(unsigned long value) const + { + return print_ulong(value); + } + __host__ __device__ const basic_printer& operator<<(migraphx::half value) const + { + return print_double(value); + } + __host__ __device__ const basic_printer& operator<<(float value) const + { + return print_double(value); + } + __host__ __device__ const basic_printer& operator<<(double value) const + { + return print_double(value); + } + __host__ __device__ const basic_printer& operator<<(bool value) const + { + return print_bool(value); + } + __host__ __device__ const basic_printer& operator<<(char value) const + { + return print_char(value); + } + __host__ __device__ const basic_printer& operator<<(unsigned char value) const + { + return print_char(value); + } + __host__ __device__ const basic_printer& operator<<(const char* value) const + { + return print_string(value); + } +}; + +template +constexpr basic_printer make_printer(F f) +{ + return {f}; +} + +template +constexpr basic_printer> make_printer(F f, G g) +{ + return {{f, g}}; +} + +inline __device__ auto cout() +{ + return make_printer([](auto f) { f(); }); +} + +inline __device__ auto coutln() +{ + return make_printer([](auto f) { f(); }, [] { printf("\n"); }); +} + +template +__device__ void unsafe_print_each(Stream s, T x, Ts... xs) +{ + s << x; + each_args([&](auto xx) { s << ' ' << xx; }, xs...); +} + +template +__device__ void print_each(Stream s, Ts... xs) +{ + auto idx = make_index(); + for(auto i = 0; i < idx.nglobal(); i++) + { + if(i == idx.global) + unsafe_print_each(s, xs...); + __syncthreads(); + } +} + +template +__device__ void print_each_once(Stream s, Ts... xs) +{ + auto idx = make_index(); + if(idx.global == 0) + unsafe_print_each(s, xs...); +} + +template +__device__ void print(Ts... xs) +{ + print_each(cout(), xs...); +} + +template +__device__ void print_once(Ts... xs) +{ + print_each_once(cout(), xs...); +} + +template +__device__ void println(Ts... xs) +{ + print_each(cout(), xs..., '\n'); +} + +template +__device__ void println_once(Ts... xs) +{ + print_each_once(cout(), xs..., '\n'); +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_PRINT_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ranges.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ranges.hpp new file mode 100644 index 000000000..af32a723b --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/ranges.hpp @@ -0,0 +1,49 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_RANGES_HPP +#define MIGRAPHX_GUARD_KERNELS_RANGES_HPP + +#include + +namespace migraphx { + +template +struct iterator_range +{ + Iterator start; + Iterator last; + + constexpr Iterator begin() const { return start; } + + constexpr Iterator end() const { return last; } +}; + +constexpr iterator_range range(diff_int start, diff_int last) +{ + return {{start, {}}, {last, {}}}; +} +constexpr iterator_range range(diff_int last) { return range(0, last); } + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_RANGES_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/rank.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/rank.hpp new file mode 100644 index 000000000..5765b4f3e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/rank.hpp @@ -0,0 +1,41 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#ifndef MIGRAPHX_GUARD_KERNELS_RANK_HPP +#define MIGRAPHX_GUARD_KERNELS_RANK_HPP + +namespace migraphx { + +template +struct rank : rank +{ +}; + +template <> +struct rank<0> +{ +}; + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_RANK_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp new file mode 100644 index 000000000..76150cbfc --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp @@ -0,0 +1,785 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_REDUCE_HPP +#define MIGRAPHX_GUARD_KERNELS_REDUCE_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { + +#if MIGRAPHX_HAS_DPP + +template +__device__ void dpp_reduce(T& in, Op op) +{ + static_assert(SubWaveSize <= MIGRAPHX_WAVEFRONTSIZE, "Too large subwave size"); + static_assert(is_power_of_2(SubWaveSize), "SubWaveSize is not a power of 2"); + if constexpr(SubWaveSize > 1) + { + auto out = dpp_mov(in); + in = op(in, out); + } + if constexpr(SubWaveSize > 2) + { + auto out = dpp_mov(in); + in = op(in, out); + } + if constexpr(SubWaveSize > 4) + { + auto out = dpp_mov(in); + in = op(in, out); + } + if constexpr(SubWaveSize > 8) + { + auto out = dpp_mov(in); + in = op(in, out); + } +#if MIGRAPHX_WAVEFRONTSIZE == 32 + if constexpr(SubWaveSize > 16) + { + auto out = dpp_swizzle<0x1e0>(in); + in = op(in, out); + } +#else + if constexpr(SubWaveSize > 16) + { + auto out = dpp_mov(in); + in = op(in, out); + } + if constexpr(SubWaveSize > 32) + { + auto out = dpp_mov(in); + in = op(in, out); + } +#endif +} + +#if defined(MIGRAPHX_USE_CLANG_TIDY) || defined(CPPCHECK) +// NOLINTNEXTLINE +#define MIGRAPHX_DPP_REDUCE_ASM_FUN(type, op, ins) \ + template \ + __device__ inline void dpp_reduce(type& x, op f) \ + { \ + (void)f; \ + x = 1; \ + } +#else +#define MIGRAPHX_DPP_IIF64(then, ...) then +#define MIGRAPHX_DPP_IIF32(then, ...) __VA_ARGS__ +#define MIGRAPHX_DPP_IF_64(x) MIGRAPHX_PP_CAT(MIGRAPHX_DPP_IIF, x) +#define MIGRAPHX_DPP_WHEN_64(x) MIGRAPHX_DPP_IF_64(x)(MIGRAPHX_PP_EXPAND, MIGRAPHX_PP_EAT) + +#define MIGRAPHX_DPP_REDUCE_ASM0(ins) #ins " %0 %0 %0 row_shr:1\n" +#define MIGRAPHX_DPP_REDUCE_ASM1(ins) #ins " %0 %0 %0 row_shr:2\n" +#define MIGRAPHX_DPP_REDUCE_ASM2(ins) #ins " %0 %0 %0 row_shr:4 bank_mask:0xe\n" +#define MIGRAPHX_DPP_REDUCE_ASM3(ins) #ins " %0 %0 %0 row_shr:8 bank_mask:0xc\n" +#define MIGRAPHX_DPP_REDUCE_ASM4(ins) #ins " %0 %0 %0 row_bcast:15 row_mask:0xa\n" +#define MIGRAPHX_DPP_REDUCE_ASM5(ins) #ins " %0 %0 %0 row_bcast:31 row_mask:0xc\n" + +#define MIGRAPHX_DPP_REDUCE_ASM_REPEAT(i, ins) \ + MIGRAPHX_PP_CAT(MIGRAPHX_DPP_REDUCE_ASM, i)(ins) "s_nop 1\n" +#define MIGRAPHX_DPP_REDUCE_ASM(n, x, ins, ...) \ + { \ + __asm__ volatile("s_nop 4\n" MIGRAPHX_PP_REPEAT(n, MIGRAPHX_DPP_REDUCE_ASM_REPEAT, ins) \ + : "=v"(x) \ + : "0"(x)); \ + __VA_ARGS__ \ + } + +#if MIGRAPHX_WAVEFRONTSIZE == 64 +#define MIGRAPHX_DPP_REDUCE_SWIZZLE(x, f) (void)f; +#else +#define MIGRAPHX_DPP_REDUCE_SWIZZLE(x, f) \ + auto y = dpp_swizzle<0x1e0>(x); \ + x = f(x, y); +#endif + +#define MIGRAPHX_DPP_REDUCE_ASM_FUN(type, op, ins) \ + template \ + __device__ inline void dpp_reduce(type& x, op f) \ + { \ + if constexpr(SubWaveSize == 2) \ + MIGRAPHX_DPP_REDUCE_ASM(0, x, ins, ); \ + if constexpr(SubWaveSize == 4) \ + MIGRAPHX_DPP_REDUCE_ASM(1, x, ins, ); \ + if constexpr(SubWaveSize == 8) \ + MIGRAPHX_DPP_REDUCE_ASM(2, x, ins, ); \ + if constexpr(SubWaveSize == 16) \ + MIGRAPHX_DPP_REDUCE_ASM(3, x, ins, ); \ + if constexpr(SubWaveSize == 32) \ + MIGRAPHX_DPP_REDUCE_ASM(MIGRAPHX_DPP_IF_64(MIGRAPHX_WAVEFRONTSIZE)(4, 3), \ + x, \ + ins, \ + MIGRAPHX_DPP_REDUCE_SWIZZLE(x, f)); \ + MIGRAPHX_DPP_WHEN_64(MIGRAPHX_WAVEFRONTSIZE) \ + (if constexpr(SubWaveSize == 64) MIGRAPHX_DPP_REDUCE_ASM(5, x, ins, )); \ + } +#endif + +// Navi21 doesn't support int32 dpp +#if defined(__gfx1030__) +// NOLINTNEXTLINE +#define MIGRAPHX_DPP_REDUCE(op, prefix, sign) \ + MIGRAPHX_DPP_REDUCE_ASM_FUN(double, op, prefix##_f64); \ + MIGRAPHX_DPP_REDUCE_ASM_FUN(float, op, prefix##_f32); \ + MIGRAPHX_DPP_REDUCE_ASM_FUN(half, op, prefix##_f16); \ + MIGRAPHX_DPP_REDUCE_ASM_FUN(uint32_t, op, prefix##_u32); +#else +// NOLINTNEXTLINE +#define MIGRAPHX_DPP_REDUCE(op, prefix, sign) \ + MIGRAPHX_DPP_REDUCE_ASM_FUN(double, op, prefix##_f64); \ + MIGRAPHX_DPP_REDUCE_ASM_FUN(float, op, prefix##_f32); \ + MIGRAPHX_DPP_REDUCE_ASM_FUN(half, op, prefix##_f16); \ + MIGRAPHX_DPP_REDUCE_ASM_FUN(int32_t, op, prefix##sign##32); \ + MIGRAPHX_DPP_REDUCE_ASM_FUN(uint32_t, op, prefix##_u32); +#endif + +// Note: when max and min are in int32_t, signed version of instruction needs to be used. +MIGRAPHX_DPP_REDUCE(op::sum, v_add, _u) +MIGRAPHX_DPP_REDUCE(op::product, v_mul, _u) +MIGRAPHX_DPP_REDUCE(op::max, v_max, _i) +MIGRAPHX_DPP_REDUCE(op::min, v_min, _i) + +template +__device__ void dpp_reduce(T& in, Op op) +{ + dpp_reduce(in, op); +} + +template +__device__ auto subwave_reduce(index idx, Op op, T init, Index n, F f) +{ + MIGRAPHX_ASSERT(idx.max_nlocal() == idx.nlocal() or (idx.nlocal() % SubWaveSize) == 0); + using type = decltype(index::invoke_loop(f, 0, _c<0>)); + auto x = type(init); + idx.local_subwave_stride( + n, [&](auto i, auto d) { x = op(x, index::invoke_loop(f, i, d)); }); + dpp_reduce(x, op); + return readlane(x); +} + +template +__device__ auto wave_reduce(index idx, Op op, T init, Index n, F f) +{ + return subwave_reduce(idx, op, init, n, f); +} + +template +__device__ auto block_reduce(index idx, Op op, T init, Index n, F f) +{ + MIGRAPHX_ASSERT(idx.max_nlocal() == idx.nlocal()); +#ifdef MIGRAPHX_HAS_CONST_LOCAL + if constexpr(decltype(idx.nlocal()){} == MIGRAPHX_WAVEFRONTSIZE) + return wave_reduce(idx, op, init, n, f); +#endif + constexpr index_int lanes_per_thread = MIGRAPHX_WAVEFRONTSIZE; + using type = decltype(index::invoke_loop(f, 0, _c<0>)); + __shared__ type buffer[idx.max_nlocal() / lanes_per_thread]; + auto x = type(init); + idx.local_stride(n, [&](auto i, auto d) { x = op(x, index::invoke_loop(f, i, d)); }); + dpp_reduce(x, op); + + const auto ldsidx = idx.local / lanes_per_thread; + if((idx.local % lanes_per_thread) == lanes_per_thread - 1) + { + buffer[ldsidx] = x; + } + __syncthreads(); + + type y = type(init); + for(index_int i = 0; i < idx.nlocal() / lanes_per_thread; i++) + { + y = op(y, buffer[i]); + } + return y; +} +#else +template +__device__ auto block_reduce(index idx, Op op, T init, Index n, F f) +{ + MIGRAPHX_ASSERT(idx.max_nlocal() == idx.nlocal()); + using type = decltype(index::invoke_loop(f, 0, _c<0>)); + __shared__ type buffer[idx.max_nlocal()]; + auto x = type(init); + idx.local_stride(n, [&](auto i, auto d) { x = op(x, index::invoke_loop(f, i, d)); }); + buffer[idx.local] = x; + __syncthreads(); + + for(index_int s = 1; s < idx.nlocal(); s *= 2) + { + const index_int index = 2 * s * idx.local; + if(index + s < idx.nlocal()) + { + buffer[index] = op(buffer[index], buffer[index + s]); + } + __syncthreads(); + } + return buffer[0]; +} +#endif + +template +constexpr auto reduce_slice(Input input, T i) +{ + constexpr auto lens = transform(get_shape_c{}.lens, + get_shape_c{}.lens, + [](index_int x, index_int y) -> index_int { + if(x == y) + return 1; + return x; + }); + ; + constexpr auto s = make_shape(lens, get_shape_c{}.strides); + MIGRAPHX_ASSERT((input.get_shape().index(i) + s.element_space()) <= + input.get_shape().element_space()); + return make_tensor_view(&input[i], s); +} + +namespace reduce { + +struct inner_storage_tag +{ +}; + +template +using is_inner_storage = is_base_of>>; + +template +struct lazy_inner_storage : inner_storage_tag +{ + using type = remove_reference_t()(0, _c<0>))>; + F f; + constexpr Size rsize() const { return {}; } + template + constexpr auto operator()(U j, V d) const + { + return f(j, d); + } +}; + +template +constexpr lazy_inner_storage make_lazy_inner_storage(Size, F f) +{ + return {{}, f}; +} + +template +constexpr auto make_indices(Size size) +{ + return make_lazy_inner_storage(size, [](auto j, auto) { return j; }); +} + +template +struct storage_access : F +{ + using type = R; +}; + +template +constexpr storage_access make_storage_access(F f) +{ + return {{f}}; +} + +template +constexpr auto sliced(Slicer slicer, F f) +{ + return [=](auto x, auto... xs) { + // TODO: assert all elements are the same + return f(slicer(x), slicer(xs)...); + }; +} + +template +constexpr auto compute_reduce_axis() +{ + constexpr auto lens = + transform_i(get_shape_c{}.lens, [](index_int x, index_int i) -> index_int { + if(i == Axis) + return 1; + return x; + }); + return make_shape(lens, get_shape_c{}.strides); +} + +template +constexpr auto final_reduce(T x, F f) +{ + return vec_reduce(x, f); +} + +template +constexpr auto final_reduce(array a, F f) +{ + return a.apply([&](auto x) { return final_reduce(x, f); }); +} + +template +using with_axis = decltype(compute_reduce_axis()); + +template +struct reducer_base +{ + template + __device__ decltype(auto) make_inner_slice(T&& x) const + { + if constexpr(is_inner_storage{}) + { + return x; + } + else + { + auto&& derived = static_cast(*this); + auto t = derived.slice(x); + return make_storage_access( + [=](auto i, auto...) -> auto& { return t[i]; }); + } + } + + template + constexpr auto get_size(T&& x, [[maybe_unused]] Ts&&... xs) const + { + MIGRAPHX_ASSERT(get_size(x) == get_size(xs...)); + return get_size(x); + } + + template + constexpr auto get_size(T&& x) const + { + if constexpr(is_inner_storage{}) + { + return x.rsize(); + } + else + { + auto&& derived = static_cast(*this); + auto t = derived.slice(x); + return t.size(); + } + } + + template + __device__ auto inner_sliced(F f) const + { + return [=](auto&&... xs) { return f(get_size(xs...), make_inner_slice(xs)...); }; + } + + template + static __device__ typename T::type& decl_inner_storage(const T&); + + template + __device__ auto inner(F f) const + { + return this->inner_sliced([=](auto n, auto&&... xs) { + using result_type = decltype(f(decl_inner_storage(xs)...)); + auto&& derived = static_cast(*this); + if constexpr(is_void{}) + { + derived.inner_void_impl(f, n, xs...); + } + else + { + return derived.template inner_impl(f, n, xs...); + } + }); + } + + template + __device__ auto lazy_inner(F f) const + { + return this->inner_sliced([=](auto n, auto&&... xs) { + return make_lazy_inner_storage(n, [=](auto j, auto d) { return f(xs(j, d)...); }); + }); + } + + template + __device__ auto reduce(Op op, T init, Read read) const + { + return this->inner_sliced([=](auto n, auto&&... xs) { + auto&& derived = static_cast(*this); + return derived.reduce_impl(op, init, read, n, xs...); + }); + } + + template + __device__ auto reduce(Op op, T init) const + { + return this->reduce(op, init, op::id{}); + } + + template + __device__ void outer(F f) const + { + f(); + } + + template + constexpr auto elements() const + { + auto&& derived = static_cast(*this); + using reduce_type = decltype(derived.slice(Input{})); + using value_type = typename Input::type; + constexpr auto relements = get_shape_c{}.elements(); + if constexpr(vec_size() > 1) + return relements * vec_size(); + else + return relements; + } +}; + +struct block +{ + template + struct reducer : reducer_base> + { + index idx; + Slicer slice; + + template + struct inner_storage : inner_storage_tag + { + using type = T; + array arr; + constexpr Size rsize() const { return {}; } + template + constexpr auto& operator()(U, V d) const + { + return arr[d]; + } + template + constexpr auto& operator()(U, V d) + { + return arr[d]; + } + }; + + template + __device__ auto reduce_impl(Op op, T init, Read read, N n, Ts&&... xs) const + { + return block_reduce(idx, op, init, n, [&](auto j, auto d) { + return final_reduce(read(xs(j, d)...), op); + }); + } + + template + __device__ void outer(F f) const + { + if(idx.local == 0) + f(); + } + + template + __device__ void inner_void_impl(F f, N n, Ts&&... xs) const + { + idx.local_stride(n, [&](auto j, auto d) { f(xs(j, d)...); }); + } + + template + __device__ auto inner_impl(F f, N n, Ts&&... xs) const + { + using max_iterations = decltype(idx.max_local_stride_iterations(n)); + inner_storage storage; + idx.local_stride(n, [&](auto j, auto d) { storage(j, d) = R{f(xs(j, d)...)}; }); + return storage; + } + }; + + template + static __device__ auto make(index idx, Slicer slicer) + { + return reducer{{}, idx, slicer}; + } + + template + static __device__ void run(F f) + { + auto idx = make_index(); + constexpr auto nelements = get_shape_c{}.elements(); + idx.global_stride(nelements * idx.nlocal(), [&](auto i) { + const auto out_idx = get_shape_c{}.multi(i / idx.nlocal()); + f(out_idx, make(idx, [&](auto input) { return reduce_slice(input, out_idx); })); + }); + } +}; + +struct block_large +{ + template + struct reducer : reducer_base> + { + index idx; + Slicer slice; + + template + __device__ auto reduce_impl(Op op, T init, Read read, N n, Ts&&... xs) const + { + return block_reduce(idx, op, init, index_int{n}, [&](auto j, auto d) { + return final_reduce(read(xs(j, d)...), op); + }); + } + + template + __device__ void outer(F f) const + { + if(idx.local == 0) + f(); + } + + template + __device__ void inner_void_impl(F f, N n, Ts&&... xs) const + { + idx.local_stride(index_int{n}, [&](auto j, auto d) { f(xs(j, d)...); }); + } + + template + __device__ auto inner_impl(F f, N n, Ts&&... xs) const + { + return make_lazy_inner_storage(n, [=](auto j, auto d) { return f(xs(j, d)...); }); + } + }; + + template + static __device__ auto make(index idx, Slicer slicer) + { + return reducer{{}, idx, slicer}; + } + + template + static __device__ void run(F f) + { + auto idx = make_index(); + constexpr auto nelements = get_shape_c{}.elements(); + idx.global_stride(nelements * idx.nlocal(), [&](auto i) { + const auto out_idx = get_shape_c{}.multi(i / idx.nlocal()); + f(out_idx, make(idx, [&](auto input) { return reduce_slice(input, out_idx); })); + }); + } +}; + +template +struct subwave +{ + template + struct reducer : reducer_base> + { + index idx; + Slicer slice; + + template + struct inner_storage : inner_storage_tag + { + using type = T; + array arr; + constexpr Size rsize() const { return {}; } + template + constexpr auto& operator()(U, V d) const + { + return arr[d]; + } + template + constexpr auto& operator()(U, V d) + { + return arr[d]; + } + }; + + template + __device__ auto reduce_impl(Op op, T init, Read read, N n, Ts&&... xs) const + { + return subwave_reduce(idx, op, init, n, [&](auto j, auto d) { + return final_reduce(read(xs(j, d)...), op); + }); + } + + template + __device__ void outer(F f) const + { + if(idx.local_subwave() == 0) + f(); + } + + template + __device__ void inner_void_impl(F f, N n, Ts&&... xs) const + { + idx.local_subwave_stride(n, [&](auto j, auto d) { f(xs(j, d)...); }); + } + + template + __device__ auto inner_impl(F f, N n, Ts&&... xs) const + { + using max_iterations = + decltype(idx.max_local_subwave_stride_iterations(n)); + inner_storage storage; + idx.local_subwave_stride( + n, [&](auto j, auto d) { storage(j, d) = f(xs(j, d)...); }); + return storage; + } + }; + + template + static __device__ auto make(index idx, Slicer slicer) + { + return reducer{{}, idx, slicer}; + } + + template + static __device__ void run(F f) + { + auto idx = make_index(); + constexpr auto nelements = get_shape_c{}.elements(); + idx.global_stride(nelements * idx.nlocal_subwave(), [&](auto i) { + const auto out_idx = get_shape_c{}.multi(i / idx.nlocal_subwave()); + f(out_idx, make(idx, [&](auto input) { return reduce_slice(input, out_idx); })); + }); + } +}; + +using wave = subwave; + +struct lane +{ + template + struct reducer : reducer_base> + { + index idx; + Slicer slice; + + template + __device__ auto reduce_impl(Op op, T init, Read read, N n, U&& x, Us&&... xs) const + { + using type = remove_reference_t), xs(0, _c<0>)...))>; + type r = type(init); + for(index_int j = 0; j < n; j++) + { + r = op(r, read(x(j, _c<0>), xs(j, _c<0>)...)); + } + return r; + } + + template + __device__ void outer(F f) const + { + f(); + } + + template + __device__ void inner_void_impl(F f, N n, Ts&&... xs) const + { + for(index_int j = 0; j < n; j++) + { + f(xs(j, _c<0>)...); + } + } + + template + __device__ auto inner_impl(F f, N n, Ts&&... xs) const + { + return make_lazy_inner_storage(n, [=](auto j, auto d) { return f(xs(j, d)...); }); + } + }; + template + static __device__ auto make(index idx, Slicer slicer) + { + return reducer{{}, idx, slicer}; + } + + template + static __device__ void run(F f) + { + auto idx = make_index(); + constexpr auto nelements = get_shape_c{}.elements(); + idx.global_stride(nelements, [&](auto i) { + const auto out_idx = get_shape_c{}.multi(i); + f(out_idx, make(idx, [&](auto input) { return reduce_slice(input, out_idx); })); + }); + } +}; + +// TODO: Remove these in the future when they can be selected in the compiler class +template +constexpr auto pick_block() +{ + using nlocal = decltype(index{}.max_nlocal()); + if constexpr(RElements < nlocal{} * 256) + return block{}; + else + return block_large{}; +} +template +using auto_block = decltype(pick_block()); + +template +constexpr auto reduce_elements_with_axis() +{ + constexpr auto s = get_shape_c{}; + return s.lens[Axis]; +} + +} // namespace reduce + +template +__device__ void +simple_reduce(Op op, T init, Input input, Output output, ReadInput read, WriteOuput write) +{ + Algo::template run([&](auto out_idx, auto r) { + auto x = r.reduce(op, init, read)(input); + r.outer([&] { output[out_idx] = write(x); }); + }); +} + +template +__device__ void fused_reduce(Output output_pack, Assign assign, F f) +{ + Algo::template run([&](auto out_idx, auto r) { + auto result_tuple = f(r, out_idx); + unpack_each( + [&](auto output, auto result) { + if constexpr(reduce::is_inner_storage{}) + { + r.inner([&](auto& y, auto x) { assign(y, x); })(output, result); + } + else + { + r.outer([&] { assign(output[out_idx], implicit_conversion(result)); }); + } + }, + output_pack, + result_tuple); + }); +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_REDUCE_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp new file mode 100644 index 000000000..b7d7216c6 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp @@ -0,0 +1,229 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_ROIALIGN_HPP +#define MIGRAPHX_GUARD_KERNELS_ROIALIGN_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { + +struct max_pool +{ + MIGRAPHX_DEVICE_CONSTEXPR auto init() { return lowest{}; } + + template + MIGRAPHX_DEVICE_CONSTEXPR T operator()(T x, T y) + { + return max(x, y); + } + + template + MIGRAPHX_DEVICE_CONSTEXPR T final(T x, index_int) + { + return (x); + } +}; + +struct avg_pool +{ + MIGRAPHX_DEVICE_CONSTEXPR auto init() { return 0.0; } + + template + MIGRAPHX_DEVICE_CONSTEXPR T operator()(T x, T y) + { + return x + y; + } + + template + MIGRAPHX_DEVICE_CONSTEXPR T final(T x, index_int y) + { + return (y == 0) ? T{0.0} : T{x / y}; + } +}; + +template +MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate( + const Iterator data, const array& dims, array xy, Op pooling) +{ + array low{}; + array high{}; + for(index_int ii = 0; ii < xy.size(); ++ii) + { + if(xy[ii] < -1.0f or xy[ii] > dims[ii]) + { + return implicit_conversion(0); + } + + xy[ii] = migraphx::max(xy[ii], 0.0f); + low[ii] = xy[ii]; + high[ii] = low[ii] + 1; + if(low[ii] >= dims[ii] - 1) + { + xy[ii] = high[ii] = low[ii] = dims[ii] - 1; + } + } + array locs = {low[0] * dims[1] + low[1], + low[0] * dims[1] + high[1], + high[0] * dims[1] + low[1], + high[0] * dims[1] + high[1]}; + + float ly = xy[0] - low[0]; + float lx = xy[1] - low[1]; + float hy = 1.0f - ly; + float hx = 1.0f - lx; + // do calculations in floating point and convert final result to required type + array ws = {hy * hx, hy * lx, ly * hx, ly * lx}; + + auto v01 = pooling(data[locs[0]] * ws[0], data[locs[1]] * ws[1]); + auto v23 = pooling(data[locs[2]] * ws[2], data[locs[3]] * ws[3]); + return implicit_conversion(pooling(v01, v23)); +} + +template +MIGRAPHX_DEVICE_CONSTEXPR auto calc_pooling(const Iterator& data, + const array& roi_starts, + const array& bin_size, + const array& idx, + const array& bin_grid_size, + const array& dims, + float roi_offset, + Op op) +{ + using in_dtype = typename Iterator::value_type; + in_dtype output_val = in_dtype{op.init()}; + const int64_t count = bin_grid_size[0] * bin_grid_size[1]; + dfor(bin_grid_size[0], bin_grid_size[1])([&](auto iy, auto ix) { + array id = {iy, ix}; + array locs = + roi_starts + idx * bin_size + bin_size * (id + 0.5f) / bin_grid_size + roi_offset; + + auto val = bilinear_interpolate(data, dims, locs, op); + output_val = op(output_val, val); + }); + return op.final(output_val, count); +} + +template +struct roalign_settings +{ + T1 roi_offset{}; + T2 is_avg_pooling{}; + T3 sampling_ratio{}; + T4 spatial_scale{}; +}; + +template +constexpr roalign_settings make_roalign_settings(Ts... xs) +{ + return {xs...}; +} + +template +__device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t, Settings s) +{ + auto index = make_index(); + const auto x = x_t.begin(); + const auto rois = rois_t.begin(); + const auto ind = ind_t.begin(); + // input shape + auto x_lens = x_t.get_shape().lens; + auto channel_num = x_lens[1]; + // input dims of height and width, in all 2-dim arrays, the first dim + // is for height and second dim is for width + array in_dims = {x_lens[2], x_lens[3]}; + + const auto stride = index.nglobal(); + auto out_s = y_t.get_shape(); + auto roi_column_num = rois_t.get_shape().lens[1]; + + // output dims of height and width, in all 2-dim arrays, the first dim + // is for height and second dim is for width + const auto& out_lens = out_s.lens; + array out_dims = {out_lens[2], out_lens[3]}; + + for(index_int i = index.global; i < out_s.elements(); i += stride) + { + auto idx = out_s.multi(i); + int n = idx[0]; + int c = idx[1]; + int ph = idx[2]; + int pw = idx[3]; + + const auto offset_rois = rois + (n * roi_column_num); + const int batch_ind = ind[n]; + + array roi_starts = { + static_cast(offset_rois[1]) * static_cast(s.spatial_scale), + static_cast(offset_rois[0]) * static_cast(s.spatial_scale)}; + array roi_ends = { + static_cast(offset_rois[3]) * static_cast(s.spatial_scale), + static_cast(offset_rois[2]) * static_cast(s.spatial_scale)}; + + array roi_size{}; + array bin_size{}; + array bin_grid_size{}; + + for(index_int ii = 0; ii < roi_size.size(); ++ii) + { + roi_size[ii] = roi_ends[ii] - roi_starts[ii]; + roi_size[ii] = migraphx::max(roi_size[ii], 1.0f); + + bin_size[ii] = roi_size[ii] / out_dims[ii]; + bin_grid_size[ii] = (s.sampling_ratio > 0) + ? s.sampling_ratio + : migraphx::ceil(roi_size[ii] / out_dims[ii]); + } + + const auto offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]); + if constexpr(s.is_avg_pooling) + { + y_t[i] = calc_pooling(offset_x, + roi_starts, + bin_size, + {ph, pw}, + bin_grid_size, + in_dims, + s.roi_offset, + avg_pool{}); + } + else + { + y_t[i] = calc_pooling(offset_x, + roi_starts, + bin_size, + {ph, pw}, + bin_grid_size, + in_dims, + s.roi_offset, + max_pool{}); + } + } +} + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/scatter.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/scatter.hpp new file mode 100644 index 000000000..efe5fe347 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/scatter.hpp @@ -0,0 +1,70 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_SCATTER_ELEMENTS_HPP +#define MIGRAPHX_GUARD_KERNELS_SCATTER_ELEMENTS_HPP + +#include +#include +#include + +namespace migraphx { + +// Checks and skips out of bounds indices if SkipOutOfBounds is true. +// Otherwise does not check and underfined behavior if out of bounds. +template +__device__ void scatter(const T& indices_t, const U& updates_t, const V& output_t, F f) +{ + auto gpu_index = make_index(); + auto indices_shape = indices_t.get_shape(); + auto output_shape = output_t.get_shape(); + auto axis_dim_size = output_shape.lens[Axis]; + + gpu_index.global_stride(indices_shape.elements(), [&](auto i) { + auto out_idx = indices_shape.multi(i); + auto index = indices_t[i]; + index = index < 0 ? index + axis_dim_size : index; + if constexpr(SkipOutOfBounds) + { + if(index < 0) + { + return; + } + } + out_idx[Axis] = index; + if constexpr(SkipOutOfBounds) + { + if(not equal( + out_idx.begin(), out_idx.end(), output_shape.lens.begin(), [](auto x, auto y) { + return x < y; + })) + { + return; + } + } + f(output_t[out_idx], updates_t[i]); + }); +} + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/scatter_reduction_modes.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/scatter_reduction_modes.hpp new file mode 100644 index 000000000..166552a84 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/scatter_reduction_modes.hpp @@ -0,0 +1,79 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_SCATTER_REDUCTION_MODES_HPP +#define MIGRAPHX_GUARD_KERNELS_SCATTER_REDUCTION_MODES_HPP + +#include +#include +#include + +namespace migraphx { + +struct assign_none +{ + template + MIGRAPHX_DEVICE_CONSTEXPR void operator()(T& x, U y) const + { + x = y; + } +}; + +struct assign_add +{ + template + MIGRAPHX_DEVICE_CONSTEXPR void operator()(T& x, U y) const + { + atomic_assign(x, y, op::sum{}); + } +}; + +struct assign_mul +{ + template + MIGRAPHX_DEVICE_CONSTEXPR void operator()(T& x, U y) const + { + atomic_assign(x, y, op::product{}); + } +}; + +struct assign_max +{ + template + MIGRAPHX_DEVICE_CONSTEXPR void operator()(T& x, U y) const + { + atomic_assign(x, y, op::max{}); + } +}; + +struct assign_min +{ + template + MIGRAPHX_DEVICE_CONSTEXPR void operator()(T& x, U y) const + { + atomic_assign(x, y, op::min{}); + } +}; + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/scatternd.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/scatternd.hpp new file mode 100644 index 000000000..dee649e8c --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/scatternd.hpp @@ -0,0 +1,61 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_SCATTERND_HPP +#define MIGRAPHX_GUARD_KERNELS_SCATTERND_HPP + +#include +#include +#include + +namespace migraphx { + +template +__device__ void scatternd(const T& indices_t, const U& updates_t, const V& output_t, F f) +{ + auto index = make_index(); + auto updates_shape = updates_t.get_shape(); + + index.global_stride(updates_shape.elements(), [&](auto i) { + auto output_shape = output_t.get_shape(); + + auto indices_shape = indices_t.get_shape(); + auto k = indices_shape.lens.back(); + auto q = indices_shape.lens.size(); + + auto updates_idx = updates_shape.multi(i); + auto indices_idx = indices_shape.multi(0); + copy(updates_idx.begin(), updates_idx.begin() + q - 1, indices_idx.begin()); + + auto index_start = indices_t.begin() + indices_shape.index(indices_idx); + auto index_end = index_start + k; + auto out_idx = output_shape.multi(0); + copy(index_start, index_end, out_idx.begin()); + copy(updates_idx.begin() + q - 1, updates_idx.end(), out_idx.begin() + k); + + f(output_t[out_idx], updates_t[i]); + }); +} + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/shape.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/shape.hpp new file mode 100644 index 000000000..0828328d7 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/shape.hpp @@ -0,0 +1,201 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_SHAPE_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_SHAPE_HPP + +#include +#include +#include +#include + +namespace migraphx { + +template +struct shape : equality_comparable> +{ + using shape_type = shape; + using index_array = typename Lens::base_array; + Lens lens = {}; + Strides strides = {}; + + constexpr shape() = default; + + constexpr shape(Lens l, Strides s) : lens(l), strides(s) {} + + constexpr auto elements() const { return _c; } + + constexpr auto element_space() const { return _c; } + + constexpr auto packed() const { return not skips() and elements() == element_space(); } + constexpr auto broadcasted() const { return _c; } + constexpr auto transposed() const + { + return return_c([] { + auto lstrides = Strides{}; + if(shape{}.broadcasted()) + { + index_array s{}; + auto out = copy_if( + lstrides.begin(), lstrides.end(), s.begin(), [](auto x) { return x != 0; }); + return not is_sorted(s.begin(), out, greater{}); + } + else + { + return not is_sorted(lstrides.begin(), lstrides.end(), greater{}); + } + }); + } + constexpr auto skips() const + { + return return_c([] { + auto lstrides = Strides{}; + return none_of(lstrides.begin(), lstrides.end(), [](auto x) { return x == 1; }); + }); + } + + constexpr auto standard() const { return packed() and not transposed(); } + + constexpr index_int index(index_array x) const { return x.dot(strides); } + + constexpr index_int index(index_int i) const + { + if(this->standard()) + { + MIGRAPHX_ASSERT(i == compute_index(i)); + return i; + } + else + { + return compute_index(i); + } + } + + constexpr index_int compute_index(index_int i) const + { + const auto rank = this->lens.size(); + index_int s = 1; + index_int result = 0; + for(index_int j = 0; j < rank; j++) + { + const index_int k = rank - j - 1; + const index_int stride = this->strides[k]; + const index_int len = this->lens[k]; + const index_int slen = s * len; + const index_int idx = (i % slen) / s; + result += stride * idx; + s = slen; + } + return result; + } + + /// Convert single index into a multi-index + constexpr index_array multi(index_int idx) const { return lens.multi(idx); } + + /// Convert multi-index into a single index + constexpr index_int single(index_array idx) const + { + if(idx.empty()) + return 0; + return inner_product(lens.begin() + 1, lens.end(), idx.begin(), idx.back()); + } + + constexpr shape get_shape() const { return *this; } + + template + friend constexpr bool operator==(const shape& x, const shape& y) + { + return x.lens == y.lens and x.strides == y.strides; + } + + template + friend constexpr const Stream& operator<<(const Stream& ss, const shape& s) + { + ss << "{" << s.lens << "}, {" << s.strides << "}"; + return ss; + } +}; + +template +constexpr auto calculate_strides(Lens) +{ + return return_array_c([] { + Lens lens{}; + array strides{1}; + const auto n = lens.size() - 1; + index_int stride = 1; + for(index_int i = 0; i < n; i++) + { + auto ri = n - i; + stride *= lens[ri]; + strides[ri - 1] = stride; + } + return strides; + }); +} + +template +constexpr shape make_shape(Lens lens, Strides strides) +{ + return {lens, strides}; +} + +template +constexpr auto make_shape(Lens lens) +{ + return make_shape(lens, calculate_strides(lens)); +} + +template +constexpr auto reorder_shape(Shape, Permutation) +{ + constexpr auto lens = return_array_c([] { return reorder_dims(Shape{}.lens, Permutation{}); }); + constexpr auto strides = + return_array_c([] { return reorder_dims(Shape{}.strides, Permutation{}); }); + return make_shape(lens, strides); +} + +template +constexpr auto make_shape_from_permutation(Lens, Permutation) +{ + constexpr auto new_lens = reorder_dims(Lens{}, Permutation{}); + return reorder_shape(make_shape(new_lens), invert_permutation(Permutation{})); +} + +template +constexpr auto make_packed_shape(Shape) +{ + constexpr auto s = Shape{}; + if constexpr(s.packed()) + { + return s; + } + else + { + return make_shape_from_permutation(s.lens, find_permutation(s)); + } +} + +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/softmax.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/softmax.hpp new file mode 100644 index 000000000..e9c2ac36f --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/softmax.hpp @@ -0,0 +1,51 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_SOFTMAX_HPP +#define MIGRAPHX_GUARD_KERNELS_SOFTMAX_HPP + +#include +#include + +namespace migraphx { + +template +__device__ void softmax(Input input1, Output output) +{ + using block = reduce::auto_block()>; + block::template run>([&](auto, auto r) { + auto x = r.inner(op::id{})(input1); +#ifdef MIGRAPHX_USE_FAST_SOFTMAX + const auto c = vec_at(r.slice(input1)[0], 0); +#else + const auto c = r.reduce(op::max{}, lowest{}, op::id{})(x); +#endif + r.inner([&](auto& x1) { x1 = migraphx::exp(x1 - c); })(x); + auto batch_sum = + r.reduce(op::sum{}, 0, [](auto x1) { return migraphx::convert(x1); })(x); + r.inner([&](auto& y, auto x1) { y = implicit_conversion(x1 / batch_sum); })(output, x); + }); +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_SOFTMAX_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/tensor_view.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/tensor_view.hpp new file mode 100644 index 000000000..e959ed6be --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/tensor_view.hpp @@ -0,0 +1,113 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_TENSOR_VIEW_HPP +#define MIGRAPHX_GUARD_KERNELS_TENSOR_VIEW_HPP + +#include +#include +#include +#include + +namespace migraphx { + +template +struct tensor_view_iterator_read +{ + T* view; + constexpr auto& operator()(MIGRAPHX_CAPTURE_SOURCE_LOCATION(index_int) n) const + { + MIGRAPHX_ASSERT(view != nullptr); + return (*view)[n]; + } +}; + +template +struct tensor_view +{ + using type = T; + using shape_type = Shape; + using index_array = typename Shape::index_array; + using iterator = basic_iota_iterator, index_int>; + + constexpr Shape get_shape() const { return Shape{}; } + constexpr auto size() const { return get_shape().elements(); } + + struct index_to_offset + { + index_int offset; + template + constexpr index_to_offset(U i) : offset(Shape{}.index(i)) + { + } + }; + + constexpr T& operator[](MIGRAPHX_CAPTURE_SOURCE_LOCATION(index_to_offset) i) const + { + index_to_offset ito = i; + MIGRAPHX_WARN(ito.offset < get_shape().element_space(), + i, + "Out of bounds access at offset: ", + ito.offset); + return x[ito.offset]; + } + + constexpr T* data() const { return x; } + + constexpr auto begin() const { return iterator{0, {this}}; } + constexpr auto end() const { return iterator{this->size(), {this}}; } + + constexpr auto begin_at(index_array i) const + { + MIGRAPHX_ASSERT(get_shape().single(i) < get_shape().elements()); + MIGRAPHX_ASSERT(get_shape().index(i) < get_shape().element_space()); + return iterator{get_shape().single(i), {this}}; + } + + template + constexpr tensor_view with(U* y) const + { + static_assert(sizeof(T) == sizeof(U), "Not the same size"); + return {y}; + } + + T* x; +}; + +template +using get_shape_c = typename T::shape_type; + +template +constexpr tensor_view make_tensor_view(T* x, Shape) +{ + return {x}; +} + +template +constexpr auto reorder_tensor_view(T x, Permutation perm) +{ + return make_tensor_view(x.data(), reorder_shape(x.get_shape(), perm)); +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_TENSOR_VIEW_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/tile.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/tile.hpp new file mode 100644 index 000000000..1f11b214f --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/tile.hpp @@ -0,0 +1,168 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#ifndef MIGRAPHX_GUARD_KERNELS_TILE_HPP +#define MIGRAPHX_GUARD_KERNELS_TILE_HPP + +#include +#include +#include +#include + +namespace migraphx { + +struct tile +{ + template + static constexpr auto pad_shape(Shape) + { + constexpr Shape s{}; + constexpr auto axis = s.strides.size() - _c<1>; + constexpr auto strides = transform_i(s.strides, [](auto stride, auto i) { + if constexpr(i == decltype(axis){}) + { + // Pad by 1 element extra to avoid memory bank conflicts + return stride + 1; + } + else + { + return stride; + } + }); + return make_shape(s.lens, strides); + } + struct load + { + template + static __device__ auto copy(index idx, T x) + { + return [=](auto f) { + using type = typename T::type; + constexpr auto s = pad_shape(make_packed_shape(get_shape_c{})); + constexpr auto size = s.element_space(); + __shared__ type buffer[size]; + auto b = make_tensor_view(buffer, s); + local_tensor_copy(idx, x, b); + f(b); + }; + } + }; + struct store + { + template + static __device__ auto copy(index idx, T x) + { + return [=](auto f) { + using type = typename T::type; + constexpr auto s = pad_shape(make_packed_shape(get_shape_c{})); + constexpr auto size = s.element_space(); + __shared__ type buffer[size]; + auto b = make_tensor_view(buffer, s); + f(b); + local_tensor_copy(idx, b, x); + }; + } + }; + struct none + { + template + static __device__ auto copy(index, T x) + { + return [=](auto f) { f(x); }; + } + }; + + template + static constexpr auto slice(T x, index_int group, InnerLens, OuterLens) + { + constexpr auto outer_strides = + transform(x.get_shape().strides, InnerLens{}, [](auto stride, auto inner_len) { + return stride * inner_len; + }); + constexpr auto is = make_shape(InnerLens{}, x.get_shape().strides); + constexpr auto os = make_shape(OuterLens{}, outer_strides); + auto offset = os.index(group); + MIGRAPHX_ASSERT((os.element_space() + is.element_space()) == + (x.get_shape().element_space() + _c<1>)); + MIGRAPHX_ASSERT((is.elements() + group) <= x.get_shape().elements()); + MIGRAPHX_ASSERT((is.element_space() + offset) <= x.get_shape().element_space()); + return make_tensor_view(x.data() + offset, is); + } + + template + static __device__ auto auto_slice(index idx) + { + return make_transform([=](auto f, auto... xs) { + idx.group_stride(OuterLens{}.product(), + [=](auto group) { f(slice(xs, group, InnerLens{}, OuterLens{})...); }); + }); + } + + template + static __device__ auto auto_copy(index idx) + { + return make_transform([=](auto f, auto... xs) { + static_assert(sizeof...(Modes) == sizeof...(xs)); + auto invoke = [=](auto... ys) { + if constexpr((is_same{} or ...)) + __syncthreads(); + f(ys...); + if constexpr((is_same{} or ...)) + __syncthreads(); + }; + join(invoke, Modes::copy(idx, xs)...); + }); + } +}; + +template +__device__ auto tile_stride(index idx) +{ + if constexpr(Tiled) + { + return [=](auto... xs) { return idx.local_stride(xs...); }; + } + else + { + return [=](auto... xs) { return idx.global_stride(xs...); }; + } +} + +template +__device__ auto auto_tile(InnerLens, OuterLens) +{ + if constexpr((is_same{} and ...)) + { + return transform_args(); + } + else + { + auto idx = make_index(); + return transform_args(tile::auto_slice(idx), + tile::auto_copy(idx)); + } +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_TILE_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/tuple.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/tuple.hpp new file mode 100644 index 000000000..c54a9f4d3 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/tuple.hpp @@ -0,0 +1,184 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#ifndef MIGRAPHX_GUARD_KERNELS_TUPLE_HPP +#define MIGRAPHX_GUARD_KERNELS_TUPLE_HPP + +#include + +namespace migraphx { + +namespace tuple_detail { + +template +struct element_storage +{ + [[no_unique_address]] T element; +}; + +template +constexpr const auto& get_element(const element_storage& x) +{ + return x.element; +} + +template +constexpr auto& get_element(element_storage& x) +{ + return x.element; +} + +struct unpack_t +{ +}; + +template +struct tuple_storage; + +template +struct tuple_storage, Ts...> : element_storage... +{ + template + constexpr tuple_storage(Us... ys) : element_storage{static_cast(ys)}... + { + } + + template + constexpr tuple_storage(unpack_t, U y) : element_storage{static_cast(y[_c])}... + { + } + + template + constexpr auto operator()(F f) const + { + return f(static_cast&>(*this).element...); + } + + template + constexpr auto operator()(F f) + { + return f(static_cast&>(*this).element...); + } + + template + constexpr auto& operator[](IntegralConstant i) + { + static_assert(i < sizeof...(Ts), "Out of bounds tuple access"); + return get_element(*this); + } + + template + constexpr auto& operator[](IntegralConstant i) const + { + static_assert(i < sizeof...(Ts), "Out of bounds tuple access"); + return get_element(*this); + } + + constexpr index_constant size() const { return {}; } + constexpr auto empty() const { return size() == _c<0>; } +}; + +template +using tuple_base = tuple_detail::tuple_storage::type, Ts...>; + +} // namespace tuple_detail + +// NOLINTNEXTLINE +#define MIGRAPHX_DEVICE_TUPLE_OP(op, binary_op) \ + template \ + constexpr tuple& operator op(const tuple& rhs) \ + { \ + (*this)( \ + [&](auto&... xs) { rhs([&](const auto&... ys) { swallow{((xs op ys), 0)...}; }); }); \ + return *this; \ + } \ + template \ + friend constexpr auto operator binary_op(const tuple& lhs, const tuple& rhs) \ + { \ + using result = tuple() binary_op declval())...>; \ + return lhs([&](auto&... xs) { \ + return rhs([&](const auto&... ys) { return result{xs binary_op ys...}; }); \ + }); \ + } + +template +struct tuple : tuple_detail::tuple_base +{ + using base = tuple_detail::tuple_base; + + constexpr tuple() : base(Ts{}...) {} + + template {} and ...))> + constexpr tuple(Us... ys) : base(ys...) + { + } + + template {} and ...))> + constexpr tuple(tuple y) : base(tuple_detail::unpack_t{}, y) + { + } + + MIGRAPHX_DEVICE_TUPLE_OP(+=, +) + MIGRAPHX_DEVICE_TUPLE_OP(-=, -) + MIGRAPHX_DEVICE_TUPLE_OP(*=, *) + MIGRAPHX_DEVICE_TUPLE_OP(/=, /) + MIGRAPHX_DEVICE_TUPLE_OP(%=, %) + MIGRAPHX_DEVICE_TUPLE_OP(&=, &) + MIGRAPHX_DEVICE_TUPLE_OP(|=, |) + MIGRAPHX_DEVICE_TUPLE_OP(^=, ^) + + friend constexpr bool operator==(const tuple& x, const tuple& y) + { + return x([&](const auto&... xs) { + return y([&](const auto&... ys) { return ((xs == ys) and ...); }); + }); + } + friend constexpr bool operator!=(const tuple& x, const tuple& y) { return not(x == y); } + friend constexpr bool operator<(const tuple& x, const tuple& y) + { + return x([&](const auto&... xs) { + return y([&](const auto&... ys) { + return fold([&](auto a, auto b) { return a == 0 ? b() : a; })(0, [&] { + return (xs < ys) ? -1 : (ys < xs) ? 1 : 0; + }...); + }); + }) < 0; + } + friend constexpr bool operator>(const tuple& x, const tuple& y) { return y < x; } + friend constexpr bool operator<=(const tuple& x, const tuple& y) { return not(x > y); } + friend constexpr bool operator>=(const tuple& x, const tuple& y) { return not(x < y); } +}; + +template +constexpr tuple make_tuple(Ts... xs) +{ + return {xs...}; +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_TUPLE_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp new file mode 100644 index 000000000..24b7d4a5b --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp @@ -0,0 +1,289 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPE_TRAITS_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPE_TRAITS_HPP + +#include +#include + +namespace migraphx { + +template +using void_t = void; + +template +U private_declval(int); + +template +T private_declval(long); + +template +auto declval() noexcept -> decltype(private_declval(0)); + +template +struct is_callable_impl : false_type +{ +}; + +template +struct is_callable_impl()(declval()...))>, F, Ts...> : true_type +{ +}; + +template +using is_callable = is_callable_impl; + +template +struct type_identity +{ + using type = T; +}; + +template +struct enable_if +{ +}; + +template +struct enable_if +{ + using type = T; +}; + +template +using enable_if_t = typename enable_if::type; + +template +struct conditional +{ + using type = T; +}; + +template +struct conditional +{ + using type = F; +}; + +template +using conditional_t = typename conditional::type; + +// NOLINTNEXTLINE +#define MIGRAPHX_BUILTIN_TYPE_TRAIT1(name) \ + template \ + struct name : bool_constant<__##name(T)> \ + { \ + } + +// NOLINTNEXTLINE +#define MIGRAPHX_BUILTIN_TYPE_TRAIT2(name) \ + template \ + struct name : bool_constant<__##name(T, U)> \ + { \ + } + +// NOLINTNEXTLINE +#define MIGRAPHX_BUILTIN_TYPE_TRAITN(name) \ + template \ + struct name : bool_constant<__##name(Ts...)> \ + { \ + } + +// MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_arithmetic); +// MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_destructible); +// MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_nothrow_destructible); +// MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_pointer); +// MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_scalar); +// MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_signed); +// MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_void); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_abstract); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_aggregate); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_array); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_class); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_compound); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_const); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_empty); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_enum); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_final); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_floating_point); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_function); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_fundamental); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_integral); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_literal_type); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_lvalue_reference); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_member_function_pointer); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_member_object_pointer); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_member_pointer); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_object); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_pod); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_polymorphic); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_reference); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_rvalue_reference); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_standard_layout); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_trivial); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_trivially_copyable); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_trivially_destructible); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_union); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_unsigned); +MIGRAPHX_BUILTIN_TYPE_TRAIT1(is_volatile); +MIGRAPHX_BUILTIN_TYPE_TRAIT2(is_assignable); +MIGRAPHX_BUILTIN_TYPE_TRAIT2(is_base_of); +MIGRAPHX_BUILTIN_TYPE_TRAIT2(is_convertible); +MIGRAPHX_BUILTIN_TYPE_TRAIT2(is_nothrow_assignable); +MIGRAPHX_BUILTIN_TYPE_TRAIT2(is_same); +MIGRAPHX_BUILTIN_TYPE_TRAIT2(is_trivially_assignable); +MIGRAPHX_BUILTIN_TYPE_TRAITN(is_constructible); +MIGRAPHX_BUILTIN_TYPE_TRAITN(is_nothrow_constructible); +MIGRAPHX_BUILTIN_TYPE_TRAITN(is_trivially_constructible); + +template +struct remove_cv +{ + using type = T; +}; + +template +struct remove_cv : remove_cv +{ +}; + +template +struct remove_cv : remove_cv +{ +}; + +template +using remove_cv_t = typename remove_cv::type; + +template +struct remove_reference +{ + using type = T; +}; +template +struct remove_reference +{ + using type = T; +}; +template +struct remove_reference +{ + using type = T; +}; + +template +using remove_reference_t = typename remove_reference::type; + +template +struct add_pointer : type_identity::type*> +{ +}; + +template +using add_pointer_t = typename add_pointer::type; + +template +struct is_void : is_same> +{ +}; + +template +struct common_type; + +template +struct common_type +{ + using type = T; +}; + +template +struct common_type +{ + using type = decltype(true ? declval() : declval()); +}; + +template +struct common_type +{ + using type = typename common_type::type, Us...>::type; +}; + +template +using common_type_t = typename common_type::type; + +#define MIGRAPHX_REQUIRES(...) enable_if_t<__VA_ARGS__, int> = 0 + +constexpr unsigned long long int_max(unsigned long n) +{ + // Note, left shift cannot be used to get the maximum value of int64_type or + // uint64_type because it is undefined behavior to left shift 64 bits for + // these types + if(n == sizeof(int64_t)) + return -1; + return (1ull << (n * 8)) - 1; +} + +template {} or is_floating_point{} or + is_same{})> +constexpr T numeric_max() +{ + if constexpr(is_integral{}) + { + if constexpr(is_unsigned{}) + return int_max(sizeof(T)); + else + return int_max(sizeof(T)) / 2; + } + else if constexpr(is_same{}) + return __DBL_MAX__; + else if constexpr(is_same{}) + return __FLT_MAX__; + else if constexpr(is_same{}) + return __FLT16_MAX__; + else if constexpr(is_same{}) + return 338953138925153547590470800371487866880.000000; + else + return 0; +} + +template +constexpr auto numeric_lowest() -> decltype(numeric_max()) +{ + if constexpr(is_integral{}) + { + if constexpr(is_unsigned{}) + return 0; + else + return -numeric_max() - 1; + } + else + { + return -numeric_max(); + } +} + +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/types.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/types.hpp new file mode 100644 index 000000000..c88343ce1 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/types.hpp @@ -0,0 +1,83 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP + +#include + +namespace migraphx { + +#if defined(MIGRAPHX_USE_HIPRTC) +using int8_t = signed char; +using uint8_t = unsigned char; +using int16_t = signed short; +using uint16_t = unsigned short; +using int32_t = signed int; +using uint32_t = unsigned int; +using int64_t = signed long long; +using uint64_t = unsigned long long; +#elif defined(MIGRAPHX_USE_HIPRTC) +using int8_t = __hip_int8_t; +using uint8_t = __hip_uint8_t; +using int16_t = __hip_int16_t; +using uint16_t = __hip_uint16_t; +using int32_t = __hip_int32_t; +using uint32_t = __hip_uint32_t; +using int64_t = __hip_int64_t; +using uint64_t = __hip_uint64_t; +#else +using int8_t = std::int8_t; +using uint8_t = std::uint8_t; +using int16_t = std::int16_t; +using uint16_t = std::uint16_t; +using int32_t = std::int32_t; +using uint32_t = std::uint32_t; +using int64_t = std::int64_t; +using uint64_t = std::uint64_t; +#endif // MIGRAPHX_USE_HIPRTC +using index_int = uint32_t; +using diff_int = int32_t; +using uintptr_t = uint64_t; + +static_assert(sizeof(int8_t) == 1, "int8_t must be 1 bytes"); +static_assert(sizeof(uint8_t) == 1, "uint8_t must be 1 bytes"); +static_assert(sizeof(int16_t) == 2, "int16_t must be 2 bytes"); +static_assert(sizeof(uint16_t) == 2, "uint16_t must be 2 bytes"); +static_assert(sizeof(int32_t) == 4, "int32_t must be 4 bytes"); +static_assert(sizeof(uint32_t) == 4, "uint32_t must be 4 bytes"); +static_assert(sizeof(int64_t) == 8, "int64_t must be 8 bytes"); +static_assert(sizeof(uint64_t) == 8, "uint64_t must be 8 bytes"); + +#define MIGRAPHX_DEVICE_CONSTEXPR constexpr __device__ __host__ // NOLINT + +template +using vec = T __attribute__((ext_vector_type(N))); + +using half = _Float16; +using half2 = migraphx::vec; +using bf16 = __bf16; + +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/unpack_int4.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/unpack_int4.hpp new file mode 100644 index 000000000..35ffcff7a --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/unpack_int4.hpp @@ -0,0 +1,57 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_UNPACK_INT4_HPP +#define MIGRAPHX_GUARD_KERNELS_UNPACK_INT4_HPP + +#include "migraphx/kernels/types.hpp" +#include +#include + +namespace migraphx { + +template +__device__ void unpack_int4(Output output, Input input) +{ + const auto input_shape = input.get_shape(); + + make_index().global_stride(input_shape.elements(), [&](auto i) { + auto idx = input_shape.multi(i); + idx[Axis] *= 2; + const auto input_val = input[i]; + + // unpack_int4 op's normalize_compute_shape will ensure that Input::type is either uint8_t + // or int8_t + if constexpr(is_unsigned{}) + output[idx] = input_val & 0xfu; + else + // NOLINTNEXTLINE (hicpp-signed-bitwise) + output[idx] = static_cast(static_cast(input_val) << 4) >> 4; + + idx[Axis] += 1; + output[idx] = input_val >> 4; + }); +} + +} // namespace migraphx +#endif diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/vec.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/vec.hpp new file mode 100644 index 000000000..ae453ff40 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/vec.hpp @@ -0,0 +1,228 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_VEC_HPP +#define MIGRAPHX_GUARD_KERNELS_VEC_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { + +template +constexpr auto vec_size(vec) +{ + return index_constant{}; +} + +template +constexpr auto vec_size(T, ...) // NOLINT +{ + return index_constant<0>{}; +} + +template +constexpr auto vec_size() +{ + return decltype(vec_size(T{})){}; +} + +template +constexpr auto is_any_vec() +{ + if constexpr(sizeof...(Ts) == 0) + return false_type{}; + else + return bool_constant<((vec_size() + ...) > 0)>{}; +} + +template +constexpr auto vec_at(T x, I i) +{ + if constexpr(vec_size() == 0) + return x; + else + { + MIGRAPHX_ASSERT(i < vec_size()); + return x[i]; + } +} + +template +using vec_type = decltype(vec_at(T{}, 0)); + +template +constexpr auto common_vec_size() +{ + return fold([](auto x, auto y) { + if constexpr(x > y) + return x; + else + return y; + })(vec_size()...); +} + +// Bools can not be used as a vector type so convert it to uint8 +template +__device__ __host__ T* remove_bool(T* x) +{ + return x; +} + +inline __device__ __host__ uint8_t* remove_bool(bool* x) { return reinterpret_cast(x); } + +template +__device__ __host__ auto as_vec(T* x) +{ + if constexpr(N < 2) + return x; + else + return reinterpret_cast*>(x); +} + +template +using safe_vec = vec{}, uint8_t, T>, N>; + +template +constexpr auto vec_transform(Ts... xs) +{ + return [=](auto f) { + if constexpr(is_any_vec()) + { + using type = decltype(f(vec_at(xs, 0)...)); + constexpr auto size = common_vec_size(); + safe_vec result = {0}; + for(int i = 0; i < size; i++) + result[i] = f(vec_at(xs, i)...); + return result; + } + else + { + return f(xs...); + } + }; +} + +// Return a vector type of N from index i in another larger vector +// N will be 2 for half2 packing +template +constexpr vec, N> vec_packed_at(T x, I i) +{ + if constexpr(vec_size() == 0) + return vec{x}; + else + { + MIGRAPHX_ASSERT((i + N) <= vec_size()); + vec, N> result = {0}; + for(int j = 0; j < N; j++) + { + result[j] = x[i + j]; + } + return result; + } +} + +template +constexpr auto vec_packed_transform(Ts... xs) +{ + return [=](auto f) { + if constexpr(is_any_vec()) + { + using type = vec_type(xs, 0)...))>; + constexpr auto size = common_vec_size(); + safe_vec result = {0}; + for(int i = 0; i < size / N; i++) + { + // Call the function with packed vectors + safe_vec r = f(vec_packed_at(xs, i * N)...); + // Copy the packed vectors to the result + for(int j = 0; j < N; j++) + result[i * N + j] = r[j]; + } + return result; + } + else + { + return f(xs...); + } + }; +} + +template +constexpr auto vec_reduce(T x, Op op) +{ + if constexpr(vec_size() < 2) + return vec_type{x}; + else + { + vec_type result = x[0]; + for(int i = 1; i < vec_size(); i++) + result = op(result, x[i]); + return result; + } +} + +template +constexpr auto vec_generate(F f) +{ + using type = decltype(f(_c<0>)); + return sequence_c([&](auto... is) { return safe_vec{f(is)...}; }); +} + +template +struct implicit_conversion_op +{ + T x; + + template + constexpr operator vec() const + { + if constexpr(vec_size() == 0) + { + return x; + } + else + { + static_assert(vec_size() == N, "Vector mismatch size"); + return __builtin_convertvector(x, vec); + } + } + + template + constexpr operator U() const + { + return static_cast(x); + } +}; + +template +constexpr implicit_conversion_op implicit_conversion(T x) +{ + return {x}; +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_VEC_HPP diff --git a/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/vectorize.hpp b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/vectorize.hpp new file mode 100644 index 000000000..b456b5c6e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/kernels/include/migraphx/kernels/vectorize.hpp @@ -0,0 +1,263 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_KERNELS_VECTORIZE_HPP +#define MIGRAPHX_GUARD_KERNELS_VECTORIZE_HPP + +#include +#include + +namespace migraphx { + +template +constexpr auto tensor_vec_size() +{ + return vec_size(); +} + +template +constexpr auto tensor_vec_size(T) +{ + return tensor_vec_size(); +} + +template +constexpr auto shape_step(Shape s, Axis) +{ + static_assert(N > 0, "Vector size must be non-zero"); + return sequence(s.lens.size(), [&](auto... is) { + auto lens = transform(s.lens, index_ints{}, [&](auto i, auto j) { + constexpr auto axis = Axis::to(); + MIGRAPHX_ASSERT(i != 0); + MIGRAPHX_ASSERT(j != axis or i % N == 0); + if(j == axis) + return i / N; + else + return i; + }); + auto strides = transform(s.strides, index_ints{}, [&](auto i, auto j) { + constexpr auto axis = Axis::to(); + // If stride of the axis is zero then we dont need to adjust the other strides + if(Shape{}.strides[axis] == 0) + return i; + MIGRAPHX_ASSERT(j == axis or i % N == 0); + if(j == axis) + return i; + else + return i / N; + }); + MIGRAPHX_ASSERT(make_shape(lens, strides).elements() * N == s.elements()); + MIGRAPHX_ASSERT(strides[Axis{}] == 0 or + make_shape(lens, strides).element_space() * N == s.element_space()); + return make_shape(lens, strides); + }); +} + +template +__device__ __host__ auto as_vec(T x, Axis axis) +{ + if constexpr(N < 2) + return x; + else + return make_tensor_view(as_vec(remove_bool(x.data())), + shape_step(x.get_shape(), axis)); +} + +template +constexpr auto tensor_step(T x, Axis axis) +{ + if constexpr(N < 2) + { + return x; + } + else + { + constexpr auto s = decltype(x.get_shape()){}; + MIGRAPHX_ASSERT(s.strides[axis] == 0); + return make_tensor_view(x.data(), shape_step(s, axis)); + } +} + +template +__device__ __host__ auto as_vec(IntegralConstant ic, T&& x) +{ + return as_vec(x); +} + +template +constexpr index_int find_vector_axis_c(Shape s) +{ + // Find the fastest axis that is not broadcasted + index_int axis = 0; + for(index_int i = 1; i < s.lens.size(); i++) + { + if(s.strides[i] == 0) + continue; + if(s.strides[axis] == 0 or + pack_compare(less{}, pack(s.strides[i], s.lens[i]), pack(s.strides[axis], s.lens[axis]))) + axis = i; + } + return axis; +} + +template +constexpr index_int find_vector_axis_c(Shapes... ss) +{ + const bool all_broadcasted = (ss.broadcasted() and ...); + index_int axis = 0; + bool b = false; + by([&](auto s) { + if(b) + return; + // Skip broadcasted shapes if there are shapes not broadcasted + if(not all_broadcasted and s.broadcasted()) + return; + axis = find_vector_axis_c(s); + if(s.strides[axis] == 1) + b = true; + })(ss...); + if(not b) + return -1; + return axis; +} + +template +constexpr auto find_vector_axis(Shapes...) +{ + return _c; +} + +template +constexpr auto is_vectorizable_c(Axis axis, Shapes... ss) +{ + return ((axis < ss.lens.size() and ss.lens[axis] % N == 0 and + // Only vectorize broadcasted types with stride 0, since this causes issues in the + // preloader + ((not ss.broadcasted() and ss.strides[axis] == 1) or ss.strides[axis] == 0)) and + ...); +} + +template +constexpr auto is_vectorizable(Axis, Shapes...) +{ + return _c(Axis::to(), Shapes{}...)>; +} + +template +constexpr auto find_vectorize_size(P pred) +{ + if constexpr(decltype(pred(_c<4>)){}) + return _c<4>; + else if constexpr(decltype(pred(_c<2>)){}) + return _c<2>; + else + return _c<1>; +} + +template +__host__ __device__ auto auto_vectorize(T x) +{ + if constexpr(tensor_vec_size() == 0) + { + constexpr auto axis = find_vector_axis(x.get_shape()); + constexpr auto n = + find_vectorize_size([&](auto i) { return is_vectorizable(axis, x.get_shape()); }); + return as_vec(x, axis); + } + else + { + return x; + } +} + +template +inline __device__ __host__ auto auto_vectorize_impl(F f, Ts... xs) +{ + // TODO: Just check there a single axis of 1 + constexpr bool packed_or_broadcasted = + ((xs.get_shape().packed() or xs.get_shape().broadcasted()) and ...); + if constexpr(packed_or_broadcasted) + { + constexpr auto axis = decltype(find_vector_axis(xs.get_shape()...)){}; + constexpr auto n = find_vectorize_size( + [&](auto i) { return is_vectorizable(axis, xs.get_shape()...); }); + by( + [&](auto x) { + constexpr auto s = decltype(x.get_shape()){}; + if constexpr(axis < s.strides.size()) + { + MIGRAPHX_ASSERT(s.strides[axis] == 0 or s.strides[axis] == 1); + MIGRAPHX_ASSERT(s.lens[axis] > 0); + MIGRAPHX_ASSERT(n == 1 or s.lens[axis] % n == 0); + if constexpr(s.strides[axis] == 0) + return tensor_step(x, axis); + else + return as_vec(x, axis); + } + else + { + return x; + } + }, + f)(xs...); + } + else + { + f(xs...); + } +} + +inline __device__ __host__ auto auto_vectorize() +{ + return make_transform([](auto f, auto... xs) { auto_vectorize_impl(f, xs...); }); +} + +template +__device__ __host__ auto vectorize_tensor(T x) +{ + constexpr auto shape = get_shape_c{}; + if constexpr(shape.lens[Axis] == 1) + return x; + else if constexpr(shape.strides[Axis] == 0) + return tensor_step(x, _c); + else + return as_vec(x, _c); +} + +template +__device__ __host__ auto vectorize() +{ + return make_transform([](auto f, auto... xs) { + if constexpr(N < 2) + { + f(xs...); + } + else + { + f(vectorize_tensor(xs)...); + } + }); +} + +} // namespace migraphx +#endif // MIGRAPHX_GUARD_KERNELS_VECTORIZE_HPP diff --git a/docker/rocm/migraphx/targets/gpu/logsoftmax.cpp b/docker/rocm/migraphx/targets/gpu/logsoftmax.cpp new file mode 100644 index 000000000..63fc5eb5e --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/logsoftmax.cpp @@ -0,0 +1,53 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +shape hip_logsoftmax::compute_shape(const std::vector& inputs) const +{ + check_shapes{inputs, *this}.has(2).standard(); + return op.normalize_compute_shape({inputs.at(0)}); +} + +argument +hip_logsoftmax::compute(context& ctx, const shape&, const std::vector& args) const +{ + auto n_dim = args.front().get_shape().lens().size(); + auto tuned_axis = tune_axis(n_dim, op.axis, op.name()); + device::logsoftmax(ctx.get_stream().get(), args.back(), args.front(), tuned_axis); + return args.back(); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/loop.cpp b/docker/rocm/migraphx/targets/gpu/loop.cpp new file mode 100644 index 000000000..ad5fc210c --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/loop.cpp @@ -0,0 +1,126 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +shape hip_loop::compute_shape(std::vector inputs, std::vector mods) const +{ + auto input_num = (inputs.size() - 2) / 2; + inputs.erase(inputs.begin() + input_num, inputs.end()); + return op.compute_shape(inputs, std::move(mods)); +} + +struct gpu_loop +{ + int64_t max_iterations = 0; + + template + void copy(context& ctx, const argument& src, T& dst) const + { + argument arg_dst{src.get_shape(), &dst}; + copy_from_gpu(ctx, src, arg_dst); + } + + template + void copy(context& ctx, T src, const argument& dst) const + { + argument arg_src{dst.get_shape(), &src}; + copy_to_gpu(ctx, arg_src, dst); + } + + void append(const std::vector&, + const std::vector&, + const std::vector&, + int64_t, + int64_t) const + { + } + + void set_zero(context& ctx, const std::vector& concatenated_outputs, int iter) const + { + if(iter >= max_iterations) + return; + + auto elem_num = max_iterations - iter; + for(const auto& out : concatenated_outputs) + { + auto s = out.get_shape(); + auto size = s.bytes() / max_iterations; + auto lens = s.lens(); + lens[0] = elem_num; + shape ss{s.type(), lens}; + assert(ss.bytes() + iter * size <= out.get_shape().bytes()); + device::fill(ctx.get_stream().get(), argument(ss, out.data() + iter * size), 0); + } + } + + std::unordered_map get_output_params(const module& m) const + { + auto get_output_index = [](const std::string& name) { + std::string out_prefix = "#output_"; + auto loc = name.find(out_prefix); + if(loc != std::string::npos) + { + return std::stoi(name.substr(loc + out_prefix.size())); + } + + return -1; + }; + + const auto& param_names = m.get_parameter_names(); + std::unordered_map result; + for(const auto& name : param_names) + { + auto index = get_output_index(name); + if(index == -1) + continue; + result[name] = index; + } + + return result; + } +}; + +argument +hip_loop::compute(context& ctx, + const shape&, + const std::vector& args, + const std::vector& mods, + const std::function( + module_ref&, const std::unordered_map&)>& run) const +{ + return run_loop(gpu_loop{op.max_iterations}, op.scan_output_directions, ctx, args, mods, run); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/lowering.cpp b/docker/rocm/migraphx/targets/gpu/lowering.cpp new file mode 100644 index 000000000..adba54661 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/lowering.cpp @@ -0,0 +1,599 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_HIPBLASLT_GEMM); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_MIOPEN_POOLING) + +struct miopen_apply +{ + module* mod = nullptr; + module_pass_manager* mpm = nullptr; + const lowering* pass = nullptr; + std::unordered_map> apply_map{}; + instruction_ref last{}; + bool offload_copy = false; + bool compute_fp32 = false; + + context& get_context() const + { + assert(pass != nullptr); + assert(pass->ctx != nullptr); + return *pass->ctx; + } + + void check_shape(shape x, instruction_ref i) + { + assert(x == i->get_shape()); + (void)x; + (void)i; + } + + void init() + { + assert(mod != nullptr); + assert(pass != nullptr); +#if MIGRAPHX_USE_ROCBLAS + compute_fp32 = get_compute_fp32_flag(); +#endif + offload_copy = (mod == mpm->get_root_module()) ? pass->offload_copy : false; + + add_extend_op("argmax"); + add_extend_op("argmin"); + add_extend_op("logsoftmax"); + add_extend_op("multinomial"); + add_extend_op("nonzero"); + add_extend_op("prefix_scan_sum"); + add_extend_op("reverse"); + add_extend_op("rnn_var_sl_last_output"); + add_extend_op("rnn_var_sl_shift_output"); + add_extend_op("rnn_var_sl_shift_sequence"); + add_extend_op("topk"); + add_generic_op("contiguous"); + add_pooling_op(); +#if MIGRAPHX_USE_MIOPEN + add_convolution_op("convolution"); + add_convolution_op("convolution_backwards"); + add_convolution_op("quant_convolution"); + add_extend_op("lrn"); +#endif +#if MIGRAPHX_USE_ROCBLAS or MIGRAPHX_USE_HIPBLASLT + add_gemm_op("dot"); + add_gemm_op("quant_dot"); +#endif + add_if_op(); + add_loop_op(); + add_neg_op(); + add_nms_op(); + add_lrn_op(); + add_convolution_backwards_op(); + add_select_module_op(); + add_reshape_lazy_op(); + add_group_query_attention_op(); + add_scan_slice_op(); + } + + void copy_params() const + { + if(not offload_copy) + return; + + for(auto ins : iterator_for(*mod)) + { + if(ins->name() != "@param") + continue; + + // parameter no outputs, no need to insert copy to gpu + if(ins->outputs().empty()) + continue; + + auto pos = std::next(ins); + auto a = insert_allocation(pos, ins->get_shape()); + auto c = mod->insert_instruction(pos, make_op("hip::copy_to_gpu"), ins, a); + mod->replace_instruction(ins, c); + } + + // return instruction + auto ret = std::prev(mod->end()); + if(ret->name() == "@return") + { + const auto& inputs = ret->inputs(); + + // each input of ret need to be copied from gpu to host, and replace + // output with copy output + for(const auto& in : inputs) + { + auto p_output = mod->insert_instruction(ret, make_op("hip::copy_from_gpu"), in); + instruction::replace_argument(ret, in, p_output); + } + } + // else branch to handle legacy program without the return instruction + else + { + mod->add_instruction(make_op("hip::copy_from_gpu"), ret); + } + } + + void apply() + { + init(); + for(auto it = mod->begin(); it != mod->end(); it++) + { + auto s = it->get_shape(); + auto attrs = it->get_operator().attributes(); + if(apply_map.count(it->name()) > 0) + { + check_shape(s, apply_map.at(it->name())(it)); + } + else if(has_compiler_for(it->name())) + { + check_shape(s, insert_precompile_op(it)); + } + else if(attrs.contains("target")) + { + check_shape(s, insert_custom_op(it, attrs)); + } + if(attrs.contains("prefill")) + { + insert_fill(it, attrs.at("prefill")); + } + } + copy_params(); + } + + void insert_fill(instruction_ref ins, value v) const + { + instruction_ref alloc = instruction::get_output_alias(ins, true); + if(alloc == ins) + return; + auto fill = mod->insert_instruction(ins, make_op("hip::fill", {{"value", v}}), alloc); + instruction::replace_argument(ins, alloc, fill); + } + + instruction_ref insert_custom_op(instruction_ref ins, const value& attrs) const + { + const auto& custom_op = ins->get_operator(); + if(attrs.at("target") == "cpu") + { + auto s = ins->get_shape(); + std::vector cpu_inputs; + auto inputs = ins->inputs(); + auto output = inputs.back(); + std::transform( + inputs.begin(), inputs.end(), std::back_inserter(cpu_inputs), [&](auto in) { + return mod->insert_instruction(ins, make_op("hip::copy_from_gpu"), in); + }); + cpu_inputs.front() = + mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_inputs); + auto cpu_out = mod->insert_instruction(ins, custom_op, cpu_inputs); + auto gpu_out = + mod->insert_instruction(ins, make_op("hip::copy_to_gpu"), cpu_out, output); + return mod->replace_instruction(ins, gpu_out); + } + return ins; + } + + instruction_ref insert_precompile_op(instruction_ref ins) const + { + auto output = insert_allocation(ins, ins->get_shape()); + std::vector refs = ins->inputs(); + refs.push_back(output); + + return mod->replace_instruction( + ins, + make_op("gpu::precompile_op", {{"op", to_value(ins->get_operator())}}), + refs, + ins->module_inputs()); + } + + instruction_ref insert_allocation(instruction_ref ins, const shape& s) const + { + return mod->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}})); + } + +#if MIGRAPHX_USE_ROCBLAS or MIGRAPHX_USE_HIPBLASLT + template + void add_gemm_op(const std::string& name) + { + apply_map.emplace(name, [=](instruction_ref ins) { + std::vector refs = ins->inputs(); + assert(refs.size() == 2); + auto output = insert_allocation(ins, ins->get_shape()); + refs.push_back(output); +#if MIGRAPHX_USE_HIPBLASLT + if(not enabled(MIGRAPHX_ENABLE_HIPBLASLT_GEMM{}) or not hipblaslt_supported()) + { +#endif + return mod->replace_instruction( + ins, rocblas_gemm{Op{}, 1, 0, compute_fp32}, refs); +#if MIGRAPHX_USE_HIPBLASLT + } + std::string op_name = "gpu::hip_gemm"; + if(contains(name, "quant_")) + { + op_name = "gpu::hip_quant_gemm"; + } + operation gemm_op = make_op(op_name); + return mod->replace_instruction( + ins, + make_op("gpu::hipblaslt_op", {{"op", to_value(gemm_op)}}), + ins->inputs().at(0), + ins->inputs().at(1), + output); +#endif + }); + } +#endif + +#if MIGRAPHX_USE_MIOPEN + void add_convolution_op(const std::string& name) + { + apply_map.emplace(name, [=](instruction_ref ins) { + operation conv = make_op("gpu::" + name, {{"op", ins->get_operator().to_value()}}); + auto output = insert_allocation(ins, ins->get_shape()); + + return mod->replace_instruction(ins, + make_op("gpu::miopen_op", {{"op", to_value(conv)}}), + ins->inputs().at(0), + ins->inputs().at(1), + output); + }); + } +#endif + // add_generic_op just constructs the operator with no fields whereas add_extend_op copies over + // the fields Since it doesn't have fields its default constructed + + void add_generic_op(const std::string& name) { add_generic_op(name, "gpu::" + name); } + + void add_generic_op(const std::string& op_name, const std::string& gpu_name) + { + apply_map.emplace(op_name, [=](instruction_ref ins) { + auto output = insert_allocation(ins, ins->get_shape()); + std::vector refs = ins->inputs(); + refs.push_back(output); + + return mod->replace_instruction(ins, make_op(gpu_name), refs); + }); + } + + void add_extend_op(const std::string& name) { add_extend_op(name, "gpu::" + name); } + + void add_extend_op(const std::string& op_name, const std::string& gpu_name) + { + apply_map.emplace(op_name, [=](instruction_ref ins) { + auto&& op = ins->get_operator(); + auto output = insert_allocation(ins, ins->get_shape()); + std::vector refs = ins->inputs(); + refs.push_back(output); + + return mod->replace_instruction(ins, make_op(gpu_name, op.to_value()), refs); + }); + } + + static bool use_miopen_pooling(instruction_ref ins) + { + if(enabled(MIGRAPHX_DISABLE_MIOPEN_POOLING{}) or + not contains({shape::float_type, shape::half_type}, ins->get_shape().type())) + return false; + auto&& op = ins->get_operator(); + auto op_val = op.to_value(); + auto mode = op_val.at("mode").to(); + if(op_val.at("count_include_pad").to() and mode == op::pooling_mode::average) + return false; + if(mode == op::pooling_mode::lpnorm) + return false; + auto op_padding = op_val.at("padding").to_vector(); + auto kdims = ins->get_shape().lens().size() - 2; + return std::equal(op_padding.begin(), + op_padding.begin() + kdims, + op_padding.begin() + kdims, + op_padding.end()); + } + + void add_pooling_op() + { + apply_map.emplace("pooling", [=](instruction_ref ins) { + if(not use_miopen_pooling(ins)) + return insert_precompile_op(ins); +#if MIGRAPHX_USE_MIOPEN + auto output = insert_allocation(ins, ins->get_shape()); + std::vector refs = ins->inputs(); + auto&& op = ins->get_operator(); + refs.push_back(output); + return mod->replace_instruction(ins, make_op("gpu::pooling", op.to_value()), refs); +#else + return insert_precompile_op(ins); +#endif + }); + } + + // use 0 - input to represent neg + void add_neg_op() + { + apply_map.emplace("neg", [=](instruction_ref ins) { + auto s = ins->get_shape(); + std::vector zeros(s.elements(), 0.0f); + auto l0 = mod->add_literal(literal(s, zeros)); + auto output = insert_allocation(ins, s); + return mod->replace_instruction( + ins, make_op("gpu::sub"), l0, ins->inputs().front(), output); + }); + } + + // add input and output argument for the if operator + void add_if_op() + { + apply_map.emplace("if", [=](instruction_ref ins) { + std::vector inputs = ins->inputs(); + auto cpu_cond = + mod->insert_instruction(ins, make_op("hip::copy_from_gpu"), inputs.front()); + auto sync_cond = mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_cond); + inputs.front() = sync_cond; + + return mod->replace_instruction(ins, ins->get_operator(), inputs, ins->module_inputs()); + }); + } + + // replace the loop operator with gpu_loop operator + void add_loop_op() + { + apply_map.emplace("loop", [=](instruction_ref ins) { + std::vector inputs = ins->inputs(); + // copy max_iter from gpu to cpu + auto cpu_max_iter = + mod->insert_instruction(ins, make_op("hip::copy_from_gpu"), inputs.at(0)); + auto cpu_cond = + mod->insert_instruction(ins, make_op("hip::copy_from_gpu"), inputs.at(1)); + auto synced_max_iter = + mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_max_iter, cpu_cond); + inputs.at(0) = synced_max_iter; + inputs.at(1) = cpu_cond; + auto copy_inputs = inputs; + std::transform(copy_inputs.begin(), + copy_inputs.end(), + std::back_inserter(inputs), + [&](auto in) { return insert_allocation(ins, in->get_shape()); }); + + auto mod_args = ins->module_inputs(); + auto output = insert_allocation(ins, ins->get_shape()); + + const auto* sub_mod = mod_args.front(); + auto cond_out = insert_allocation(ins, sub_mod->get_output_shapes().front()); + + // add cond and mod outputs to the argument list + inputs.push_back(cond_out); + inputs.push_back(output); + + return mod->replace_instruction( + ins, make_op("gpu::loop", ins->get_operator().to_value()), inputs, mod_args); + }); + } + + void add_nms_op() + { + apply_map.emplace("nonmaxsuppression", [=](instruction_ref ins) { + auto s = ins->get_shape(); + auto output = insert_allocation(ins, s); + std::vector cpu_inputs; + auto inputs = ins->inputs(); + std::transform( + inputs.begin(), inputs.end(), std::back_inserter(cpu_inputs), [&](auto in) { + return mod->insert_instruction(ins, make_op("hip::copy_from_gpu"), in); + }); + cpu_inputs.front() = + mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_inputs); + auto cpu_out = mod->insert_instruction(ins, ins->get_operator(), cpu_inputs); + auto gpu_out = + mod->insert_instruction(ins, make_op("hip::copy_to_gpu"), cpu_out, output); + return mod->replace_instruction(ins, gpu_out); + }); + } + + void add_lrn_op() + { + apply_map.emplace("lrn", [=](instruction_ref ins) { + auto s = ins->get_shape(); + auto output = insert_allocation(ins, s); + std::vector cpu_inputs; + auto inputs = ins->inputs(); + std::transform( + inputs.begin(), inputs.end(), std::back_inserter(cpu_inputs), [&](auto in) { + return mod->insert_instruction(ins, make_op("hip::copy_from_gpu"), in); + }); + cpu_inputs.front() = + mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_inputs); + auto cpu_out = mod->insert_instruction(ins, ins->get_operator(), cpu_inputs); + auto gpu_out = + mod->insert_instruction(ins, make_op("hip::copy_to_gpu"), cpu_out, output); + return mod->replace_instruction(ins, gpu_out); + }); + } + + void add_convolution_backwards_op() + { + apply_map.emplace("convolution_backwards", [=](instruction_ref ins) { + auto s = ins->get_shape(); + auto output = insert_allocation(ins, s); + std::vector cpu_inputs; + auto inputs = ins->inputs(); + std::transform( + inputs.begin(), inputs.end(), std::back_inserter(cpu_inputs), [&](auto in) { + return mod->insert_instruction(ins, make_op("hip::copy_from_gpu"), in); + }); + cpu_inputs.front() = + mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_inputs); + auto cpu_out = mod->insert_instruction(ins, ins->get_operator(), cpu_inputs); + auto gpu_out = + mod->insert_instruction(ins, make_op("hip::copy_to_gpu"), cpu_out, output); + return mod->replace_instruction(ins, gpu_out); + }); + } + + /** + * Adds dynamic allocation for submodule output parameter. + */ + void add_select_module_op() + { + apply_map.emplace("select_module", [=](instruction_ref ins) { + auto s = ins->get_shape(); + auto output = insert_allocation(ins, s); + std::vector inputs = ins->inputs(); + inputs.push_back(output); + return mod->replace_instruction(ins, ins->get_operator(), inputs, ins->module_inputs()); + }); + } + + /** + * Adds reshape lazy to reshape ops that can be aliased instead of copied. + * `gpu::contiguous` are added before and after the reshape; these contiguous + * instructions can be removed by the eliminate_contiguous pass. + */ + void add_reshape_lazy_op() + { + apply_map.emplace("reshape", [=](instruction_ref ins) { + std::vector before_contiguous_args = ins->inputs(); + auto before_alloc = insert_allocation(ins, std::prev(ins)->get_shape()); + before_contiguous_args.push_back(before_alloc); + auto before_contig = + mod->insert_instruction(ins, make_op("gpu::contiguous"), {before_contiguous_args}); + + auto new_lazy_reshape = mod->insert_instruction( + ins, + make_op("reshape_lazy", {{"dims", {ins->get_operator().to_value().at("dims")}}}), + before_contig); + + std::vector after_contiguous_args = {new_lazy_reshape}; + auto after_alloc = insert_allocation(new_lazy_reshape, new_lazy_reshape->get_shape()); + after_contiguous_args.push_back(after_alloc); + return mod->replace_instruction(ins, make_op("gpu::contiguous"), after_contiguous_args); + }); + } + + void add_group_query_attention_op() + { + apply_map.emplace("gpu::gqa_rotary_embedding", [=](instruction_ref ins) { + auto s = ins->get_shape(); + auto output = insert_allocation(ins, s); + auto new_inputs = ins->inputs(); + new_inputs.push_back(output); + return mod->replace_instruction( + ins, + make_op("gpu::precompile_op", {{"op", to_value(ins->get_operator())}}), + new_inputs); + }); + + apply_map.emplace("gpu::concat_past_present", [=](instruction_ref ins) { + return mod->replace_instruction( + ins, + make_op("gpu::precompile_op", {{"op", to_value(ins->get_operator())}}), + ins->inputs()); + }); + + apply_map.emplace("gpu::compute_attention_probabilities", [=](instruction_ref ins) { + auto s = ins->get_shape(); + auto output = insert_allocation(ins, s); + auto new_inputs = ins->inputs(); + new_inputs.push_back(output); + return mod->replace_instruction( + ins, + make_op("gpu::precompile_op", {{"op", to_value(ins->get_operator())}}), + new_inputs); + }); + + apply_map.emplace("gpu::gqa_softmax", [=](instruction_ref ins) { + auto s = ins->get_shape(); + auto inputs = ins->inputs(); + + auto new_inputs = ins->inputs(); + new_inputs.push_back(inputs.at(2)); + return mod->replace_instruction( + ins, + make_op("gpu::precompile_op", {{"op", to_value(ins->get_operator())}}), + new_inputs); + }); + + apply_map.emplace("gpu::compute_attention_scores", [=](instruction_ref ins) { + auto s = ins->get_shape(); + auto output = insert_allocation(ins, s); + auto new_inputs = ins->inputs(); + new_inputs.push_back(output); + return mod->replace_instruction( + ins, + make_op("gpu::precompile_op", {{"op", to_value(ins->get_operator())}}), + new_inputs); + }); + } + + void add_scan_slice_op() + { + apply_map.emplace("scan_slice", [=](instruction_ref ins) { + auto inputs = ins->inputs(); + auto cpu_idx = mod->insert_instruction(ins, make_op("hip::copy_from_gpu"), inputs[1]); + inputs[1] = mod->insert_instruction(ins, make_op("hip::sync_stream"), cpu_idx); + return mod->replace_instruction( + ins, mod->insert_instruction(ins, ins->get_operator(), inputs)); + }); + } +}; + +void lowering::apply(module_pass_manager& mpm) const +{ + miopen_apply{&mpm.get_module(), &mpm, this}.apply(); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/lrn.cpp b/docker/rocm/migraphx/targets/gpu/lrn.cpp new file mode 100644 index 000000000..2e99c208d --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/lrn.cpp @@ -0,0 +1,66 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +#if MIGRAPHX_USE_MIOPEN +shape miopen_lrn::compute_shape(const std::vector& inputs) const +{ + check_shapes{inputs, *this}.has(2).not_broadcasted(); + return inputs.at(1); +} + +argument miopen_lrn::compute(context& ctx, + const shape& output_shape, + const std::vector& args) const +{ + float alpha = 1; + float beta = 0; + auto x_desc = make_tensor(args[0].get_shape()); + auto y_desc = make_tensor(output_shape); + miopenLRNForward(ctx.get_stream().get_miopen(), + ldesc.get(), + &alpha, + x_desc.get(), + args[0].implicit(), + &beta, + y_desc.get(), + args[1].implicit(), + false, + nullptr); + + return args[1]; +} + +void miopen_lrn::finalize(context&, const shape&, const std::vector&) +{ + ldesc = make_lrn(op); +} +#endif +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/mlir.cpp b/docker/rocm/migraphx/targets/gpu/mlir.cpp new file mode 100644 index 000000000..61e0325ac --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/mlir.cpp @@ -0,0 +1,1300 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef MIGRAPHX_MLIR +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 4 +#warning "Incompatible version of rocMLIR library used, disabling" +// Only undefine when not using cppcheck +#ifndef CPPCHECK +#undef MIGRAPHX_MLIR +#endif +#else +#include +#endif +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_MLIR); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_TUNE_EXHAUSTIVE); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_TUNE_LIMIT); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_TUNING_DB); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_TUNING_CFG); +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_ENABLE_SPLITK); + +#ifdef MIGRAPHX_MLIR +template // NOLINT +struct mlir_handle +{ + struct ptr + { + ptr() = default; + ptr(std::nullptr_t) {} + ptr(T x) : obj(x) {} + + std::intptr_t get_value() const + { + static_assert(sizeof(T) == sizeof(std::intptr_t), "MLIR Handle different size"); + return reinterpret_cast(obj); + } + + T get() const { return obj; } + + friend bool operator==(ptr x, ptr y) { return x.get_value() == y.get_value(); } + + friend bool operator!=(ptr x, ptr y) { return not(x == y); } + + explicit operator bool() const noexcept { return obj != ptr(); } + T obj{}; + }; + + struct deleter + { + using pointer = ptr; + + void operator()(pointer x) const + { + if(x != nullptr) + { + (void)f(x.obj); + } + } + }; + + mlir_handle() : handle(nullptr) {} + + mlir_handle(T p) : handle(ptr{p}) {} + + T get() const + { + return handle.get().get(); // NOLINT(readability-redundant-smartptr-get) + } + + T release() { return handle.release().get(); } + + private: + std::unique_ptr handle; +}; + +#define MIGRAPHX_MANAGE_MLIR_HANDLE(T, F) migraphx::gpu::mlir_handle // NOLINT + +using mlir_context = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirContext, mlirContextDestroy); +using mlir_thread_pool = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirLlvmThreadPool, mlirLlvmThreadPoolDestroy); +using mlir_dialect_registry = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirDialectRegistry, + mlirDialectRegistryDestroy); +using mlir_module = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirModule, mlirModuleDestroy); +using mlir_operation = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirOperation, mlirOperationDestroy); +using mlir_op_printing_flags = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirOpPrintingFlags, + mlirOpPrintingFlagsDestroy); +using mlir_region = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirRegion, mlirRegionDestroy); +using mlir_block = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirBlock, mlirBlockDestroy); +using mlir_pass_manager = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirPassManager, mlirPassManagerDestroy); +using mlir_tuning_table = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirRockTuningTable, + mlirRockTuningTableDestroy); +using mlir_tuning_space = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirRockTuningSpace, + mlirRockTuningSpaceDestroy); +using mlir_tuning_param = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirRockTuningParam, + mlirRockTuningParamDestroy); + +std::string_view to_string_view(MlirStringRef s) { return {s.data, s.length}; } + +MlirStringRef make_mlir_string_ref(const std::string_view& s) +{ + return mlirStringRefCreate(s.data(), s.size()); +} + +template +void mlir_print(F f, T x, Printer printer) +{ + f( + x, + +[](MlirStringRef s, void* data) { + (*reinterpret_cast(data))(to_string_view(s)); + }, + &printer); +} + +template +void mlir_print(F f, T x, std::ostream& os) +{ + mlir_print(f, x, [&](auto s) { os << s; }); +} + +template +std::string mlir_print(F f, T x) +{ + std::stringstream ss; + mlir_print(f, x, [&](auto s) { ss << s; }); + return ss.str(); +} + +struct mlir_logger +{ + std::stringstream ss; + mlir_context* ctx; + std::optional id; + + mlir_logger() : ctx(nullptr), id(std::nullopt) {} + + mlir_logger(mlir_context* context) : ctx(context) + { + id = + mlirContextAttachDiagnosticHandler(ctx->get(), mlir_diagnostic_print_cb, this, nullptr); + } + + ~mlir_logger() + { + if(id.has_value()) + mlirContextDetachDiagnosticHandler(ctx->get(), *id); + } + + mlir_logger(const mlir_logger& other) = delete; + mlir_logger& operator=(const mlir_logger& other) = delete; + + mlir_logger(mlir_logger&& other) noexcept + : ss(std::move(other.ss)), ctx(other.ctx), id(other.id) + { + other.ctx = nullptr; + other.id = std::nullopt; + } + + mlir_logger& operator=(mlir_logger other) noexcept + { + std::swap(ss, other.ss); + std::swap(ctx, other.ctx); + std::swap(id, other.id); + return *this; + } + + std::string str() const { return ss.str(); } + + void clear() { ss = std::stringstream{}; } + + static MlirLogicalResult mlir_diagnostic_print_cb(MlirDiagnostic diag, void* logger); + + MlirLogicalResult handle(MlirDiagnostic diag); +}; + +MlirLogicalResult mlir_logger::mlir_diagnostic_print_cb(MlirDiagnostic diag, void* logger) +{ + return reinterpret_cast(logger)->handle(diag); +} + +MlirLogicalResult mlir_logger::handle(MlirDiagnostic diag) +{ + MlirDiagnosticSeverity sev = mlirDiagnosticGetSeverity(diag); + switch(sev) + { + case MlirDiagnosticSeverity::MlirDiagnosticError: ss << "Error: "; break; + case MlirDiagnosticSeverity::MlirDiagnosticWarning: ss << "Warning: "; break; + case MlirDiagnosticSeverity::MlirDiagnosticNote: ss << "Note: "; break; + case MlirDiagnosticSeverity::MlirDiagnosticRemark: ss << "Remark: "; break; + } + mlir_print(mlirDiagnosticPrint, diag, [&](auto s) { ss << s; }); + ss << std::endl; + for(intptr_t i = 0, e = mlirDiagnosticGetNumNotes(diag); i < e; ++i) + { + (void)handle(mlirDiagnosticGetNote(diag, i)); + } + return mlirLogicalResultSuccess(); +} + +struct mlir_program +{ + mlir_program() + : ctx(mlirContextCreateWithRegistry(get_dialect_registry().get(), + /*threadingEnable=*/false)), + location(mlirLocationUnknownGet(ctx.get())), + mmodule(mlirModuleCreateEmpty(location)), + logger(&ctx) + { + mlirContextSetThreadPool(ctx.get(), get_thread_pool().get()); + mlirContextLoadAllAvailableDialects(ctx.get()); + } + + static mlir_dialect_registry& get_dialect_registry() + { + static std::once_flag init_guard; + static mlir_dialect_registry the_registry; + // The MLIR registration functions (for dialects and passes) are not + // necessarily thread-safe and need to be executed exactly once + // (especially since they eventually call non-thread-safe LLVM + // initilizations). + std::call_once(init_guard, [&]() { + the_registry = mlirDialectRegistryCreate(); + mlirRegisterRocMLIRDialects(the_registry.get()); + mlirRegisterRocMLIRPasses(); + }); + return the_registry; + } + + static mlir_thread_pool& get_thread_pool() + { + // To save on overhead, we create one LLVM thread pool and reuse it + // across all MLIR contexts as recommended by MLIR upstream. + // Note that this is thread-safe as of C++11. + static mlir_thread_pool the_pool = mlirLlvmThreadPoolCreate(); + return the_pool; + } + + MlirType make_type(shape::type_t t) const + { + MlirType result; + shape::visit(t, [&](auto as) { + if(as.type_enum() == shape::float_type) + result = mlirF32TypeGet(ctx.get()); + else if(as.type_enum() == shape::half_type) + result = mlirF16TypeGet(ctx.get()); + else if(as.type_enum() == shape::bf16_type) + result = mlirBF16TypeGet(ctx.get()); + else if(as.type_enum() == shape::fp8e4m3fnuz_type) + result = mlirFloat8E4M3FNUZTypeGet(ctx.get()); + else if(as.type_enum() == shape::fp8e5m2fnuz_type) + result = mlirFloat8E5M2FNUZTypeGet(ctx.get()); + else if(as.type_enum() == shape::fp8e4m3fn_type) + result = mlirFloat8E4M3FNTypeGet(ctx.get()); + else if(as.type_enum() == shape::fp8e5m2_type) + result = mlirFloat8E5M2TypeGet(ctx.get()); + else if(as.type_enum() == shape::double_type) + result = mlirF64TypeGet(ctx.get()); + else if(as.is_integral()) + { + if(as.is_unsigned()) + { + result = mlirIntegerTypeUnsignedGet(ctx.get(), as.size() * 8); + } + else + { + result = mlirIntegerTypeSignedGet(ctx.get(), as.size() * 8); + } + } + else + MIGRAPHX_THROW("Unsupported type: " + std::to_string(as.type_enum())); + }); + return result; + } + + MlirType make_mlir_shaped(const shape& s) const + { + if(s.dynamic()) + MIGRAPHX_THROW("MLIR does not support dynamic shapes"); + std::vector lens(s.lens().begin(), s.lens().end()); + std::vector strides(s.strides().begin(), s.strides().end()); + return rocmlirMIXRShapedTypeGet( + lens.size(), lens.data(), strides.data(), make_type(s.type())); + } + + template + std::vector make_mlir_shapeds(const Range& r) + { + std::vector result; + std::transform(r.begin(), r.end(), std::back_inserter(result), [&](const auto& s) { + return make_mlir_shaped(s); + }); + return result; + } + + MlirType make_function_type(const std::vector& inputs, const std::vector& outputs) + { + auto in = make_mlir_shapeds(inputs); + auto out = make_mlir_shapeds(outputs); + return mlirFunctionTypeGet(ctx.get(), in.size(), in.data(), out.size(), out.data()); + } + + MlirIdentifier id(const std::string_view& s) const + { + return mlirIdentifierGet(ctx.get(), make_mlir_string_ref(s)); + } + + MlirAttribute attribute(std::int64_t i) const + { + return mlirIntegerAttrGet(mlirIntegerTypeGet(ctx.get(), 64), i); + } + MlirAttribute attribute(std::uint64_t i) const + { + if(i > (std::numeric_limits::max() / 2)) + MIGRAPHX_THROW("MLIR cant handle large integer values since they are ambiguous"); + return mlirIntegerAttrGet(mlirIntegerTypeGet(ctx.get(), 64), i); + } + MlirAttribute attribute(unsigned char i) const { return attribute(std::uint64_t(i)); } + MlirAttribute attribute(bool b) const { return mlirBoolAttrGet(ctx.get(), b ? 1 : 0); } + MlirAttribute attribute(double d) const + { + return mlirFloatAttrDoubleGet(ctx.get(), mlirF64TypeGet(ctx.get()), d); + } + MlirAttribute attribute(const std::string& s) const + { + return mlirStringAttrGet(ctx.get(), make_mlir_string_ref(s)); + } + MlirAttribute attribute(std::nullptr_t) const { return {}; } + template + MlirAttribute attribute(const std::vector& v) const + { + std::vector attributes; + attributes.reserve(v.size()); + std::transform(v.begin(), v.end(), std::back_inserter(attributes), [&](auto&& x) { + return attribute(x); + }); + return mlirArrayAttrGet(ctx.get(), attributes.size(), attributes.data()); + } + MlirAttribute attribute(const value& v) const + { + MlirAttribute attr; + v.visit_value([&](auto&& x) { attr = attribute(x); }); + return attr; + } + MlirAttribute attribute(const std::vector& v) const + { + if(v.empty()) + { + return mlirArrayAttrGet(ctx.get(), 0, nullptr); + } + if(not v.front().get_key().empty()) + { + std::vector attributes = name_attributes(v); + return mlirDictionaryAttrGet(ctx.get(), attributes.size(), attributes.data()); + } + else + { + std::vector attributes; + attributes.reserve(v.size()); + std::transform(v.begin(), v.end(), std::back_inserter(attributes), [&](auto&& x) { + return attribute(x); + }); + return mlirArrayAttrGet(ctx.get(), attributes.size(), attributes.data()); + } + } + + MlirAttribute attribute(MlirType t) const { return mlirTypeAttrGet(t); } + + MlirAttribute attribute(MlirAttribute a) const { return a; } + + template + MlirNamedAttribute name_attribute(const std::string_view& key, const T& x) const + { + MlirNamedAttribute attr; + attr.name = id(key); + attr.attribute = attribute(x); + return attr; + } + + using attribute_t = std::variant, + MlirType, + MlirAttribute>; + using named_attribute_t = std::pair; + + MlirNamedAttribute name_attribute(const named_attribute_t& na) const + { + return name_attribute(na.first, + std::visit([&](const auto& x) { return attribute(x); }, na.second)); + } + + std::vector + name_attributes(const std::vector& named_attrs) const + { + std::vector attributes; + attributes.reserve(named_attrs.size()); + std::transform(named_attrs.begin(), + named_attrs.end(), + std::back_inserter(attributes), + [&](const named_attribute_t& a) { return name_attribute(a); }); + return attributes; + } + + std::vector name_attributes(const value& v) const + { + std::vector attributes; + attributes.reserve(v.size()); + migraphx::transform_if( + v.begin(), + v.end(), + std::back_inserter(attributes), + [&](const value& x) { return not x.is_null(); }, + [&](const value& x) { return name_attribute(x.get_key(), x.without_key()); }); + return attributes; + } + + struct mlir_operation_state + { + mlir_operation_state(mlir_program& p, const std::string_view& name) + : prog(&p), op_state(mlirOperationStateGet(make_mlir_string_ref(name), p.location)) + { + } + + mlir_operation_state& add_attributes(const std::vector& named_attrs) + { + auto attributes = prog->name_attributes(named_attrs); + if(not attributes.empty()) + { + mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data()); + } + return *this; + } + + mlir_operation_state& add_attribute_value(const value& v) + { + auto attributes = prog->name_attributes(v); + if(not attributes.empty()) + { + mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data()); + } + return *this; + } + + mlir_operation_state& add_regions(std::vector rs) + { + regions = std::move(rs); + return *this; + } + + mlir_operation_state& add_region(mlir_region r) + { + regions.emplace_back(std::move(r)); + return *this; + } + + mlir_operation_state& add_results(const std::vector& outputs) + { + auto x = prog->make_mlir_shapeds(outputs); + if(not x.empty()) + { + mlirOperationStateAddResults(&op_state, x.size(), x.data()); + } + return *this; + } + + mlir_operation_state& add_operands(const std::vector& inputs) + { + if(not inputs.empty()) + { + mlirOperationStateAddOperands(&op_state, inputs.size(), inputs.data()); + } + return *this; + } + + mlir_operation create_operation() + { + std::vector mregions(regions.size()); + std::transform(regions.begin(), regions.end(), mregions.begin(), [](const auto& r) { + return r.get(); + }); + if(not mregions.empty()) + { + mlirOperationStateAddOwnedRegions(&op_state, mregions.size(), mregions.data()); + } + mlir_operation op(mlirOperationCreate(&op_state)); + // Release memory since mlir_operation owns it + for(auto& r : regions) + r.release(); + regions.clear(); + return op; + } + + mlir_program* prog; + MlirOperationState op_state; + std::vector regions = {}; + }; + + mlir_operation_state create_operation_state(const std::string_view& name) + { + return {*this, name}; + } + + std::vector insert(MlirBlock body, mlir_operation_state ops) + { + std::vector result; + mlir_operation op = ops.create_operation(); + auto weak_op = op.get(); + mlirBlockAppendOwnedOperation(body, op.release()); + + auto n = mlirOperationGetNumResults(weak_op); + result.reserve(n); + transform(range(n), std::back_inserter(result), [&](auto i) { + return mlirOperationGetResult(weak_op, i); + }); + return result; + } + + MlirBlock + insert(MlirBlock body, const module& m, std::unordered_map& ins_map) + { + auto names = m.get_parameter_names(); + std::sort(names.begin(), names.end()); + std::vector inputs; + std::transform(names.begin(), + names.end(), + std::back_inserter(inputs), + [&](const std::string& name) { return m.get_parameter_shape(name); }); + std::vector outputs = m.get_output_shapes(); + + std::vector arg_locs(inputs.size(), location); + auto body_inputs = make_mlir_shapeds(inputs); + mlir_region region = mlirRegionCreate(); + mlir_block fbody = mlirBlockCreate(body_inputs.size(), body_inputs.data(), arg_locs.data()); + MlirBlock result = fbody.get(); + mlirRegionAppendOwnedBlock(region.get(), fbody.release()); + + auto ops = create_operation_state("func.func"); + ops.add_attributes({{"function_type", make_function_type(inputs, outputs)}, + {"sym_name", sym_name}, + {"kernel", std::string("mixr")}, + {"arch", target_arch}, + {"num_cu", num_cu}}); + if(enabled(MIGRAPHX_MLIR_ENABLE_SPLITK{})) + { + ops.add_attributes({{"enable_splitk_for_tuning", mlirUnitAttrGet(ctx.get())}}); + } + ops.add_region(std::move(region)); + insert(body, std::move(ops)); + + for(auto i : range(names.size())) + ins_map[m.get_parameter(names[i])] = mlirBlockGetArgument(result, i); + return result; + } + + static bool is_reshape(const std::string& name) + { + return contains({"reshape", "lazy_reshape", "squeeze", "unsqueeze", "flatten"}, name); + } + + static std::string get_name(instruction_ref ins) + { + if(ins->name() == "@return") + return "func.return"; + if(ins->name() == "@literal") + return "migraphx.literal"; + if(ins->name() == "unpack_int4") + return "migraphx.unpack"; + if(is_reshape(ins->name())) + return "migraphx.reshape"; + return "migraphx." + ins->name(); + } + + static value get_operator_value(instruction_ref ins) + { + const operation& op = ins->get_operator(); + auto v = op.to_value(); + + // Reshape operator can have dim 0 or -1. + // Avoid passing those on to MLIR: + if(is_reshape(op.name())) + v = {{"dims", ins->get_shape().lens()}}; + + if(op.name() == "convolution" or op.name() == "quant_convolution") + { + // Adjust symetrical padding + if(v.at("padding").size() == v.at("stride").size()) + { + auto padding = v.at("padding"); + std::copy(padding.begin(), padding.end(), std::back_inserter(v.at("padding"))); + } + } + + if(op.name() == "unpack_int4") + v["axis"] = ins->get_shape().ndim() - 1; + + return v; + } + + static shape get_shape(instruction_ref ins) + { + if(ins->name() == "@return") + { + assert(ins->inputs().size() == 1); + return ins->inputs().front()->get_shape(); + } + return ins->get_shape(); + } + + static std::string get_symbol_name(const module& m) + { + return "mlir_" + gen::generate_name_from_ops(m); + } + + static void validate(const module& m) + { + if(m.begin() == m.end()) + MIGRAPHX_THROW("Empty module"); + auto last = std::prev(m.end()); + if(last->name() != "@return") + MIGRAPHX_THROW("Missing @return as last instruction."); + } + + void parse(const module& m) + { + validate(m); + sym_name = get_symbol_name(m); + auto mbody = mlirModuleGetBody(mmodule.get()); + std::unordered_map ins_map; + auto fbody = insert(mbody, m, ins_map); + + for(auto ins : iterator_for(m)) + { + if(ins->name() == "@param") + continue; + if(ins->name() == "contiguous") + { + ins_map[ins] = ins_map[ins->inputs().at(0)]; + continue; + } + auto name = get_name(ins); + auto ops = create_operation_state(name); + ops.add_attribute_value(get_operator_value(ins)); + if(ins->name() != "@return") + ops.add_results({get_shape(ins)}); + + if(ins->name() == "@literal") + { + literal r = ins->get_literal(); + auto sh = ins->get_shape(); + + MlirType shaped_type = make_mlir_shaped(sh); + MlirType tensor_type = rocmlirMIXRShapedTypeAsTensor(shaped_type); + MlirAttribute mlir_value_attr = + mlirDenseElementsAttrRawBufferGet(tensor_type, r.get_shape().bytes(), r.data()); + ops.add_attributes({{"value", mlir_value_attr}}); + } + + if(ins->name() == "convolution" or ins->name() == "dot") + { + pp = + problem_params{ins->get_operator(), to_shapes(ins->inputs()), ins->get_shape()}; + } + + std::vector inputs; + transform( + ins->inputs(), std::back_inserter(inputs), [&](auto i) { return ins_map.at(i); }); + ops.add_operands(inputs); + + auto outputs = insert(fbody, std::move(ops)); + if(ins->name() != "@return") + { + assert(outputs.size() == 1); + ins_map[ins] = outputs.front(); + } + } + } + + void run_high_level_pipeline() + { + mlir_pass_manager pm_front{mlirPassManagerCreate(ctx.get())}; + mlirMIGraphXAddHighLevelPipeline(pm_front.get()); + logger.clear(); + if(mlirLogicalResultIsFailure( + mlirPassManagerRunOnOp(pm_front.get(), mlirModuleGetOperation(mmodule.get())))) + { + std::string error = "Invalid MLIR created: " + logger.str(); + if(enabled(MIGRAPHX_TRACE_MLIR{})) + { + std::cout << error << std::endl; + } + MIGRAPHX_THROW(error); + } + } + + void run_backend_pipeline() + { + mlir_pass_manager pm_back{mlirPassManagerCreate(ctx.get())}; + mlirMIGraphXAddBackendPipeline(pm_back.get(), target_arch.c_str()); + logger.clear(); + const size_t trace = value_of(MIGRAPHX_TRACE_MLIR{}); + static std::mutex mutex; + auto mod_op = mlirModuleGetOperation(mmodule.get()); + if(trace >= 2) + { + const std::lock_guard lock(mutex); + std::cout << mlir_print(&mlirOperationPrint, mod_op) << std::endl; + } + + if(mlirLogicalResultIsFailure(mlirPassManagerRunOnOp(pm_back.get(), mod_op))) + { + std::string error = "MLIR backend compilation failed: " + logger.str(); + if(enabled(MIGRAPHX_TRACE_MLIR{})) + { + std::cout << error << std::endl; + } + MIGRAPHX_THROW(error); + } + } + + code_object_op compile(const value& solution) + { + // 1st pipeline to call + run_high_level_pipeline(); + if(solution.is_null()) + get_module_tuned(); + else + set_tuning(solution); + // 2nd pipeline to call + run_backend_pipeline(); + + code_object_op op{}; + op.symbol_name = sym_name; + op.code_object = get_binary(); + std::tie(op.global, op.local) = get_launch_params(); + return op; + } + + void set_gpu_properties(const context& migraphx_ctx) + { + const auto& device = migraphx_ctx.get_current_device(); + target_arch = device.get_device_name(); + num_cu = device.get_cu_count(); + } + + std::pair get_launch_params() const + { + uint32_t attrs[2]; + // returns block and grid sizes + mlirGetKernelAttrs(mmodule.get(), attrs); + std::size_t local = attrs[0]; + std::size_t global = local * attrs[1]; + return {global, local}; + } + + value::binary get_binary() const + { + size_t size = 0; + mlirGetBinary(mmodule.get(), &size, nullptr); + value::binary result(size); + if(mlirGetBinary(mmodule.get(), &size, reinterpret_cast(result.data()))) + return result; + MIGRAPHX_THROW("Failed to compile mlir program"); + } + + void set_tuning(const value& v) MIGRAPHX_TIDY_CONST + { + const auto* str = v.if_string(); + if(str == nullptr) + MIGRAPHX_THROW("mlir tuning solutions must be strings"); + if(not mlirRockTuningSetFromStr(mmodule.get(), make_mlir_string_ref(*str))) + MIGRAPHX_THROW("Failed setting tuning key: " + *str); + } + + tuning_config get_tuning_config(bool exhaustive) + { + tuning_config tc; + run_high_level_pipeline(); + auto tuning_mode = + exhaustive ? RocmlirTuningParamSetKindFull : RocmlirTuningParamSetKindQuick; + if(enabled(MIGRAPHX_MLIR_TUNE_EXHAUSTIVE{})) + tuning_mode = RocmlirTuningParamSetKindExhaustive; + mlir_tuning_space params{mlirRockTuningSpaceCreate(mmodule.get(), tuning_mode)}; + const auto limit = + value_of(MIGRAPHX_MLIR_TUNE_LIMIT{}, std::numeric_limits::max()); + for(auto i : range(std::min(limit, mlirRockTuningGetNumParams(params.get())))) + { + mlir_tuning_param param{mlirRockTuningParamCreate()}; + if(not mlirRockTuningParamGet(params.get(), i, param.get())) + MIGRAPHX_THROW("Incorrect mlir tuning parameter: " + std::to_string(i)); + std::array perf_key; + size_t perf_key_bytes = + mlirRockTuningParamToString(param.get(), perf_key.data(), perf_key.size()); + if(perf_key_bytes > perf_key.size()) + MIGRAPHX_THROW("Tuning perf key was " + std::to_string(perf_key_bytes) + + " bytes and thus too long"); + tc.solutions.emplace_back( + std::string(perf_key.begin(), perf_key.begin() + perf_key_bytes)); + } + std::array tuning_key; + size_t tuning_key_bytes = + mlirRockTuningGetKey(mmodule.get(), tuning_key.data(), tuning_key.size()); + if(tuning_key_bytes > tuning_key.size()) + MIGRAPHX_THROW("Tuning table key was " + std::to_string(tuning_key_bytes) + + " bytes and thus too long"); + tc.problem = std::string(tuning_key.begin(), tuning_key.begin() + tuning_key_bytes); + return tc; + } + + std::string get_tune_params(bool xdlops) const { return get_mlir_perf_for_conv(pp, xdlops); } + + // This function appends to tuning cfg file that could be + // used with rocMLIR tuning scripts. + void dump_tuning_cfg(const std::string& prob_config) const + { + std::string tuning_cfg_path = string_value_of(MIGRAPHX_MLIR_TUNING_CFG{}); + if(not tuning_cfg_path.empty()) + { + std::vector tokens = split_string(prob_config, '\t'); + std::string prob = tokens[2]; + + if(starts_with(prob, "conv")) + { + tuning_cfg_path += ".conv"; + } + else + { + tuning_cfg_path += ".gemm"; + } + std::ofstream tuning_cfg(tuning_cfg_path, std::ios::app); + prob = + trim(prob, [](unsigned char c) { return (c == '\0') or (std::isspace(c) != 0); }); + tuning_cfg << prob << std::endl; + } + } + + static std::pair load_tuning_table() + { + mlir_tuning_table tuning_table{mlirRockTuningTableCreate()}; + bool found_table = false; + std::string tuning_db_path = string_value_of(MIGRAPHX_MLIR_TUNING_DB{}); + if(not tuning_db_path.empty()) + { + std::ifstream tuning_db_tsv(tuning_db_path); + if(tuning_db_tsv) + { + found_table = true; + std::string line; + while(std::getline(tuning_db_tsv, line)) + { + std::vector tokens = split_string(line, '\t'); + std::string arch = tokens[0]; + std::string num_cu = tokens[1]; + std::string prob = tokens[2]; + std::string perf = tokens[3]; + std::string key = arch.append("\t").append(num_cu).append("\t").append(prob); + mlirRockTuningUpdateTable(tuning_table.get(), + make_mlir_string_ref(key), + make_mlir_string_ref(perf), + 1.0); + } + } + } + else + { + found_table = false; + std::cerr + << "WARNING: MLIR tuning db not found. Please set MIGRAPHX_MLIR_TUNING_DB for " + "optimal performance." + << std::endl; + } + return std::make_pair(std::move(tuning_table), found_table); + } + + bool get_module_tuned() const + { + static std::pair tuning_table = load_tuning_table(); + if(not mlirRockTuningSetFromTable(tuning_table.first.get(), mmodule.get())) + { + std::array prob_config; + size_t prob_config_bytes = + mlirRockTuningGetKey(mmodule.get(), prob_config.data(), prob_config.size()); + if(prob_config_bytes >= prob_config.size()) + { + std::cerr << "MLIR tuning key overflowed buffer, needed " << prob_config_bytes + << " bytes" << std::endl; + return false; + } + std::string prob_config_str(prob_config.begin(), + prob_config.begin() + prob_config_bytes); + if(tuning_table.second) + { + std::cerr << "NOTE: MLIR tuning table did not include a key for " << prob_config_str + << std::endl; + } + dump_tuning_cfg(prob_config_str); + return false; + } + return true; + } + + mlir_context ctx; + MlirLocation location; + mlir_module mmodule; + mlir_logger logger; + problem_params pp; + std::deque strings{}; + std::string target_arch = ""; + std::size_t num_cu = 0; + std::string sym_name; +}; + +bool is_reduce(const instruction& ins) { return contains(ins.name(), "reduce"); } + +static void rewrite_reduce(module& m) +{ + for(auto i : iterator_for(m)) + { + if(is_reduce(*i)) + { + auto reduce_op = i->get_operator().to_value(); + auto reduce_axes = reduce_op["axes"].to_vector(); + auto reduce_lens = i->get_shape().lens(); + auto in_shape = i->inputs().front()->get_shape(); + auto in_lens = in_shape.lens(); + assert(in_shape.standard()); + assert(reduce_lens.size() == in_lens.size()); + assert(std::adjacent_find( + reduce_axes.begin(), reduce_axes.end(), [](auto axis_1, auto axis_2) { + return axis_2 - axis_1 > 1; + }) == reduce_axes.end()); + + std::vector new_rsp_dims; + std::vector new_reduce_axes; + for(const auto axis : range(in_shape.ndim())) + { + if(reduce_lens[axis] == in_lens[axis]) + { + new_rsp_dims.push_back(in_lens[axis]); + } + else if(new_reduce_axes.empty()) + { + assert(reduce_lens[axis] == 1); + new_rsp_dims.push_back(-1); + new_reduce_axes.push_back(axis); + } + } + auto rsp_ins = m.insert_instruction( + i, migraphx::make_op("reshape", {{"dims", new_rsp_dims}}), i->inputs().front()); + auto collapsed_reduce = m.insert_instruction( + i, migraphx::make_op("reduce_sum", {{"axes", new_reduce_axes}}), rsp_ins); + auto rsp_back = m.insert_instruction( + i, migraphx::make_op("reshape", {{"dims", reduce_lens}}), collapsed_reduce); + m.replace_instruction(i, rsp_back); + } + } + migraphx::run_passes(m, {migraphx::dead_code_elimination{}}); +} + +bool is_module_fusible(const module& m, const context& migraphx_ctx, const value& solution) +{ + auto mm = m; + rewrite_reduce(mm); + mlir_program mp; + mp.set_gpu_properties(migraphx_ctx); + mp.parse(mm); + mp.run_high_level_pipeline(); + return mlirIsModuleFusible(mp.mmodule.get(), make_mlir_string_ref(*solution.if_string())); +} + +void adjust_param_shapes(module& m, const std::vector& inputs) +{ + auto names = m.get_parameter_names(); + std::sort(names.begin(), names.end()); + for(auto i : range(names.size())) + { + const auto& name = names[i]; + const auto& input = inputs[i]; + auto param = m.get_parameter(name); + assert(param->get_shape().standard()); + if(input.standard()) + continue; + auto new_param = m.add_parameter(name + ".0", input); + m.replace_instruction(param, new_param); + m.remove_instruction(param); + } +} + +void replace_params_with_literals(module& m, const std::vector& inputs) +{ + auto names = m.get_parameter_names(); + std::sort(names.begin(), names.end()); + for(auto i : range(names.size())) + { + const auto& name = names[i]; + const auto& input = inputs[i]; + if(input->name() != "@literal") + continue; + auto param = m.get_parameter(name); + auto lit = m.add_literal(input->get_literal()); + m.replace_instruction(param, lit); + m.remove_instruction(param); + } +} + +std::string dump_mlir(module m, const std::vector& inputs) +{ + const_module_ref mr = &m; + if(not inputs.empty()) + { + adjust_param_shapes(m, inputs); + } + rewrite_reduce(m); + mlir_program mp; + mp.parse(*mr); + auto mod_op = mlirModuleGetOperation(mp.mmodule.get()); + return mlir_print(&mlirOperationPrint, mod_op); +} + +static std::string compute_dump_name(const module& m, const std::string& ext) +{ + std::vector sizes; + for(auto ins : iterator_for(m)) + { + if(contains({"quant_convolution", "quant_dot", "convolution", "dot"}, ins->name())) + sizes.insert(sizes.end(), ins->inputs().begin(), ins->inputs().end()); + } + auto name = + mlir_program::get_symbol_name(m) + "_" + shape::to_sizes_string(to_shapes(sizes)) + ext; + replace_string_inplace(name, ", ", "_"); + replace_string_inplace(name, ":", "s"); + return name; +} + +void dump_mlir_to_file(module m, const std::vector& inputs, const fs::path& location) +{ + static std::mutex mutex; + const std::lock_guard lock(mutex); + + if(not inputs.empty()) + { + adjust_param_shapes(m, inputs); + } + rewrite_reduce(m); + + auto name = compute_dump_name(m, ".mlir"); + auto f = location / name; + std::cout << "Dumping MLIR file to: " << f << std::endl; + + mlir_program mp; + mp.parse(m); + auto mod_op = mlirModuleGetOperation(mp.mmodule.get()); + + std::string mlir_str = mlir_print(&mlirOperationPrint, mod_op); + + write_string(f, mlir_str); +} + +std::string dump_mlir(module m) { return dump_mlir(std::move(m), {}); } + +mlir_code_object compile_mlir(const context& migraphx_ctx, + module m, + const std::vector& in_shapes, + const value& solution) +{ + adjust_param_shapes(m, in_shapes); + rewrite_reduce(m); + const bool trace = enabled(MIGRAPHX_TRACE_MLIR{}); + + static std::mutex mutex; + if(trace) + { + const std::lock_guard lock(mutex); + std::cout << m << std::endl; + } + + mlir_program mp; + + mp.set_gpu_properties(migraphx_ctx); + mp.parse(m); + auto mod_op = mlirModuleGetOperation(mp.mmodule.get()); + if(trace) + { + const std::lock_guard lock(mutex); + std::cout << mlir_print(&mlirOperationPrint, mod_op) << std::endl; + } + auto co = mp.compile(solution); + + co.expected_inputs = in_shapes; + auto out_shapes = m.get_output_shapes(); + if(out_shapes.size() == 1) + { + co.output = m.get_output_shapes().front(); + } + else + { + co.output = shape{out_shapes}; + } + mlir_code_object mco; + mco.cop = co; + size_t num_prefill_args = mlirGetNumPrefillArgs(mp.mmodule.get()); + if(num_prefill_args > 0) + { + std::vector prefill_indices(num_prefill_args); + std::vector prefill_mlir_values(num_prefill_args); + mlirGetPrefillArgsInfo( + mp.mmodule.get(), prefill_indices.data(), prefill_mlir_values.data(), num_prefill_args); + std::vector prefill_values(prefill_mlir_values.size()); + std::transform(prefill_mlir_values.begin(), + prefill_mlir_values.end(), + prefill_values.begin(), + [](const auto& v) { + // mlir sets fill attribute as float but migx hip::fill operator only + // supports integer type. + // TODO: Need to add checks that it is indeed an integer. + double dv = mlirFloatAttrGetValueDouble(v); + return static_cast(dv); + }); + mco.prefill_indices = prefill_indices; + mco.prefill_values = prefill_values; + } + return mco; +} + +instruction_ref insert_mlir(module& m, + instruction_ref ins, + code_object_op co, + const std::vector& inputs) +{ + + std::vector refs; + std::size_t last = 0; + refs.reserve(inputs.size()); + std::copy(inputs.begin(), inputs.end(), std::back_inserter(refs)); + last = refs.size() - 1; + co.expected_inputs = to_shapes(refs); + co.output_arg = last; + return m.insert_instruction(ins, co, refs); +} + +tuning_config get_tuning_config_mlir(const context& migraphx_ctx, + module m, + const std::vector& inputs, + bool exhaustive) +{ + adjust_param_shapes(m, inputs); + rewrite_reduce(m); + mlir_program mp; + mp.set_gpu_properties(migraphx_ctx); + mp.parse(m); + auto tc = mp.get_tuning_config(exhaustive); + const bool trace = enabled(MIGRAPHX_TRACE_MLIR{}); + static std::mutex mutex; + if(trace) + { + const std::lock_guard lock(mutex); + std::cout << "Problem: " << tc.problem << std::endl; + auto mod_op = mlirModuleGetOperation(mp.mmodule.get()); + std::cout << mlir_print(&mlirOperationPrint, mod_op) << std::endl; + } + return tc; +} + +void dump_mlir_to_mxr(module m, + const std::vector& inputs, + const fs::path& location) +{ + static std::mutex mutex; + const std::lock_guard lock(mutex); + + adjust_param_shapes(m, to_shapes(inputs)); + replace_params_with_literals(m, inputs); + std::vector sizes; + for(auto ins : iterator_for(m)) + { + if(contains({"quant_convolution", "quant_dot", "convolution", "dot"}, ins->name())) + sizes.insert(sizes.end(), ins->inputs().begin(), ins->inputs().end()); + } + auto name = compute_dump_name(m, ".mxr"); + auto f = location / name; + std::cout << "Dumping MXR file to: " << f << std::endl; + save(program{std::move(m)}, f.string()); +} + +#else + +template +void use(T&) +{ +} + +std::string dump_mlir(module) { return {}; } + +std::string dump_mlir(module m, const std::vector& inputs) +{ + use(m); + use(inputs); + return {}; +} + +// Disabling clang-tidy warning on non-real useage. +// NOLINTBEGIN(performance-unnecessary-value-param) +mlir_code_object compile_mlir(const context&, module, const std::vector&, const value&) +{ + return {}; +} + +instruction_ref +// cppcheck-suppress funcArgNamesDifferent +insert_mlir(module& m, instruction_ref, code_object_op co, const std::vector&) +{ + use(co); + use(m); + return m.end(); +} + +tuning_config get_tuning_config_mlir(const context&, module, const std::vector&, bool) +{ + return {}; +} +// NOLINTEND(performance-unnecessary-value-param) + +#endif + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/multinomial.cpp b/docker/rocm/migraphx/targets/gpu/multinomial.cpp new file mode 100644 index 000000000..51e5c48b4 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/multinomial.cpp @@ -0,0 +1,50 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +shape hip_multinomial::compute_shape(std::vector inputs) const +{ + check_shapes{inputs, *this}.has(3).only_dims(2).standard(); + inputs.pop_back(); + return op.compute_shape(inputs); +} + +argument +hip_multinomial::compute(context& ctx, const shape&, const std::vector& args) const +{ + device::multinomial(ctx.get_stream().get(), args.back(), args.front(), args[1]); + return args.back(); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/no_device.cpp b/docker/rocm/migraphx/targets/gpu/no_device.cpp new file mode 100644 index 000000000..0ccdbac74 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/no_device.cpp @@ -0,0 +1,28 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifdef __HIP_DEVICE_COMPILE__ +#error \ + "Device compilation not allowed for migraphx_gpu. Do not link with hip::device. Device code should go into migraphx_device or migraphx_kernels" +#endif diff --git a/docker/rocm/migraphx/targets/gpu/nonzero.cpp b/docker/rocm/migraphx/targets/gpu/nonzero.cpp new file mode 100644 index 000000000..0ff281f88 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/nonzero.cpp @@ -0,0 +1,44 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +shape hip_nonzero::compute_shape(std::vector inputs) const +{ + return op.compute_shape({inputs.front()}); +} + +argument hip_nonzero::compute(context& ctx, const shape&, const std::vector& args) const +{ + return device::nonzero(ctx.get_stream().get(), args.back(), args.front()); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/pack_args.cpp b/docker/rocm/migraphx/targets/gpu/pack_args.cpp new file mode 100644 index 000000000..2c3f41cf6 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/pack_args.cpp @@ -0,0 +1,48 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +std::vector pack_args(const std::vector& args) +{ + std::vector kernargs; + for(auto&& arg : args) + { + std::size_t n = arg.size; + const auto* p = static_cast(arg.data); + // Insert padding + std::size_t padding = (arg.align - (kernargs.size() % arg.align)) % arg.align; + kernargs.insert(kernargs.end(), padding, 0); + kernargs.insert(kernargs.end(), p, p + n); + } + return kernargs; +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/perfdb.cpp b/docker/rocm/migraphx/targets/gpu/perfdb.cpp new file mode 100644 index 000000000..bdad925db --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/perfdb.cpp @@ -0,0 +1,133 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +namespace { + +std::string get_layout(const shape& s, std::string labels) +{ + auto result = labels; + auto p = find_permutation(s); + std::transform(p.begin(), p.end(), result.begin(), [&](auto i) { return labels[i]; }); + return "'" + result + "'"; +} + +std::string get_type(const shape& s) +{ + static const std::unordered_map m = { + {shape::float_type, "'FP32'"}, + {shape::half_type, "'FP16'"}, + {shape::double_type, "'FP64'"}, + {shape::int8_type, "'INT8'"}, + {shape::int32_type, "'INT32'"}, + }; + auto it = m.find(s.type()); + if(it == m.end()) + return "UNKNOWN"; + return it->second; +} + +std::string generate_miopen_config(const problem_params& pp) +{ + value v = pp.op.to_value(); + auto input = pp.inputs[0].lens(); + auto weights = pp.inputs[1].lens(); + auto padding = v["padding"].to_vector(); + auto stride = v["stride"].to_vector(); + auto dilation = v["dilation"].to_vector(); + if(padding.size() != stride.size()) + padding.erase(padding.begin() + padding.size() / 2, padding.end()); + return to_string_range({std::string{" C.in_channels="}, to_string(input[1]), + std::string{" AND C.in_h="}, to_string(input[2]), + std::string{" AND C.in_w="}, to_string(input[3]), + std::string{" AND C.fil_h="}, to_string(weights[2]), + std::string{" AND C.fil_w="}, to_string(weights[3]), + std::string{" AND C.out_channels="}, to_string(weights[0]), + std::string{" AND C.batchsize="}, to_string(input[0]), + std::string{" AND C.pad_h="}, to_string(padding[0]), + std::string{" AND C.pad_w="}, to_string(padding[2]), + std::string{" AND C.dilation_h="}, to_string(dilation[0]), + std::string{" AND C.dilation_w="}, to_string(dilation[1]), + std::string{" AND C.conv_stride_h="}, to_string(stride[0]), + std::string{" AND C.conv_stride_w="}, to_string(stride[1]), + std::string{" AND C.layout="}, get_layout(pp.inputs[0], "NCHW"), + std::string{" AND C.data_type="}, get_type(pp.inputs[0]), + std::string{" AND C.direction="}, std::string{"'F'"}}, + " "); +} + +auto query_miopen_db(const std::string& query) +{ + static std::mutex g_db_mutex; // NOLINT + const std::lock_guard lock(g_db_mutex); + + // TODO: Store db as a static variable + const auto dbpath = fs::path{"/opt"} / "rocm" / "share" / "miopen" / "db" / "miopen.db"; + // Check if db file exists. + std::ifstream dbs(dbpath); + if(dbs.is_open()) + { + dbs.close(); + } + else + { + std::vector> empty; + return empty; + } + + auto db = sqlite::read(dbpath); + return db.execute(query); +} + +} // namespace + +std::string get_mlir_perf_for_conv(const problem_params& pp, bool xdlops) +{ + std::string solver = xdlops ? "ConvMlirIgemmFwdXdlops" : "ConvMlirIgemmFwd"; + std::string query = "select P.* \ + from perf_db P, config C \ + where P.config = C.id AND \ + P.solver = '${solver}' AND \ + ${config}"; + + auto results = query_miopen_db( + interpolate_string(query, {{"config", generate_miopen_config(pp)}, {"solver", solver}})); + if(results.empty()) + return ""; + return results.front().at("params"); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/pooling.cpp b/docker/rocm/migraphx/targets/gpu/pooling.cpp new file mode 100644 index 000000000..a6f86f077 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/pooling.cpp @@ -0,0 +1,90 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +#if MIGRAPHX_USE_MIOPEN +shape miopen_pooling::compute_shape(const std::vector& inputs) const +{ + check_shapes{inputs, *this}.has(2).standard(); + std::vector pooling_input = {inputs.at(0)}; + check_shapes{pooling_input, *this}.max_ndims(5); + return op.normalize_compute_shape(pooling_input); +} + +inline void reshape_if_1d(shape& input) +{ + auto dims = input.lens(); + + if(dims.size() == 3) + { + std::vector new_dims = dims; + new_dims.insert(new_dims.begin() + 2, 1); + input = shape{input.type(), new_dims}; + } +} + +argument miopen_pooling::compute(context& ctx, + const shape& output_shape, + const std::vector& args) const +{ + shape x_shape = args[0].get_shape(); + shape y_shape = output_shape; + + reshape_if_1d(x_shape); + reshape_if_1d(y_shape); + + auto x_desc = make_tensor(x_shape); + auto y_desc = make_tensor(y_shape); + + float alpha = 1; + float beta = 0; + + miopenPoolingForward(ctx.get_stream().get_miopen(), + pd.get(), + &alpha, + x_desc.get(), + args[0].implicit(), + &beta, + y_desc.get(), + args[1].implicit(), + false, + nullptr, + 0); + + return args[1]; +} + +void miopen_pooling::finalize(context&, const shape&, const std::vector&) +{ + if(pd == nullptr) + pd = make_pooling(op); +} +#endif +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/prefuse_ops.cpp b/docker/rocm/migraphx/targets/gpu/prefuse_ops.cpp new file mode 100644 index 000000000..f8a8f8375 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/prefuse_ops.cpp @@ -0,0 +1,400 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef MIGRAPHX_USE_COMPOSABLEKERNEL +#include +#endif +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_LAYERNORM_FUSION); + +namespace { + +template +struct layernorm_base +{ + float epsilon = 1e-12f; + template + static auto reflect(Self& self, F f) + { + return pack(f(self.epsilon, "epsilon")); + } + shape compute_shape(std::vector inputs, std::vector mods) const + { + std::size_t nargs = N; + if(not mods.empty()) + { + auto* pm = mods.front(); + nargs += pm->get_parameter_names().size() - 1; + } + check_shapes{inputs, static_cast(*this)}.has(nargs); + auto s = inputs.front(); + auto t = s.type(); + if(not mods.empty()) + t = mods.front()->get_output_shapes().front().type(); + + // Scalar output if all inputs are scalar + if(inputs.front().elements() == 1 and + all_of(inputs, [](const auto& ss) { return ss.scalar(); })) + return inputs.front(); + auto l_s = shape::from_permutation( + t, s.lens(), find_permutation(std::vector(inputs.begin(), inputs.begin() + N))); + // just prelayernorm or preadd_layernorm + if(nargs <= N) + return l_s; + // else, layernorm + pointwise fusion, preserve layout of fused op + std::vector lp_s(inputs.begin() + N, inputs.end()); + lp_s.insert(lp_s.begin(), l_s); + return shape::from_permutation(t, s.lens(), find_permutation(lp_s)); + } +}; + +struct layernorm : layernorm_base +{ + + std::string name() const { return "gpu::prelayernorm"; } +}; +MIGRAPHX_REGISTER_OP(layernorm); + +struct add_layernorm : layernorm_base +{ + std::string name() const { return "gpu::preadd_layernorm"; } +}; +MIGRAPHX_REGISTER_OP(add_layernorm); + +struct find_layernorm +{ + auto matcher() const { return match::layernorm(); } + + void apply(module& m, const match::matcher_result& r) const + { + auto ins = r.result; + auto x_ins = r.instructions["x"]; + float eps = 0; + if(contains(r.instructions, "eps")) + eps = r.instructions["eps"]->eval().at(); + + m.replace_instruction(ins, layernorm{eps}, x_ins); + } +}; + +struct find_add_layernorm +{ + auto matcher() const + { + return match::name("gpu::prelayernorm")( + match::args(match::name("add")(match::used_once()).bind("add"))); + } + + void apply(module& m, const match::matcher_result& r) const + { + auto ins = r.result; + auto add_ins = r.instructions["add"]; + auto op = any_cast(ins->get_operator()); + + m.replace_instruction(ins, add_layernorm{op.epsilon}, add_ins->inputs()); + } +}; + +struct pre_gemm_softmax_gemm : gemm_softmax_gemm +{ + std::string name() const { return "gpu::pre_gemm_softmax_gemm"; } +}; +MIGRAPHX_REGISTER_OP(pre_gemm_softmax_gemm); + +auto is_ck_gemm() +{ + return match::make_basic_pred_matcher([=](instruction_ref ins) { +#ifdef MIGRAPHX_USE_COMPOSABLEKERNEL + if(not enabled(MIGRAPHX_ENABLE_CK{})) + return false; + if(ins->name() != "dot") + return false; + if(not pre_gemm_softmax_gemm::is_ck_supported_type(ins->get_shape().type())) + return false; + return true; +#else + (void)ins; + return false; +#endif + }); +} + +auto is_test_gemm(bool enable_attention) +{ + return match::make_basic_pred_matcher([=](instruction_ref ins) { + if(ins->name() != "dot") + return false; + return enable_attention; + }); +} + +auto is_bias_supported() +{ + return match::make_basic_pred_matcher([=](instruction_ref) { +#ifdef MIGRAPHX_USE_COMPOSABLEKERNEL + return not enabled(MIGRAPHX_ENABLE_CK{}); +#else + return true; +#endif + }); +} + +struct find_gemm_softmax_gemm +{ + bool enable_attention = false; + + auto matcher() const + { + auto gemm1 = match::skip(match::name("contiguous"))(match::name("dot")( + match::any_of(is_ck_gemm(), is_test_gemm(enable_attention)).bind("gemm1"))); + auto mul = match::name("mul")( + match::nargs(2), match::either_arg(0, 1)(match::is_constant().bind("scale"), gemm1)); + auto where = match::name("where")(match::arg(2)(match::is_constant().bind("select_const")), + match::arg(1)(mul), + match::arg(0)(match::any().bind("select_cond"))); + auto add = + match::name("add")(is_bias_supported(), + match::nargs(2), + match::either_arg(0, 1)(match::none_of(mul).bind("bias"), mul)); + auto softmax = match::name("softmax")(match::arg(0)(match::any_of(mul, add, gemm1, where))) + .bind("softmax"); + + return match::name("dot")( + match::any_of(is_ck_gemm(), is_test_gemm(enable_attention)).bind("gemm2"))( + match::arg(0)(softmax)); + } + + void apply(module_pass_manager& mpm, const match::matcher_result& r) const + { + auto ins = r.result; + auto gemm2_ins = r.instructions["gemm2"]; + auto gemm1_ins = r.instructions["gemm1"]; + + float scale = 1.0; + if(contains(r.instructions, "scale")) + { + auto scale_lit = r.instructions["scale"]; + // CK only supports single-valued scale + scale_lit->eval().visit([&](const auto s) { + // CK only supports single-valued scale + if(not std::all_of( + s.begin() + 1, s.end(), [&](auto v) { return float_equal(v, s.front()); })) + return; + scale = s.front(); + }); + } + + auto inputs = gemm1_ins->inputs(); // A, B + if(contains(r.instructions, "select_cond")) + { + inputs.push_back(r.instructions["select_cond"]); + inputs.push_back(r.instructions["select_const"]); + } + if(contains(r.instructions, "bias")) + { + inputs.push_back(r.instructions["bias"]); + } + + inputs.push_back(gemm2_ins->inputs().back()); // B1 + + mpm.get_module().replace_instruction( + ins, pre_gemm_softmax_gemm{gemm2_ins->get_operator(), scale}, inputs); + } +}; + +struct gpu_compute_attention_probabilities : op::group_query_attention +{ + std::string name() const { return "gpu::compute_attention_probabilities"; } + + shape compute_shape(std::vector inputs) const + { + auto query_lens = inputs.front().lens(); + auto present_kv_seqlen = inputs.at(1).lens().at(2); + std::vector output_lens{ + query_lens.at(0), num_heads, query_lens.at(2), present_kv_seqlen}; + shape output_shape{inputs.front().type(), output_lens}; + return output_shape; + } +}; +MIGRAPHX_REGISTER_OP(gpu_compute_attention_probabilities); + +struct gpu_compute_attention_scores : op::group_query_attention +{ + std::string name() const { return "gpu::compute_attention_scores"; } + + shape compute_shape(std::vector inputs) const + { + auto query_lens = inputs.front().lens(); + std::size_t q_hidden_size = + (query_lens[1] * query_lens[3] * num_heads) / (num_heads + 2 * kv_num_heads); + std::vector output_lens{query_lens.at(0), query_lens.at(2), q_hidden_size}; + shape output_shape{inputs.front().type(), output_lens}; + return output_shape; + } +}; +MIGRAPHX_REGISTER_OP(gpu_compute_attention_scores); + +struct gpu_gqa_rotary_embedding : op::group_query_attention +{ + std::string name() const { return "gpu::gqa_rotary_embedding"; } + + shape compute_shape(std::vector inputs) const { return inputs.front(); } +}; +MIGRAPHX_REGISTER_OP(gpu_gqa_rotary_embedding); + +struct gpu_gqa_softmax : op::group_query_attention +{ + std::string name() const { return "gpu::gqa_softmax"; } + + shape compute_shape(std::vector inputs) const { return inputs.at(2); } +}; +MIGRAPHX_REGISTER_OP(gpu_gqa_softmax); + +struct gpu_concat_past_present : op::group_query_attention +{ + std::string name() const { return "gpu::concat_past_present"; } + + shape compute_shape(std::vector inputs) const { return inputs[0]; } +}; +MIGRAPHX_REGISTER_OP(gpu_concat_past_present); + +struct find_group_query_attention +{ + auto matcher() const { return match::name("group_query_attention"); } + + void apply(module_pass_manager& mpm, const match::matcher_result& r) const + { + auto ins = r.result; + auto inputs = ins->inputs(); + auto v = ins->get_operator().to_value(); + + auto num_heads = v.at("num_heads").to(); + auto kv_num_heads = v.at("kv_num_heads").to(); + auto do_rotary = v.at("do_rotary").to(); + auto local_window_size = v.at("local_window_size").to(); + auto rotary_interleaved = v.at("rotary_interleaved").to(); + auto scale = v.at("scale").to(); + + auto q_shape = inputs[0]->get_shape(); + auto q_lens = q_shape.lens(); + const std::size_t batch_size = q_lens[0]; + const std::size_t sequence_length = q_lens[1]; + std::size_t q_hidden_size = q_lens[2]; + std::size_t head_size = q_hidden_size / (num_heads + 2 * kv_num_heads); + + std::vector bsnh{ + batch_size, sequence_length, num_heads + 2 * kv_num_heads, head_size}; + + auto transposed_qkv = mpm.get_module().insert_instruction( + ins, make_op("reshape", {{"dims", bsnh}}), inputs.at(0)); + + transposed_qkv = mpm.get_module().insert_instruction( + ins, make_op("transpose", {{"permutation", {0, 2, 1, 3}}}), transposed_qkv); + + auto rotary_qkv = transposed_qkv; + if(do_rotary) + { + std::vector rotary_inputs{ + transposed_qkv, inputs.at(5), inputs.at(7), inputs.at(8)}; + rotary_qkv = + mpm.get_module().insert_instruction(ins, + gpu_gqa_rotary_embedding{do_rotary, + kv_num_heads, + local_window_size, + num_heads, + rotary_interleaved, + scale}, + rotary_inputs); + } + + auto pres_k = inputs.at(3); + auto pres_v = inputs.at(4); + std::vector concat_inputs{rotary_qkv, pres_k, pres_v, inputs.at(5)}; + + auto concat = mpm.get_module().insert_instruction( + ins, + gpu_concat_past_present{ + do_rotary, kv_num_heads, local_window_size, num_heads, rotary_interleaved, scale}, + concat_inputs); + auto id = + mpm.get_module().insert_instruction(ins, make_op("identity"), concat, pres_k, pres_v); + + std::vector attn_probs_inputs{id, pres_k, pres_v, inputs.at(5)}; + auto attn_probs = mpm.get_module().insert_instruction( + ins, + gpu_compute_attention_probabilities{ + do_rotary, kv_num_heads, local_window_size, num_heads, rotary_interleaved, scale}, + attn_probs_inputs); + + std::vector softmax_inputs{rotary_qkv, pres_k, attn_probs, inputs.at(5)}; + auto softmax = mpm.get_module().insert_instruction( + ins, + gpu_gqa_softmax{ + do_rotary, kv_num_heads, local_window_size, num_heads, rotary_interleaved, scale}, + softmax_inputs); + std::vector new_inputs{rotary_qkv, pres_k, pres_v, inputs.at(5), softmax}; + + auto get_tuple_elm_0 = std::next(ins); + auto get_tuple_elm_1 = std::next(get_tuple_elm_0); + auto get_tuple_elm_2 = std::next(get_tuple_elm_1); + mpm.get_module().replace_instruction(get_tuple_elm_2, pres_v); + mpm.get_module().replace_instruction(get_tuple_elm_1, pres_k); + mpm.get_module().replace_instruction( + get_tuple_elm_0, + gpu_compute_attention_scores{ + do_rotary, kv_num_heads, local_window_size, num_heads, rotary_interleaved, scale}, + new_inputs); + } +}; + +} // namespace + +void prefuse_ops::apply(module_pass_manager& mpm) const +{ + if(not enabled(MIGRAPHX_DISABLE_LAYERNORM_FUSION{})) + { + match::find_matches(mpm.get_module(), find_layernorm{}); + mpm.run_pass(dead_code_elimination{}); + match::find_matches(mpm.get_module(), find_add_layernorm{}); + } + match::find_matches(mpm, find_gemm_softmax_gemm{enable_attention}); + match::find_matches(mpm, find_group_query_attention{}); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/prepare_reduce.cpp b/docker/rocm/migraphx/targets/gpu/prepare_reduce.cpp new file mode 100644 index 000000000..bd5abd42b --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/prepare_reduce.cpp @@ -0,0 +1,122 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct parallel_reduce +{ + operation op; + + template + static auto reflect(Self& self, F f) + { + return pack(f(self.op, "op")); + } + + std::string name() const { return "gpu::parallel_reduce"; } + + shape compute_shape(const std::vector& inputs) const + { + std::vector result; + std::transform(inputs.begin(), inputs.end(), std::back_inserter(result), [&](auto input) { + return op.compute_shape({input}); + }); + return shape{result}; + } +}; +MIGRAPHX_REGISTER_OP(parallel_reduce); + +namespace { + +std::vector find_reduce(module& m) +{ + std::vector result; + auto im = iterator_for(m); + std::copy_if(im.begin(), im.end(), std::back_inserter(result), [](auto ins) { + if(contains({"gpu::parallel_reduce", "reduce_mean"}, ins->name())) + return false; + return contains(ins->name(), "reduce"); + }); + return result; +} + +std::vector find_parallel_reduce(const std::vector& r) +{ + std::vector result; + auto ir = iterator_for(r); + transform_if( + ir.begin(), + ir.end(), + std::back_inserter(result), + [&](auto x) { + return std::none_of( + std::next(x), r.end(), [&](auto reduce) { return reaches(*x, reduce); }); + }, + [](auto x) { return *x; }); + return result; +} + +void fuse_reductions(module& m) +{ + auto rs = find_parallel_reduce(find_reduce(m)); + if(rs.size() < 2) + return; + // Only handle the same reduction operator for now + if(std::any_of(std::next(rs.begin()), rs.end(), [&](auto r) { + return rs.front()->name() != r->name(); + })) + return; + auto last = rs.front(); + auto op = last->get_operator(); + std::vector inputs; + std::transform(rs.begin(), rs.end(), std::back_inserter(inputs), [&](auto r) { + return r->inputs().front(); + }); + auto pr = m.insert_instruction(last, parallel_reduce{op}, inputs); + int i = 0; + for(auto r : rs) + { + m.replace_instruction(r, make_op("get_tuple_elem", {{"index", i}}), pr); + i++; + } + m.sort(); +} + +} // namespace + +void prepare_reduce::apply(module& m) const { fuse_reductions(m); } + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/problem_cache.cpp b/docker/rocm/migraphx/targets/gpu/problem_cache.cpp new file mode 100644 index 000000000..8eb25f3b8 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/problem_cache.cpp @@ -0,0 +1,90 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_PROBLEM_CACHE) + +void problem_cache::load() +{ + auto pc_path = string_value_of(MIGRAPHX_PROBLEM_CACHE{}); + if(pc_path.empty()) + return; + if(not fs::exists(pc_path)) + { + std::cout << "Problem cache not found. Creating new file.\n"; + return; + } + from_value(from_json_string(read_string(pc_path)), cache); +} +void problem_cache::save() const +{ + auto pc_path = string_value_of(MIGRAPHX_PROBLEM_CACHE{}); + if(pc_path.empty()) + return; + write_string(pc_path, to_pretty_json_string(to_value(cache))); +} + +static value create_key(const std::string& name, const value& problem) +{ + return {{"name", name}, {"problem", problem}}; +} + +bool problem_cache::has(const std::string& name, const value& problem) const +{ + return contains(cache, create_key(name, problem)); +} + +void problem_cache::insert(const std::string& name, const value& problem, const value& solution) +{ + assert(not solution.is_null()); + cache[create_key(name, problem)] = solution; +} + +void problem_cache::mark(const std::string& name, const value& problem) +{ + cache.insert(std::make_pair(create_key(name, problem), value{})); +} + +optional problem_cache::get(const std::string& name, const value& problem) const +{ + auto it = cache.find(create_key(name, problem)); + if(it == cache.end()) + return nullopt; + return it->second; +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/reverse.cpp b/docker/rocm/migraphx/targets/gpu/reverse.cpp new file mode 100644 index 000000000..ea70e3fbb --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/reverse.cpp @@ -0,0 +1,45 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +shape hip_reverse::compute_shape(std::vector inputs) const +{ + inputs.pop_back(); + return op.normalize_compute_shape(inputs); +} + +argument hip_reverse::compute(context& ctx, const shape&, const std::vector& args) const +{ + return device::reverse(ctx.get_stream().get(), args.back(), args[0], op.axes); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/rnn_variable_seq_lens.cpp b/docker/rocm/migraphx/targets/gpu/rnn_variable_seq_lens.cpp new file mode 100644 index 000000000..e251716f9 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/rnn_variable_seq_lens.cpp @@ -0,0 +1,84 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +shape hip_rnn_var_sl_shift_output::compute_shape(std::vector inputs) const +{ + inputs.pop_back(); + return op.compute_shape(inputs); +} + +argument hip_rnn_var_sl_shift_output::compute(context& ctx, + const shape&, + const std::vector& args) const +{ + device::rnn_var_sl_shift_output(ctx.get_stream().get(), + args.back(), + args.at(0), + args.at(1), + (op.direction == op::rnn_direction::reverse)); + return args.back(); +} + +shape hip_rnn_var_sl_shift_sequence::compute_shape(std::vector inputs) const +{ + inputs.pop_back(); + return op.compute_shape(inputs); +} + +argument hip_rnn_var_sl_shift_sequence::compute(context& ctx, + const shape&, + const std::vector& args) const +{ + device::rnn_var_sl_shift_sequence(ctx.get_stream().get(), args.back(), args.at(0), args.at(1)); + return args.back(); +} + +shape hip_rnn_var_sl_last_output::compute_shape(std::vector inputs) const +{ + inputs.pop_back(); + return op.compute_shape(inputs); +} + +argument hip_rnn_var_sl_last_output::compute(context& ctx, + const shape&, + const std::vector& args) const +{ + device::rnn_var_sl_last_output(ctx.get_stream().get(), + args.back(), + args.at(0), + args.at(1), + (op.direction == op::rnn_direction::reverse)); + return args.back(); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/rocblas.cpp b/docker/rocm/migraphx/targets/gpu/rocblas.cpp new file mode 100644 index 000000000..8c06ad51f --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/rocblas.cpp @@ -0,0 +1,72 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { +#if MIGRAPHX_USE_ROCBLAS +rocblas_handle_ptr create_rocblas_handle_ptr() +{ + // add a call to rocblas_initialize() to workaround a rocblas bug SWDEV-438929 + rocblas_initialize(); + rocblas_handle handle; + rocblas_create_handle(&handle); + return rocblas_handle_ptr{handle}; +} + +rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s) +{ + rocblas_handle_ptr rb = create_rocblas_handle_ptr(); + rocblas_set_stream(rb.get(), s); + return rb; +} +#endif +bool get_compute_fp32_flag() +{ + const auto device_name = trim(split_string(get_device_name(), ':').front()); + return (starts_with(device_name, "gfx9") and device_name >= "gfx908"); +} + +bool rocblas_fp8_available() +{ +#if MIGRAPHX_USE_ROCBLAS +#ifndef MIGRAPHX_USE_ROCBLAS_FP8_API + return false; +#else + return gfx_has_fp8fnuz_intrinsics(); +#endif +#else + return false; +#endif +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/schedule_model.cpp b/docker/rocm/migraphx/targets/gpu/schedule_model.cpp new file mode 100644 index 000000000..aa59a693f --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/schedule_model.cpp @@ -0,0 +1,156 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +struct record_event +{ + std::size_t event = 0; + template + static auto reflect(Self& self, F f) + { + return pack(f(self.event, "event")); + } + std::string name() const { return "gpu::record_event"; } + shape compute_shape(const std::vector&) const { return {}; } + + argument compute(context& ctx, const shape&, const std::vector&) const + { + ctx.get_stream().record(ctx.get_event(event)); + return {}; + } + + void finalize(context& ctx, const shape&, const std::vector&) const + { + ctx.create_events(event); + } +}; + +struct wait_event +{ + std::size_t event = 0; + template + static auto reflect(Self& self, F f) + { + return pack(f(self.event, "event")); + } + std::string name() const { return "gpu::wait_event"; } + shape compute_shape(const std::vector&) const { return {}; } + + argument compute(context& ctx, const shape&, const std::vector&) const + { + ctx.get_stream().wait(ctx.get_event(event)); + return {}; + } +}; + +struct set_stream +{ + std::size_t stream = 0; + template + static auto reflect(Self& self, F f) + { + return pack(f(self.stream, "stream")); + } + std::string name() const { return "gpu::set_stream"; } + shape compute_shape(const std::vector&) const { return {}; } + + argument compute(context& ctx, const shape&, const std::vector&) const + { + ctx.set_stream(stream); + return {}; + } + void finalize(context& ctx, const shape&, const std::vector&) const + { + ctx.set_stream(stream); + } +}; + +MIGRAPHX_REGISTER_OP(record_event) +MIGRAPHX_REGISTER_OP(wait_event) +MIGRAPHX_REGISTER_OP(set_stream) + +std::size_t schedule_model::concurrency() const { return streams; } +void schedule_model::sched(module& m, instruction_ref ins, std::size_t n) const +{ + auto last_stream = std::find_if(std::make_reverse_iterator(ins), + std::make_reverse_iterator(m.begin()), + [&](auto&& i) { return i.name() == "gpu::set_stream"; }); + if(last_stream != std::make_reverse_iterator(m.begin())) + { + auto&& op = any_cast(last_stream->get_operator()); + // If the same stream was set earlier then skip + if(op.stream == n) + return; + } + m.insert_instruction(ins, set_stream{n}); +} + +void schedule_model::wait(module& m, instruction_ref ins, std::size_t wait_id) const +{ + m.insert_instruction(ins, wait_event{wait_id}); +} +void schedule_model::record(module& m, instruction_ref ins, std::size_t wait_id) const +{ + m.insert_instruction(std::next(ins), record_event{wait_id}); +} + +static std::unordered_map create_weight_map() +{ + return {{"hip::load_literal", 0}, + {"hip::hip_allocate_memory", 0}, + {"hip::hip_load_memory", 0}, + {"hip::allocate", 0}, + {"gpu::convolution", 8}, + {"gpu::conv_bias_relu", 8}, + {"gpu::pooling", 4}, + {"gpu::gemm", 4}}; +} + +static const std::unordered_map& weight_map() +{ + static const std::unordered_map m = create_weight_map(); + return m; +} + +std::size_t schedule_model::weight(const operation& op) const +{ + if(weight_map().count(op.name()) == 0) + { + return 2; + } + return weight_map().at(op.name()); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/sync_device.cpp b/docker/rocm/migraphx/targets/gpu/sync_device.cpp new file mode 100644 index 000000000..4e8a176eb --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/sync_device.cpp @@ -0,0 +1,55 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +void sync_device::apply(module& m) const +{ + auto last = std::prev(m.end()); + if(last->name() == "@return") + { + auto inputs = last->inputs(); + if(std::any_of(inputs.begin(), inputs.end(), [](auto i) { + return (i->name() == "hip::copy_from_gpu"); + })) + { + auto sync_in = m.insert_instruction(last, make_op("hip::sync_stream"), inputs); + if(not inputs.empty()) + { + m.replace_instruction(inputs.front(), sync_in); + } + } + } +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/target.cpp b/docker/rocm/migraphx/targets/gpu/target.cpp new file mode 100644 index 000000000..bf2fc3e86 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/target.cpp @@ -0,0 +1,280 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_SCHEDULE_PASS) +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_NHWC) +#ifndef _WIN32 +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_CK) +#endif +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_HIPBLASLT_GEMM) + +std::vector target::get_passes(migraphx::context& gctx, const compile_options& options) const +{ + auto& ctx = any_cast(gctx); + ctx.set_exhaustive_tune_flag(options.exhaustive_tune); + ctx.load_problem_cache(); + std::set unsupported_types(shape::types().begin(), shape::types().end()); + unsupported_types.erase(shape::type_t::float_type); + unsupported_types.erase(shape::type_t::fp8e4m3fnuz_type); + unsupported_types.erase(shape::type_t::fp8e5m2fnuz_type); + unsupported_types.erase(shape::type_t::fp8e4m3fn_type); + unsupported_types.erase(shape::type_t::fp8e5m2_type); + unsupported_types.erase(shape::type_t::half_type); + unsupported_types.erase(shape::type_t::bool_type); + unsupported_types.erase(shape::type_t::int8_type); + unsupported_types.erase(shape::type_t::uint8_type); + unsupported_types.erase(shape::type_t::int32_type); + unsupported_types.erase(shape::type_t::tuple_type); + unsupported_types.erase(shape::type_t::bf16_type); + + // whiltelist supported Ops for the FP8 types + // different between fp8e4m3fnuz and OCP types because rocBLAS only has + // support for fp8e4m3fnuz + std::set unsupported_fp8e4m3fnuz_ops = {}; + if(not enabled(MIGRAPHX_ENABLE_HIPBLASLT_GEMM{}) and not gpu::rocblas_fp8_available()) + { + unsupported_fp8e4m3fnuz_ops.insert("dot"); + unsupported_fp8e4m3fnuz_ops.insert("quant_dot"); + } +#if MIGRAPHX_USE_MIOPEN + // MIOpen doesn't have support for fp8 pooling yet. + unsupported_fp8e4m3fnuz_ops.insert("pooling"); +#endif + if(not gpu::gfx_has_fp8fnuz_intrinsics()) + { + unsupported_fp8e4m3fnuz_ops.insert("convolution"); + unsupported_fp8e4m3fnuz_ops.insert("quant_convolution"); + } + // add all device kernels + unsupported_fp8e4m3fnuz_ops.insert("logsoftmax"); + unsupported_fp8e4m3fnuz_ops.insert("nonzero"); + unsupported_fp8e4m3fnuz_ops.insert("prefix_scan_sum"); + unsupported_fp8e4m3fnuz_ops.insert("scatter_none"); + unsupported_fp8e4m3fnuz_ops.insert("topk"); + unsupported_fp8e4m3fnuz_ops.insert("rnn_var_sl_shift_output"); + unsupported_fp8e4m3fnuz_ops.insert("multinomial"); + unsupported_fp8e4m3fnuz_ops.insert("argmax"); + unsupported_fp8e4m3fnuz_ops.insert("argmin"); + + std::set unsupported_fp8e5m2fnuz_ops = unsupported_fp8e4m3fnuz_ops; + // disable gemm for fp8e5m2fnuz if rocBLAS is being used + if(not enabled(MIGRAPHX_ENABLE_HIPBLASLT_GEMM{})) + { + unsupported_fp8e5m2fnuz_ops.insert("dot"); + unsupported_fp8e5m2fnuz_ops.insert("quant_dot"); + } + + std::set unsupported_fp8ocp_ops = {}; + // TODO: remove this when the flag is removed + if(not enabled(MIGRAPHX_ENABLE_HIPBLASLT_GEMM{})) + { + unsupported_fp8ocp_ops.insert("dot"); + unsupported_fp8ocp_ops.insert("quant_dot"); + } +#if MIGRAPHX_USE_MIOPEN + // MIOpen doesn't have support for fp8 pooling yet. + unsupported_fp8ocp_ops.insert("pooling"); +#endif + if(not gpu::gfx_has_fp8ocp_intrinsics()) + { + unsupported_fp8ocp_ops.insert("convolution"); + unsupported_fp8ocp_ops.insert("quant_convolution"); + unsupported_fp8ocp_ops.insert("dot"); + unsupported_fp8ocp_ops.insert("quant_dot"); + } + // add all device kernels + unsupported_fp8ocp_ops.insert("logsoftmax"); + unsupported_fp8ocp_ops.insert("nonzero"); + unsupported_fp8ocp_ops.insert("prefix_scan_sum"); + unsupported_fp8ocp_ops.insert("scatter_none"); + unsupported_fp8ocp_ops.insert("topk"); + unsupported_fp8ocp_ops.insert("rnn_var_sl_shift_output"); + unsupported_fp8ocp_ops.insert("multinomial"); + unsupported_fp8ocp_ops.insert("argmax"); + unsupported_fp8ocp_ops.insert("argmin"); + + // clang-format off + return + { + split_single_dyn_dim{}, + dead_code_elimination{}, + simplify_dyn_ops{}, + dead_code_elimination{}, + normalize_ops{}, + dead_code_elimination{}, + eliminate_identity{}, + dead_code_elimination{}, + enable_pass(not gpu::gfx_has_fp8ocp_intrinsics() and gpu::gfx_has_fp8fnuz_intrinsics(), fp8_ocp_to_fnuz{}), + enable_pass(not gpu::gfx_has_fp8ocp_intrinsics() and gpu::gfx_has_fp8fnuz_intrinsics(), dead_code_elimination{}), + simplify_qdq{}, + enable_pass(not mlir_enabled(), rewrite_quantization{}), + dead_code_elimination{}, + // workaround for rocBLAS unsupported error when using uint8 in quant_dot, quant_convolution & pooling + eliminate_data_type{{migraphx::shape::uint8_type}, shape::float_type, {"quant_convolution", "quant_dot", "pooling"}}, + eliminate_data_type{unsupported_types, shape::type_t::float_type}, + simplify_reshapes{}, + eliminate_identity{}, + eliminate_pad{}, + dead_code_elimination{}, + insert_pad{{"convolution"}}, + dead_code_elimination{}, + rewrite_rnn{}, + dead_code_elimination{}, + inline_module{}, + rewrite_pooling{}, + dead_code_elimination{}, + rewrite_gelu{options.fast_math}, + optimize_module{}, + layout_convolution{.channels_last = enabled(MIGRAPHX_ENABLE_NHWC{})}, + dead_code_elimination{}, + prefuse_ops{}, + dead_code_elimination{}, + eliminate_data_type{{migraphx::shape::fp8e4m3fnuz_type}, shape::float_type, unsupported_fp8e4m3fnuz_ops}, + eliminate_data_type{{migraphx::shape::fp8e5m2fnuz_type}, shape::float_type, unsupported_fp8e5m2fnuz_ops}, + eliminate_data_type{{migraphx::shape::fp8e4m3fn_type, migraphx::shape::fp8e5m2_type}, shape::float_type, unsupported_fp8ocp_ops}, + dead_code_elimination{}, + rewrite_reduce{}, + rewrite_low_precision{}, + dead_code_elimination{}, + optimize_module{}, + fuse_pointwise_reduce{}, + dead_code_elimination{}, +#ifndef _WIN32 + enable_pass(enabled(MIGRAPHX_ENABLE_CK{}), fuse_ck{}), +#endif + dead_code_elimination{}, + enable_pass(mlir_enabled(), fuse_mlir{&ctx}), + dead_code_elimination{}, + fuse_concat{}, + dead_code_elimination{}, + auto_contiguous{}, + dead_code_elimination{}, + lowering{&ctx, options.offload_copy}, + eliminate_contiguous{"gpu::contiguous"}, + dead_code_elimination{}, + eliminate_concat{concat_gpu_optimization{}}, + dead_code_elimination{}, +#if MIGRAPHX_USE_MIOPEN + compile_miopen{&gctx}, + dead_code_elimination{}, +#endif + fuse_ops{&ctx, options.fast_math}, + dead_code_elimination{}, +#if MIGRAPHX_USE_HIPBLASLT + compile_hipblaslt{&gctx}, + dead_code_elimination{}, +#endif + replace_allocate{gpu_allocation_model{}, options.offload_copy}, + dead_code_elimination{}, + adjust_allocation{gpu_allocation_model{}}, + dead_code_elimination{}, + compile_ops{&ctx, options.exhaustive_tune}, + dead_code_elimination{}, + promote_literals{}, + dead_code_elimination{}, + write_literals{&ctx}, + schedule{gpu::schedule_model{ctx.get_current_device().nstreams()}, not enabled(MIGRAPHX_DISABLE_SCHEDULE_PASS{})}, + memory_coloring{"hip::allocate"}, + sync_device{}, + preallocate_param{"scratch", gpu_allocation_model{}}, + dead_code_elimination{}, + eliminate_allocation{"hip::allocate"}, + check_context{}, + normalize_ops{}, + dead_code_elimination{}, + eliminate_identity{} + }; + // clang-format on +} + +std::string target::name() const { return "gpu"; } + +migraphx::context target::get_context() const { return context(gpu::get_device_id()); } + +argument target::copy_to(const argument& arg) const { return gpu::to_gpu(arg); } + +argument target::copy_from(const argument& arg) const { return gpu::from_gpu(arg); } + +argument target::allocate(const shape& s) const { return gpu::allocate_gpu(s); } + +MIGRAPHX_REGISTER_TARGET(target); + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/time_op.cpp b/docker/rocm/migraphx/targets/gpu/time_op.cpp new file mode 100644 index 000000000..3b37cfb1f --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/time_op.cpp @@ -0,0 +1,101 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +std::vector generate_arguments(const std::vector& shapes, + unsigned long seed = 0, + random_mode rm = random_mode::random) +{ + std::vector args; + std::transform(shapes.begin(), shapes.end(), std::back_inserter(args), [&](const auto& s) { + return to_gpu(generate_argument(s, seed++, rm)); + }); + return args; +} + +template +double time_loop(migraphx::gpu::context& gctx, int n, F f) +{ + auto start = context::create_event_for_timing(); + auto stop = context::create_event_for_timing(); + f(); + gctx.get_stream().record(start.get()); + for(auto i : range(n)) + { + (void)i; + f(); + } + gctx.get_stream().record(stop.get()); + gctx.finish(); + return context::get_elapsed_ms(start.get(), stop.get()) / n; +} + +double time_op(const context& ictx, operation op, const std::vector& inputs, int n) +{ + // TODO: Use std::ref + migraphx::context ctx = ictx; + auto& gctx = any_cast(ctx); + auto output = op.compute_shape(inputs); + op.finalize(ctx, output, inputs); + auto args = generate_arguments(inputs); + auto run = [&] { op.compute(ctx, output, args); }; + return time_loop(gctx, n, run); +} + +double time_op(const context& ictx, operation op, int n) +{ + auto inputs = any_cast(op).expected_inputs; + return time_op(ictx, op, inputs, n); +} + +double time_program(const context& ictx, program p, int n) +{ + std::vector ctx_vec = {ictx}; + auto& gctx = any_cast(ctx_vec.front()); + auto* mm = p.get_main_module(); + mm->finalize(ctx_vec); + auto in_shapes = p.get_parameter_shapes(); + std::unordered_map param_map; + unsigned long seed = 0; + for(const auto& [name, shape] : in_shapes) + { + param_map[name] = to_gpu(generate_argument(shape, seed++, random_mode::random)); + } + auto run = [&] { p.eval_with_context(ctx_vec, param_map); }; + return time_loop(gctx, n, run); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/topk.cpp b/docker/rocm/migraphx/targets/gpu/topk.cpp new file mode 100644 index 000000000..2e799c650 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/topk.cpp @@ -0,0 +1,56 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +shape hip_topk::compute_shape(std::vector inputs) const +{ + return op.normalize_compute_shape({inputs.front()}); +} + +argument hip_topk::compute(context& ctx, const shape&, const std::vector& args) const +{ + auto outputs = args.back().get_sub_objects(); + return op.largest ? device::topk_largest(ctx.get_stream().get(), + outputs.front(), + outputs.back(), + args[0], + op.k, + op.axis) + : device::topk_smallest(ctx.get_stream().get(), + outputs.front(), + outputs.back(), + args[0], + op.k, + op.axis); +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/gpu/write_literals.cpp b/docker/rocm/migraphx/targets/gpu/write_literals.cpp new file mode 100644 index 000000000..cbc776737 --- /dev/null +++ b/docker/rocm/migraphx/targets/gpu/write_literals.cpp @@ -0,0 +1,64 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace gpu { + +MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_COPY_LITERALS) + +void write_literals::apply(module& m) const +{ + assert(ctx != nullptr); + std::size_t n = 0; + for(auto ins : iterator_for(m)) + { + if(ins->name() == "@literal") + { + if(enabled(MIGRAPHX_COPY_LITERALS{})) + { + literal l = ins->get_literal(); + auto pre = m.add_literal(l); + auto alloc = m.insert_instruction(std::next(pre), hip_allocate{l.get_shape()}); + m.replace_instruction(ins, hip_copy_to_gpu{}, pre, alloc); + } + else + { + std::string id = m.name() + ":@literal:" + std::to_string(n); + m.replace_instruction(ins, hip_copy_literal{ins->get_literal(), id}); + n++; + } + } + } +} + +} // namespace gpu +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/ref/CMakeLists.txt b/docker/rocm/migraphx/targets/ref/CMakeLists.txt new file mode 100644 index 000000000..d4b3e63c7 --- /dev/null +++ b/docker/rocm/migraphx/targets/ref/CMakeLists.txt @@ -0,0 +1,44 @@ +##################################################################################### +# The MIT License (MIT) +# +# Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +##################################################################################### + +add_library(migraphx_ref + target.cpp + lowering.cpp +) +set_target_properties(migraphx_ref PROPERTIES EXPORT_NAME ref) +rocm_set_soversion(migraphx_ref ${MIGRAPHX_SO_VERSION}) + +rocm_clang_tidy_check(migraphx_ref) +target_link_libraries(migraphx_ref PRIVATE Threads::Threads) +target_link_libraries(migraphx_ref PUBLIC migraphx) + +migraphx_generate_export_header(migraphx_ref) + +rocm_install_targets( + PRIVATE + TARGETS migraphx_ref + INCLUDE + ${CMAKE_CURRENT_SOURCE_DIR}/include +) + diff --git a/docker/rocm/migraphx/targets/ref/include/migraphx/ref/context.hpp b/docker/rocm/migraphx/targets/ref/include/migraphx/ref/context.hpp new file mode 100644 index 000000000..8c2cdfe9d --- /dev/null +++ b/docker/rocm/migraphx/targets/ref/include/migraphx/ref/context.hpp @@ -0,0 +1,43 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP +#define MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace ref { + +struct context +{ + void finish() const {} +}; + +} // namespace ref +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/ref/include/migraphx/ref/lowering.hpp b/docker/rocm/migraphx/targets/ref/include/migraphx/ref/lowering.hpp new file mode 100644 index 000000000..a775fed15 --- /dev/null +++ b/docker/rocm/migraphx/targets/ref/include/migraphx/ref/lowering.hpp @@ -0,0 +1,44 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP +#define MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace ref { + +struct MIGRAPHX_REF_EXPORT lowering +{ + std::string name() const { return "ref::lowering"; } + void apply(module& m) const; +}; + +} // namespace ref +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/ref/include/migraphx/ref/target.hpp b/docker/rocm/migraphx/targets/ref/include/migraphx/ref/target.hpp new file mode 100644 index 000000000..b31b7f9d1 --- /dev/null +++ b/docker/rocm/migraphx/targets/ref/include/migraphx/ref/target.hpp @@ -0,0 +1,53 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP +#define MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +struct pass; +namespace ref { + +struct MIGRAPHX_REF_EXPORT target +{ + std::string name() const; + std::vector get_passes(migraphx::context& ctx, const compile_options&) const; + migraphx::context get_context() const { return context{}; } + + argument copy_to(const argument& arg) const { return arg; } + argument copy_from(const argument& arg) const { return arg; } + argument allocate(const shape& s) const; +}; + +} // namespace ref +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/targets/ref/lowering.cpp b/docker/rocm/migraphx/targets/ref/lowering.cpp new file mode 100644 index 000000000..a0b6b4bd0 --- /dev/null +++ b/docker/rocm/migraphx/targets/ref/lowering.cpp @@ -0,0 +1,504 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace ref { + +template +T zero(const T&) +{ + return T(0); +} + +template +typename std::conditional_t{}, std::make_signed, std::enable_if>:: + type + make_signed(T x) +{ + return x; +} + +struct ref_lrn +{ + op::lrn op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "ref::lrn"; } + shape compute_shape(const std::vector& inputs) const { return op.compute_shape(inputs); } + argument compute(context&, shape output_shape, std::vector args) const + { + argument result{output_shape}; + visit_all(result, args[0])([&](auto output, auto input) { + int n_batch = output_shape.lens()[0]; + int channels = output_shape.lens()[1]; + int height = output_shape.lens()[2]; + int width = output_shape.lens()[3]; + float alphaoverarea = op.alpha / float(op.size); + int radius_lower = (op.size - 1) / 2; + int radius_upper = op.size / 2 + 1; + + par_dfor(n_batch, height, width)([&](int b, int h, int w) { + float scale = 0; + dfor(channels)([&](int c) { + auto start = (c - radius_lower) < 0 ? 0 : (c - radius_lower); + auto end = (c + radius_upper) > channels ? channels : (c + radius_upper); + for(auto k = start; k < end; ++k) + { + scale += std::pow(input(b, k, h, w), 2); + } + scale *= alphaoverarea; + scale += op.bias; + scale = std::pow(scale, -op.beta); + output(b, c, h, w) = input(b, c, h, w) * scale; + }); + }); + }); + return result; + } +}; +MIGRAPHX_REGISTER_OP(ref_lrn) + +template +void visit_quantize_impl(V&& v, T&& x, Ts&&... xs) +{ + x.visit([&](auto y) { visit_all(xs...)([&](auto... ys) { v(y, ys...); }); }); +} + +template +auto visit_quantize(T&& x, Ts&&... xs) +{ + return [&](auto v) { + // Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70100 + visit_quantize_impl(v, x, xs...); + }; +} + +struct ref_im2col +{ + op::im2col op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + static std::string name() { return "ref::im2col"; } + shape compute_shape(const std::vector& inputs) const + { + return op.normalize_compute_shape(inputs); + } + + argument compute(context&, const shape& output_shape, std::vector args) const + { + argument result{output_shape}; + auto input_shape = args[0].get_shape(); + auto weights_shape = args[1].get_shape(); + visit_all(result, args[0])([&](auto col, auto input) { + const std::size_t& height = input_shape.lens()[2]; + const std::size_t& width = input_shape.lens()[3]; + const std::size_t& channels = weights_shape.lens()[1]; + const std::size_t& kernel_h = weights_shape.lens()[2]; + const std::size_t& kernel_w = weights_shape.lens()[3]; + const std::size_t& pad_h = op.padding[0]; + const std::size_t& pad_w = op.padding[1]; + const std::size_t& stride_h = op.stride[0]; + const std::size_t& stride_w = op.stride[1]; + + long kdiv2_h = long(kernel_h) / 2; + long kdiv2_w = long(kernel_w) / 2; + // calculate output sizes + const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1; + const std::size_t col_width = (width - kernel_w + 2 * pad_w) / stride_w + 1; + // account for padding for the starting position of the input pixels + long iinput = kdiv2_h - long(pad_h); + // loop over output pixels (ioutput, joutput) + for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h) + { + long jinput = kdiv2_w - long(pad_w); + for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w) + { + // compute linear index for output + std::size_t ldx = ioutput * col_width + joutput; + std::size_t p = 0; + dfor(channels, + kernel_h, + kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) { + auto idx = iinput + long(koffset) - kdiv2_h; + auto jdx = jinput + long(loffset) - kdiv2_w; + col(ldx, p) = + ((idx >= 0) and (idx < height) and (jdx >= 0) and (jdx < width)) + ? input(0, c, idx, jdx) + : 0; + p++; + }); + } + } + }); + return result; + } +}; +MIGRAPHX_REGISTER_OP(ref_im2col) + +struct ref_op +{ + operation op = op::identity{}; + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + std::string name() const { return "ref::op"; } + shape compute_shape(const std::vector& inputs) const { return op.compute_shape(inputs); } + argument compute(context&, const shape& output_shape, const std::vector& args) const + { + return op.compute(output_shape, args); + } + value to_value() const + { + value v; + v["name"] = op.name(); + v["operator"] = op.to_value(); + return v; + } + void from_value(const value& v) + { + op = make_op(v.at("name").to(), v.at("operator")); + } + friend std::ostream& operator<<(std::ostream& os, const ref_op& x) + { + os << "ref::" << x.op; + return os; + } +}; +MIGRAPHX_REGISTER_OP(ref_op) + +struct ref_pad +{ + op::pad op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "ref::pad"; } + shape compute_shape(const std::vector& inputs) const { return op.compute_shape(inputs); } + argument compute(context&, const dyn_output& dyn_out, std::vector args) const + { + assert(dyn_out.computed_shape.standard()); + argument result{dyn_out.computed_shape}; + result.visit([&](auto output) { + using type = typename decltype(output)::value_type; + std::fill(output.begin(), output.end(), pad_clamp(op.value)); + }); + + visit_all(result, args[0])([&](auto output, auto input) { + shape_for_each(input.get_shape(), [&](const auto& idx) { + std::vector new_idx(idx.size()); + std::transform( + idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) { + return i + j; + }); + output(new_idx.begin(), new_idx.end()) = input(idx.begin(), idx.end()); + }); + }); + + return result; + } +}; +MIGRAPHX_REGISTER_OP(ref_pad) + +struct ref_gemm +{ + op::dot op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + std::string name() const { return "ref::dot"; } + shape compute_shape(const std::vector& inputs) const { return op.compute_shape(inputs); } + + argument compute(context&, const dyn_output& dyn_out, std::vector args) const + { + argument result{dyn_out.computed_shape}; + visit_all(result, args[0], args[1])( + [&](auto cmat, auto amat, auto bmat) { gemm(cmat, amat, bmat, 1.0f, 0.0f); }); + return result; + } +}; +MIGRAPHX_REGISTER_OP(ref_gemm) + +struct ref_quant_gemm +{ + op::quant_dot op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "ref::quant_dot"; } + shape compute_shape(const std::vector& inputs) const { return op.compute_shape(inputs); } + + argument compute(context&, const shape& output_shape, std::vector args) const + { + argument result{output_shape}; + result.visit([&](auto cmat) { + visit_all(args.at(0), args.at(1))( + [&](auto amat, auto bmat) { return gemm(cmat, amat, bmat, 1.0f, 0.0f); }); + }); + return result; + } +}; + +MIGRAPHX_REGISTER_OP(ref_gemm) + +template +struct ref_softmax : auto_register_op> +{ + ref_softmax() = default; + + ref_softmax(Op pop) : op(std::move(pop)) {} + + Op op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "ref::" + op.name(); } + shape compute_shape(const std::vector& inputs) const + { + return op.normalize_compute_shape(inputs); + } + argument compute(context&, const dyn_output& dyn_out, std::vector args) const + { + argument result{dyn_out.computed_shape}; + auto batch_lens = dyn_out.computed_shape.lens(); + int64_t tuned_axis = tune_axis(args[0].get_shape().lens().size(), op.axis, op.name()); + std::size_t n_dims = batch_lens[tuned_axis]; + batch_lens[tuned_axis] = 1; + shape batch_shape{shape::int32_type, batch_lens}; + + visit_all(result, args[0])([&](auto output, auto input) { + using value_type = accumulator_type; + std::vector batch_max(batch_shape.elements(), + std::numeric_limits::lowest()); + std::vector batch_sum(batch_shape.elements(), value_type(0)); + par_for(batch_shape.elements(), [&](auto i) { + auto idx = batch_shape.multi(i); + for(std::size_t j = 0; j < n_dims; ++j) + { + idx[tuned_axis] = j; + batch_max[i] = + std::max(batch_max[i], input(idx.begin(), idx.end())); + } + + for(std::size_t j = 0; j < n_dims; ++j) + { + idx[tuned_axis] = j; + std::size_t index = dyn_out.computed_shape.index(idx); + output[index] = std::exp(input[index] - batch_max[i]); + } + + for(std::size_t j = 0; j < n_dims; ++j) + { + idx[tuned_axis] = j; + batch_sum[i] += output(idx.begin(), idx.end()); + } + + for(std::size_t j = 0; j < n_dims; ++j) + { + idx[tuned_axis] = j; + output(idx.begin(), idx.end()) = + op.output()(output(idx.begin(), idx.end()), batch_sum[i]); + } + }); + }); + + return result; + } +}; + +struct ref_rnn_var_sl_last_output +{ + op::rnn_var_sl_last_output op; + + template + static auto reflect(Self& self, F f) + { + return migraphx::reflect(self.op, f); + } + + std::string name() const { return "ref::rnn_var_sl_last_output"; } + + shape compute_shape(std::vector inputs) const + { + return op.compute_shape(std::move(inputs)); + } + + argument compute(const shape& output_shape, std::vector args) const + { + argument result{output_shape}; + auto out_comp_lens = args[0].get_shape().lens(); + out_comp_lens[0] = 1; + shape out_comp_s{output_shape.type(), out_comp_lens}; + + visit_all(result, args[0])([&](auto output, auto input) { + args[1].visit([&](auto seq_lens) { + par_for(output_shape.elements(), [&](auto i) { + auto idx = out_comp_s.multi(i); + auto b = idx[2]; + if(op.direction == op::rnn_direction::reverse or idx[1] == 1) + { + idx[0] = 0; + } + else + { + idx[0] = seq_lens[b] - 1; + } + output[i] = input(idx.begin(), idx.end()); + }); + }); + }); + + return result; + } +}; +MIGRAPHX_REGISTER_OP(ref_rnn_var_sl_last_output) + +struct ref_apply +{ + module* mod; + std::unordered_map> apply_map{}; + + template + auto simple_op() + { + return [this](instruction_ref ins) { apply_simple_op(ins); }; + } + + template + auto extend_op() + { + return [this](instruction_ref ins) { apply_extend_op(ins); }; + } + + void init() + { + apply_map["dot"] = extend_op(); + apply_map["quant_dot"] = extend_op(); + apply_map["im2col"] = extend_op(); + apply_map["logsoftmax"] = extend_op, op::logsoftmax>(); + apply_map["lrn"] = extend_op(); + apply_map["pad"] = extend_op(); + apply_map["softmax"] = extend_op, op::softmax>(); + apply_map["rnn_var_sl_last_output"] = + extend_op(); + } + + void apply() + { + init(); + for(auto it : iterator_for(*mod)) + { + if(apply_map.count(it->name()) > 0) + { + apply_map.at(it->name())(it); + } + else if(is_context_free(it->get_operator())) + { + apply_ref_op(it); + } + } + } + + void apply_ref_op(instruction_ref ins) const + { + mod->replace_instruction(ins, ref_op{ins->get_operator()}, ins->inputs()); + } + + template + void apply_simple_op(instruction_ref ins) + { + mod->replace_instruction(ins, T{}, ins->inputs()); + } + + template + void apply_extend_op(instruction_ref ins) + { + auto&& op = any_cast(ins->get_operator()); + mod->replace_instruction(ins, T{op}, ins->inputs()); + } +}; + +void lowering::apply(module& m) const { ref_apply{&m}.apply(); } + +} // namespace ref +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/targets/ref/target.cpp b/docker/rocm/migraphx/targets/ref/target.cpp new file mode 100644 index 000000000..13c15e541 --- /dev/null +++ b/docker/rocm/migraphx/targets/ref/target.cpp @@ -0,0 +1,66 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace ref { + +std::string target::name() const { return "ref"; } + +std::vector target::get_passes(migraphx::context&, const compile_options&) const +{ + return {normalize_ops{}, + eliminate_pad{}, + dead_code_elimination{}, + insert_pad{}, + dead_code_elimination{}, + rewrite_rnn{}, + dead_code_elimination{}, + auto_contiguous{}, + dead_code_elimination{}, + lowering{}, + dead_code_elimination{}}; +} + +argument target::allocate(const shape& s) const { return fill_argument(s, 0); } + +MIGRAPHX_REGISTER_TARGET(target); + +} // namespace ref +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/CMakeLists.txt b/docker/rocm/migraphx/tf/CMakeLists.txt new file mode 100644 index 000000000..49df6d39d --- /dev/null +++ b/docker/rocm/migraphx/tf/CMakeLists.txt @@ -0,0 +1,66 @@ +##################################################################################### +# The MIT License (MIT) +# +# Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +##################################################################################### +find_package(Protobuf REQUIRED) + +protobuf_generate_cpp( + PROTO_SRCS PROTO_HDRS + graph.proto + node_def.proto + attr_value.proto + tensor.proto + tensor_shape.proto + resource_handle.proto + types.proto + function.proto + op_def.proto + versions.proto +) +add_library(tf-proto STATIC ${PROTO_SRCS}) +target_include_directories(tf-proto SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${PROTOBUF_INCLUDE_DIR}) +if(MSVC) + target_compile_options(tf-proto PRIVATE /w) +else() + target_compile_options(tf-proto PRIVATE -w) +endif() +target_link_libraries(tf-proto PRIVATE ${PROTOBUF_LIBRARY}) +set_target_properties(tf-proto PROPERTIES POSITION_INDEPENDENT_CODE On) + +file(GLOB TF_SRCS CONFIGURE_DEPENDS *.cpp) +add_library(migraphx_tf ${TF_SRCS}) +migraphx_generate_export_header(migraphx_tf) +target_include_directories(migraphx_tf PRIVATE include) +set_target_properties(migraphx_tf PROPERTIES EXPORT_NAME tf) +rocm_set_soversion(migraphx_tf ${MIGRAPHX_SO_VERSION}) +rocm_clang_tidy_check(migraphx_tf) +target_link_libraries(migraphx_tf PRIVATE tf-proto) +if(NOT WIN32) + target_link_libraries(migraphx_tf PRIVATE "-Wl,--exclude-libs,ALL") +endif() +target_link_libraries(migraphx_tf PUBLIC migraphx) + +rocm_install_targets( + PRIVATE + TARGETS migraphx_tf +) + diff --git a/docker/rocm/migraphx/tf/attr_value.proto b/docker/rocm/migraphx/tf/attr_value.proto new file mode 100644 index 000000000..76944f77b --- /dev/null +++ b/docker/rocm/migraphx/tf/attr_value.proto @@ -0,0 +1,62 @@ +syntax = "proto3"; + +package tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "AttrValueProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; +import "tensor.proto"; +import "tensor_shape.proto"; +import "types.proto"; + +// Protocol buffer representing the value for an attr used to configure an Op. +// Comment indicates the corresponding attr type. Only the field matching the +// attr type may be filled. +message AttrValue { + // LINT.IfChange + message ListValue { + repeated bytes s = 2; // "list(string)" + repeated int64 i = 3 [packed = true]; // "list(int)" + repeated float f = 4 [packed = true]; // "list(float)" + repeated bool b = 5 [packed = true]; // "list(bool)" + repeated DataType type = 6 [packed = true]; // "list(type)" + repeated TensorShapeProto shape = 7; // "list(shape)" + repeated TensorProto tensor = 8; // "list(tensor)" + repeated NameAttrList func = 9; // "list(attr)" + } + // LINT.ThenChange(https://www.tensorflow.org/code/tensorflow/c/c_api.cc) + + oneof value { + bytes s = 2; // "string" + int64 i = 3; // "int" + float f = 4; // "float" + bool b = 5; // "bool" + DataType type = 6; // "type" + TensorShapeProto shape = 7; // "shape" + TensorProto tensor = 8; // "tensor" + ListValue list = 1; // any "list(...)" + + // "func" represents a function. func.name is a function's name or + // a primitive op's name. func.attr.first is the name of an attr + // defined for that function. func.attr.second is the value for + // that attr in the instantiation. + NameAttrList func = 10; + + // This is a placeholder only used in nodes defined inside a + // function. It indicates the attr value will be supplied when + // the function is instantiated. For example, let us suppose a + // node "N" in function "FN". "N" has an attr "A" with value + // placeholder = "foo". When FN is instantiated with attr "foo" + // set to "bar", the instantiated node N's attr A will have been + // given the value "bar". + string placeholder = 9; + } +} + +// A list of attr names and their values. The whole list is attached +// with a string name. E.g., MatMul[T=float]. +message NameAttrList { + string name = 1; + map attr = 2; +} diff --git a/docker/rocm/migraphx/tf/function.proto b/docker/rocm/migraphx/tf/function.proto new file mode 100644 index 000000000..ce7f8d60e --- /dev/null +++ b/docker/rocm/migraphx/tf/function.proto @@ -0,0 +1,102 @@ +syntax = "proto3"; + +package tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "FunctionProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; +import "attr_value.proto"; +import "node_def.proto"; +import "op_def.proto"; + +// A library is a set of named functions. +message FunctionDefLibrary { + repeated FunctionDef function = 1; + repeated GradientDef gradient = 2; +} + +// A function can be instantiated when the runtime can bind every attr +// with a value. When a GraphDef has a call to a function, it must +// have binding for every attr defined in the signature. +// +// TODO(zhifengc): +// * device spec, etc. +message FunctionDef { + // The definition of the function's name, arguments, return values, + // attrs etc. + OpDef signature = 1; + + // Attributes specific to this function definition. + map attr = 5; + + // NOTE: field id 2 deleted on Jan 11, 2017, GraphDef version 21. + reserved 2; + + // In both of the following fields, there is the need to specify an + // output that is used as either the input to another node (in + // `node_def`) or as a return value of the function (in `ret`). + // Unlike the NodeDefs in GraphDef, we need to be able to specify a + // list in some cases (instead of just single outputs). Also, we + // need to be able to deal with lists of unknown length (so the + // output index may not be known at function definition time). So + // we use the following format instead: + // * "fun_in" where "fun_in" is the name of a function input arg in + // the `signature` field above. This represents that input, whether + // it is a single tensor or a list. + // * "fun_in:0" gives the first element of a function input arg (a + // non-list input is considered a list of length 1 for these + // purposes). + // * "node:out" where "node" is the name of a node in `node_def` and + // "out" is the name one of its op's output arguments (the name + // comes from the OpDef of the node's op). This represents that + // node's output, whether it is a single tensor or a list. + // Note: We enforce that an op's output arguments are never + // renamed in the backwards-compatibility test. + // * "node:out:0" gives the first element of a node output arg (a + // non-list output is considered a list of length 1 for these + // purposes). + // + // NOT CURRENTLY SUPPORTED (but may be in the future): + // * "node:out:-1" gives last element in a node output list + // * "node:out:1:" gives a list with all but the first element in a + // node output list + // * "node:out::-1" gives a list with all but the last element in a + // node output list + + // The body of the function. Unlike the NodeDefs in a GraphDef, attrs + // may have values of type `placeholder` and the `input` field uses + // the "output" format above. + + // By convention, "op" in node_def is resolved by consulting with a + // user-defined library first. If not resolved, "func" is assumed to + // be a builtin op. + repeated NodeDef node_def = 3; + + // A mapping from the output arg names from `signature` to the + // outputs from `node_def` that should be returned by the function. + map ret = 4; +} + +// GradientDef defines the gradient function of a function defined in +// a function library. +// +// A gradient function g (specified by gradient_func) for a function f +// (specified by function_name) must follow the following: +// +// The function 'f' must be a numerical function which takes N inputs +// and produces M outputs. Its gradient function 'g', which is a +// function taking N + M inputs and produces N outputs. +// +// I.e. if we have +// (y1, y2, ..., y_M) = f(x1, x2, ..., x_N), +// then, g is +// (dL/dx1, dL/dx2, ..., dL/dx_N) = g(x1, x2, ..., x_N, +// dL/dy1, dL/dy2, ..., dL/dy_M), +// where L is a scalar-value function of (x1, x2, ..., xN) (e.g., the +// loss function). dL/dx_i is the partial derivative of L with respect +// to x_i. +message GradientDef { + string function_name = 1; // The function name. + string gradient_func = 2; // The gradient function's name. +} diff --git a/docker/rocm/migraphx/tf/graph.proto b/docker/rocm/migraphx/tf/graph.proto new file mode 100644 index 000000000..14d9edfab --- /dev/null +++ b/docker/rocm/migraphx/tf/graph.proto @@ -0,0 +1,56 @@ +syntax = "proto3"; + +package tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "GraphProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; +import "node_def.proto"; +import "function.proto"; +import "versions.proto"; + +// Represents the graph of operations +message GraphDef { + repeated NodeDef node = 1; + + // Compatibility versions of the graph. See core/public/version.h for version + // history. The GraphDef version is distinct from the TensorFlow version, and + // each release of TensorFlow will support a range of GraphDef versions. + VersionDef versions = 4; + + // Deprecated single version field; use versions above instead. Since all + // GraphDef changes before "versions" was introduced were forward + // compatible, this field is entirely ignored. + int32 version = 3 [deprecated = true]; + + // EXPERIMENTAL. DO NOT USE OR DEPEND ON THIS YET. + // + // "library" provides user-defined functions. + // + // Naming: + // * library.function.name are in a flat namespace. + // NOTE: We may need to change it to be hierarchical to support + // different orgs. E.g., + // { "/google/nn", { ... }}, + // { "/google/vision", { ... }} + // { "/org_foo/module_bar", { ... }} + // map named_lib; + // * If node[i].op is the name of one function in "library", + // node[i] is deemed as a function call. Otherwise, node[i].op + // must be a primitive operation supported by the runtime. + // + // + // Function call semantics: + // + // * The callee may start execution as soon as some of its inputs + // are ready. The caller may want to use Tuple() mechanism to + // ensure all inputs are ready in the same time. + // + // * The consumer of return values may start executing as soon as + // the return values the consumer depends on are ready. The + // consumer may want to use Tuple() mechanism to ensure the + // consumer does not start until all return values of the callee + // function are ready. + FunctionDefLibrary library = 2; +}; diff --git a/docker/rocm/migraphx/tf/include/migraphx/tf/op_parser.hpp b/docker/rocm/migraphx/tf/include/migraphx/tf/op_parser.hpp new file mode 100644 index 000000000..7ac7af501 --- /dev/null +++ b/docker/rocm/migraphx/tf/include/migraphx/tf/op_parser.hpp @@ -0,0 +1,102 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_TF_REGISTER_OP_PARSER_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_TF_REGISTER_OP_PARSER_HPP + +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct op_desc +{ + std::string tf_name = ""; + std::string op_name = ""; +}; + +void register_op_parser(const std::string& name, tf_parser::op_func f); +tf_parser::op_func get_op_parser(const std::string& name); +std::vector get_op_parsers(); + +inline std::vector implicit_multi_op(std::vector inss) +{ + return inss; +} + +inline std::vector implicit_multi_op(instruction_ref ins) { return {ins}; } + +template +void register_op_parser() +{ + T parser; + for(auto&& opd : parser.operators()) + register_op_parser(opd.tf_name, + [opd, parser](auto&&... xs) { return parser.base_parse(opd, xs...); }); +} + +struct register_op_parser_action +{ + template + static void apply() + { + register_op_parser(); + } +}; + +template +struct op_parser : auto_register +{ + bool transpose() const { return false; } + std::vector base_parse(const op_desc& opd, + const tf_parser& parser, + tf_parser::node_info info, + const std::vector& args) const + { + std::vector result; + auto& self = static_cast(*this); + if(self.transpose()) + { + result = implicit_multi_op(self.parse(opd, parser, info, parser.to_nchw(args))); + std::transform(result.begin(), result.end(), result.begin(), [&](auto ins) { + return parser.to_nhwc(ins); + }); + } + else + { + result = implicit_multi_op(self.parse(opd, parser, info, args)); + } + return result; + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/tf/include/migraphx/tf/tf_parser.hpp b/docker/rocm/migraphx/tf/include/migraphx/tf/tf_parser.hpp new file mode 100644 index 000000000..99510512e --- /dev/null +++ b/docker/rocm/migraphx/tf/include/migraphx/tf/tf_parser.hpp @@ -0,0 +1,141 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_TF_PARSER_HPP +#define MIGRAPHX_GUARD_AMDMIGRAPHX_TF_PARSER_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +// namespace tf = tf_for_migraphx; + +struct tf_parser +{ + std::string filename; + std::string path = "."; + using attribute_map = std::unordered_map; + struct node_info + { + attribute_map attributes{}; + std::string name = ""; + module* mm = nullptr; + + instruction_ref make_contiguous(instruction_ref ins) const; + + instruction_ref add_broadcastable_binary_op(const std::string& op_name, + instruction_ref arg0, + instruction_ref arg1) const; + + instruction_ref add_common_op(const std::string& op_name, + std::vector inputs) const; + + template + instruction_ref add_common_op(const std::string& op_name, Ts... xs) const + { + return add_common_op(op_name, {xs...}); + } + + instruction_ref add_instruction(const operation& op, + const std::vector& args) const; + + template + instruction_ref add_instruction(const operation& op, Ts... xs) const + { + return add_instruction(op, {xs...}); + } + instruction_ref add_literal(literal l) const; + template + instruction_ref add_literal(Ts&&... xs) const + { + return add_literal(literal{std::forward(xs)...}); + } + }; + + using node_map = std::map; + using op_func = std::function( + const tf_parser&, const node_info&, std::vector)>; + node_map nodes; + std::vector input_nodes; + std::vector output_node_names; + std::unordered_map instructions; + program prog = program(); + module* mm = prog.get_main_module(); + bool is_nhwc = true; + unsigned int batch_size = 1; + std::size_t default_dim_value = 1; + std::unordered_map> map_input_dims; + + std::unordered_map ops; + + tf_parser(); + operation load(const std::string& name, const node_info& info) const; + bool should_transpose(instruction_ref ins) const; + instruction_ref to_nhwc(instruction_ref ins) const; + instruction_ref to_nchw(instruction_ref ins) const; + instruction_ref to_kcxy(instruction_ref ins) const; + std::vector to_nchw(const std::vector& args) const; + std::vector to_nhwc(const std::vector& args) const; + int64_t parse_axis(int64_t dim, size_t num_dims) const; + // tf stores certain attributes such as strides, dilations, as a 4D input. + // The first and last dims are equal to 1, and the relevant data is in dims 2 and 3. + // This helper function reorders the data to store for the respective operator member variables. + template + void reorder_data(std::vector& prev_data) const + { + std::vector new_data(prev_data.size()); + for(size_t i = 0; i < new_data.size(); i++) + { + auto new_idx = parse_axis(i, new_data.size()); + new_data.at(new_idx) = prev_data.at(i); + } + prev_data = new_data; + } + + void parse_undefined(module* mm, const std::string& name); + void parse_from(std::istream& is); + void parse_from(const void* data, std::size_t size); + void parse_graph(const tensorflow::GraphDef& graph); + void parse_node(const std::string& name); + literal parse_tensor(const tensorflow::TensorProto& t) const; + shape::type_t parse_type(tensorflow::DataType t) const; + std::vector find_outputs() const; +}; + +std::vector get_axes_from_mask(size_t num_axes, uint32_t mask); + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx + +#endif diff --git a/docker/rocm/migraphx/tf/node_def.proto b/docker/rocm/migraphx/tf/node_def.proto new file mode 100644 index 000000000..a79c0acd7 --- /dev/null +++ b/docker/rocm/migraphx/tf/node_def.proto @@ -0,0 +1,63 @@ +syntax = "proto3"; + +package tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "NodeProto"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; +import "attr_value.proto"; + +message NodeDef { + // The name given to this operator. Used for naming inputs, + // logging, visualization, etc. Unique within a single GraphDef. + // Must match the regexp "[A-Za-z0-9.][A-Za-z0-9_./]*". + string name = 1; + + // The operation name. There may be custom parameters in attrs. + // Op names starting with an underscore are reserved for internal use. + string op = 2; + + // Each input is "node:src_output" with "node" being a string name and + // "src_output" indicating which output tensor to use from "node". If + // "src_output" is 0 the ":0" suffix can be omitted. Regular inputs + // may optionally be followed by control inputs that have the format + // "^node". + repeated string input = 3; + + // A (possibly partial) specification for the device on which this + // node should be placed. + // The expected syntax for this string is as follows: + // + // DEVICE_SPEC ::= PARTIAL_SPEC + // + // PARTIAL_SPEC ::= ("/" CONSTRAINT) * + // CONSTRAINT ::= ("job:" JOB_NAME) + // | ("replica:" [1-9][0-9]*) + // | ("task:" [1-9][0-9]*) + // | ("device:" [A-Za-z]* ":" ([1-9][0-9]* | "*") ) + // + // Valid values for this string include: + // * "/job:worker/replica:0/task:1/device:GPU:3" (full specification) + // * "/job:worker/device:GPU:3" (partial specification) + // * "" (no specification) + // + // If the constraints do not resolve to a single device (or if this + // field is empty or not present), the runtime will attempt to + // choose a device automatically. + string device = 4; + + // Operation-specific graph-construction-time configuration. + // Note that this should include all attrs defined in the + // corresponding OpDef, including those with a value matching + // the default -- this allows the default to change and makes + // NodeDefs easier to interpret on their own. However, if + // an attr with a default is not specified in this list, the + // default will be used. + // The "names" (keys) must match the regexp "[a-z][a-z0-9_]+" (and + // one of the names from the corresponding OpDef's attr field). + // The values must have a type matching the corresponding OpDef + // attr's type field. + // TODO(josh11b): Add some examples here showing best practices. + map attr = 5; +}; diff --git a/docker/rocm/migraphx/tf/op_def.proto b/docker/rocm/migraphx/tf/op_def.proto new file mode 100644 index 000000000..86bea899a --- /dev/null +++ b/docker/rocm/migraphx/tf/op_def.proto @@ -0,0 +1,166 @@ +syntax = "proto3"; + +package tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "OpDefProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; +import "attr_value.proto"; +import "types.proto"; + +// Defines an operation. A NodeDef in a GraphDef specifies an Op by +// using the "op" field which should match the name of a OpDef. +// LINT.IfChange +message OpDef { + // Op names starting with an underscore are reserved for internal use. + // Names should be CamelCase and match the regexp "[A-Z][a-zA-Z0-9_]*". + string name = 1; + + // For describing inputs and outputs. + message ArgDef { + // Name for the input/output. Should match the regexp "[a-z][a-z0-9_]*". + string name = 1; + + // Human readable description. + string description = 2; + + // Describes the type of one or more tensors that are accepted/produced + // by this input/output arg. The only legal combinations are: + // * For a single tensor: either the "type" field is set or the + // "type_attr" field is set to the name of an attr with type "type". + // * For a sequence of tensors with the same type: the "number_attr" + // field will be set to the name of an attr with type "int", and + // either the "type" or "type_attr" field will be set as for + // single tensors. + // * For a sequence of tensors, the "type_list_attr" field will be set + // to the name of an attr with type "list(type)". + DataType type = 3; + string type_attr = 4; // if specified, attr must have type "type" + string number_attr = 5; // if specified, attr must have type "int" + // If specified, attr must have type "list(type)", and none of + // type, type_attr, and number_attr may be specified. + string type_list_attr = 6; + + // For inputs: if true, the inputs are required to be refs. + // By default, inputs can be either refs or non-refs. + // For outputs: if true, outputs are refs, otherwise they are not. + bool is_ref = 16; + }; + + // Description of the input(s). + repeated ArgDef input_arg = 2; + + // Description of the output(s). + repeated ArgDef output_arg = 3; + + // Description of the graph-construction-time configuration of this + // Op. That is to say, this describes the attr fields that will + // be specified in the NodeDef. + message AttrDef { + // A descriptive name for the argument. May be used, e.g. by the + // Python client, as a keyword argument name, and so should match + // the regexp "[a-z][a-z0-9_]+". + string name = 1; + + // One of the type names from attr_value.proto ("string", "list(string)", + // "int", etc.). + string type = 2; + + // A reasonable default for this attribute if the user does not supply + // a value. If not specified, the user must supply a value. + AttrValue default_value = 3; + + // Human-readable description. + string description = 4; + + // TODO(josh11b): bool is_optional? + + // --- Constraints --- + // These constraints are only in effect if specified. Default is no + // constraints. + + // For type == "int", this is a minimum value. For "list(___)" + // types, this is the minimum length. + bool has_minimum = 5; + int64 minimum = 6; + + // The set of allowed values. Has type that is the "list" version + // of the "type" field above (uses the "list" field of AttrValue). + // If type == "type" or "list(type)" above, then the "type" field + // of "allowed_values.list" has the set of allowed DataTypes. + // If type == "string" or "list(string)", then the "s" field of + // "allowed_values.list" has the set of allowed strings. + AttrValue allowed_values = 7; + } + repeated AttrDef attr = 4; + + // Optional deprecation based on GraphDef versions. + OpDeprecation deprecation = 8; + + // One-line human-readable description of what the Op does. + string summary = 5; + + // Additional, longer human-readable description of what the Op does. + string description = 6; + + // ------------------------------------------------------------------------- + // Which optimizations this operation can participate in. + + // True if the operation is commutative ("op(a,b) == op(b,a)" for all inputs) + bool is_commutative = 18; + + // If is_aggregate is true, then this operation accepts N >= 2 + // inputs and produces 1 output all of the same type. Should be + // associative and commutative, and produce output with the same + // shape as the input. The optimizer may replace an aggregate op + // taking input from multiple devices with a tree of aggregate ops + // that aggregate locally within each device (and possibly within + // groups of nearby devices) before communicating. + // TODO(josh11b): Implement that optimization. + bool is_aggregate = 16; // for things like add + + // Other optimizations go here, like + // can_alias_input, rewrite_when_output_unused, partitioning_strategy, etc. + + // ------------------------------------------------------------------------- + // Optimization constraints. + + // Ops are marked as stateful if their behavior depends on some state beyond + // their input tensors (e.g. variable reading op) or if they have + // a side-effect (e.g. printing or asserting ops). Equivalently, stateless ops + // must always produce the same output for the same input and have + // no side-effects. + // + // By default Ops may be moved between devices. Stateful ops should + // either not be moved, or should only be moved if that state can also + // be moved (e.g. via some sort of save / restore). + // Stateful ops are guaranteed to never be optimized away by Common + // Subexpression Elimination (CSE). + bool is_stateful = 17; // for things like variables, queue + + // ------------------------------------------------------------------------- + // Non-standard options. + + // By default, all inputs to an Op must be initialized Tensors. Ops + // that may initialize tensors for the first time should set this + // field to true, to allow the Op to take an uninitialized Tensor as + // input. + bool allows_uninitialized_input = 19; // for Assign, etc. +}; +// LINT.ThenChange( +// https://www.tensorflow.org/code/tensorflow/core/framework/op_def_util.cc) + +// Information about version-dependent deprecation of an op +message OpDeprecation { + // First GraphDef version at which the op is disallowed. + int32 version = 1; + + // Explanation of why it was deprecated and what to use instead. + string explanation = 2; +}; + +// A collection of OpDefs +message OpList { + repeated OpDef op = 1; +}; diff --git a/docker/rocm/migraphx/tf/op_parser.cpp b/docker/rocm/migraphx/tf/op_parser.cpp new file mode 100644 index 000000000..9a1b25c9b --- /dev/null +++ b/docker/rocm/migraphx/tf/op_parser.cpp @@ -0,0 +1,55 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +std::unordered_map& op_parser_map() +{ + static std::unordered_map m; // NOLINT + return m; +} + +void register_op_parser(const std::string& name, tf_parser::op_func f) +{ + op_parser_map()[name] = std::move(f); +} +tf_parser::op_func get_op_parser(const std::string& name) { return op_parser_map().at(name); } +std::vector get_op_parsers() +{ + std::vector result; + std::transform(op_parser_map().begin(), + op_parser_map().end(), + std::back_inserter(result), + [&](auto&& p) { return p.first; }); + std::sort(result.begin(), result.end()); + return result; +} + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_arg_op.cpp b/docker/rocm/migraphx/tf/parse_arg_op.cpp new file mode 100644 index 000000000..403382325 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_arg_op.cpp @@ -0,0 +1,51 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_arg_op : op_parser +{ + std::vector operators() const { return {{"ArgMax", "argmax"}, {"ArgMin", "argmin"}}; } + + instruction_ref parse(const op_desc& opd, + const tf_parser& /*parser*/, + const tf_parser::node_info& info, + const std::vector& args) const + { + int64_t axis = 0; + axis = args[1]->eval().at(); + auto ins = info.add_instruction(make_op(opd.op_name, {{"axis", axis}}), args.front()); + return info.add_instruction(make_op("squeeze", {{"axes", {axis}}}), ins); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_batchnorm.cpp b/docker/rocm/migraphx/tf/parse_batchnorm.cpp new file mode 100644 index 000000000..4c7772eca --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_batchnorm.cpp @@ -0,0 +1,77 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_batchnorm : op_parser +{ + bool transpose() const { return true; } + std::vector operators() const { return {{"FusedBatchNorm"}, {"FusedBatchNormV3"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + tf_parser::node_info info, + std::vector args) const + { + // different default epsilon than from ONNX + float epsilon = 1e-4f; + if(contains(info.attributes, "epsilon")) + { + epsilon = info.attributes.at("epsilon").f(); + } + + auto x_lens = args[0]->get_shape().lens(); + auto x_type = args[0]->get_shape().type(); + + // unsqueeze tensors of shape (C) to broadcast correctly + auto eps = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {epsilon}}); + + auto scale_unsqueeze = + info.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1, 2}}}), args[1]); + auto bias_unsqueeze = + info.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1, 2}}}), args[2]); + auto mean_unsqueeze = + info.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1, 2}}}), args[3]); + auto var_unsqueeze = + info.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1, 2}}}), args[4]); + + auto x_sub_mean = info.add_broadcastable_binary_op("sub", args[0], mean_unsqueeze); + auto var_eps = info.add_broadcastable_binary_op("add", var_unsqueeze, eps); + auto rsqrt = info.add_instruction(make_op("rsqrt"), var_eps); + auto mul0 = info.add_broadcastable_binary_op("mul", scale_unsqueeze, rsqrt); + auto r0 = info.add_broadcastable_binary_op("mul", x_sub_mean, mul0); + return info.add_broadcastable_binary_op("add", r0, bias_unsqueeze); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_biasadd.cpp b/docker/rocm/migraphx/tf/parse_biasadd.cpp new file mode 100644 index 000000000..3ecdf42cc --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_biasadd.cpp @@ -0,0 +1,54 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_biasadd : op_parser +{ + bool transpose() const { return true; } + std::vector operators() const { return {{"BiasAdd"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + const tf_parser::node_info& info, + std::vector args) const + { + uint64_t axis = 1; // assume output of previous layer is in NCHW (broadcast on channel) + + auto l0 = info.add_instruction( + make_op("broadcast", {{"axis", axis}, {"out_lens", args[0]->get_shape().lens()}}), + args[1]); + return info.add_instruction(make_op("add"), args[0], l0); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_binary_op.cpp b/docker/rocm/migraphx/tf/parse_binary_op.cpp new file mode 100644 index 000000000..0aa30f765 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_binary_op.cpp @@ -0,0 +1,59 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_binary_op : op_parser +{ + bool transpose() const { return true; } + std::vector operators() const + { + return {{"Add", "add"}, + {"AddV2", "add"}, + {"Mul", "mul"}, + {"Pow", "pow"}, + {"SquaredDifference", "sqdiff"}, + {"Sub", "sub"}}; + } + + instruction_ref parse(const op_desc& opd, + const tf_parser& /*parser*/, + const tf_parser::node_info& info, + std::vector args) const + { + if(args.size() != 2) + MIGRAPHX_THROW("binary operators should have 2 operands"); + return info.add_broadcastable_binary_op(opd.op_name, args[0], args[1]); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_cast.cpp b/docker/rocm/migraphx/tf/parse_cast.cpp new file mode 100644 index 000000000..4bcd2905f --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_cast.cpp @@ -0,0 +1,50 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_cast : op_parser +{ + std::vector operators() const { return {{"Cast"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& parser, + tf_parser::node_info info, + const std::vector& args) const + { + shape::type_t type = parser.parse_type(info.attributes.at("DstT").type()); + return info.add_instruction(make_op("convert", {{"target_type", type}}), args); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_concat.cpp b/docker/rocm/migraphx/tf/parse_concat.cpp new file mode 100644 index 000000000..5b0fbb124 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_concat.cpp @@ -0,0 +1,55 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_concat : op_parser +{ + std::vector operators() const { return {{"ConcatV2"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + tf_parser::node_info info, + std::vector args) const + { + // get index for axis within args + size_t axis_idx = info.attributes.at("N").i(); + int64_t axis = args[axis_idx]->eval().at(); + auto op = make_op("concat", {{"axis", axis}}); + // return only first N arguments (assuming last index is the axis value) + return info.add_instruction( + op, std::vector(args.begin(), args.begin() + args.size() - 1)); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_constant.cpp b/docker/rocm/migraphx/tf/parse_constant.cpp new file mode 100644 index 000000000..5b1400b15 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_constant.cpp @@ -0,0 +1,50 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_constant_op : op_parser +{ + bool transpose() const { return true; } + std::vector operators() const { return {{"Const"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& parser, + tf_parser::node_info info, + const std::vector& /*args*/) const + { + literal v = parser.parse_tensor(info.attributes.at("value").tensor()); + return info.add_literal(v); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_conv.cpp b/docker/rocm/migraphx/tf/parse_conv.cpp new file mode 100644 index 000000000..cd7b2302c --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_conv.cpp @@ -0,0 +1,112 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_conv : op_parser +{ + bool transpose() const { return true; } + std::vector operators() const { return {{"Conv2D"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& parser, + tf_parser::node_info info, + std::vector args) const + { + op::convolution op; + if(contains(info.attributes, "strides")) + { + std::vector stride; + copy(info.attributes.at("strides").list().i(), std::back_inserter(stride)); + parser.reorder_data(stride); + if(stride.size() != 4) + { + MIGRAPHX_THROW("strides should have 4 values"); + } + op.stride[0] = stride[2]; + op.stride[1] = stride[3]; + } + if(contains(info.attributes, "dilations")) + { + std::vector dilation; + copy(info.attributes.at("dilations").list().i(), std::back_inserter(dilation)); + parser.reorder_data(dilation); + if(dilation.size() != 4) + { + MIGRAPHX_THROW("dilation should have 4 values"); + } + op.dilation[0] = dilation[2]; + op.dilation[1] = dilation[3]; + } + + auto weights = parser.to_kcxy(args[1]); + auto l0 = args[0]; + if(contains(info.attributes, "padding")) + { + const std::string& pad_mode = info.attributes.at("padding").s(); + if(pad_mode.find("SAME") != std::string::npos) + { + std::vector weight_dims = weights->get_shape().lens(); + size_t weight_h = weight_dims[2]; + size_t weight_w = weight_dims[3]; + + auto input_dims = l0->get_shape().lens(); + std::vector pads(input_dims.size()); + calculate_padding(0, pads, input_dims[2], op.stride[0], op.dilation[0], weight_h); + calculate_padding(1, pads, input_dims[3], op.stride[1], op.dilation[1], weight_w); + + op.padding = std::vector(pads.begin(), pads.end()); + } + else if(pad_mode.find("EXPLICIT") != std::string::npos) + { + std::vector padding; + copy(info.attributes.at("explicit_paddings").list().i(), + std::back_inserter(padding)); + if(padding.size() != 4) + { + MIGRAPHX_THROW("padding should have 4 values"); + } + if(padding[0] != padding[2] or padding[1] != padding[3]) + { + MIGRAPHX_THROW("migraphx does not support asymetric padding"); + } + op.padding[0] = padding[0]; + op.padding[1] = padding[1]; + } + } + return info.add_instruction(op, {l0, weights}); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_depthwiseconv.cpp b/docker/rocm/migraphx/tf/parse_depthwiseconv.cpp new file mode 100644 index 000000000..7474654ad --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_depthwiseconv.cpp @@ -0,0 +1,125 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_depthwiseconv : op_parser +{ + bool transpose() const { return true; } + std::vector operators() const { return {{"DepthwiseConv2dNative"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& parser, + tf_parser::node_info info, + std::vector args) const + { + op::convolution op; + size_t num_channels = args[0]->get_shape().lens()[1]; + op.group = num_channels; + + if(contains(info.attributes, "strides")) + { + std::vector stride; + copy(info.attributes.at("strides").list().i(), std::back_inserter(stride)); + parser.reorder_data(stride); + if(stride.size() != 4) + { + MIGRAPHX_THROW("strides should have 4 values"); + } + op.stride[0] = stride[2]; + op.stride[1] = stride[3]; + } + + auto weights = parser.to_kcxy(args[1]); + if(contains(info.attributes, "dilations")) + { + std::vector dilation; + copy(info.attributes.at("dilations").list().i(), std::back_inserter(dilation)); + parser.reorder_data(dilation); + if(dilation.size() != 4) + { + MIGRAPHX_THROW("dilation should have 4 values"); + } + op.dilation[0] = dilation[2]; + op.dilation[1] = dilation[3]; + } + + auto l0 = args[0]; + if(contains(info.attributes, "padding")) + { + const std::string& pad_mode = info.attributes.at("padding").s(); + + if(pad_mode.find("SAME") != std::string::npos) + { + std::vector weight_dims = weights->get_shape().lens(); + size_t weight_h = weight_dims[2]; + size_t weight_w = weight_dims[3]; + + auto input_dims = l0->get_shape().lens(); + std::vector pads(input_dims.size()); + calculate_padding(0, pads, input_dims[2], op.stride[0], op.dilation[0], weight_h); + calculate_padding(1, pads, input_dims[3], op.stride[1], op.dilation[1], weight_w); + + if(pads[0] != pads[2] or pads[1] != pads[3]) + { + std::vector padding = {0, 0, pads[0], pads[1], 0, 0, pads[2], pads[3]}; + l0 = info.add_instruction(migraphx::make_op("pad", {{"pads", padding}}), l0); + } + else + { + op.padding[0] = pads[0]; + op.padding[1] = pads[1]; + } + } + } + + std::vector new_weights_shape; + copy(weights->get_shape().lens(), std::back_inserter(new_weights_shape)); + + // weight format is (out_channels, in_channels, h, w), but in depthwise_conv, + // out_channels is equal to the multiplier. Adjust by inserting a reshape and + // setting in_channels to 1 + int64_t multiplier = new_weights_shape[0]; + int64_t out_channels = num_channels * multiplier; + new_weights_shape[0] = out_channels; + new_weights_shape[1] = 1; + // Make sure weights are contiguous before doing reshape + auto new_weights = info.add_instruction(make_op("reshape", {{"dims", new_weights_shape}}), + info.make_contiguous(weights)); + + return info.add_instruction(op, {l0, new_weights}); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_expanddims.cpp b/docker/rocm/migraphx/tf/parse_expanddims.cpp new file mode 100644 index 000000000..db74eb9f6 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_expanddims.cpp @@ -0,0 +1,62 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_expanddims : op_parser +{ + std::vector operators() const { return {{"ExpandDims"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + const tf_parser::node_info& info, + std::vector args) const + { + std::vector input_dims = args[0]->get_shape().lens(); + std::vector new_dims(input_dims.begin(), input_dims.end()); + size_t num_dims = input_dims.size(); + int32_t dim = args[1]->eval().at(); + + if(dim < 0) + { + new_dims.insert(new_dims.begin() + (num_dims + dim + 1), 1); + } + else + { + new_dims.insert(new_dims.begin() + dim, 1); + } + return info.add_instruction(make_op("reshape", {{"dims", new_dims}}), args[0]); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_gather.cpp b/docker/rocm/migraphx/tf/parse_gather.cpp new file mode 100644 index 000000000..965b8fa83 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_gather.cpp @@ -0,0 +1,50 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_gather : op_parser +{ + std::vector operators() const { return {{"GatherV2"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + const tf_parser::node_info& info, + std::vector args) const + { + int axis = args[2]->eval().at(); + return info.add_instruction(make_op("gather", {{"axis", axis}}), {args[0], args[1]}); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_generic_op.cpp b/docker/rocm/migraphx/tf/parse_generic_op.cpp new file mode 100644 index 000000000..e459147fc --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_generic_op.cpp @@ -0,0 +1,58 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_generic_op : op_parser +{ + bool transpose() const { return true; } + std::vector operators() const + { + return {{"All", "identity"}, + {"Identity", "identity"}, + {"LessEqual", "identity"}, + {"Relu", "relu"}, + {"Rsqrt", "rsqrt"}, + {"Tanh", "tanh"}, + {"StopGradient", "identity"}}; + } + + instruction_ref parse(const op_desc& opd, + const tf_parser& /*parser*/, + const tf_parser::node_info& info, + const std::vector& args) const + { + return info.add_instruction(make_op(opd.op_name), args); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_matmul.cpp b/docker/rocm/migraphx/tf/parse_matmul.cpp new file mode 100644 index 000000000..7ca4c52b4 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_matmul.cpp @@ -0,0 +1,85 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_matmul : op_parser +{ + std::vector operators() const + { + return {{"BatchMatMul"}, {"BatchMatMulV2"}, {"MatMul"}}; + } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + tf_parser::node_info info, + std::vector args) const + { + bool transa = false; + bool transb = false; + + if(contains(info.attributes, "transpose_a")) + { + transa = info.attributes.at("transpose_a").b(); + } + if(contains(info.attributes, "transpose_b")) + { + transb = info.attributes.at("transpose_b").b(); + } + + if(contains(info.attributes, "adj_x")) + { + transa = info.attributes.at("adj_x").b(); + } + if(contains(info.attributes, "adj_y")) + { + transb = info.attributes.at("adj_y").b(); + } + + std::vector perm(args[0]->get_shape().lens().size()); + std::iota(perm.begin(), perm.end(), int64_t{0}); + // swap the last two elements + std::iter_swap(perm.end() - 1, perm.end() - 2); + + auto l1 = (transa) + ? info.add_instruction(make_op("transpose", {{"permutation", perm}}), args[0]) + : args[0]; + auto l2 = (transb) + ? info.add_instruction(make_op("transpose", {{"permutation", perm}}), args[1]) + : args[1]; + + return info.add_instruction(make_op("dot"), l1, l2); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_mean.cpp b/docker/rocm/migraphx/tf/parse_mean.cpp new file mode 100644 index 000000000..d0c2b9952 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_mean.cpp @@ -0,0 +1,55 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_mean : op_parser +{ + std::vector operators() const { return {{"Mean"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + tf_parser::node_info info, + std::vector args) const + { + bool keep_dims = info.attributes.at("keep_dims").b(); + auto axes = args[1]->eval().get().to_vector(); + + auto ins = info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), args[0]); + if(not keep_dims) + ins = info.add_instruction(make_op("squeeze", {{"axes", axes}}), ins); + return ins; + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_onehot.cpp b/docker/rocm/migraphx/tf/parse_onehot.cpp new file mode 100644 index 000000000..66f7d7d0a --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_onehot.cpp @@ -0,0 +1,69 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_onehot : op_parser +{ + std::vector operators() const { return {{"OneHot"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + tf_parser::node_info info, + std::vector args) const + { + size_t depth = args[1]->eval().at(); + + int64_t axis = -1; + float on_value = args[2]->eval().at(); + float off_value = args[3]->eval().at(); + + std::vector depth_input(depth * depth, off_value); + for(int i = 0; i < depth; i++) + { + depth_input[depth * i + i] = on_value; + } + + if(contains(info.attributes, "axis")) + axis = info.attributes.at("axis").i(); + if(axis == -1) + { + shape s{shape::float_type, {depth, depth}}; + auto l0 = info.add_literal({s, depth_input}); + return info.add_instruction(make_op("gather", {{"axis", 0}}), {l0, args[0]}); + } + MIGRAPHX_THROW("MIGraphX does not support axis != -1"); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_pack.cpp b/docker/rocm/migraphx/tf/parse_pack.cpp new file mode 100644 index 000000000..9766da1bc --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_pack.cpp @@ -0,0 +1,70 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_pack : op_parser +{ + std::vector operators() const { return {{"Pack"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& parser, + tf_parser::node_info info, + std::vector args) const + { + // reinterpret as unsqueeze with concat + std::vector unsqueezed_args; + int64_t axis = 0; + if(contains(info.attributes, "axis")) + axis = info.attributes.at("axis").i(); + size_t input_size = args.front()->get_shape().lens().size(); + if(axis > input_size) + { + MIGRAPHX_THROW("TF_PARSER: axis value of " + to_string(axis) + + " must be smaller than input size " + to_string(input_size)); + } + + std::transform( + args.begin(), + args.end(), + std::back_inserter(unsqueezed_args), + [&](instruction_ref arg) { + return info.add_instruction(make_op("unsqueeze", {{"axes", {axis}}}), arg); + }); + return parser.to_nhwc( + info.add_instruction(make_op("concat", {{"axis", axis}}), unsqueezed_args)); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_pad.cpp b/docker/rocm/migraphx/tf/parse_pad.cpp new file mode 100644 index 000000000..b1d3a587d --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_pad.cpp @@ -0,0 +1,69 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_pad : op_parser +{ + bool transpose() const { return true; } + std::vector operators() const { return {{"Pad"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& parser, + const tf_parser::node_info& info, + std::vector args) const + { + size_t ndims = args.front()->get_shape().lens().size(); + + // in tf, the paddings are arranged as a 2d shape (ndims, 2), + // the last dim contains the left padding and right padding respectively + std::vector> pad_per_dim(ndims); + auto tf_padding = args[1]->eval().get().to_vector(); + for(size_t i = 0; i < 2 * ndims; i += 2) + { + pad_per_dim[i / 2].first = tf_padding[i]; + pad_per_dim[i / 2].second = tf_padding[i + 1]; + } + parser.reorder_data(pad_per_dim); + + std::vector pads(ndims * 2); + for(size_t i = 0; i < ndims; i++) + { + pads[i] = pad_per_dim[i].first; + pads[i + ndims] = pad_per_dim[i].second; + } + return info.add_instruction(make_op("pad", {{"pads", pads}}), args.front()); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_pooling.cpp b/docker/rocm/migraphx/tf/parse_pooling.cpp new file mode 100644 index 000000000..4baf09a86 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_pooling.cpp @@ -0,0 +1,97 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_pooling : op_parser +{ + bool transpose() const { return true; } + std::vector operators() const { return {{"AvgPool"}, {"MaxPool"}}; } + + instruction_ref parse(const op_desc& opd, + const tf_parser& parser, + tf_parser::node_info info, + std::vector args) const + { + if(not starts_with(opd.tf_name, "Max") and not starts_with(opd.tf_name, "Av")) + { + MIGRAPHX_THROW("tf pooling mode must be Max or Average"); + } + op::pooling op{starts_with(opd.tf_name, "Max") ? op::pooling_mode::max + : op::pooling_mode::average}; + + if(contains(info.attributes, "strides")) + { + std::vector stride; + copy(info.attributes.at("strides").list().i(), std::back_inserter(stride)); + parser.reorder_data(stride); + if(stride.size() != 4) + { + MIGRAPHX_THROW("strides should have 4 values"); + } + op.stride[0] = stride[2]; + op.stride[1] = stride[3]; + } + if(contains(info.attributes, "ksize")) + { + std::vector ksize; + copy(info.attributes.at("ksize").list().i(), std::back_inserter(ksize)); + parser.reorder_data(ksize); + if(ksize.size() != 4) + { + MIGRAPHX_THROW("ksize should have 4 values"); + } + op.lengths[0] = ksize[2]; + op.lengths[1] = ksize[3]; + } + + auto l0 = args[0]; + if(contains(info.attributes, "padding")) + { + const std::string& pad_mode = info.attributes.at("padding").s(); + if(pad_mode.find("SAME") != std::string::npos) + { + auto input_dims = l0->get_shape().lens(); + std::vector pads(input_dims.size()); + calculate_padding(0, pads, input_dims[2], op.stride[0], 1, op.lengths[0]); + calculate_padding(1, pads, input_dims[3], op.stride[1], 1, op.lengths[1]); + + op.padding = std::vector(pads.begin(), pads.end()); + } + } + return info.add_instruction(op, l0); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_relu6.cpp b/docker/rocm/migraphx/tf/parse_relu6.cpp new file mode 100644 index 000000000..75155b432 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_relu6.cpp @@ -0,0 +1,54 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_relu6 : op_parser +{ + bool transpose() const { return true; } + std::vector operators() const { return {{"Relu6"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + const tf_parser::node_info& info, + std::vector args) const + { + shape::type_t output_type = args[0]->get_shape().type(); + auto min_val = info.add_literal(migraphx::literal{migraphx::shape{output_type}, {0.0f}}); + auto max_val = info.add_literal(migraphx::literal{migraphx::shape{output_type}, {6.0f}}); + + return info.add_common_op("clip", args[0], min_val, max_val); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_reshape.cpp b/docker/rocm/migraphx/tf/parse_reshape.cpp new file mode 100644 index 000000000..4a1c7e697 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_reshape.cpp @@ -0,0 +1,54 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_reshape : op_parser +{ + std::vector operators() const { return {{"Reshape"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + const tf_parser::node_info& info, + std::vector args) const + { + if(args.size() != 2) + MIGRAPHX_THROW("reshape needs 2 arguments (input, new_shape)"); + auto s = args[1]->eval(); + std::vector dims; + s.visit([&](auto v) { copy(v, std::back_inserter(dims)); }); + return info.add_instruction(make_op("reshape", {{"dims", dims}}), args[0]); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_shape.cpp b/docker/rocm/migraphx/tf/parse_shape.cpp new file mode 100644 index 000000000..bdc850ef3 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_shape.cpp @@ -0,0 +1,56 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_shape : op_parser +{ + std::vector operators() const { return {{"Shape"}}; } + + // Use a literal instruction to replace the shape since output of + // shape operator are literals in migraphx + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + const tf_parser::node_info& info, + std::vector args) const + { + std::vector arg_shape = args[0]->get_shape().lens(); + std::vector vec_shape(arg_shape.size()); + migraphx::shape s(migraphx::shape::int32_type, {arg_shape.size()}); + std::transform( + arg_shape.begin(), arg_shape.end(), vec_shape.begin(), [](auto i) { return i; }); + return info.add_literal(migraphx::literal{s, vec_shape}); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_slice.cpp b/docker/rocm/migraphx/tf/parse_slice.cpp new file mode 100644 index 000000000..4fd9be29c --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_slice.cpp @@ -0,0 +1,69 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_slice : op_parser +{ + std::vector operators() const { return {{"Slice"}}; } + + // Use a literal instruction to replace the shape since output of + // shape operator are literals in migraphx + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + const tf_parser::node_info& info, + std::vector args) const + { + auto starts = args[1]->eval().get().to_vector(); + auto size = args[2]->eval().get().to_vector(); + auto axes = args[0]->get_shape().lens(); + size_t num_axes = axes.size(); + + std::vector axes_int64(axes.begin(), axes.end()); + std::vector starts_int64(starts.begin(), starts.end()); + std::vector ends(num_axes); + std::vector op_axes(num_axes); + std::iota(op_axes.begin(), op_axes.end(), 0); + for(size_t i = 0; i < num_axes; i++) + { + if(size[i] == -1) + ends[i] = axes_int64[i]; + else + ends[i] = starts_int64[i] + size[i]; + } + auto op = make_op("slice", {{"starts", starts_int64}, {"ends", ends}, {"axes", op_axes}}); + return info.add_instruction(op, info.make_contiguous(args[0])); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_softmax.cpp b/docker/rocm/migraphx/tf/parse_softmax.cpp new file mode 100644 index 000000000..a136e5c6f --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_softmax.cpp @@ -0,0 +1,60 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_softmax : op_parser +{ + std::vector operators() const { return {{"Softmax"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + tf_parser::node_info info, + std::vector args) const + { + int axis = -1; + auto num_dims = args[0]->get_shape().lens().size(); + if(contains(info.attributes, "axis")) + { + axis = static_cast(info.attributes.at("axis").i()); + } + + axis = tune_axis(num_dims, axis, "tf_parse_softmax"); + + return info.add_instruction(make_op("softmax", {{"axis", axis}}), + info.make_contiguous(args[0])); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_split.cpp b/docker/rocm/migraphx/tf/parse_split.cpp new file mode 100644 index 000000000..91aa23936 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_split.cpp @@ -0,0 +1,121 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_split : op_parser +{ + std::vector operators() const { return {{"Split"}, {"SplitV"}}; } + + std::vector parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + tf_parser::node_info info, + std::vector args) const + { + bool vector_as_input = args.size() == 3; + int num_outputs = 1; + auto axis_arg = args[0]; + auto input_arg = args[1]; + if(vector_as_input) + { + input_arg = args[0]; + axis_arg = args[2]; + } + + if(contains(info.attributes, "num_split")) + num_outputs = info.attributes.at("num_split").i(); + + std::vector splits(num_outputs); + std::vector slice_pos{0}; + if(vector_as_input) + { + splits = args[1]->eval().get().to_vector(); + num_outputs = splits.size(); + } + + assert(num_outputs > 0); + + if(num_outputs == 1) + return std::vector{ + info.add_instruction(make_op("identity"), input_arg)}; + + auto lens = input_arg->get_shape().lens(); + auto num_dims = lens.size(); + int axis = axis_arg->eval().at(); + + // ensure split is made evenly if "num_split" is used + assert(vector_as_input or lens[axis] % num_outputs == 0); + + auto split_size = lens[axis] / num_outputs; + + // push back first end point of slice + if(vector_as_input) + { + slice_pos.push_back(splits[0]); + } + else + { + slice_pos.push_back(split_size); + } + + // calculate remaining end points for each slice + for(auto i = 1; i < num_outputs; i++) + { + if(vector_as_input) + { + splits[i] += splits[i - 1]; + slice_pos.push_back(splits[i]); + } + else + { + slice_pos.push_back((i + 1) * split_size); + } + } + std::vector result; + for(auto i = 0; i < num_outputs; i++) + { + std::vector axes(num_dims); + std::iota(axes.begin(), axes.end(), 0); + std::vector starts(num_dims, 0); + std::vector ends(lens.begin(), lens.end()); + + starts[axis] = slice_pos[i]; + ends[axis] = slice_pos[i + 1]; + auto op = make_op("slice", {{"axes", axes}, {"starts", starts}, {"ends", ends}}); + result.push_back(info.add_instruction(op, input_arg)); + } + return result; + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_squeeze.cpp b/docker/rocm/migraphx/tf/parse_squeeze.cpp new file mode 100644 index 000000000..8936c0ac3 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_squeeze.cpp @@ -0,0 +1,64 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_squeeze : op_parser +{ + std::vector operators() const { return {{"Squeeze"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + tf_parser::node_info info, + std::vector args) const + { + auto input_dims = args[0]->get_shape().lens(); + auto axes = info.attributes.at("squeeze_dims").list().i(); + std::vector op_axes(axes.begin(), axes.end()); + + if(op_axes.empty()) // no squeeze_dims provided, remove any dim that equals 1 + { + for(size_t i = 0; i < input_dims.size(); i++) + { + if(input_dims.at(i) == 1) + { + op_axes.push_back(i); + } + } + } + return info.add_instruction(make_op("squeeze", {{"axes", op_axes}}), + info.make_contiguous(args[0])); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_stridedslice.cpp b/docker/rocm/migraphx/tf/parse_stridedslice.cpp new file mode 100644 index 000000000..c161e8953 --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_stridedslice.cpp @@ -0,0 +1,101 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_strideslice : op_parser +{ + std::vector operators() const { return {{"StridedSlice"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + tf_parser::node_info info, + std::vector args) const + { + auto starts = args[1]->eval().get().to_vector(); + auto ends = args[2]->eval().get().to_vector(); + auto l0 = args[0]; + size_t num_axes = l0->get_shape().lens().size(); + std::vector axes = l0->get_shape().lens(); + + std::vector op_starts(starts.begin(), starts.end()); + std::vector op_ends(ends.begin(), ends.end()); + std::vector op_axes(num_axes); + std::iota(op_axes.begin(), op_axes.end(), 0); + uint32_t begin_mask = 0; + uint32_t end_mask = 0; + uint32_t shrink_axis_mask = 0; + uint32_t bitwise_compare = 1; + std::vector squeeze_axes; + + if(contains(info.attributes, "begin_mask")) + begin_mask = static_cast(info.attributes.at("begin_mask").i()); + + if(contains(info.attributes, "end_mask")) + end_mask = static_cast(info.attributes.at("end_mask").i()); + + if(contains(info.attributes, "shrink_axis_mask")) + shrink_axis_mask = static_cast(info.attributes.at("shrink_axis_mask").i()); + + std::vector begin_axes = get_axes_from_mask(num_axes, begin_mask); + std::vector end_axes = get_axes_from_mask(num_axes, end_mask); + + for(size_t i = 0; i < num_axes; i++) + { + if(begin_axes.at(i) == 1) + { + op_starts.at(i) = 0; + } + if(end_axes.at(i) == 1) + { + op_ends.at(i) = axes.at(i); + } + } + + auto op = make_op("slice", {{"starts", op_starts}, {"ends", op_ends}, {"axes", op_axes}}); + auto l1 = info.add_instruction(op, l0); + if(shrink_axis_mask == 0) + return l1; + + for(size_t i = 0; i < num_axes; i++) + { + // the LSB corresponds to axis 0 when determining which axes to squeeze + if(((shrink_axis_mask >> i) & bitwise_compare) == 1) + squeeze_axes.push_back(i); + } + + return info.add_instruction(make_op("squeeze", {{"axes", squeeze_axes}}), l1); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/parse_transpose.cpp b/docker/rocm/migraphx/tf/parse_transpose.cpp new file mode 100644 index 000000000..9b306b97d --- /dev/null +++ b/docker/rocm/migraphx/tf/parse_transpose.cpp @@ -0,0 +1,52 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +struct parse_transpose : op_parser +{ + std::vector operators() const { return {{"Transpose"}}; } + + instruction_ref parse(const op_desc& /*opd*/, + const tf_parser& /*parser*/, + const tf_parser::node_info& info, + std::vector args) const + { + auto perm = args[1]->eval().get().to_vector(); + std::vector dims(perm.begin(), perm.end()); + + return info.add_instruction(make_op("transpose", {{"permutation", dims}}), args.front()); + } +}; + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/resource_handle.proto b/docker/rocm/migraphx/tf/resource_handle.proto new file mode 100644 index 000000000..a54d3d906 --- /dev/null +++ b/docker/rocm/migraphx/tf/resource_handle.proto @@ -0,0 +1,30 @@ +syntax = "proto3"; + +package tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "ResourceHandle"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; + +// Protocol buffer representing a handle to a tensorflow resource. Handles are +// not valid across executions, but can be serialized back and forth from within +// a single run. +message ResourceHandleProto { + // Unique name for the device containing the resource. + string device = 1; + + // Container in which this resource is placed. + string container = 2; + + // Unique name of this resource. + string name = 3; + + // Hash code for the type of the resource. Is only valid in the same device + // and in the same execution. + uint64 hash_code = 4; + + // For debug-only, the name of the type pointed to by this handle, if + // available. + string maybe_type_name = 5; +}; diff --git a/docker/rocm/migraphx/tf/tensor.proto b/docker/rocm/migraphx/tf/tensor.proto new file mode 100644 index 000000000..5d4d66aed --- /dev/null +++ b/docker/rocm/migraphx/tf/tensor.proto @@ -0,0 +1,94 @@ +syntax = "proto3"; + +package tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "TensorProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; +import "resource_handle.proto"; +import "tensor_shape.proto"; +import "types.proto"; + +// Protocol buffer representing a tensor. +message TensorProto { + DataType dtype = 1; + + // Shape of the tensor. TODO(touts): sort out the 0-rank issues. + TensorShapeProto tensor_shape = 2; + + // Only one of the representations below is set, one of "tensor_contents" and + // the "xxx_val" attributes. We are not using oneof because as oneofs cannot + // contain repeated fields it would require another extra set of messages. + + // Version number. + // + // In version 0, if the "repeated xxx" representations contain only one + // element, that element is repeated to fill the shape. This makes it easy + // to represent a constant Tensor with a single value. + int32 version_number = 3; + + // Serialized raw tensor content from either Tensor::AsProtoTensorContent or + // memcpy in tensorflow::grpc::EncodeTensorToByteBuffer. This representation + // can be used for all tensor types. The purpose of this representation is to + // reduce serialization overhead during RPC call by avoiding serialization of + // many repeated small items. + bytes tensor_content = 4; + + // Type specific representations that make it easy to create tensor protos in + // all languages. Only the representation corresponding to "dtype" can + // be set. The values hold the flattened representation of the tensor in + // row major order. + + // DT_HALF, DT_BFLOAT16. Note that since protobuf has no int16 type, we'll + // have some pointless zero padding for each value here. + repeated int32 half_val = 13 [packed = true]; + + // DT_FLOAT. + repeated float float_val = 5 [packed = true]; + + // DT_DOUBLE. + repeated double double_val = 6 [packed = true]; + + // DT_INT32, DT_INT16, DT_INT8, DT_UINT8. + repeated int32 int_val = 7 [packed = true]; + + // DT_STRING + repeated bytes string_val = 8; + + // DT_COMPLEX64. scomplex_val(2*i) and scomplex_val(2*i+1) are real + // and imaginary parts of i-th single precision complex. + repeated float scomplex_val = 9 [packed = true]; + + // DT_INT64 + repeated int64 int64_val = 10 [packed = true]; + + // DT_BOOL + repeated bool bool_val = 11 [packed = true]; + + // DT_COMPLEX128. dcomplex_val(2*i) and dcomplex_val(2*i+1) are real + // and imaginary parts of i-th double precision complex. + repeated double dcomplex_val = 12 [packed = true]; + + // DT_RESOURCE + repeated ResourceHandleProto resource_handle_val = 14; + + // DT_VARIANT + repeated VariantTensorDataProto variant_val = 15; + + // DT_UINT32 + repeated uint32 uint32_val = 16 [packed = true]; + + // DT_UINT64 + repeated uint64 uint64_val = 17 [packed = true]; +}; + +// Protocol buffer representing the serialization format of DT_VARIANT tensors. +message VariantTensorDataProto { + // Name of the type of objects being serialized. + string type_name = 1; + // Portions of the object that are not Tensors. + bytes metadata = 2; + // Tensors contained within objects being serialized. + repeated TensorProto tensors = 3; +} diff --git a/docker/rocm/migraphx/tf/tensor_shape.proto b/docker/rocm/migraphx/tf/tensor_shape.proto new file mode 100644 index 000000000..286156a01 --- /dev/null +++ b/docker/rocm/migraphx/tf/tensor_shape.proto @@ -0,0 +1,46 @@ +// Protocol buffer representing the shape of tensors. + +syntax = "proto3"; +option cc_enable_arenas = true; +option java_outer_classname = "TensorShapeProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; + +package tensorflow; + +// Dimensions of a tensor. +message TensorShapeProto { + // One dimension of the tensor. + message Dim { + // Size of the tensor in that dimension. + // This value must be >= -1, but values of -1 are reserved for "unknown" + // shapes (values of -1 mean "unknown" dimension). Certain wrappers + // that work with TensorShapeProto may fail at runtime when deserializing + // a TensorShapeProto containing a dim value of -1. + int64 size = 1; + + // Optional name of the tensor dimension. + string name = 2; + }; + + // Dimensions of the tensor, such as {"input", 30}, {"output", 40} + // for a 30 x 40 2D tensor. If an entry has size -1, this + // corresponds to a dimension of unknown size. The names are + // optional. + // + // The order of entries in "dim" matters: It indicates the layout of the + // values in the tensor in-memory representation. + // + // The first entry in "dim" is the outermost dimension used to layout the + // values, the last entry is the innermost dimension. This matches the + // in-memory layout of RowMajor Eigen tensors. + // + // If "dim.size()" > 0, "unknown_rank" must be false. + repeated Dim dim = 2; + + // If true, the number of dimensions in the shape is unknown. + // + // If true, "dim.size()" must be 0. + bool unknown_rank = 3; +}; diff --git a/docker/rocm/migraphx/tf/tf.cpp b/docker/rocm/migraphx/tf/tf.cpp new file mode 100644 index 000000000..7b6c1322d --- /dev/null +++ b/docker/rocm/migraphx/tf/tf.cpp @@ -0,0 +1,85 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { + +template +program parse_tf_from(const tf_options& options, Ts&&... xs) +{ + tf::tf_parser parser; + parser.is_nhwc = options.is_nhwc; + parser.batch_size = options.batch_size; + parser.map_input_dims = options.map_input_dims; + parser.output_node_names = options.output_node_names; + +#ifndef NDEBUG + // Log the program when it can't be parsed + try + { + parser.parse_from(std::forward(xs)...); + } + catch(...) + { + std::cerr << parser.prog << std::endl; + throw; + } +#else + parser.parse_from(std::forward(xs)...); +#endif + return std::move(parser.prog); +} + +program parse_tf(const std::string& name, const tf_options& options) +{ + std::fstream input(name.c_str(), std::ios::in | std::ios::binary); + return parse_tf_from(options, input); +} + +program parse_tf_buffer(const std::string& buffer, const tf_options& options) +{ + return parse_tf_from(options, buffer.data(), buffer.size()); +} + +program parse_tf_buffer(const void* data, std::size_t size, const tf_options& options) +{ + return parse_tf_from(options, data, size); +} + +std::vector get_tf_operators() { return tf::get_op_parsers(); } + +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/tf_parser.cpp b/docker/rocm/migraphx/tf/tf_parser.cpp new file mode 100644 index 000000000..a53c7b29e --- /dev/null +++ b/docker/rocm/migraphx/tf/tf_parser.cpp @@ -0,0 +1,605 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace migraphx { +inline namespace MIGRAPHX_INLINE_NS { +namespace tf { + +bool tf_parser::should_transpose(instruction_ref ins) const +{ + return is_nhwc and ins->get_shape().lens().size() == 4; +} + +instruction_ref tf_parser::to_nhwc(instruction_ref ins) const +{ + if(should_transpose(ins)) + return mm->add_instruction(make_op("transpose", {{"permutation", {0, 2, 3, 1}}}), ins); + return ins; +} + +instruction_ref tf_parser::to_nchw(instruction_ref ins) const +{ + if(should_transpose(ins)) + return mm->add_instruction(make_op("transpose", {{"permutation", {0, 3, 1, 2}}}), ins); + return ins; +} + +instruction_ref tf_parser::to_kcxy(instruction_ref ins) const +{ + return mm->add_instruction(make_op("transpose", {{"permutation", {3, 2, 0, 1}}}), ins); +} + +std::vector tf_parser::to_nchw(const std::vector& args) const +{ + std::vector result(args.size()); + std::transform( + args.begin(), args.end(), result.begin(), [&](auto ins) { return this->to_nchw(ins); }); + return result; +} + +std::vector tf_parser::to_nhwc(const std::vector& args) const +{ + std::vector result(args.size()); + std::transform( + args.begin(), args.end(), result.begin(), [&](auto ins) { return this->to_nhwc(ins); }); + return result; +} + +instruction_ref tf_parser::node_info::make_contiguous(instruction_ref ins) const +{ + if(ins->get_shape().standard()) + return ins; + else + return mm->add_instruction(make_op("contiguous"), ins); +} + +instruction_ref tf_parser::node_info::add_broadcastable_binary_op(const std::string& op_name, + instruction_ref arg0, + instruction_ref arg1) const +{ + return this->add_common_op(op_name, arg0, arg1); +} + +instruction_ref tf_parser::node_info::add_common_op(const std::string& op_name, + std::vector inputs) const +{ + return migraphx::add_common_op(*mm, make_op(op_name), std::move(inputs)); +} + +int64_t tf_parser::parse_axis(const int64_t dim, const size_t num_dims) const +{ + int64_t new_dim = dim; + if(is_nhwc and num_dims >= 4) + { + switch(dim) + { + case 0: new_dim = 0; break; + case 1: new_dim = 2; break; + case 2: new_dim = 3; break; + case 3: new_dim = 1; break; + default: break; + } + } + return new_dim; +} + +instruction_ref +tf_parser::node_info::add_instruction(const operation& op, + const std::vector& args) const +{ + return mm->add_instruction(op, args); +} + +instruction_ref tf_parser::node_info::add_literal(literal l) const +{ + return mm->add_literal(std::move(l)); +} + +std::vector get_axes_from_mask(const size_t num_axes, const uint32_t mask) +{ + uint32_t bitwise_compare = 1; + std::vector axes; + for(size_t i = 0; i < num_axes; i++) + { + // the LSB corresponds to axis 0 when determining which axes to begin + if(((mask >> i) & bitwise_compare) == 1) + axes.push_back(1); + else + axes.push_back(0); + } + return axes; +} + +tf_parser::tf_parser() +{ + // Add all registered op parsers + for(auto&& name : get_op_parsers()) + ops.emplace(name, get_op_parser(name)); +} + +static std::string get_name(const tensorflow::NodeDef& node) { return node.name(); } + +static tf_parser::node_map get_nodes(const tensorflow::GraphDef& graph, + std::vector& input_nodes) +{ + tf_parser::node_map result; + for(auto&& node : graph.node()) + { + auto node_name = get_name(node); + // assume each node in graph has an associated name + if(node_name.empty()) + MIGRAPHX_THROW("tf node with no name found"); + result[node_name] = node; + if(node.op() == "Placeholder") + { + input_nodes.push_back(node); + } + } + return result; +} + +static tf_parser::attribute_map get_attributes(const tensorflow::NodeDef& node) +{ + tf_parser::attribute_map result; + for(auto&& attr : node.attr()) + { + result[attr.first] = attr.second; + } + + return result; +} + +static std::vector parse_dims(const tensorflow::TensorShapeProto& s) +{ + std::vector dims; + auto input_dims = s.dim(); + std::transform(input_dims.begin(), + input_dims.end(), + std::back_inserter(dims), + [](const tensorflow::TensorShapeProto_Dim& dim) { return dim.size(); }); + return dims; +} + +template +static std::vector get_data_vals(const google::protobuf::RepeatedField& data, + const size_t& shape_size) +{ + std::vector data_vals(shape_size); + // check if shape has enough data values given existing fields + if(data.size() == 1) + { + std::fill(data_vals.begin(), data_vals.end(), data[0]); + } + else + copy(data.begin(), data.end(), data_vals.begin()); + return data_vals; +} + +template +static literal +create_literal(shape::type_t shape_type, const std::vector& dims, std::vector data) +{ + // assume if explicit value is mentioned in protobuf and dim size <= 1, treat as scalar + if(dims.empty() or (dims.size() == 1 and dims.front() == 1)) + return literal{{shape_type}, data}; + return literal{{shape_type, dims}, data}; +} + +static bool is_valid_op(const tensorflow::NodeDef& node) +{ + std::vector ignored{"NoOp", "Assert"}; + return none_of(ignored, [&](const auto& op) { + const auto& name = get_name(node); + return node.op() == op or contains(name, op); + }); +} + +std::vector tf_parser::find_outputs() const +{ + std::unordered_set inputs; + for(auto&& p : nodes) + { + auto&& node = p.second; + std::copy(node.input().begin(), node.input().end(), std::inserter(inputs, inputs.end())); + } + std::vector outputs; + for(auto&& p : nodes) + { + const auto& name = p.first; + const auto& node = p.second; + if(not is_valid_op(node)) + continue; + // control flow related, ignore this node + if(contains(name, "^")) + continue; + // literals are valid ops, but they are not outputs unless specified + if(node.op() == "Const") + continue; + if(inputs.count(name) == 0) + outputs.push_back(name); + } + return outputs; +} + +void tf_parser::parse_graph(const tensorflow::GraphDef& graph) +{ + nodes = get_nodes(graph, input_nodes); + for(auto&& input : input_nodes) + { + const std::string& name = input.name(); + attribute_map input_attrs = get_attributes(input); + shape::type_t shape_type = parse_type(input_attrs.at("dtype").type()); + std::vector dims = parse_dims(input_attrs.at("shape").shape()); + + if(contains(map_input_dims, name)) + { + dims = map_input_dims.at(name); + } + else + { + if(is_nhwc and dims.size() >= 4) + { + this->reorder_data(dims); + } + std::transform(dims.begin(), dims.end(), dims.begin(), [&](auto dim) { + return static_cast(dim) <= 0 ? batch_size : dim; + }); + } + + shape s = shape{shape_type, dims}; + instructions[name] = to_nhwc(mm->add_parameter(name, s)); + } + for(auto&& p : nodes) + { + this->parse_node(p.first); + } + if(mm->size() == 0) + return; + + // Needs to add a ret instruction at the end of + // the program + if(output_node_names.empty()) + { + output_node_names = find_outputs(); + } + + std::vector output_ins; + std::transform(output_node_names.begin(), + output_node_names.end(), + std::back_inserter(output_ins), + [&](auto output_name) { + if(not contains(instructions, output_name)) + MIGRAPHX_THROW("PARSE_TF: output name " + output_name + + " not found in graph!"); + return this->to_nchw(instructions[output_name]); + }); + mm->add_return(output_ins); +} + +void tf_parser::parse_node(const std::string& name) +{ + if(instructions.count(name) == 0) + { + auto&& node = nodes.at(name); + if(not is_valid_op(node)) + return; + std::vector args; + for(auto&& input : node.input()) + { + // control dependencies (signified by ^ before the name) are ignored + if(contains(input, "^")) + continue; + std::string input_name = input; + // if input has trailing `:0` index then remove it + auto multi_out_idx = input.find(':'); + if(multi_out_idx != std::string::npos and input.substr(multi_out_idx + 1) == "0") + { + input_name = input.substr(0, multi_out_idx); + } + if(nodes.count(input_name) > 0) + { + // input was from a node with multiple outputs + if(contains(input_name, ':')) + { + input_name.resize(input.find(':')); + } + else + { + input_name = get_name(nodes.at(input_name)); + } + assert(name != input_name); + this->parse_node(input_name); + args.push_back(instructions.at(input_name)); + } + else + { + args.push_back(instructions.at(input_name)); + } + } + std::vector result; + if(ops.count(node.op()) == 0) + { + result.push_back(mm->add_instruction(op::unknown{node.op()}, args)); + } + else + { + result = ops[node.op()](*this, {get_attributes(node), node.op(), mm}, args); + } + assert(not result.empty()); + // First output has no ":" delimiter + instructions[name] = result.front(); + for(size_t i = 1; i < result.size(); i++) + { + instructions[name + ":" + std::to_string(i)] = result.at(i); + } + } +} + +void tf_parser::parse_from(std::istream& is) +{ + tensorflow::GraphDef graph; + if(graph.ParseFromIstream(&is)) + { + this->parse_graph(graph); + } + else + { + throw std::runtime_error("Failed reading tf file"); + } +} + +void tf_parser::parse_from(const void* data, std::size_t size) +{ + tensorflow::GraphDef graph; + if(graph.ParseFromArray(data, size)) + { + this->parse_graph(graph); + } + else + { + throw std::runtime_error("Failed reading tf buffer array"); + } +} + +shape::type_t tf_parser::parse_type(const tensorflow::DataType t) const +{ + shape::type_t shape_type{}; + switch(t) + { + case tensorflow::DataType::DT_FLOAT: shape_type = shape::float_type; break; + case tensorflow::DataType::DT_DOUBLE: shape_type = shape::double_type; break; + case tensorflow::DataType::DT_INT32: shape_type = shape::int32_type; break; + case tensorflow::DataType::DT_INT16: shape_type = shape::int16_type; break; + case tensorflow::DataType::DT_INT8: shape_type = shape::int8_type; break; + case tensorflow::DataType::DT_INT64: shape_type = shape::int64_type; break; + case tensorflow::DataType::DT_UINT16: shape_type = shape::uint16_type; break; + case tensorflow::DataType::DT_HALF: shape_type = shape::half_type; break; + case tensorflow::DataType::DT_UINT32: shape_type = shape::uint32_type; break; + case tensorflow::DataType::DT_UINT64: shape_type = shape::uint64_type; break; + + case tensorflow::DataType::DT_INVALID: + case tensorflow::DataType::DT_UINT8: + case tensorflow::DataType::DT_STRING: + case tensorflow::DataType::DT_COMPLEX64: + case tensorflow::DataType::DT_BOOL: + case tensorflow::DataType::DT_QINT8: + case tensorflow::DataType::DT_QUINT8: + case tensorflow::DataType::DT_QINT32: + case tensorflow::DataType::DT_BFLOAT16: + case tensorflow::DataType::DT_QINT16: + case tensorflow::DataType::DT_QUINT16: + case tensorflow::DataType::DT_COMPLEX128: + case tensorflow::DataType::DT_RESOURCE: + case tensorflow::DataType::DT_VARIANT: + // tf pb should not use these types + case tensorflow::DataType::DT_FLOAT_REF: + case tensorflow::DataType::DT_DOUBLE_REF: + case tensorflow::DataType::DT_INT32_REF: + case tensorflow::DataType::DT_UINT8_REF: + case tensorflow::DataType::DT_INT16_REF: + case tensorflow::DataType::DT_INT8_REF: + case tensorflow::DataType::DT_STRING_REF: + case tensorflow::DataType::DT_COMPLEX64_REF: + case tensorflow::DataType::DT_INT64_REF: + case tensorflow::DataType::DT_BOOL_REF: + case tensorflow::DataType::DT_QINT8_REF: + case tensorflow::DataType::DT_QUINT8_REF: + case tensorflow::DataType::DT_QINT32_REF: + case tensorflow::DataType::DT_BFLOAT16_REF: + case tensorflow::DataType::DT_QINT16_REF: + case tensorflow::DataType::DT_QUINT16_REF: + case tensorflow::DataType::DT_UINT16_REF: + case tensorflow::DataType::DT_COMPLEX128_REF: + case tensorflow::DataType::DT_HALF_REF: + case tensorflow::DataType::DT_RESOURCE_REF: + case tensorflow::DataType::DT_VARIANT_REF: + case tensorflow::DataType::DT_UINT32_REF: + case tensorflow::DataType::DT_UINT64_REF: + case tensorflow::DataType::DataType_INT_MAX_SENTINEL_DO_NOT_USE_: + case tensorflow::DataType::DataType_INT_MIN_SENTINEL_DO_NOT_USE_: break; + } + return shape_type; +} + +literal tf_parser::parse_tensor(const tensorflow::TensorProto& t) const +{ + std::vector dims = parse_dims(t.tensor_shape()); + size_t shape_size = std::accumulate(dims.begin(), dims.end(), 1, std::multiplies()); + if(not t.tensor_content().empty()) // has raw data + { + const std::string& s = t.tensor_content(); + switch(t.dtype()) + { + case tensorflow::DataType::DT_FLOAT: return literal{{shape::float_type, dims}, s.data()}; + case tensorflow::DataType::DT_BOOL: + case tensorflow::DataType::DT_INT8: return literal{{shape::int8_type, dims}, s.data()}; + case tensorflow::DataType::DT_UINT16: + case tensorflow::DataType::DT_INT16: return literal{{shape::int16_type, dims}, s.data()}; + case tensorflow::DataType::DT_INT32: return literal{{shape::int32_type, dims}, s.data()}; + case tensorflow::DataType::DT_INT64: return literal{{shape::int64_type, dims}, s.data()}; + case tensorflow::DataType::DT_HALF: return literal{{shape::half_type, dims}, s.data()}; + case tensorflow::DataType::DT_DOUBLE: return literal{{shape::double_type, dims}, s.data()}; + case tensorflow::DataType::DT_INVALID: + case tensorflow::DataType::DT_UINT8: + case tensorflow::DataType::DT_STRING: + case tensorflow::DataType::DT_UINT32: + case tensorflow::DataType::DT_UINT64: + case tensorflow::DataType::DT_COMPLEX64: + case tensorflow::DataType::DT_COMPLEX128: + case tensorflow::DataType::DT_QINT8: + case tensorflow::DataType::DT_QUINT8: + case tensorflow::DataType::DT_QINT32: + case tensorflow::DataType::DT_BFLOAT16: + case tensorflow::DataType::DT_QINT16: + case tensorflow::DataType::DT_QUINT16: + case tensorflow::DataType::DT_RESOURCE: + case tensorflow::DataType::DT_VARIANT: + case tensorflow::DataType::DT_FLOAT_REF: + case tensorflow::DataType::DT_DOUBLE_REF: + case tensorflow::DataType::DT_INT32_REF: + case tensorflow::DataType::DT_UINT8_REF: + case tensorflow::DataType::DT_INT16_REF: + case tensorflow::DataType::DT_INT8_REF: + case tensorflow::DataType::DT_STRING_REF: + case tensorflow::DataType::DT_COMPLEX64_REF: + case tensorflow::DataType::DT_INT64_REF: + case tensorflow::DataType::DT_BOOL_REF: + case tensorflow::DataType::DT_QINT8_REF: + case tensorflow::DataType::DT_QUINT8_REF: + case tensorflow::DataType::DT_QINT32_REF: + case tensorflow::DataType::DT_BFLOAT16_REF: + case tensorflow::DataType::DT_QINT16_REF: + case tensorflow::DataType::DT_QUINT16_REF: + case tensorflow::DataType::DT_UINT16_REF: + case tensorflow::DataType::DT_COMPLEX128_REF: + case tensorflow::DataType::DT_HALF_REF: + case tensorflow::DataType::DT_RESOURCE_REF: + case tensorflow::DataType::DT_VARIANT_REF: + case tensorflow::DataType::DT_UINT32_REF: + case tensorflow::DataType::DT_UINT64_REF: + case tensorflow::DataType::DataType_INT_MAX_SENTINEL_DO_NOT_USE_: + case tensorflow::DataType::DataType_INT_MIN_SENTINEL_DO_NOT_USE_: + throw std::runtime_error(""); + } + MIGRAPHX_THROW("Invalid tensor type"); + } + switch(t.dtype()) + { + case tensorflow::DataType::DT_FLOAT: + return create_literal(shape::float_type, dims, get_data_vals(t.float_val(), shape_size)); + case tensorflow::DataType::DT_INT8: + return create_literal(shape::int8_type, dims, get_data_vals(t.int_val(), shape_size)); + case tensorflow::DataType::DT_UINT16: + return create_literal(shape::uint16_type, dims, get_data_vals(t.int_val(), shape_size)); + case tensorflow::DataType::DT_INT16: + return create_literal(shape::int16_type, dims, get_data_vals(t.int_val(), shape_size)); + case tensorflow::DataType::DT_INT32: + return create_literal(shape::int32_type, dims, get_data_vals(t.int_val(), shape_size)); + case tensorflow::DataType::DT_INT64: + return create_literal(shape::int64_type, dims, get_data_vals(t.int64_val(), shape_size)); + case tensorflow::DataType::DT_BOOL: + return create_literal(shape::int32_type, dims, get_data_vals(t.bool_val(), shape_size)); + case tensorflow::DataType::DT_HALF: { + std::vector data_int32 = get_data_vals(t.half_val(), shape_size); + std::vector data_uint16(data_int32.begin(), data_int32.end()); + std::vector data_half; + std::transform(data_uint16.begin(), + data_uint16.end(), + std::back_inserter(data_half), + [](uint16_t raw_val) { return *reinterpret_cast(&raw_val); }); + return create_literal(shape::half_type, dims, data_half); + } + case tensorflow::DataType::DT_DOUBLE: + return literal{{shape::double_type, dims}, get_data_vals(t.double_val(), shape_size)}; + case tensorflow::DataType::DT_INVALID: + case tensorflow::DataType::DT_UINT8: + case tensorflow::DataType::DT_STRING: + case tensorflow::DataType::DT_UINT32: + case tensorflow::DataType::DT_UINT64: + case tensorflow::DataType::DT_COMPLEX64: + case tensorflow::DataType::DT_COMPLEX128: + case tensorflow::DataType::DT_QINT8: + case tensorflow::DataType::DT_QUINT8: + case tensorflow::DataType::DT_QINT32: + case tensorflow::DataType::DT_BFLOAT16: + case tensorflow::DataType::DT_QINT16: + case tensorflow::DataType::DT_QUINT16: + case tensorflow::DataType::DT_RESOURCE: + case tensorflow::DataType::DT_VARIANT: + case tensorflow::DataType::DT_FLOAT_REF: + case tensorflow::DataType::DT_DOUBLE_REF: + case tensorflow::DataType::DT_INT32_REF: + case tensorflow::DataType::DT_UINT8_REF: + case tensorflow::DataType::DT_INT16_REF: + case tensorflow::DataType::DT_INT8_REF: + case tensorflow::DataType::DT_STRING_REF: + case tensorflow::DataType::DT_COMPLEX64_REF: + case tensorflow::DataType::DT_INT64_REF: + case tensorflow::DataType::DT_BOOL_REF: + case tensorflow::DataType::DT_QINT8_REF: + case tensorflow::DataType::DT_QUINT8_REF: + case tensorflow::DataType::DT_QINT32_REF: + case tensorflow::DataType::DT_BFLOAT16_REF: + case tensorflow::DataType::DT_QINT16_REF: + case tensorflow::DataType::DT_QUINT16_REF: + case tensorflow::DataType::DT_UINT16_REF: + case tensorflow::DataType::DT_COMPLEX128_REF: + case tensorflow::DataType::DT_HALF_REF: + case tensorflow::DataType::DT_RESOURCE_REF: + case tensorflow::DataType::DT_VARIANT_REF: + case tensorflow::DataType::DT_UINT32_REF: + case tensorflow::DataType::DT_UINT64_REF: + case tensorflow::DataType::DataType_INT_MAX_SENTINEL_DO_NOT_USE_: + case tensorflow::DataType::DataType_INT_MIN_SENTINEL_DO_NOT_USE_: throw std::runtime_error(""); + } + MIGRAPHX_THROW("Invalid tensor type"); +} + +} // namespace tf +} // namespace MIGRAPHX_INLINE_NS +} // namespace migraphx diff --git a/docker/rocm/migraphx/tf/types.proto b/docker/rocm/migraphx/tf/types.proto new file mode 100644 index 000000000..03835d1b9 --- /dev/null +++ b/docker/rocm/migraphx/tf/types.proto @@ -0,0 +1,75 @@ +syntax = "proto3"; + +package tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "TypesProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; + +// LINT.IfChange +enum DataType { + // Not a legal value for DataType. Used to indicate a DataType field + // has not been set. + DT_INVALID = 0; + + // Data types that all computation devices are expected to be + // capable to support. + DT_FLOAT = 1; + DT_DOUBLE = 2; + DT_INT32 = 3; + DT_UINT8 = 4; + DT_INT16 = 5; + DT_INT8 = 6; + DT_STRING = 7; + DT_COMPLEX64 = 8; // Single-precision complex + DT_INT64 = 9; + DT_BOOL = 10; + DT_QINT8 = 11; // Quantized int8 + DT_QUINT8 = 12; // Quantized uint8 + DT_QINT32 = 13; // Quantized int32 + DT_BFLOAT16 = 14; // Float32 truncated to 16 bits. Only for cast ops. + DT_QINT16 = 15; // Quantized int16 + DT_QUINT16 = 16; // Quantized uint16 + DT_UINT16 = 17; + DT_COMPLEX128 = 18; // Double-precision complex + DT_HALF = 19; + DT_RESOURCE = 20; + DT_VARIANT = 21; // Arbitrary C++ data types + DT_UINT32 = 22; + DT_UINT64 = 23; + + // Do not use! These are only for parameters. Every enum above + // should have a corresponding value below (verified by types_test). + DT_FLOAT_REF = 101; + DT_DOUBLE_REF = 102; + DT_INT32_REF = 103; + DT_UINT8_REF = 104; + DT_INT16_REF = 105; + DT_INT8_REF = 106; + DT_STRING_REF = 107; + DT_COMPLEX64_REF = 108; + DT_INT64_REF = 109; + DT_BOOL_REF = 110; + DT_QINT8_REF = 111; + DT_QUINT8_REF = 112; + DT_QINT32_REF = 113; + DT_BFLOAT16_REF = 114; + DT_QINT16_REF = 115; + DT_QUINT16_REF = 116; + DT_UINT16_REF = 117; + DT_COMPLEX128_REF = 118; + DT_HALF_REF = 119; + DT_RESOURCE_REF = 120; + DT_VARIANT_REF = 121; + DT_UINT32_REF = 122; + DT_UINT64_REF = 123; +} +// LINT.ThenChange( +// https://www.tensorflow.org/code/tensorflow/c/c_api.h, +// https://www.tensorflow.org/code/tensorflow/go/tensor.go, +// https://www.tensorflow.org/code/tensorflow/core/framework/tensor.cc, +// https://www.tensorflow.org/code/tensorflow/core/framework/types.h, +// https://www.tensorflow.org/code/tensorflow/core/framework/types.cc, +// https://www.tensorflow.org/code/tensorflow/python/framework/dtypes.py, +// https://www.tensorflow.org/code/tensorflow/python/framework/function.py) diff --git a/docker/rocm/migraphx/tf/versions.proto b/docker/rocm/migraphx/tf/versions.proto new file mode 100644 index 000000000..dd2ec5523 --- /dev/null +++ b/docker/rocm/migraphx/tf/versions.proto @@ -0,0 +1,32 @@ +syntax = "proto3"; + +package tensorflow; +option cc_enable_arenas = true; +option java_outer_classname = "VersionsProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.framework"; +option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework"; + +// Version information for a piece of serialized data +// +// There are different types of versions for each type of data +// (GraphDef, etc.), but they all have the same common shape +// described here. +// +// Each consumer has "consumer" and "min_producer" versions (specified +// elsewhere). A consumer is allowed to consume this data if +// +// producer >= min_producer +// consumer >= min_consumer +// consumer not in bad_consumers +// +message VersionDef { + // The version of the code that produced this data. + int32 producer = 1; + + // Any consumer below this version is not allowed to consume this data. + int32 min_consumer = 2; + + // Specific consumer versions which are disallowed (e.g. due to bugs). + repeated int32 bad_consumers = 3; +};