mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-02-18 17:14:26 +03:00
upload2
This commit is contained in:
parent
2c3d0a980e
commit
931b31452a
105
docker/rocm/migraphx/targets/cpu/CMakeLists.txt
Normal file
105
docker/rocm/migraphx/targets/cpu/CMakeLists.txt
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
#####################################################################################
|
||||||
|
# The MIT License (MIT)
|
||||||
|
#
|
||||||
|
# Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
# in the Software without restriction, including without limitation the rights
|
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in
|
||||||
|
# all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
# THE SOFTWARE.
|
||||||
|
#####################################################################################
|
||||||
|
|
||||||
|
include(CheckCXXCompilerFlag)
|
||||||
|
|
||||||
|
add_library(migraphx_cpu
|
||||||
|
allocate.cpp
|
||||||
|
allocation_model.cpp
|
||||||
|
binary.cpp
|
||||||
|
concat.cpp
|
||||||
|
convolution.cpp
|
||||||
|
copy.cpp
|
||||||
|
deconvolution.cpp
|
||||||
|
dnnl.cpp
|
||||||
|
eltwise.cpp
|
||||||
|
erf.cpp
|
||||||
|
fmod.cpp
|
||||||
|
fuse_ops.cpp
|
||||||
|
gather.cpp
|
||||||
|
gemm.cpp
|
||||||
|
layernorm.cpp
|
||||||
|
logsoftmax.cpp
|
||||||
|
lowering.cpp
|
||||||
|
lrn.cpp
|
||||||
|
mod.cpp
|
||||||
|
preallocate.cpp
|
||||||
|
pooling.cpp
|
||||||
|
reduction.cpp
|
||||||
|
reorder.cpp
|
||||||
|
softmax.cpp
|
||||||
|
sub.cpp
|
||||||
|
target.cpp
|
||||||
|
write_literals.cpp
|
||||||
|
)
|
||||||
|
set_target_properties(migraphx_cpu PROPERTIES EXPORT_NAME cpu)
|
||||||
|
rocm_set_soversion(migraphx_cpu ${MIGRAPHX_SO_VERSION})
|
||||||
|
|
||||||
|
set(MIGRAPHX_ENABLE_ZENDNN Off CACHE BOOL "")
|
||||||
|
|
||||||
|
if(MIGRAPHX_ENABLE_ZENDNN)
|
||||||
|
find_path(ZENDNN_INC_PATH zendnn.hpp)
|
||||||
|
find_library(ZENDNN_LIB amdZenDNN)
|
||||||
|
find_library(BLIS_LIB blis)
|
||||||
|
else()
|
||||||
|
find_package(dnnl REQUIRED)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
rocm_clang_tidy_check(migraphx_cpu)
|
||||||
|
if(MIGRAPHX_ENABLE_ZENDNN)
|
||||||
|
target_compile_definitions(migraphx_cpu PRIVATE -DMIGRAPHX_ENABLE_ZENDNN)
|
||||||
|
target_include_directories(migraphx_cpu PRIVATE ${ZENDNN_INC_PATH})
|
||||||
|
message(STATUS "ZENDNN_LIB: ${ZENDNN_LIB}")
|
||||||
|
target_link_libraries(migraphx_cpu PRIVATE ${BLIS_LIB})
|
||||||
|
target_link_libraries(migraphx_cpu PRIVATE ${ZENDNN_LIB})
|
||||||
|
else()
|
||||||
|
target_link_libraries(migraphx_cpu PUBLIC DNNL::dnnl)
|
||||||
|
endif()
|
||||||
|
target_link_libraries(migraphx_cpu PRIVATE migraphx)
|
||||||
|
|
||||||
|
migraphx_generate_export_header(migraphx_cpu)
|
||||||
|
|
||||||
|
find_package(OpenMP)
|
||||||
|
if(WIN32)
|
||||||
|
target_link_libraries(migraphx_cpu PUBLIC libomp)
|
||||||
|
target_include_directories(migraphx_cpu PUBLIC ${OpenMP_CXX_INCLUDE_DIRS})
|
||||||
|
target_compile_options(migraphx_cpu PUBLIC ${OpenMP_CXX_FLAGS})
|
||||||
|
else()
|
||||||
|
target_link_libraries(migraphx_cpu PUBLIC OpenMP::OpenMP_CXX)
|
||||||
|
# Add library path to rpath to workaround issues with our broken packages
|
||||||
|
foreach(LIBRARY ${OpenMP_CXX_LIBRARIES})
|
||||||
|
if(LIBRARY MATCHES "libomp")
|
||||||
|
get_filename_component(LIBRARY_PATH "${LIBRARY}" PATH)
|
||||||
|
target_link_libraries(migraphx_cpu PUBLIC -Wl,-rpath=${LIBRARY_PATH} -Wl,-rpath-link=${LIBRARY_PATH})
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
rocm_install_targets(
|
||||||
|
PRIVATE
|
||||||
|
TARGETS migraphx_cpu
|
||||||
|
INCLUDE
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||||
|
)
|
||||||
|
|
||||||
60
docker/rocm/migraphx/targets/cpu/allocate.cpp
Normal file
60
docker/rocm/migraphx/targets/cpu/allocate.cpp
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/check_shapes.hpp>
|
||||||
|
#include <migraphx/argument.hpp>
|
||||||
|
#include <migraphx/context.hpp>
|
||||||
|
#include <migraphx/cpu/context.hpp>
|
||||||
|
#include <migraphx/register_op.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct cpu_allocate : auto_register_op<cpu_allocate>
|
||||||
|
{
|
||||||
|
shape s;
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return pack(f(self.s, "shape"));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name() const { return "cpu::allocate"; }
|
||||||
|
shape compute_shape(const std::vector<shape>& inputs) const
|
||||||
|
{
|
||||||
|
check_shapes{inputs, *this}.has(0);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
argument compute(context&, const shape& output_shape, const std::vector<argument>&) const
|
||||||
|
{
|
||||||
|
argument result{output_shape};
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
46
docker/rocm/migraphx/targets/cpu/allocation_model.cpp
Normal file
46
docker/rocm/migraphx/targets/cpu/allocation_model.cpp
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/cpu/allocation_model.hpp>
|
||||||
|
#include <migraphx/make_op.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
std::string cpu_allocation_model::name() const { return "cpu::allocate"; }
|
||||||
|
operation cpu_allocation_model::allocate(const shape& s) const
|
||||||
|
{
|
||||||
|
return make_op(name(), {{"shape", to_value(s)}});
|
||||||
|
}
|
||||||
|
|
||||||
|
operation cpu_allocation_model::preallocate(const shape& s, const std::string& id) const
|
||||||
|
{
|
||||||
|
return make_op("cpu::preallocate", {{"shape", to_value(s)}, {"id", id}});
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string cpu_allocation_model::copy() const { return "cpu::copy"; }
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
83
docker/rocm/migraphx/targets/cpu/binary.cpp
Normal file
83
docker/rocm/migraphx/targets/cpu/binary.cpp
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/dnnl.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct dnnl_binary : dnnl_op<dnnl_binary, dnnl::binary>
|
||||||
|
{
|
||||||
|
std::string algo;
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return pack_join(self.reflect_base(self, f), pack(f(self.algo, "algo")));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string group() const { return this->name() + "::" + algo; }
|
||||||
|
|
||||||
|
std::string name() const { return "dnnl::binary"; }
|
||||||
|
|
||||||
|
shape compute_shape(std::vector<shape> inputs) const
|
||||||
|
{
|
||||||
|
// Compensate for allocation
|
||||||
|
inputs.pop_back();
|
||||||
|
check_shapes{this->trim_post_op_inputs(inputs), *this}.has(2);
|
||||||
|
auto s0 = inputs.at(0);
|
||||||
|
auto s1 = inputs.at(1);
|
||||||
|
auto r = s0;
|
||||||
|
if(s0 != s1 or not s0.packed())
|
||||||
|
{
|
||||||
|
if(s0.packed() != s1.packed())
|
||||||
|
{
|
||||||
|
r = s0.packed() ? s0 : s1;
|
||||||
|
}
|
||||||
|
else if(s0.broadcasted() != s1.broadcasted())
|
||||||
|
{
|
||||||
|
r = s0.broadcasted() ? s1.with_lens(s0.lens()) : s0.with_lens(s0.lens());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
r = {s0.type(), s0.lens()};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Call to get_primitive to make sure an algo is available
|
||||||
|
this->get_primitive(this->to_memory_desc(r, inputs));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
dnnl::binary::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
return {to_dnnl_algo(algo),
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)),
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_1)),
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST))};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
67
docker/rocm/migraphx/targets/cpu/concat.cpp
Normal file
67
docker/rocm/migraphx/targets/cpu/concat.cpp
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/pointwise.hpp>
|
||||||
|
#include <migraphx/op/concat.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct dnnl_concat : dnnl_extend_op<dnnl_concat, dnnl::concat, op::concat>
|
||||||
|
{
|
||||||
|
std::vector<int> arg_map(int size) const
|
||||||
|
{
|
||||||
|
std::vector<int> result(size);
|
||||||
|
std::iota(result.begin(), result.end(), MIGRAPHX_DNNL_PREFIX(ARG_MULTIPLE_SRC));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
// Custom desc class since its missing in dnnl
|
||||||
|
struct desc
|
||||||
|
{
|
||||||
|
dnnl::memory::desc dst;
|
||||||
|
std::size_t axis = 1;
|
||||||
|
std::vector<dnnl::memory::desc> srcs;
|
||||||
|
};
|
||||||
|
desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
std::vector<dnnl::memory::desc> srcs;
|
||||||
|
srcs.reserve(m.size() - 1);
|
||||||
|
|
||||||
|
for(auto i = 0; i < m.size() - 1; i++)
|
||||||
|
{
|
||||||
|
srcs.push_back(m.at(MIGRAPHX_DNNL_PREFIX(ARG_MULTIPLE_SRC) + i));
|
||||||
|
}
|
||||||
|
return {m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)), std::size_t(op.axis), srcs};
|
||||||
|
}
|
||||||
|
|
||||||
|
auto get_primitive_desc(const desc& d, const dnnl::primitive_attr& attr) const
|
||||||
|
{
|
||||||
|
return dnnl::concat::primitive_desc(d.dst, d.axis, d.srcs, get_dnnl_context().engine, attr);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
86
docker/rocm/migraphx/targets/cpu/convolution.cpp
Normal file
86
docker/rocm/migraphx/targets/cpu/convolution.cpp
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/register_op.hpp>
|
||||||
|
#include <migraphx/reflect.hpp>
|
||||||
|
#include <migraphx/par_for.hpp>
|
||||||
|
#include <migraphx/context.hpp>
|
||||||
|
#include <migraphx/cpu/context.hpp>
|
||||||
|
#include <migraphx/cpu/dnnl.hpp>
|
||||||
|
#include <migraphx/op/convolution.hpp>
|
||||||
|
#include <migraphx/op/quant_convolution.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct dnnl_convolution
|
||||||
|
: dnnl_extend_op<dnnl_convolution, dnnl::convolution_forward, op::convolution>
|
||||||
|
{
|
||||||
|
std::vector<int> arg_map(int) const
|
||||||
|
{
|
||||||
|
return {MIGRAPHX_DNNL_PREFIX(ARG_SRC), MIGRAPHX_DNNL_PREFIX(ARG_WEIGHTS)};
|
||||||
|
}
|
||||||
|
|
||||||
|
shape adjust_shape(const shape& x, int i, const shape& output) const
|
||||||
|
{
|
||||||
|
auto s = base_adjust_shape(x, output);
|
||||||
|
if(i == 1 and op.group > 1)
|
||||||
|
{
|
||||||
|
// TODO: Add support for transposed weights
|
||||||
|
if(not s.standard())
|
||||||
|
MIGRAPHX_THROW("Weights for grouped convolution must be standard");
|
||||||
|
auto lens = s.lens();
|
||||||
|
lens.insert(lens.begin(), op.group);
|
||||||
|
lens.at(1) /= op.group;
|
||||||
|
return shape{s.type(), lens};
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
dnnl::convolution_forward::desc
|
||||||
|
get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
// In DNNL dilation is zero-based
|
||||||
|
auto dilation = op.dilation;
|
||||||
|
std::transform(
|
||||||
|
dilation.begin(), dilation.end(), dilation.begin(), [](auto x) { return x - 1; });
|
||||||
|
auto kdims = op.kdims();
|
||||||
|
std::vector<size_t> padding_l(op.padding.begin(), op.padding.begin() + kdims);
|
||||||
|
std::vector<size_t> padding_r(op.padding.begin() + kdims, op.padding.end());
|
||||||
|
return {dnnl::prop_kind::forward_inference,
|
||||||
|
dnnl::algorithm::convolution_auto,
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)),
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_WEIGHTS)),
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)),
|
||||||
|
to_dnnl_dims(op.stride),
|
||||||
|
to_dnnl_dims(dilation),
|
||||||
|
to_dnnl_dims(padding_l),
|
||||||
|
to_dnnl_dims(padding_r)};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
65
docker/rocm/migraphx/targets/cpu/copy.cpp
Normal file
65
docker/rocm/migraphx/targets/cpu/copy.cpp
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/pointwise.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct cpu_copy : reduce_dims_base, auto_register_op<cpu_copy>
|
||||||
|
{
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self&, F)
|
||||||
|
{
|
||||||
|
return pack();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name() const { return "cpu::copy"; }
|
||||||
|
shape compute_shape(const std::vector<shape>& inputs) const
|
||||||
|
{
|
||||||
|
check_shapes{inputs, *this}.has(2);
|
||||||
|
return inputs.at(1);
|
||||||
|
}
|
||||||
|
argument
|
||||||
|
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
|
||||||
|
{
|
||||||
|
argument result = get_arg(args, args.size() - 1);
|
||||||
|
|
||||||
|
visit_all(result, get_arg(args, 0))([&](auto output, auto input) {
|
||||||
|
pointwise(output, input)(ctx, output.get_shape(), 1024, [](auto& y, auto x) { y = x; });
|
||||||
|
});
|
||||||
|
|
||||||
|
return result.reshape(output_shape);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
|
||||||
|
{
|
||||||
|
return shapes.size() - 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
76
docker/rocm/migraphx/targets/cpu/deconvolution.cpp
Normal file
76
docker/rocm/migraphx/targets/cpu/deconvolution.cpp
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/dnnl.hpp>
|
||||||
|
#include <migraphx/op/convolution_backwards.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct dnnl_deconvolution
|
||||||
|
: dnnl_extend_op<dnnl_deconvolution, dnnl::deconvolution_forward, op::convolution_backwards>
|
||||||
|
{
|
||||||
|
std::vector<int> arg_map(int) const
|
||||||
|
{
|
||||||
|
return {MIGRAPHX_DNNL_PREFIX(ARG_SRC), MIGRAPHX_DNNL_PREFIX(ARG_WEIGHTS)};
|
||||||
|
}
|
||||||
|
|
||||||
|
shape adjust_shape(const shape& x, int i, const shape& output) const
|
||||||
|
{
|
||||||
|
auto s = base_adjust_shape(x, output);
|
||||||
|
if(i == 1)
|
||||||
|
{
|
||||||
|
// The input and output channels are flipped for dnnl
|
||||||
|
auto lens = s.lens();
|
||||||
|
std::swap(lens[0], lens[1]);
|
||||||
|
auto strides = s.strides();
|
||||||
|
std::swap(strides[0], strides[1]);
|
||||||
|
return {s.type(), lens, strides};
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
dnnl::deconvolution_forward::desc
|
||||||
|
get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
// In DNNL dilation is zero-based
|
||||||
|
auto dilation = op.dilation;
|
||||||
|
std::transform(
|
||||||
|
dilation.begin(), dilation.end(), dilation.begin(), [](auto x) { return x - 1; });
|
||||||
|
return {dnnl::prop_kind::forward_inference,
|
||||||
|
dnnl::algorithm::deconvolution_direct,
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)),
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_WEIGHTS)),
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)),
|
||||||
|
to_dnnl_dims(op.stride),
|
||||||
|
to_dnnl_dims(dilation),
|
||||||
|
to_dnnl_dims(op.padding),
|
||||||
|
to_dnnl_dims(op.padding)};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
205
docker/rocm/migraphx/targets/cpu/dnnl.cpp
Normal file
205
docker/rocm/migraphx/targets/cpu/dnnl.cpp
Normal file
@ -0,0 +1,205 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/cpu/dnnl.hpp>
|
||||||
|
|
||||||
|
#if defined(__GNUC__) && __GNUC__ <= 5
|
||||||
|
namespace std {
|
||||||
|
#ifdef MIGRAPHX_ENABLE_ZENDNN
|
||||||
|
namespace dnnl = zendnn;
|
||||||
|
#endif
|
||||||
|
template <>
|
||||||
|
struct hash<dnnl::algorithm>
|
||||||
|
{
|
||||||
|
using argument_type = dnnl::algorithm;
|
||||||
|
using result_type = std::size_t;
|
||||||
|
result_type operator()(const argument_type& x) const noexcept
|
||||||
|
{
|
||||||
|
return std::hash<underlying_type_t<argument_type>>{}(
|
||||||
|
static_cast<underlying_type_t<argument_type>>(x));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace std
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
dnnl_context& get_dnnl_context()
|
||||||
|
{
|
||||||
|
static dnnl_context ctx{}; // NOLINT
|
||||||
|
return ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __clang__
|
||||||
|
#pragma clang diagnostic push
|
||||||
|
#pragma clang diagnostic ignored "-Wswitch-enum"
|
||||||
|
#endif
|
||||||
|
dnnl::memory::data_type to_dnnl_memory_data_type(shape::type_t t)
|
||||||
|
{
|
||||||
|
using dt = dnnl::memory::data_type;
|
||||||
|
using st = shape::type_t;
|
||||||
|
switch(t)
|
||||||
|
{
|
||||||
|
case st::half_type: return dt::f16;
|
||||||
|
case st::float_type: return dt::f32;
|
||||||
|
case st::int32_type: return dt::s32;
|
||||||
|
case st::int8_type: return dt::s8;
|
||||||
|
case st::uint8_type: return dt::u8;
|
||||||
|
case st::fp8e4m3fnuz_type: MIGRAPHX_THROW("fp8e4m3fnuz unsupported in DNNL");
|
||||||
|
default: MIGRAPHX_THROW("Unsupported data type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef __clang__
|
||||||
|
#pragma clang diagnostic pop
|
||||||
|
#endif
|
||||||
|
|
||||||
|
dnnl::memory::format_tag to_dnnl_memory_format_tag(std::size_t n)
|
||||||
|
{
|
||||||
|
switch(n)
|
||||||
|
{
|
||||||
|
case 1: return dnnl::memory::format_tag::a;
|
||||||
|
case 2: return dnnl::memory::format_tag::ab;
|
||||||
|
case 3: return dnnl::memory::format_tag::abc;
|
||||||
|
case 4: return dnnl::memory::format_tag::abcd;
|
||||||
|
case 5: return dnnl::memory::format_tag::abcde;
|
||||||
|
case 6: return dnnl::memory::format_tag::abcdef;
|
||||||
|
default: MIGRAPHX_THROW("Unsupported tensor size: " + std::to_string(n));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dnnl::memory::desc to_dnnl_memory_desc(const shape& s)
|
||||||
|
{
|
||||||
|
return {to_dnnl_dims(s.lens()), to_dnnl_memory_data_type(s.type()), to_dnnl_dims(s.strides())};
|
||||||
|
}
|
||||||
|
|
||||||
|
dnnl::memory to_dnnl_memory(const dnnl::memory::desc& desc, const argument& a)
|
||||||
|
{
|
||||||
|
return {desc, get_dnnl_context().engine, a.data()};
|
||||||
|
}
|
||||||
|
|
||||||
|
dnnl::memory to_dnnl_memory(const argument& a)
|
||||||
|
{
|
||||||
|
return to_dnnl_memory(to_dnnl_memory_desc(a.get_shape()), a);
|
||||||
|
}
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
#define MIGRAPHX_VISIT_DNNL_ALGO(m) \
|
||||||
|
m(undef) \
|
||||||
|
m(convolution_auto) \
|
||||||
|
m(convolution_direct) \
|
||||||
|
m(convolution_winograd) \
|
||||||
|
m(deconvolution_direct) \
|
||||||
|
m(deconvolution_winograd) \
|
||||||
|
m(eltwise_relu) \
|
||||||
|
m(eltwise_tanh) \
|
||||||
|
m(eltwise_elu) \
|
||||||
|
m(eltwise_square) \
|
||||||
|
m(eltwise_abs) \
|
||||||
|
m(eltwise_sqrt) \
|
||||||
|
m(eltwise_swish) \
|
||||||
|
m(eltwise_linear) \
|
||||||
|
m(eltwise_bounded_relu) \
|
||||||
|
m(eltwise_soft_relu) \
|
||||||
|
m(eltwise_logistic) \
|
||||||
|
m(eltwise_exp) \
|
||||||
|
m(eltwise_gelu) \
|
||||||
|
m(eltwise_gelu_tanh) \
|
||||||
|
m(eltwise_gelu_erf) \
|
||||||
|
m(eltwise_log) \
|
||||||
|
m(eltwise_clip) \
|
||||||
|
m(eltwise_pow) \
|
||||||
|
m(eltwise_round) \
|
||||||
|
m(eltwise_relu_use_dst_for_bwd) \
|
||||||
|
m(eltwise_tanh_use_dst_for_bwd) \
|
||||||
|
m(eltwise_elu_use_dst_for_bwd) \
|
||||||
|
m(eltwise_sqrt_use_dst_for_bwd) \
|
||||||
|
m(eltwise_logistic_use_dst_for_bwd) \
|
||||||
|
m(eltwise_exp_use_dst_for_bwd) \
|
||||||
|
m(lrn_across_channels) \
|
||||||
|
m(lrn_within_channel) \
|
||||||
|
m(pooling_max) \
|
||||||
|
m(pooling_avg) \
|
||||||
|
m(pooling_avg_include_padding) \
|
||||||
|
m(pooling_avg_exclude_padding) \
|
||||||
|
m(vanilla_rnn) \
|
||||||
|
m(vanilla_lstm) \
|
||||||
|
m(vanilla_gru) \
|
||||||
|
m(lbr_gru) \
|
||||||
|
m(binary_add) \
|
||||||
|
m(binary_mul) \
|
||||||
|
m(binary_max) \
|
||||||
|
m(binary_min) \
|
||||||
|
m(binary_div) \
|
||||||
|
m(resampling_nearest) \
|
||||||
|
m(resampling_linear) \
|
||||||
|
m(reduction_max) \
|
||||||
|
m(reduction_min) \
|
||||||
|
m(reduction_sum) \
|
||||||
|
m(reduction_mul) \
|
||||||
|
m(reduction_mean) \
|
||||||
|
m(reduction_norm_lp_max) \
|
||||||
|
m(reduction_norm_lp_sum) \
|
||||||
|
m(reduction_norm_lp_power_p_max) \
|
||||||
|
m(reduction_norm_lp_power_p_sum)
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
const std::unordered_map<std::string, dnnl::algorithm>& dnnl_algo_map()
|
||||||
|
{
|
||||||
|
static const std::unordered_map<std::string, dnnl::algorithm> m = {
|
||||||
|
#define MIGRAPHX_DNNL_ALGO_GENERATE_VISITOR(x) {#x, dnnl::algorithm::x},
|
||||||
|
MIGRAPHX_VISIT_DNNL_ALGO(MIGRAPHX_DNNL_ALGO_GENERATE_VISITOR)
|
||||||
|
#undef MIGRAPHX_DNNL_ALGO_GENERATE_VISITOR
|
||||||
|
};
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
dnnl::algorithm to_dnnl_algo(const std::string& name)
|
||||||
|
{
|
||||||
|
if(dnnl_algo_map().count(name) == 0)
|
||||||
|
MIGRAPHX_THROW("Missing dnnl algo: " + name);
|
||||||
|
return dnnl_algo_map().at(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::unordered_map<dnnl::algorithm, std::string>& dnnl_algo_string_map()
|
||||||
|
{
|
||||||
|
static const std::unordered_map<dnnl::algorithm, std::string> m = {
|
||||||
|
#define MIGRAPHX_DNNL_ALGO_GENERATE_VISITOR(x) {dnnl::algorithm::x, #x},
|
||||||
|
MIGRAPHX_VISIT_DNNL_ALGO(MIGRAPHX_DNNL_ALGO_GENERATE_VISITOR)
|
||||||
|
#undef MIGRAPHX_DNNL_ALGO_GENERATE_VISITOR
|
||||||
|
};
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string to_string(const dnnl::algorithm& algo)
|
||||||
|
{
|
||||||
|
if(dnnl_algo_string_map().count(algo) == 0)
|
||||||
|
return "unknown_" + std::to_string(static_cast<int>(algo));
|
||||||
|
return dnnl_algo_string_map().at(algo);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
73
docker/rocm/migraphx/targets/cpu/eltwise.cpp
Normal file
73
docker/rocm/migraphx/targets/cpu/eltwise.cpp
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/pointwise.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct dnnl_eltwise : dnnl_op<dnnl_eltwise, dnnl::eltwise_forward>
|
||||||
|
{
|
||||||
|
std::string algo;
|
||||||
|
float alpha = 0;
|
||||||
|
float beta = 0;
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return pack_join(self.reflect_base(self, f),
|
||||||
|
pack(f(self.algo, "algo"), f(self.alpha, "alpha"), f(self.beta, "beta")));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string group() const { return this->name() + "::" + algo; }
|
||||||
|
|
||||||
|
std::string name() const { return "dnnl::eltwise"; }
|
||||||
|
|
||||||
|
shape compute_shape(std::vector<shape> inputs) const
|
||||||
|
{
|
||||||
|
// Compensate for allocation
|
||||||
|
inputs.pop_back();
|
||||||
|
check_shapes{this->trim_post_op_inputs(inputs), *this}.has(1).packed();
|
||||||
|
auto s = inputs.at(0);
|
||||||
|
auto r = s;
|
||||||
|
if(not s.packed())
|
||||||
|
r = shape{s.type(), s.lens()};
|
||||||
|
// Call to get_primitive to make sure an algo is available
|
||||||
|
this->get_primitive(this->to_memory_desc(r, inputs));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
dnnl::eltwise_forward::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
return {dnnl::prop_kind::forward_inference,
|
||||||
|
to_dnnl_algo(algo),
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)),
|
||||||
|
alpha,
|
||||||
|
beta};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
36
docker/rocm/migraphx/targets/cpu/erf.cpp
Normal file
36
docker/rocm/migraphx/targets/cpu/erf.cpp
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/pointwise.hpp>
|
||||||
|
#include <migraphx/op/erf.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
template struct cpu_unary<op::erf>;
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
36
docker/rocm/migraphx/targets/cpu/fmod.cpp
Normal file
36
docker/rocm/migraphx/targets/cpu/fmod.cpp
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/pointwise.hpp>
|
||||||
|
#include <migraphx/op/fmod.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
template struct cpu_binary<op::fmod>;
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
134
docker/rocm/migraphx/targets/cpu/fuse_ops.cpp
Normal file
134
docker/rocm/migraphx/targets/cpu/fuse_ops.cpp
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/cpu/fuse_ops.hpp>
|
||||||
|
#include <migraphx/make_op.hpp>
|
||||||
|
#include <migraphx/operation.hpp>
|
||||||
|
#include <migraphx/value.hpp>
|
||||||
|
#include <migraphx/matcher.hpp>
|
||||||
|
#include <migraphx/context.hpp>
|
||||||
|
#include <migraphx/env.hpp>
|
||||||
|
#include <migraphx/cpu/context.hpp>
|
||||||
|
#include <migraphx/dead_code_elimination.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_DNNL_POST_OPS_WORKAROUND);
|
||||||
|
|
||||||
|
MIGRAPHX_PRED_MATCHER(has_post_ops, instruction_ref ins)
|
||||||
|
{
|
||||||
|
auto v = ins->get_operator().to_value();
|
||||||
|
return v.contains("post_ops");
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_PRED_MATCHER(without_post_ops, instruction_ref ins)
|
||||||
|
{
|
||||||
|
auto v = ins->get_operator().to_value();
|
||||||
|
return v.contains("post_ops") and v["post_ops"].empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool workaround_dnnl_broken_post_ops(const operation& op, const operation& post_op)
|
||||||
|
{
|
||||||
|
if(contains({"dnnl::dot", "dnnl::convolution"}, op.name()))
|
||||||
|
return true;
|
||||||
|
auto pv = post_op.to_value();
|
||||||
|
if(not pv.at("post_ops").empty())
|
||||||
|
return true;
|
||||||
|
auto v = op.to_value();
|
||||||
|
auto last_op = v.at("post_ops").empty() ? v : v.at("post_ops").back();
|
||||||
|
auto algo = last_op.contains("algo") ? last_op.at("algo").to<std::string>() : op.name();
|
||||||
|
auto post_algo = pv["algo"].to<std::string>();
|
||||||
|
if(starts_with(algo, "eltwise") and starts_with(post_algo, "eltwise"))
|
||||||
|
return true;
|
||||||
|
if(algo == post_algo)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
operation merge_post_ops(const operation& op, const operation& post_op)
|
||||||
|
{
|
||||||
|
auto pv = post_op.to_value();
|
||||||
|
auto v = op.to_value();
|
||||||
|
v["post_ops"].push_back({{"algo", pv["algo"]},
|
||||||
|
{"alpha", pv["alpha"].value_or(0.0f)},
|
||||||
|
{"beta", pv["beta"].value_or(0.0f)}});
|
||||||
|
auto post_ops = pv.at("post_ops");
|
||||||
|
for(const auto& po : post_ops)
|
||||||
|
v["post_ops"].push_back(po);
|
||||||
|
return make_op(op.name(), v);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct find_post_ops
|
||||||
|
{
|
||||||
|
context* ctx = nullptr;
|
||||||
|
match::any_matcher matcher() const
|
||||||
|
{
|
||||||
|
if(enabled(MIGRAPHX_DISABLE_DNNL_POST_OPS_WORKAROUND{}))
|
||||||
|
return match::name("dnnl::eltwise",
|
||||||
|
"dnnl::binary")(match::arg(0)(has_post_ops(), match::used_once()));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto dnnl_binary = match::name("dnnl::binary")(without_post_ops(), match::used_once());
|
||||||
|
return match::name("dnnl::eltwise")(without_post_ops(), match::arg(0)(dnnl_binary));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void apply(module& m, const match::matcher_result& r) const
|
||||||
|
{
|
||||||
|
auto ins = r.result;
|
||||||
|
auto x_ins = ins->inputs().front();
|
||||||
|
auto x = x_ins->get_operator();
|
||||||
|
|
||||||
|
if(workaround_dnnl_broken_post_ops(x, ins->get_operator()))
|
||||||
|
return;
|
||||||
|
|
||||||
|
auto op = merge_post_ops(x, ins->get_operator());
|
||||||
|
auto inputs = x_ins->inputs();
|
||||||
|
inputs.back() = ins->inputs().back();
|
||||||
|
if(ins->name() == "dnnl::binary")
|
||||||
|
inputs.insert(std::prev(inputs.end()), ins->inputs().at(1));
|
||||||
|
auto input_shapes = to_shapes(inputs);
|
||||||
|
auto new_shape = try_compute_shape(op, input_shapes);
|
||||||
|
if(new_shape.empty() or new_shape.front() != ins->get_shape())
|
||||||
|
return;
|
||||||
|
auto info = compile(op, *ctx, new_shape.front(), input_shapes);
|
||||||
|
if(info.contains("impl") and starts_with(info.at("impl").to<std::string>(), "ref:"))
|
||||||
|
return;
|
||||||
|
m.replace_instruction(ins, op, inputs);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void fuse_ops::apply(module& m) const
|
||||||
|
{
|
||||||
|
for(std::size_t i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
|
match::find_matches(m, find_post_ops{ctx});
|
||||||
|
dead_code_elimination{}.apply(m);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
88
docker/rocm/migraphx/targets/cpu/gather.cpp
Normal file
88
docker/rocm/migraphx/targets/cpu/gather.cpp
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/context.hpp>
|
||||||
|
#include <migraphx/cpu/context.hpp>
|
||||||
|
#include <migraphx/op/gather.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct cpu_gather : auto_register_op<cpu_gather>
|
||||||
|
{
|
||||||
|
op::gather op;
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return migraphx::reflect(self.op, f);
|
||||||
|
}
|
||||||
|
std::string name() const { return "cpu::" + op.name(); }
|
||||||
|
shape compute_shape(std::vector<shape> inputs) const
|
||||||
|
{
|
||||||
|
// Compensate for allocation
|
||||||
|
inputs.pop_back();
|
||||||
|
check_shapes(inputs, *this).standard();
|
||||||
|
return migraphx::compute_shape(op, inputs);
|
||||||
|
}
|
||||||
|
|
||||||
|
argument
|
||||||
|
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
|
||||||
|
{
|
||||||
|
std::size_t nelements = output_shape.elements();
|
||||||
|
auto lens = args[0].get_shape().lens();
|
||||||
|
auto axis_dim_size = lens[op.axis];
|
||||||
|
lens[op.axis] = args[1].get_shape().elements();
|
||||||
|
shape out_comp{output_shape.type(), lens};
|
||||||
|
|
||||||
|
visit_all(args.back(), args[0])([&](auto output, auto input) {
|
||||||
|
args[1].visit([&](auto indices) {
|
||||||
|
const auto* indices_ptr = indices.data();
|
||||||
|
auto* output_ptr = output.data();
|
||||||
|
ctx.bulk_execute(nelements, 1024, [=](auto start, auto end) {
|
||||||
|
for(auto i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
auto idx = out_comp.multi(i);
|
||||||
|
auto in_index = indices_ptr[idx[op.axis]];
|
||||||
|
in_index = (in_index < 0) ? in_index + axis_dim_size : in_index;
|
||||||
|
idx[op.axis] = in_index;
|
||||||
|
output_ptr[i] = input(idx.begin(), idx.end());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return args.back();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
|
||||||
|
{
|
||||||
|
return shapes.size() - 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
62
docker/rocm/migraphx/targets/cpu/gemm.cpp
Normal file
62
docker/rocm/migraphx/targets/cpu/gemm.cpp
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/register_op.hpp>
|
||||||
|
#include <migraphx/reflect.hpp>
|
||||||
|
#include <migraphx/context.hpp>
|
||||||
|
#include <migraphx/cpu/context.hpp>
|
||||||
|
#include <migraphx/cpu/dnnl.hpp>
|
||||||
|
#include <migraphx/op/dot.hpp>
|
||||||
|
#include <migraphx/op/quant_dot.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct dnnl_gemm : dnnl_extend_op<dnnl_gemm, dnnl::matmul, op::dot>
|
||||||
|
{
|
||||||
|
std::vector<int> arg_map(int) const
|
||||||
|
{
|
||||||
|
return {MIGRAPHX_DNNL_PREFIX(ARG_SRC),
|
||||||
|
MIGRAPHX_DNNL_PREFIX(ARG_WEIGHTS),
|
||||||
|
MIGRAPHX_DNNL_PREFIX(ARG_BIAS)};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
void required(const check_shapes<T>& cs) const
|
||||||
|
{
|
||||||
|
cs.not_broadcasted();
|
||||||
|
}
|
||||||
|
|
||||||
|
dnnl::matmul::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
return {m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)),
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_WEIGHTS)),
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST))};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
@ -0,0 +1,49 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_ALLOCATION_MODEL_HPP
|
||||||
|
#define MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_ALLOCATION_MODEL_HPP
|
||||||
|
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/operation.hpp>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct cpu_allocation_model
|
||||||
|
{
|
||||||
|
std::string name() const;
|
||||||
|
std::string copy() const;
|
||||||
|
operation allocate(const shape& s) const;
|
||||||
|
operation preallocate(const shape& s, const std::string& id) const;
|
||||||
|
bool needs_out_params() const { return false; }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,58 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
|
||||||
|
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/dnnl.hpp>
|
||||||
|
#include <migraphx/cpu/parallel.hpp>
|
||||||
|
#include <migraphx/par_for.hpp>
|
||||||
|
#include <migraphx/cpu/export.h>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct context
|
||||||
|
{
|
||||||
|
void finish() const {}
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
void bulk_execute(std::size_t n, std::size_t min_grain, F f)
|
||||||
|
{
|
||||||
|
cpu::parallel_for(n, min_grain, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
void bulk_execute(std::size_t n, F f)
|
||||||
|
{
|
||||||
|
this->bulk_execute(n, 256, f);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
441
docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/dnnl.hpp
Normal file
441
docker/rocm/migraphx/targets/cpu/include/migraphx/cpu/dnnl.hpp
Normal file
@ -0,0 +1,441 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_DNNL_HPP
|
||||||
|
#define MIGRAPHX_GUARD_AMDMIGRAPHX_DNNL_HPP
|
||||||
|
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/argument.hpp>
|
||||||
|
#include <migraphx/reflect.hpp>
|
||||||
|
#include <migraphx/register_op.hpp>
|
||||||
|
#include <migraphx/check_shapes.hpp>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <migraphx/errors.hpp>
|
||||||
|
#include <migraphx/assert.hpp>
|
||||||
|
#ifdef MIGRAPHX_ENABLE_ZENDNN
|
||||||
|
#include <zendnn.hpp>
|
||||||
|
#else
|
||||||
|
#include <dnnl.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
#ifdef MIGRAPHX_ENABLE_ZENDNN
|
||||||
|
namespace dnnl = zendnn;
|
||||||
|
#define MIGRAPHX_CONCAT_PREFIX(b) ZENDNN_##b // NOLINT
|
||||||
|
#else
|
||||||
|
#define MIGRAPHX_CONCAT_PREFIX(b) DNNL_##b // NOLINT
|
||||||
|
#endif
|
||||||
|
#define MIGRAPHX_DNNL_PREFIX(b) MIGRAPHX_CONCAT_PREFIX(b) // NOLINT
|
||||||
|
|
||||||
|
struct dnnl_context
|
||||||
|
{
|
||||||
|
dnnl::engine engine;
|
||||||
|
dnnl::stream stream;
|
||||||
|
dnnl_context() : engine(dnnl::engine::kind::cpu, 0), stream(engine) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
dnnl_context& get_dnnl_context();
|
||||||
|
|
||||||
|
dnnl::memory::data_type to_dnnl_memory_data_type(shape::type_t t);
|
||||||
|
|
||||||
|
dnnl::memory::format_tag to_dnnl_memory_format_tag(std::size_t n);
|
||||||
|
|
||||||
|
template <class R>
|
||||||
|
inline dnnl::memory::dims to_dnnl_dims(R&& r)
|
||||||
|
{
|
||||||
|
return {r.begin(), r.end()};
|
||||||
|
}
|
||||||
|
|
||||||
|
dnnl::memory::desc to_dnnl_memory_desc(const shape& s);
|
||||||
|
|
||||||
|
dnnl::memory to_dnnl_memory(const dnnl::memory::desc& desc, const argument& a);
|
||||||
|
|
||||||
|
dnnl::memory to_dnnl_memory(const argument& a);
|
||||||
|
|
||||||
|
dnnl::algorithm to_dnnl_algo(const std::string& name);
|
||||||
|
|
||||||
|
std::string to_string(const dnnl::algorithm& algo);
|
||||||
|
|
||||||
|
struct post_op : reflect_equality<post_op>, reflect_stream<post_op>
|
||||||
|
{
|
||||||
|
std::string algo;
|
||||||
|
float alpha = 0;
|
||||||
|
float beta = 0;
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return pack(f(self.algo, "algo"), f(self.alpha, "alpha"), f(self.beta, "beta"));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
struct execute_wrapper
|
||||||
|
{
|
||||||
|
F f;
|
||||||
|
argument operator()(context&, const std::vector<argument>& args) const { return f(args); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
execute_wrapper<F> make_execute_wrapper(F f)
|
||||||
|
{
|
||||||
|
return {std::move(f)};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Derived, class Primitive>
|
||||||
|
struct dnnl_op : auto_register_op<Derived>
|
||||||
|
{
|
||||||
|
std::vector<post_op> post_ops;
|
||||||
|
std::function<argument(context& ctx, const std::vector<argument>& args)> execute;
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect_base(Self& self, F f)
|
||||||
|
{
|
||||||
|
return pack(f(self.post_ops, "post_ops"));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return reflect_base(self, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string group() const
|
||||||
|
{
|
||||||
|
const auto& self = static_cast<const Derived&>(*this);
|
||||||
|
return self.name();
|
||||||
|
}
|
||||||
|
|
||||||
|
value attributes() const
|
||||||
|
{
|
||||||
|
std::vector<std::string> names;
|
||||||
|
std::transform(post_ops.begin(), post_ops.end(), std::back_inserter(names), [](auto&& op) {
|
||||||
|
return op.algo;
|
||||||
|
});
|
||||||
|
const auto& self = static_cast<const Derived&>(*this);
|
||||||
|
auto g = self.group();
|
||||||
|
if(not names.empty())
|
||||||
|
g += "<" + join_strings(names, ",") + ">";
|
||||||
|
return {{"group", g}};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t get_extra_post_op_args() const
|
||||||
|
{
|
||||||
|
return std::count_if(post_ops.begin(), post_ops.end(), [](const auto& po) {
|
||||||
|
return contains(po.algo, "binary");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::size_t get_binary_post_op_arg(std::size_t pos)
|
||||||
|
{
|
||||||
|
return MIGRAPHX_DNNL_PREFIX(ARG_ATTR_MULTIPLE_POST_OP)(pos) | // NOLINT
|
||||||
|
MIGRAPHX_DNNL_PREFIX(ARG_SRC_1); // NOLINT
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<shape> to_shapes(const std::vector<argument>& args)
|
||||||
|
{
|
||||||
|
std::vector<shape> shapes(args.size());
|
||||||
|
std::transform(args.begin(), args.end(), shapes.begin(), [](const argument& a) {
|
||||||
|
return a.get_shape();
|
||||||
|
});
|
||||||
|
return shapes;
|
||||||
|
}
|
||||||
|
static std::string impl(const Primitive& prim)
|
||||||
|
{
|
||||||
|
auto desc = prim.get_primitive_desc();
|
||||||
|
const char* str = nullptr;
|
||||||
|
#ifdef MIGRAPHX_ENABLE_ZENDNN
|
||||||
|
zendnn_primitive_desc_query(
|
||||||
|
desc, zendnn_query_impl_info_str, 0, reinterpret_cast<void*>(&str));
|
||||||
|
#else
|
||||||
|
dnnl_primitive_desc_query(desc, dnnl_query_impl_info_str, 0, reinterpret_cast<void*>(&str));
|
||||||
|
#endif
|
||||||
|
return str == nullptr ? "" : str;
|
||||||
|
}
|
||||||
|
// Map arg index to arg in dnnl
|
||||||
|
std::vector<int> arg_map(int size) const
|
||||||
|
{
|
||||||
|
std::vector<int> result(size);
|
||||||
|
std::iota(result.begin(), result.end(), MIGRAPHX_DNNL_PREFIX(ARG_SRC_0));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
shape base_adjust_shape(const shape& s, const shape& output) const
|
||||||
|
{
|
||||||
|
if(s.broadcasted())
|
||||||
|
{
|
||||||
|
auto lens = s.lens();
|
||||||
|
auto strides = s.strides();
|
||||||
|
std::transform(strides.begin(),
|
||||||
|
strides.end(),
|
||||||
|
lens.begin(),
|
||||||
|
lens.begin(),
|
||||||
|
[](auto stride, auto len) -> std::size_t {
|
||||||
|
if(stride == 0)
|
||||||
|
return 1;
|
||||||
|
else
|
||||||
|
return len;
|
||||||
|
});
|
||||||
|
// Use the permutation of the output
|
||||||
|
return output.with_lens(s.type(), lens);
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
template <class F>
|
||||||
|
void for_each_post_op(F f) const
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
for(auto&& op : post_ops)
|
||||||
|
{
|
||||||
|
if(contains(op.algo, "binary"))
|
||||||
|
{
|
||||||
|
f(op, get_binary_post_op_arg(i));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
f(op, -1);
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
shape adjust_shape(const shape& s, int, const shape& output) const
|
||||||
|
{
|
||||||
|
return base_adjust_shape(s, output);
|
||||||
|
}
|
||||||
|
std::vector<int> create_arg_map(std::size_t input_size) const
|
||||||
|
{
|
||||||
|
const auto& self = static_cast<const Derived&>(*this);
|
||||||
|
auto npost_ops = get_extra_post_op_args();
|
||||||
|
auto prim_input_size = input_size - npost_ops;
|
||||||
|
auto m = self.arg_map(prim_input_size);
|
||||||
|
for_each_post_op([&](auto&&, auto arg) {
|
||||||
|
if(arg < 0)
|
||||||
|
return;
|
||||||
|
m.push_back(arg);
|
||||||
|
});
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
std::unordered_map<int, dnnl::memory::desc>
|
||||||
|
to_memory_desc(const shape& output_shape, const std::vector<shape>& inputs) const
|
||||||
|
{
|
||||||
|
const auto& self = static_cast<const Derived&>(*this);
|
||||||
|
std::unordered_map<int, dnnl::memory::desc> result;
|
||||||
|
result[MIGRAPHX_DNNL_PREFIX(ARG_DST)] =
|
||||||
|
to_dnnl_memory_desc(self.adjust_shape(output_shape, inputs.size(), output_shape));
|
||||||
|
auto m = create_arg_map(inputs.size());
|
||||||
|
assert(m.size() >= inputs.size());
|
||||||
|
for(int i = 0; i < inputs.size(); i++)
|
||||||
|
{
|
||||||
|
result[m[i]] = to_dnnl_memory_desc(self.adjust_shape(inputs[i], i, output_shape));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
dnnl::primitive_attr
|
||||||
|
get_primitive_attr(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
dnnl::primitive_attr result;
|
||||||
|
dnnl::post_ops po;
|
||||||
|
for_each_post_op([&](auto&& op, auto arg) {
|
||||||
|
if(contains(op.algo, "binary_add"))
|
||||||
|
{
|
||||||
|
auto desc = m.at(arg);
|
||||||
|
if(desc == m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)))
|
||||||
|
po.append_sum(1.0f);
|
||||||
|
else
|
||||||
|
po.append_binary(to_dnnl_algo(op.algo), m.at(arg));
|
||||||
|
}
|
||||||
|
else if(contains(op.algo, "binary"))
|
||||||
|
{
|
||||||
|
po.append_binary(to_dnnl_algo(op.algo), m.at(arg));
|
||||||
|
}
|
||||||
|
else if(contains(op.algo, "eltwise"))
|
||||||
|
po.append_eltwise(1.0f, to_dnnl_algo(op.algo), op.alpha, op.beta);
|
||||||
|
else
|
||||||
|
MIGRAPHX_THROW("Unknown post op algo: " + op.algo);
|
||||||
|
});
|
||||||
|
result.set_post_ops(po);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
template <class T>
|
||||||
|
auto get_primitive_desc(const T& desc, const dnnl::primitive_attr& attr) const
|
||||||
|
-> decltype(typename Primitive::primitive_desc(desc, attr, get_dnnl_context().engine))
|
||||||
|
{
|
||||||
|
return typename Primitive::primitive_desc(desc, attr, get_dnnl_context().engine);
|
||||||
|
}
|
||||||
|
Primitive get_primitive(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
const auto& self = static_cast<const Derived&>(*this);
|
||||||
|
auto desc = self.get_desc(m);
|
||||||
|
auto attr = MIGRAPHX_ASSERT_NO_THROW(this->get_primitive_attr(m));
|
||||||
|
auto pd = self.get_primitive_desc(desc, attr);
|
||||||
|
return Primitive(pd);
|
||||||
|
}
|
||||||
|
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const
|
||||||
|
{
|
||||||
|
return execute(ctx, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
|
||||||
|
{
|
||||||
|
return shapes.size() - 1;
|
||||||
|
}
|
||||||
|
value compile(context&, const shape& output_shape, std::vector<shape> inputs)
|
||||||
|
{
|
||||||
|
// Compensate for allocation
|
||||||
|
inputs.pop_back();
|
||||||
|
auto md = to_memory_desc(output_shape, inputs);
|
||||||
|
auto prim = get_primitive(md);
|
||||||
|
auto impl_name = impl(prim);
|
||||||
|
return {{"impl", impl_name}};
|
||||||
|
}
|
||||||
|
|
||||||
|
void finalize(context&, const shape& output_shape, std::vector<shape> inputs)
|
||||||
|
{
|
||||||
|
// Compensate for allocation
|
||||||
|
inputs.pop_back();
|
||||||
|
const auto& self = static_cast<const Derived&>(*this);
|
||||||
|
auto name = self.name();
|
||||||
|
auto md = to_memory_desc(output_shape, inputs);
|
||||||
|
auto prim = get_primitive(md);
|
||||||
|
auto arg_lookup = create_arg_map(inputs.size());
|
||||||
|
#ifndef NDEBUG
|
||||||
|
auto prim_attr = get_primitive_attr(md);
|
||||||
|
#endif
|
||||||
|
execute = make_execute_wrapper([=](const std::vector<argument>& args) {
|
||||||
|
#ifndef NDEBUG
|
||||||
|
// Check that the memory descriptors have not changed
|
||||||
|
auto debug_args = args;
|
||||||
|
debug_args.pop_back();
|
||||||
|
auto debug_md = to_memory_desc(output_shape, to_shapes(debug_args));
|
||||||
|
for(auto&& p : debug_md)
|
||||||
|
{
|
||||||
|
if(md.count(p.first) == 0)
|
||||||
|
MIGRAPHX_THROW(name +
|
||||||
|
": Missing memory descriptor for: " + std::to_string(p.first));
|
||||||
|
if(p.second == md.at(p.first))
|
||||||
|
continue;
|
||||||
|
MIGRAPHX_THROW(name +
|
||||||
|
": Memory descriptor has changed for: " + std::to_string(p.first));
|
||||||
|
}
|
||||||
|
// Check post_ops args are correct
|
||||||
|
auto pos = prim_attr.get_post_ops();
|
||||||
|
auto prim_input_size = inputs.size() - this->get_extra_post_op_args();
|
||||||
|
int j = 0;
|
||||||
|
for(int i = 0; i < pos.len(); i++)
|
||||||
|
{
|
||||||
|
auto arg = j + prim_input_size;
|
||||||
|
auto kind = pos.kind(i);
|
||||||
|
std::string mesg =
|
||||||
|
"Post op " + std::to_string(i) + "@" + std::to_string(arg) + ": ";
|
||||||
|
try
|
||||||
|
{
|
||||||
|
dnnl::algorithm algo;
|
||||||
|
dnnl::memory::desc mdesc;
|
||||||
|
float scale = 0;
|
||||||
|
float alpha = 0;
|
||||||
|
float beta = 0;
|
||||||
|
if(kind == dnnl::primitive::kind::binary)
|
||||||
|
{
|
||||||
|
pos.get_params_binary(i, algo, mdesc);
|
||||||
|
if(mdesc != md.at(arg_lookup.at(arg)))
|
||||||
|
MIGRAPHX_THROW(mesg +
|
||||||
|
"Memory descriptor doesn't match for binary post op");
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
else if(kind == dnnl::primitive::kind::eltwise)
|
||||||
|
{
|
||||||
|
pos.get_params_eltwise(i, scale, algo, alpha, beta);
|
||||||
|
}
|
||||||
|
else if(kind == dnnl::primitive::kind::sum)
|
||||||
|
{
|
||||||
|
pos.get_params_sum(i, scale);
|
||||||
|
algo = dnnl::algorithm::binary_add;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MIGRAPHX_THROW("Unknown kind");
|
||||||
|
}
|
||||||
|
if(to_dnnl_algo(post_ops[i].algo) != algo)
|
||||||
|
MIGRAPHX_THROW(mesg + "Algorithm doesn't match for post op " +
|
||||||
|
post_ops[i].algo + " != " + to_string(algo));
|
||||||
|
}
|
||||||
|
catch(const dnnl::error& e)
|
||||||
|
{
|
||||||
|
MIGRAPHX_THROW(mesg + "Failed to get post ops argument " + ": " + e.what());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
std::unordered_map<int, dnnl::memory> m;
|
||||||
|
m[MIGRAPHX_DNNL_PREFIX(ARG_DST)] =
|
||||||
|
to_dnnl_memory(md.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)), args.back());
|
||||||
|
for(int i = 0; i < args.size() - 1; i++)
|
||||||
|
m[arg_lookup[i]] = to_dnnl_memory(md.at(arg_lookup[i]), args[i]);
|
||||||
|
prim.execute(get_dnnl_context().stream, m);
|
||||||
|
return args.back();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
std::vector<shape> trim_post_op_inputs(const std::vector<shape>& inputs) const
|
||||||
|
{
|
||||||
|
auto prim_input_size = inputs.size() - this->get_extra_post_op_args();
|
||||||
|
return {inputs.begin(), inputs.begin() + prim_input_size};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class Derived, class Primitive, class Op>
|
||||||
|
struct dnnl_extend_op : dnnl_op<Derived, Primitive>
|
||||||
|
{
|
||||||
|
Op op;
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return pack_join(self.reflect_base(self, f), migraphx::reflect(self.op, f));
|
||||||
|
}
|
||||||
|
|
||||||
|
// dnnl has some issues with non-packed inputs
|
||||||
|
template <class T>
|
||||||
|
void required(const check_shapes<T>& cs) const
|
||||||
|
{
|
||||||
|
cs.packed_or_broadcasted();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name() const { return "dnnl::" + op.name(); }
|
||||||
|
shape compute_shape(std::vector<shape> inputs) const
|
||||||
|
{
|
||||||
|
const auto& self = static_cast<const Derived&>(*this);
|
||||||
|
// Compensate for allocation
|
||||||
|
inputs.pop_back();
|
||||||
|
self.required(check_shapes(inputs, self));
|
||||||
|
auto r = migraphx::compute_shape(op, this->trim_post_op_inputs(inputs));
|
||||||
|
// Call to get_primitive to make sure an algo is available
|
||||||
|
this->get_primitive(this->to_memory_desc(r, inputs));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,47 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_CPU_FUSE_OPS_HPP
|
||||||
|
#define MIGRAPHX_GUARD_CPU_FUSE_OPS_HPP
|
||||||
|
|
||||||
|
#include <migraphx/cpu/context.hpp>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
|
||||||
|
struct module;
|
||||||
|
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct MIGRAPHX_CPU_EXPORT fuse_ops
|
||||||
|
{
|
||||||
|
context* ctx = nullptr;
|
||||||
|
std::string name() const { return "cpu::fuse_ops"; }
|
||||||
|
void apply(module& m) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
#endif // MIGRAPHX_GUARD_CPU_FUSE_OPS_HPP
|
||||||
@ -0,0 +1,46 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
|
||||||
|
|
||||||
|
#include <migraphx/cpu/context.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
|
||||||
|
struct module;
|
||||||
|
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct MIGRAPHX_CPU_EXPORT lowering
|
||||||
|
{
|
||||||
|
std::string name() const { return "cpu::lowering"; }
|
||||||
|
void apply(module& m) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,125 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_PARALLEL_HPP
|
||||||
|
#define MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_PARALLEL_HPP
|
||||||
|
|
||||||
|
// #define MIGRAPHX_DISABLE_OMP
|
||||||
|
#include <cmath>
|
||||||
|
#include <cassert>
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#ifdef MIGRAPHX_DISABLE_OMP
|
||||||
|
#include <migraphx/par_for.hpp>
|
||||||
|
#else
|
||||||
|
|
||||||
|
#ifdef __clang__
|
||||||
|
#pragma clang diagnostic push
|
||||||
|
#pragma clang diagnostic ignored "-Wreserved-identifier"
|
||||||
|
#endif
|
||||||
|
#include <omp.h>
|
||||||
|
#ifdef __clang__
|
||||||
|
#pragma clang diagnostic pop
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
#ifdef MIGRAPHX_DISABLE_OMP
|
||||||
|
|
||||||
|
inline std::size_t max_threads() { return std::thread::hardware_concurrency(); }
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
void parallel_for_impl(std::size_t n, std::size_t threadsize, F f)
|
||||||
|
{
|
||||||
|
if(threadsize <= 1)
|
||||||
|
{
|
||||||
|
f(std::size_t{0}, n);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::vector<joinable_thread> threads(threadsize);
|
||||||
|
// Using const here causes gcc 5 to ICE
|
||||||
|
#if(!defined(__GNUC__) || __GNUC__ != 5)
|
||||||
|
const
|
||||||
|
#endif
|
||||||
|
std::size_t grainsize = std::ceil(static_cast<double>(n) / threads.size());
|
||||||
|
|
||||||
|
std::size_t work = 0;
|
||||||
|
std::generate(threads.begin(), threads.end(), [=, &work] {
|
||||||
|
auto result = joinable_thread([=]() mutable {
|
||||||
|
assert(work < n);
|
||||||
|
f(work, std::min(n, work + grainsize));
|
||||||
|
});
|
||||||
|
work += grainsize;
|
||||||
|
return result;
|
||||||
|
});
|
||||||
|
// cppcheck-suppress unsignedLessThanZero
|
||||||
|
assert(work >= n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
|
||||||
|
inline std::size_t max_threads() { return omp_get_max_threads(); }
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
void parallel_for_impl(std::size_t n, std::size_t threadsize, F f)
|
||||||
|
{
|
||||||
|
if(threadsize <= 1)
|
||||||
|
{
|
||||||
|
f(std::size_t{0}, n);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::size_t grainsize = std::ceil(static_cast<double>(n) / threadsize);
|
||||||
|
#pragma omp parallel for num_threads(threadsize) schedule(static, 1)
|
||||||
|
for(std::size_t tid = 0; tid < threadsize; tid++)
|
||||||
|
{
|
||||||
|
std::size_t work = tid * grainsize;
|
||||||
|
assert(work < n);
|
||||||
|
f(work, std::min(n, work + grainsize));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
template <class F>
|
||||||
|
void parallel_for(std::size_t n, std::size_t min_grain, F f)
|
||||||
|
{
|
||||||
|
const auto threadsize = std::min<std::size_t>(max_threads(), n / min_grain);
|
||||||
|
parallel_for_impl(n, threadsize, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
void parallel_for(std::size_t n, F f)
|
||||||
|
{
|
||||||
|
const int min_grain = 8;
|
||||||
|
parallel_for(n, min_grain, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,414 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_POINTWISE_HPP
|
||||||
|
#define MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_POINTWISE_HPP
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/context.hpp>
|
||||||
|
#include <migraphx/check_shapes.hpp>
|
||||||
|
#include <migraphx/cpu/context.hpp>
|
||||||
|
#include <migraphx/reduce_dims.hpp>
|
||||||
|
#include <migraphx/register_op.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct multi_index
|
||||||
|
{
|
||||||
|
constexpr multi_index() = default;
|
||||||
|
|
||||||
|
multi_index(const shape& s, std::size_t i) : n(s.lens().size())
|
||||||
|
{
|
||||||
|
assert(n < max_size);
|
||||||
|
std::copy(s.lens().begin(), s.lens().end(), dims);
|
||||||
|
s.multi_copy(i, index, index + max_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr std::size_t size() const { return n; }
|
||||||
|
|
||||||
|
constexpr std::size_t* begin() { return index; }
|
||||||
|
constexpr const std::size_t* begin() const { return index; }
|
||||||
|
|
||||||
|
constexpr std::size_t* end() { return index + size(); }
|
||||||
|
constexpr const std::size_t* end() const { return index + size(); }
|
||||||
|
|
||||||
|
std::size_t offset(const shape& s) const { return s.index(begin(), end()); }
|
||||||
|
|
||||||
|
constexpr void carry()
|
||||||
|
{
|
||||||
|
std::size_t overflow = 0;
|
||||||
|
for(std::ptrdiff_t i = size() - 1; i > 0; i--)
|
||||||
|
{
|
||||||
|
auto z = index[i] + overflow;
|
||||||
|
// Reset overflow
|
||||||
|
overflow = 0;
|
||||||
|
// Compute overflow using while loop instead of mod
|
||||||
|
// overflow = z / dims[i];
|
||||||
|
// z = z % dims[i];
|
||||||
|
while(z >= dims[i])
|
||||||
|
{
|
||||||
|
z -= dims[i];
|
||||||
|
overflow += 1;
|
||||||
|
}
|
||||||
|
index[i] = z;
|
||||||
|
// Exit if there is no overflow
|
||||||
|
if(overflow == 0)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
index[0] += overflow;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr void increment(std::size_t i)
|
||||||
|
{
|
||||||
|
index[size() - 1] += i;
|
||||||
|
carry();
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr multi_index& operator+=(std::size_t i)
|
||||||
|
{
|
||||||
|
increment(i);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr multi_index& operator++()
|
||||||
|
{
|
||||||
|
increment(1);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
multi_index operator++(int) // NOLINT
|
||||||
|
{
|
||||||
|
multi_index result = *this;
|
||||||
|
increment(1);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static const std::size_t max_size = 5;
|
||||||
|
std::size_t index[max_size] = {};
|
||||||
|
std::size_t dims[max_size] = {};
|
||||||
|
std::size_t n = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct reduce_dims_base
|
||||||
|
{
|
||||||
|
std::vector<shape> reduce_shapes;
|
||||||
|
|
||||||
|
void finalize(context&, const shape&, const std::vector<shape>& inputs)
|
||||||
|
{
|
||||||
|
reduce_shapes = reduce_dims(inputs);
|
||||||
|
}
|
||||||
|
|
||||||
|
argument get_arg(const std::vector<argument>& args, std::size_t i) const
|
||||||
|
{
|
||||||
|
if(reduce_shapes.empty())
|
||||||
|
return args[i];
|
||||||
|
return args.at(i).reshape(reduce_shapes.at(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
argument get_output() const
|
||||||
|
{
|
||||||
|
argument a{reduce_shapes[0]};
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T, std::size_t N>
|
||||||
|
struct vec
|
||||||
|
{
|
||||||
|
using array_type = std::array<T, N>;
|
||||||
|
using vector_type __attribute__((vector_size(N * sizeof(T)))) = T;
|
||||||
|
union
|
||||||
|
{
|
||||||
|
array_type array;
|
||||||
|
vector_type vector;
|
||||||
|
};
|
||||||
|
|
||||||
|
static_assert(sizeof(array_type) == sizeof(vector_type), "Not the same size");
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
constexpr std::integral_constant<std::size_t, 0> vec_size(const T&)
|
||||||
|
{
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, std::size_t N>
|
||||||
|
constexpr std::integral_constant<std::size_t, N> vec_size(const vec<T, N>&)
|
||||||
|
{
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
constexpr std::size_t vec_size()
|
||||||
|
{
|
||||||
|
return decltype(vec_size(std::declval<T>())){};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F, class V, class... Vs, MIGRAPHX_REQUIRES((vec_size<V>() > 0))>
|
||||||
|
void vec_apply(F f, V& v, Vs... vs)
|
||||||
|
{
|
||||||
|
assert(all_of({vec_size<Vs>()...}, [&](auto n) { return n == vec_size<V>(); }));
|
||||||
|
assert(vec_size<V>() == v.array.size());
|
||||||
|
for(std::size_t i = 0; i < vec_size<V>(); i++)
|
||||||
|
f(v.array[i], vs.vector[i]...);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F, class V, class... Vs, MIGRAPHX_REQUIRES((vec_size<V>() == 0))>
|
||||||
|
void vec_apply(F f, V& v, Vs&... vs)
|
||||||
|
{
|
||||||
|
f(v, vs...);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::size_t find_packed_len(const shape& s)
|
||||||
|
{
|
||||||
|
for(std::size_t i = 0; i < s.lens().size(); i++)
|
||||||
|
{
|
||||||
|
if(s.lens()[i] > 1 and s.strides()[i] == 1)
|
||||||
|
{
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <std::size_t N>
|
||||||
|
shape vectorize(const shape& s)
|
||||||
|
{
|
||||||
|
assert(s.standard() or s.broadcasted());
|
||||||
|
auto lens = s.lens();
|
||||||
|
if(s.broadcasted())
|
||||||
|
{
|
||||||
|
auto n = find_packed_len(s);
|
||||||
|
assert(n != -1);
|
||||||
|
assert((lens[n] % N) == 0);
|
||||||
|
lens[n] /= N;
|
||||||
|
return {s.type(), lens, s.strides()};
|
||||||
|
}
|
||||||
|
assert((lens.back() % N) == 0);
|
||||||
|
lens.back() /= N;
|
||||||
|
return {s.type(), lens};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <std::size_t N, class T>
|
||||||
|
tensor_view<vec<T, N>> vectorize(tensor_view<T> tv)
|
||||||
|
{
|
||||||
|
return {vectorize<N>(tv.get_shape()), reinterpret_cast<vec<T, N>*>(tv.data())};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
struct is_vector_type : std::false_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct is_vector_type<float> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class... Ts>
|
||||||
|
struct is_vector_tensor_view : and_<is_vector_type<typename Ts::value_type>{}...>
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <std::size_t N, class... Xs>
|
||||||
|
bool is_vectorizable(const Xs&... xs)
|
||||||
|
{
|
||||||
|
return all_of({xs...}, [](const auto& s) {
|
||||||
|
if(s.standard() and (s.lens().back() % N) == 0)
|
||||||
|
return true;
|
||||||
|
if(s.broadcasted())
|
||||||
|
{
|
||||||
|
auto n = std::inner_product(s.lens().begin(),
|
||||||
|
s.lens().end(),
|
||||||
|
s.strides().begin(),
|
||||||
|
0,
|
||||||
|
std::plus<>{},
|
||||||
|
[&](auto len, auto stride) -> std::size_t {
|
||||||
|
if(stride > 0 and len == 1)
|
||||||
|
return 0;
|
||||||
|
return stride;
|
||||||
|
});
|
||||||
|
if(n == 1)
|
||||||
|
{
|
||||||
|
auto i = find_packed_len(s);
|
||||||
|
assert(i != -1);
|
||||||
|
return (s.lens()[i] % N) == 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class... Ts, MIGRAPHX_REQUIRES(is_vector_tensor_view<Ts...>{})>
|
||||||
|
auto auto_vectorize(const shape& base_shape, Ts... xs)
|
||||||
|
{
|
||||||
|
return [=](auto f) {
|
||||||
|
if(is_vectorizable<32>(base_shape, xs.get_shape()...))
|
||||||
|
f(vectorize<32>(base_shape), vectorize<32>(xs)...);
|
||||||
|
else if(is_vectorizable<8>(base_shape, xs.get_shape()...))
|
||||||
|
f(vectorize<8>(base_shape), vectorize<8>(xs)...);
|
||||||
|
else
|
||||||
|
f(base_shape, xs...);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class... Ts, MIGRAPHX_REQUIRES(not is_vector_tensor_view<Ts...>{})>
|
||||||
|
auto auto_vectorize(const shape& base_shape, Ts... xs)
|
||||||
|
{
|
||||||
|
return [=](auto f) { f(base_shape, xs...); };
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class X, class... Xs>
|
||||||
|
bool is_standard_offset(const X& x, const Xs&... xs)
|
||||||
|
{
|
||||||
|
if(all_of({x, xs...}, [](const auto& s) { return s.standard(); }))
|
||||||
|
return true;
|
||||||
|
if(all_of({x, xs...}, [](const auto& s) { return s.packed(); }) and
|
||||||
|
all_of({xs...}, [&](const auto& s) { return s == x; }))
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class... Ts>
|
||||||
|
auto pointwise_apply(Ts... ts)
|
||||||
|
{
|
||||||
|
return [=](context& ctx, const shape& base_shape, std::size_t min_grain, auto f) mutable {
|
||||||
|
if(is_standard_offset(ts.get_shape()...))
|
||||||
|
{
|
||||||
|
ctx.bulk_execute(base_shape.elements(), min_grain, [=](auto start, auto end) mutable {
|
||||||
|
for(auto i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
vec_apply(f, ts.data()[i]...);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
assert(base_shape.lens().size() <= 6);
|
||||||
|
ctx.bulk_execute(base_shape.elements(), min_grain, [=](auto start, auto end) mutable {
|
||||||
|
multi_index mi(base_shape, start);
|
||||||
|
for(auto i = start; i < end; i++)
|
||||||
|
{
|
||||||
|
vec_apply(f, ts.data()[mi.offset(ts.get_shape())]...);
|
||||||
|
++mi;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class... Ts>
|
||||||
|
auto pointwise(Ts... ts)
|
||||||
|
{
|
||||||
|
return [=](context& ctx, const shape& base_shape, std::size_t min_grain, auto f) mutable {
|
||||||
|
auto_vectorize(base_shape, ts...)(
|
||||||
|
[&](auto bs, auto... xs) { pointwise_apply(xs...)(ctx, bs, min_grain, f); });
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Op>
|
||||||
|
struct cpu_unary : reduce_dims_base, auto_register_op<cpu_unary<Op>>
|
||||||
|
{
|
||||||
|
Op op;
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return migraphx::reflect(self.op, f);
|
||||||
|
}
|
||||||
|
std::string name() const { return "cpu::" + op.name(); }
|
||||||
|
shape compute_shape(const std::vector<shape>& inputs) const
|
||||||
|
{
|
||||||
|
check_shapes{inputs, *this}.has(2);
|
||||||
|
const auto& s = inputs.at(0);
|
||||||
|
return {s.type(), s.lens()};
|
||||||
|
}
|
||||||
|
argument
|
||||||
|
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
|
||||||
|
{
|
||||||
|
argument result = get_arg(args, args.size() - 1);
|
||||||
|
|
||||||
|
visit_all(result, get_arg(args, 0))([&](auto output, auto input) {
|
||||||
|
auto op2 = op;
|
||||||
|
pointwise(output, input)(
|
||||||
|
ctx, output.get_shape(), 1024, [op2](auto& y, auto x) { y = op2.apply()(x); });
|
||||||
|
});
|
||||||
|
|
||||||
|
return result.reshape(output_shape);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
|
||||||
|
{
|
||||||
|
return shapes.size() - 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class Op>
|
||||||
|
struct cpu_binary : reduce_dims_base, auto_register_op<cpu_binary<Op>>
|
||||||
|
{
|
||||||
|
Op op;
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return migraphx::reflect(self.op, f);
|
||||||
|
}
|
||||||
|
std::string name() const { return "cpu::" + op.name(); }
|
||||||
|
shape compute_shape(const std::vector<shape>& inputs) const
|
||||||
|
{
|
||||||
|
check_shapes{inputs, *this}.has(3);
|
||||||
|
const auto& s = inputs.at(0);
|
||||||
|
return {s.type(), s.lens()};
|
||||||
|
}
|
||||||
|
|
||||||
|
argument
|
||||||
|
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
|
||||||
|
{
|
||||||
|
argument result = get_arg(args, args.size() - 1);
|
||||||
|
|
||||||
|
visit_all(result, get_arg(args, 0), get_arg(args, 1))(
|
||||||
|
[&](auto output, auto input1, auto input2) {
|
||||||
|
auto op2 = op;
|
||||||
|
pointwise(output, input1, input2)(
|
||||||
|
ctx, output.get_shape(), 1024, [op2](auto& z, auto x, auto y) {
|
||||||
|
z = op2.apply()(x, y);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return result.reshape(output_shape);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
|
||||||
|
{
|
||||||
|
return shapes.size() - 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,51 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
|
||||||
|
#define MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
|
||||||
|
|
||||||
|
#include <migraphx/program.hpp>
|
||||||
|
#include <migraphx/register_target.hpp>
|
||||||
|
#include <migraphx/compile_options.hpp>
|
||||||
|
#include <migraphx/cpu/context.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
struct pass;
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct MIGRAPHX_CPU_EXPORT target
|
||||||
|
{
|
||||||
|
std::string name() const;
|
||||||
|
std::vector<pass> get_passes(migraphx::context& gctx, const compile_options&) const;
|
||||||
|
migraphx::context get_context() const { return context{}; }
|
||||||
|
argument copy_to(const argument& arg) const { return arg; }
|
||||||
|
argument copy_from(const argument& arg) const { return arg; }
|
||||||
|
argument allocate(const shape& s) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,45 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_WRITE_LITERALS_HPP
|
||||||
|
#define MIGRAPHX_GUARD_AMDMIGRAPHX_CPU_WRITE_LITERALS_HPP
|
||||||
|
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
struct module;
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct write_literals
|
||||||
|
{
|
||||||
|
std::string name() const { return "cpu::write_literals"; }
|
||||||
|
void apply(module& m) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
65
docker/rocm/migraphx/targets/cpu/layernorm.cpp
Normal file
65
docker/rocm/migraphx/targets/cpu/layernorm.cpp
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/dnnl.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct dnnl_layernorm : dnnl_op<dnnl_layernorm, dnnl::layer_normalization_forward>
|
||||||
|
{
|
||||||
|
float epsilon = 1e-12f;
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return pack(f(self.epsilon, "epsilon"));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name() const { return "dnnl::layernorm"; }
|
||||||
|
|
||||||
|
shape compute_shape(std::vector<shape> inputs) const
|
||||||
|
{
|
||||||
|
// Compensate for allocation
|
||||||
|
inputs.pop_back();
|
||||||
|
check_shapes{this->trim_post_op_inputs(inputs), *this}.has(1);
|
||||||
|
auto s = inputs.at(0);
|
||||||
|
// Call to get_primitive to make sure an algo is available
|
||||||
|
this->get_primitive(this->to_memory_desc(s, inputs));
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
dnnl::layer_normalization_forward::desc
|
||||||
|
get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
return {dnnl::prop_kind::forward_inference,
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)),
|
||||||
|
1e-12f,
|
||||||
|
dnnl::normalization_flags::none};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
44
docker/rocm/migraphx/targets/cpu/logsoftmax.cpp
Normal file
44
docker/rocm/migraphx/targets/cpu/logsoftmax.cpp
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/dnnl.hpp>
|
||||||
|
#include <migraphx/op/logsoftmax.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct dnnl_logsoftmax : dnnl_extend_op<dnnl_logsoftmax, dnnl::logsoftmax_forward, op::logsoftmax>
|
||||||
|
{
|
||||||
|
dnnl::logsoftmax_forward::desc
|
||||||
|
get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
int axis = this->op.axis;
|
||||||
|
return {dnnl::prop_kind::forward_inference, m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)), axis};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
502
docker/rocm/migraphx/targets/cpu/lowering.cpp
Normal file
502
docker/rocm/migraphx/targets/cpu/lowering.cpp
Normal file
@ -0,0 +1,502 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <migraphx/cpu/lowering.hpp>
|
||||||
|
#include <migraphx/instruction.hpp>
|
||||||
|
#include <migraphx/dfor.hpp>
|
||||||
|
#include <migraphx/op/identity.hpp>
|
||||||
|
#include <migraphx/op/convolution.hpp>
|
||||||
|
#include <migraphx/op/convolution_backwards.hpp>
|
||||||
|
#include <migraphx/op/quant_convolution.hpp>
|
||||||
|
#include <migraphx/op/dot.hpp>
|
||||||
|
#include <migraphx/op/quant_dot.hpp>
|
||||||
|
#include <migraphx/op/elu.hpp>
|
||||||
|
#include <migraphx/op/im2col.hpp>
|
||||||
|
#include <migraphx/op/leaky_relu.hpp>
|
||||||
|
#include <migraphx/op/logsoftmax.hpp>
|
||||||
|
#include <migraphx/op/lrn.hpp>
|
||||||
|
#include <migraphx/op/pad.hpp>
|
||||||
|
#include <migraphx/op/pooling.hpp>
|
||||||
|
#include <migraphx/op/softmax.hpp>
|
||||||
|
#include <migraphx/op/argmax.hpp>
|
||||||
|
#include <migraphx/op/argmin.hpp>
|
||||||
|
#include <migraphx/op/rnn_var_sl_last_output.hpp>
|
||||||
|
#include <migraphx/op/mod.hpp>
|
||||||
|
#include <migraphx/op/fmod.hpp>
|
||||||
|
#include <migraphx/shape_for_each.hpp>
|
||||||
|
#include <migraphx/iterator_for.hpp>
|
||||||
|
#include <migraphx/par_dfor.hpp>
|
||||||
|
#include <migraphx/clamp.hpp>
|
||||||
|
#include <migraphx/cpu/context.hpp>
|
||||||
|
#include <migraphx/register_op.hpp>
|
||||||
|
#include <migraphx/make_op.hpp>
|
||||||
|
#include <migraphx/program.hpp>
|
||||||
|
#include <migraphx/tune_axis.hpp>
|
||||||
|
#include <migraphx/match/layernorm.hpp>
|
||||||
|
#include <migraphx/match/gelu_erf.hpp>
|
||||||
|
#include <migraphx/match/gelu_tanh.hpp>
|
||||||
|
#include <migraphx/matcher.hpp>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <utility>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T zero(const T&)
|
||||||
|
{
|
||||||
|
return T(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::enable_if<true, T>>::
|
||||||
|
type
|
||||||
|
make_signed(T x)
|
||||||
|
{
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct cpu_im2col
|
||||||
|
{
|
||||||
|
op::im2col op;
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return migraphx::reflect(self.op, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string name() { return "cpu::im2col"; }
|
||||||
|
shape compute_shape(const std::vector<shape>& inputs) const
|
||||||
|
{
|
||||||
|
return op.normalize_compute_shape(inputs);
|
||||||
|
}
|
||||||
|
|
||||||
|
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
|
||||||
|
{
|
||||||
|
argument result{output_shape};
|
||||||
|
auto input_shape = args[0].get_shape();
|
||||||
|
auto weights_shape = args[1].get_shape();
|
||||||
|
visit_all(result, args[0])([&](auto col, auto input) {
|
||||||
|
const std::size_t& height = input_shape.lens()[2];
|
||||||
|
const std::size_t& width = input_shape.lens()[3];
|
||||||
|
const std::size_t& channels = weights_shape.lens()[1];
|
||||||
|
const std::size_t& kernel_h = weights_shape.lens()[2];
|
||||||
|
const std::size_t& kernel_w = weights_shape.lens()[3];
|
||||||
|
const std::size_t& pad_h = op.padding[0];
|
||||||
|
const std::size_t& pad_w = op.padding[1];
|
||||||
|
const std::size_t& stride_h = op.stride[0];
|
||||||
|
const std::size_t& stride_w = op.stride[1];
|
||||||
|
|
||||||
|
long kdiv2_h = long(kernel_h) / 2;
|
||||||
|
long kdiv2_w = long(kernel_w) / 2;
|
||||||
|
// calculate output sizes
|
||||||
|
const std::size_t col_height = (height - kernel_h + 2 * pad_h) / stride_h + 1;
|
||||||
|
const std::size_t col_width = (width - kernel_w + 2 * pad_w) / stride_w + 1;
|
||||||
|
// account for padding for the starting position of the input pixels
|
||||||
|
long iinput = kdiv2_h - long(pad_h);
|
||||||
|
// loop over output pixels (ioutput, joutput)
|
||||||
|
for(std::size_t ioutput = 0; ioutput < col_height; ioutput++, iinput += stride_h)
|
||||||
|
{
|
||||||
|
long jinput = kdiv2_w - long(pad_w);
|
||||||
|
for(std::size_t joutput = 0; joutput < col_width; joutput++, jinput += stride_w)
|
||||||
|
{
|
||||||
|
// compute linear index for output
|
||||||
|
std::size_t ldx = ioutput * col_width + joutput;
|
||||||
|
std::size_t p = 0;
|
||||||
|
dfor(channels,
|
||||||
|
kernel_h,
|
||||||
|
kernel_w)([&](std::size_t c, std::size_t koffset, std::size_t loffset) {
|
||||||
|
auto idx = iinput + long(koffset) - kdiv2_h;
|
||||||
|
auto jdx = jinput + long(loffset) - kdiv2_w;
|
||||||
|
col(ldx, p) =
|
||||||
|
((idx >= 0) and (idx < height) and (jdx >= 0) and (jdx < width))
|
||||||
|
? input(0, c, idx, jdx)
|
||||||
|
: 0;
|
||||||
|
p++;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
MIGRAPHX_REGISTER_OP(cpu_im2col)
|
||||||
|
|
||||||
|
struct cpu_op
|
||||||
|
{
|
||||||
|
operation op = op::identity{};
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return migraphx::reflect(self.op, f);
|
||||||
|
}
|
||||||
|
std::string name() const { return "cpu::op"; }
|
||||||
|
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
|
||||||
|
argument compute(context&, const shape& output_shape, const std::vector<argument>& args) const
|
||||||
|
{
|
||||||
|
return op.compute(output_shape, args);
|
||||||
|
}
|
||||||
|
value to_value() const
|
||||||
|
{
|
||||||
|
value v;
|
||||||
|
v["name"] = op.name();
|
||||||
|
v["operator"] = op.to_value();
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
void from_value(const value& v)
|
||||||
|
{
|
||||||
|
op = make_op(v.at("name").to<std::string>(), v.at("operator"));
|
||||||
|
}
|
||||||
|
friend std::ostream& operator<<(std::ostream& os, const cpu_op& x)
|
||||||
|
{
|
||||||
|
os << "cpu::" << x.op;
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
MIGRAPHX_REGISTER_OP(cpu_op)
|
||||||
|
|
||||||
|
struct cpu_pad
|
||||||
|
{
|
||||||
|
op::pad op;
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return migraphx::reflect(self.op, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name() const { return "cpu::pad"; }
|
||||||
|
shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
|
||||||
|
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
|
||||||
|
{
|
||||||
|
assert(output_shape.standard());
|
||||||
|
argument result{output_shape};
|
||||||
|
result.visit([&](auto output) {
|
||||||
|
using type = typename decltype(output)::value_type;
|
||||||
|
std::fill(output.begin(), output.end(), pad_clamp<type>(op.value));
|
||||||
|
});
|
||||||
|
|
||||||
|
visit_all(result, args[0])([&](auto output, auto input) {
|
||||||
|
shape_for_each(input.get_shape(), [&](const auto& idx) {
|
||||||
|
std::vector<std::size_t> new_idx(idx.size());
|
||||||
|
std::transform(
|
||||||
|
idx.begin(), idx.end(), op.pads.begin(), new_idx.begin(), [](auto i, auto j) {
|
||||||
|
return i + j;
|
||||||
|
});
|
||||||
|
output(new_idx.begin(), new_idx.end()) = input(idx.begin(), idx.end());
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
MIGRAPHX_REGISTER_OP(cpu_pad)
|
||||||
|
|
||||||
|
struct cpu_rnn_var_sl_last_output
|
||||||
|
{
|
||||||
|
op::rnn_var_sl_last_output op;
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return migraphx::reflect(self.op, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name() const { return "cpu::rnn_var_sl_last_output"; }
|
||||||
|
|
||||||
|
shape compute_shape(std::vector<shape> inputs) const
|
||||||
|
{
|
||||||
|
return op.compute_shape(std::move(inputs));
|
||||||
|
}
|
||||||
|
|
||||||
|
argument compute(const shape& output_shape, std::vector<argument> args) const
|
||||||
|
{
|
||||||
|
argument result{output_shape};
|
||||||
|
auto out_comp_lens = args[0].get_shape().lens();
|
||||||
|
out_comp_lens[0] = 1;
|
||||||
|
shape out_comp_s{output_shape.type(), out_comp_lens};
|
||||||
|
|
||||||
|
visit_all(result, args[0])([&](auto output, auto input) {
|
||||||
|
args[1].visit([&](auto seq_lens) {
|
||||||
|
par_for(output_shape.elements(), [&](auto i) {
|
||||||
|
auto idx = out_comp_s.multi(i);
|
||||||
|
auto b = idx[2];
|
||||||
|
if(op.direction == op::rnn_direction::reverse or idx[1] == 1)
|
||||||
|
{
|
||||||
|
idx[0] = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
idx[0] = seq_lens[b] - 1;
|
||||||
|
}
|
||||||
|
output[i] = input(idx.begin(), idx.end());
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
MIGRAPHX_REGISTER_OP(cpu_rnn_var_sl_last_output)
|
||||||
|
|
||||||
|
struct cpu_apply
|
||||||
|
{
|
||||||
|
module* modl;
|
||||||
|
std::unordered_map<std::string, std::function<instruction_ref(instruction_ref)>> apply_map{};
|
||||||
|
instruction_ref last{};
|
||||||
|
|
||||||
|
void extend_op(const std::string& op_name, const std::string& cpu_name, bool allocate = true)
|
||||||
|
{
|
||||||
|
apply_map.emplace(op_name, [=](instruction_ref ins) {
|
||||||
|
auto&& op = ins->get_operator();
|
||||||
|
if(allocate)
|
||||||
|
return replace(ins, make_op(cpu_name, op.to_value()));
|
||||||
|
return modl->replace_instruction(ins, make_op(cpu_name, op.to_value()), ins->inputs());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void extend_dnnl_algos(const std::string& dnnl_name,
|
||||||
|
const std::vector<std::pair<std::string, std::string>>& algos)
|
||||||
|
{
|
||||||
|
for(auto&& pp : algos)
|
||||||
|
{
|
||||||
|
std::string op_name = pp.first;
|
||||||
|
std::string algo = pp.second;
|
||||||
|
apply_map.emplace(op_name, [=](instruction_ref ins) {
|
||||||
|
auto v = ins->get_operator().to_value();
|
||||||
|
if(not v.is_object())
|
||||||
|
return ins;
|
||||||
|
v["algo"] = algo;
|
||||||
|
auto op = make_op(dnnl_name, v);
|
||||||
|
return replace(ins, op);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class M>
|
||||||
|
auto fuse_match(M matcher, const operation& op, const std::vector<std::string>& bind_inputs)
|
||||||
|
{
|
||||||
|
return match::make_match_finder(matcher, [=](auto&, const auto& r) {
|
||||||
|
auto ins = r.result;
|
||||||
|
std::vector<instruction_ref> inputs;
|
||||||
|
std::transform(bind_inputs.begin(),
|
||||||
|
bind_inputs.end(),
|
||||||
|
std::back_inserter(inputs),
|
||||||
|
[&](const auto& s) { return r.instructions[s]; });
|
||||||
|
inputs.push_back(this->insert_allocation(ins, ins->get_shape()));
|
||||||
|
modl->replace_instruction(ins, op, inputs);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void init()
|
||||||
|
{
|
||||||
|
extend_dnnl_algos("dnnl::binary",
|
||||||
|
{
|
||||||
|
{"add", "binary_add"},
|
||||||
|
{"div", "binary_div"},
|
||||||
|
{"max", "binary_max"},
|
||||||
|
{"min", "binary_min"},
|
||||||
|
{"mul", "binary_mul"},
|
||||||
|
});
|
||||||
|
|
||||||
|
extend_dnnl_algos("dnnl::eltwise",
|
||||||
|
{
|
||||||
|
{"abs", "eltwise_abs"},
|
||||||
|
{"elu", "eltwise_elu"},
|
||||||
|
{"exp", "eltwise_exp"},
|
||||||
|
{"log", "eltwise_log"},
|
||||||
|
{"relu", "eltwise_relu"},
|
||||||
|
{"sqrt", "eltwise_sqrt"},
|
||||||
|
{"tanh", "eltwise_tanh"},
|
||||||
|
});
|
||||||
|
|
||||||
|
extend_dnnl_algos("dnnl::reduction",
|
||||||
|
{
|
||||||
|
{"reduce_max", "reduction_max"},
|
||||||
|
{"reduce_mean", "reduction_mean"},
|
||||||
|
{"reduce_min", "reduction_min"},
|
||||||
|
{"reduce_sum", "reduction_sum"},
|
||||||
|
});
|
||||||
|
extend_op("concat", "dnnl::concat");
|
||||||
|
extend_op("contiguous", "dnnl::reorder");
|
||||||
|
extend_op("convolution", "dnnl::convolution");
|
||||||
|
#ifndef MIGRAPHX_ENABLE_ZENDNN
|
||||||
|
extend_op("convolution_backwards", "dnnl::convolution_backwards");
|
||||||
|
extend_op("dot", "dnnl::dot");
|
||||||
|
#endif
|
||||||
|
extend_op("erf", "cpu::erf");
|
||||||
|
extend_op("gather", "cpu::gather");
|
||||||
|
extend_op("logsoftmax", "dnnl::logsoftmax");
|
||||||
|
extend_op("lrn", "dnnl::lrn");
|
||||||
|
extend_op("softmax", "dnnl::softmax");
|
||||||
|
|
||||||
|
extend_op("im2col", "cpu::im2col", false);
|
||||||
|
extend_op("leaky_relu", "cpu::leaky_relu", false);
|
||||||
|
extend_op("pad", "cpu::pad", false);
|
||||||
|
extend_op("rnn_var_sl_last_output", "cpu::rnn_var_sl_last_output", false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void apply()
|
||||||
|
{
|
||||||
|
init();
|
||||||
|
// Apply fusion matchers first
|
||||||
|
match::find_matches(*modl,
|
||||||
|
fuse_match(match::gelu_erf(),
|
||||||
|
make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_erf"}}),
|
||||||
|
{"x"}),
|
||||||
|
fuse_match(match::gelu_tanh(),
|
||||||
|
make_op("dnnl::eltwise", {{"algo", "eltwise_gelu_tanh"}}),
|
||||||
|
{"x"}),
|
||||||
|
fuse_match(match::layernorm(), make_op("dnnl::layernorm"), {"x"}));
|
||||||
|
// Apply these operators first so the inputs can be const folded
|
||||||
|
for(auto it : iterator_for(*modl))
|
||||||
|
{
|
||||||
|
// skip lowering if input has fp8 as one of the inputs since oneDNN doesn't have fp8
|
||||||
|
// supported yet.
|
||||||
|
if(std::any_of(it->inputs().begin(), it->inputs().end(), [](const auto& i) {
|
||||||
|
return contains(fp8_types{}.get(), i->get_shape().type());
|
||||||
|
}))
|
||||||
|
continue;
|
||||||
|
if(it->name() == "pow")
|
||||||
|
{
|
||||||
|
apply_pow(it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(auto it : iterator_for(*modl))
|
||||||
|
{
|
||||||
|
// skip lowering if input has fp8 as one of the inputs since oneDNN doesn't have fp8
|
||||||
|
// supported yet.
|
||||||
|
if(std::any_of(it->inputs().begin(), it->inputs().end(), [](const auto& i) {
|
||||||
|
return contains(fp8_types{}.get(), i->get_shape().type());
|
||||||
|
}))
|
||||||
|
continue;
|
||||||
|
if(it->name() == "pooling")
|
||||||
|
{
|
||||||
|
apply_pooling(it);
|
||||||
|
}
|
||||||
|
else if(it->name() == "reshape")
|
||||||
|
{
|
||||||
|
apply_reshape(it);
|
||||||
|
}
|
||||||
|
else if(apply_map.count(it->name()) > 0)
|
||||||
|
{
|
||||||
|
apply_map.at(it->name())(it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
instruction_ref apply_pow(instruction_ref ins) const
|
||||||
|
{
|
||||||
|
auto beta = read_scalar<float>(ins->inputs()[1]);
|
||||||
|
if(beta.empty())
|
||||||
|
return ins;
|
||||||
|
return replace(ins,
|
||||||
|
make_op("dnnl::eltwise",
|
||||||
|
{{"algo", "eltwise_pow"}, {"alpha", 1.0}, {"beta", beta.front()}}),
|
||||||
|
{ins->inputs().front()});
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: update lowering to run the reference
|
||||||
|
// code when OneDNN can't execute pooling for a CPU
|
||||||
|
|
||||||
|
// OneDNN has a limitation on padding size for pooling. see
|
||||||
|
// https://oneapi-src.github.io/oneDNN/dev_guide_convolution.html#doxid-dev-guide-convolution
|
||||||
|
|
||||||
|
// padding = {2}; stride = {1}; lengths = {3} succeeds in oneDNN but
|
||||||
|
// padding = {2}; stride = {1}; lengths = {2} fails.
|
||||||
|
// Also, the referenced documentation contains a max. dimension size of 14 for the kernel
|
||||||
|
// ("weights tensor") that MIGraphX doesn't enforce.
|
||||||
|
instruction_ref apply_pooling(instruction_ref ins) const
|
||||||
|
{
|
||||||
|
auto&& op = ins->get_operator();
|
||||||
|
auto v = op.to_value();
|
||||||
|
if(has_op("dnnl::pooling") and ins->get_shape().type() == shape::type_t::float_type and
|
||||||
|
not v["ceil_mode"].to<bool>() and
|
||||||
|
v["mode"].to<op::pooling_mode>() != op::pooling_mode::lpnorm)
|
||||||
|
return replace(ins, make_op("dnnl::pooling", op.to_value()));
|
||||||
|
return ins;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
Lowers reshape copy operator to reshape lazy by inserting contiguous operators around it.
|
||||||
|
Contiguous ops will later by removed by eliminate_contiguous pass.
|
||||||
|
*/
|
||||||
|
instruction_ref apply_reshape(instruction_ref ins) const
|
||||||
|
{
|
||||||
|
std::vector<instruction_ref> before_contiguous_args = ins->inputs();
|
||||||
|
auto before_alloc =
|
||||||
|
insert_allocation(ins, before_contiguous_args.front()->get_shape().as_standard());
|
||||||
|
before_contiguous_args.push_back(before_alloc);
|
||||||
|
auto before_contig =
|
||||||
|
modl->insert_instruction(ins, make_op("dnnl::reorder"), {before_contiguous_args});
|
||||||
|
|
||||||
|
auto new_lazy_reshape = modl->insert_instruction(
|
||||||
|
ins,
|
||||||
|
make_op("reshape_lazy", {{"dims", {ins->get_operator().to_value().at("dims")}}}),
|
||||||
|
before_contig);
|
||||||
|
|
||||||
|
std::vector<instruction_ref> after_contiguous_args = {new_lazy_reshape};
|
||||||
|
auto after_alloc = insert_allocation(new_lazy_reshape, new_lazy_reshape->get_shape());
|
||||||
|
after_contiguous_args.push_back(after_alloc);
|
||||||
|
return modl->replace_instruction(ins, make_op("dnnl::reorder"), after_contiguous_args);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
static std::vector<T> read_scalar(instruction_ref ins)
|
||||||
|
{
|
||||||
|
if(ins->name() == "contiguous")
|
||||||
|
return read_scalar<T>(ins->inputs().front());
|
||||||
|
if(ins->get_shape().elements() != 1 and not ins->get_shape().scalar())
|
||||||
|
return {};
|
||||||
|
auto r = ins->eval();
|
||||||
|
if(r.empty())
|
||||||
|
return {};
|
||||||
|
return {r.at<T>()};
|
||||||
|
}
|
||||||
|
|
||||||
|
instruction_ref replace(instruction_ref ins, const operation& op) const
|
||||||
|
{
|
||||||
|
return replace(ins, op, ins->inputs());
|
||||||
|
}
|
||||||
|
|
||||||
|
instruction_ref
|
||||||
|
replace(instruction_ref ins, const operation& op, std::vector<instruction_ref> inputs) const
|
||||||
|
{
|
||||||
|
inputs.push_back(insert_allocation(ins, ins->get_shape()));
|
||||||
|
return modl->replace_instruction(ins, op, inputs);
|
||||||
|
}
|
||||||
|
|
||||||
|
instruction_ref insert_allocation(instruction_ref ins, const shape& s) const
|
||||||
|
{
|
||||||
|
return modl->insert_instruction(ins, make_op("allocate", {{"shape", to_value(s)}}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void lowering::apply(module& m) const { cpu_apply{&m}.apply(); }
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
48
docker/rocm/migraphx/targets/cpu/lrn.cpp
Normal file
48
docker/rocm/migraphx/targets/cpu/lrn.cpp
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/dnnl.hpp>
|
||||||
|
#include <migraphx/op/lrn.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct dnnl_lrn : dnnl_extend_op<dnnl_lrn, dnnl::lrn_forward, op::lrn>
|
||||||
|
{
|
||||||
|
dnnl::lrn_forward::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
return {dnnl::prop_kind::forward_inference,
|
||||||
|
dnnl::algorithm::lrn_across_channels,
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)),
|
||||||
|
this->op.size,
|
||||||
|
this->op.alpha,
|
||||||
|
this->op.beta,
|
||||||
|
this->op.bias};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
36
docker/rocm/migraphx/targets/cpu/mod.cpp
Normal file
36
docker/rocm/migraphx/targets/cpu/mod.cpp
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/pointwise.hpp>
|
||||||
|
#include <migraphx/op/mod.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
template struct cpu_binary<op::mod>;
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
83
docker/rocm/migraphx/targets/cpu/pooling.cpp
Normal file
83
docker/rocm/migraphx/targets/cpu/pooling.cpp
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/register_op.hpp>
|
||||||
|
#include <migraphx/reflect.hpp>
|
||||||
|
#include <migraphx/par_for.hpp>
|
||||||
|
#include <migraphx/context.hpp>
|
||||||
|
#include <migraphx/cpu/context.hpp>
|
||||||
|
#include <migraphx/cpu/dnnl.hpp>
|
||||||
|
#include <migraphx/op/pooling.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct dnnl_pooling : dnnl_extend_op<dnnl_pooling, dnnl::pooling_v2_forward, op::pooling>
|
||||||
|
{
|
||||||
|
std::vector<int> arg_map(int) const { return {MIGRAPHX_DNNL_PREFIX(ARG_SRC)}; }
|
||||||
|
|
||||||
|
dnnl::algorithm get_algo() const
|
||||||
|
{
|
||||||
|
switch(op.mode)
|
||||||
|
{
|
||||||
|
case op::pooling_mode::max: return dnnl::algorithm::pooling_max;
|
||||||
|
case op::pooling_mode::average:
|
||||||
|
return op.count_include_pad ? dnnl::algorithm::pooling_avg_include_padding
|
||||||
|
: dnnl::algorithm::pooling_avg_exclude_padding;
|
||||||
|
case op::pooling_mode::lpnorm: MIGRAPHX_THROW("Lpnorn pooling mode not supported");
|
||||||
|
}
|
||||||
|
MIGRAPHX_THROW("Unknown pooling mode");
|
||||||
|
}
|
||||||
|
|
||||||
|
dnnl::pooling_v2_forward::desc
|
||||||
|
get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
auto algo = get_algo();
|
||||||
|
auto kdims = op.kdims();
|
||||||
|
std::vector<size_t> padding_l(op.padding.begin(), op.padding.begin() + kdims);
|
||||||
|
std::vector<size_t> padding_r(op.padding.begin() + kdims, op.padding.end());
|
||||||
|
// Note: It is not documented, but the default dilation seems to be 0 instead of 1.
|
||||||
|
// We need to offset dilations with -1.
|
||||||
|
std::vector<size_t> dilations;
|
||||||
|
std::transform(op.dilations.cbegin(),
|
||||||
|
op.dilations.cend(),
|
||||||
|
std::back_inserter(dilations),
|
||||||
|
[](size_t d) { return d - 1; });
|
||||||
|
return {dnnl::prop_kind::forward_inference,
|
||||||
|
algo,
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)),
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)),
|
||||||
|
to_dnnl_dims(op.stride),
|
||||||
|
to_dnnl_dims(op.lengths),
|
||||||
|
to_dnnl_dims(dilations),
|
||||||
|
to_dnnl_dims(padding_l),
|
||||||
|
to_dnnl_dims(padding_r)};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
60
docker/rocm/migraphx/targets/cpu/preallocate.cpp
Normal file
60
docker/rocm/migraphx/targets/cpu/preallocate.cpp
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/check_shapes.hpp>
|
||||||
|
#include <migraphx/argument.hpp>
|
||||||
|
#include <migraphx/context.hpp>
|
||||||
|
#include <migraphx/cpu/context.hpp>
|
||||||
|
#include <migraphx/register_op.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct cpu_preallocate : auto_register_op<cpu_preallocate>
|
||||||
|
{
|
||||||
|
shape s;
|
||||||
|
std::string id = "";
|
||||||
|
argument data;
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return pack(f(self.s, "shape"), f(self.id, "id"));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name() const { return "cpu::preallocate"; }
|
||||||
|
shape compute_shape(const std::vector<shape>& inputs) const
|
||||||
|
{
|
||||||
|
check_shapes{inputs, *this}.has(0);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
argument compute(context&, const shape&, const std::vector<argument>&) const { return data; }
|
||||||
|
void finalize(context&, const shape&, const std::vector<shape>&) { data = argument(s); }
|
||||||
|
lifetime get_lifetime() const { return lifetime::global; }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
73
docker/rocm/migraphx/targets/cpu/reduction.cpp
Normal file
73
docker/rocm/migraphx/targets/cpu/reduction.cpp
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/dnnl.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct dnnl_reduction : dnnl_op<dnnl_reduction, dnnl::reduction>
|
||||||
|
{
|
||||||
|
std::string algo;
|
||||||
|
std::vector<std::int64_t> axes{};
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return pack_join(self.reflect_base(self, f),
|
||||||
|
pack(f(self.algo, "algo"), f(self.axes, "axes")));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name() const { return "dnnl::reduction"; }
|
||||||
|
|
||||||
|
shape compute_shape(std::vector<shape> inputs) const
|
||||||
|
{
|
||||||
|
// Compensate for allocation
|
||||||
|
inputs.pop_back();
|
||||||
|
check_shapes{this->trim_post_op_inputs(inputs), *this}.has(1).standard();
|
||||||
|
auto s = inputs.at(0);
|
||||||
|
auto lens = s.lens();
|
||||||
|
for(auto axis : axes)
|
||||||
|
{
|
||||||
|
lens[axis] = 1;
|
||||||
|
}
|
||||||
|
auto r = shape{s.type(), lens};
|
||||||
|
// Call to get_primitive to make sure an algo is available
|
||||||
|
this->get_primitive(this->to_memory_desc(r, inputs));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
dnnl::reduction::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
return {to_dnnl_algo(algo),
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)),
|
||||||
|
m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)),
|
||||||
|
0,
|
||||||
|
0};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
65
docker/rocm/migraphx/targets/cpu/reorder.cpp
Normal file
65
docker/rocm/migraphx/targets/cpu/reorder.cpp
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/dnnl.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct dnnl_reorder : dnnl_op<dnnl_reorder, dnnl::reorder>
|
||||||
|
{
|
||||||
|
std::string name() const { return "dnnl::reorder"; }
|
||||||
|
|
||||||
|
shape adjust_shape(const shape& x, int, const shape&) const { return x; }
|
||||||
|
|
||||||
|
shape compute_shape(const std::vector<shape>& inputs) const
|
||||||
|
{
|
||||||
|
check_shapes{inputs, *this}.has(2);
|
||||||
|
auto r = inputs.back();
|
||||||
|
// Call to get_primitive to make sure an algo is available
|
||||||
|
this->get_primitive(this->to_memory_desc(r, inputs));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
// Custom desc class since its missing in dnnl
|
||||||
|
struct desc
|
||||||
|
{
|
||||||
|
dnnl::memory::desc src;
|
||||||
|
dnnl::memory::desc dst;
|
||||||
|
};
|
||||||
|
desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
return {m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)), m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST))};
|
||||||
|
}
|
||||||
|
|
||||||
|
auto get_primitive_desc(const desc& d, const dnnl::primitive_attr& attr) const
|
||||||
|
{
|
||||||
|
auto& engine = get_dnnl_context().engine;
|
||||||
|
return dnnl::reorder::primitive_desc(engine, d.src, engine, d.dst, attr);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
43
docker/rocm/migraphx/targets/cpu/softmax.cpp
Normal file
43
docker/rocm/migraphx/targets/cpu/softmax.cpp
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/dnnl.hpp>
|
||||||
|
#include <migraphx/op/softmax.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct dnnl_softmax : dnnl_extend_op<dnnl_softmax, dnnl::softmax_forward, op::softmax>
|
||||||
|
{
|
||||||
|
dnnl::softmax_forward::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
|
||||||
|
{
|
||||||
|
int axis = this->op.axis;
|
||||||
|
return {dnnl::prop_kind::forward_inference, m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)), axis};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
36
docker/rocm/migraphx/targets/cpu/sub.cpp
Normal file
36
docker/rocm/migraphx/targets/cpu/sub.cpp
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/cpu/pointwise.hpp>
|
||||||
|
#include <migraphx/op/sub.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
template struct cpu_binary<op::sub>;
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
122
docker/rocm/migraphx/targets/cpu/target.cpp
Normal file
122
docker/rocm/migraphx/targets/cpu/target.cpp
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <migraphx/auto_contiguous.hpp>
|
||||||
|
#include <migraphx/adjust_allocation.hpp>
|
||||||
|
#include <migraphx/dead_code_elimination.hpp>
|
||||||
|
#include <migraphx/eliminate_allocation.hpp>
|
||||||
|
#include <migraphx/eliminate_common_subexpression.hpp>
|
||||||
|
#include <migraphx/eliminate_concat.hpp>
|
||||||
|
#include <migraphx/eliminate_contiguous.hpp>
|
||||||
|
#include <migraphx/eliminate_data_type.hpp>
|
||||||
|
#include <migraphx/eliminate_identity.hpp>
|
||||||
|
#include <migraphx/eliminate_pad.hpp>
|
||||||
|
#include <migraphx/eliminate_convert.hpp>
|
||||||
|
#include <migraphx/memory_coloring.hpp>
|
||||||
|
#include <migraphx/propagate_constant.hpp>
|
||||||
|
#include <migraphx/register_target.hpp>
|
||||||
|
#include <migraphx/replace_allocate.hpp>
|
||||||
|
#include <migraphx/rewrite_pooling.hpp>
|
||||||
|
#include <migraphx/rewrite_quantization.hpp>
|
||||||
|
#include <migraphx/rewrite_rnn.hpp>
|
||||||
|
#include <migraphx/schedule.hpp>
|
||||||
|
#include <migraphx/simplify_algebra.hpp>
|
||||||
|
#include <migraphx/simplify_reshapes.hpp>
|
||||||
|
#include <migraphx/preallocate_param.hpp>
|
||||||
|
#include <migraphx/cpu/fuse_ops.hpp>
|
||||||
|
#include <migraphx/cpu/write_literals.hpp>
|
||||||
|
#include <migraphx/cpu/allocation_model.hpp>
|
||||||
|
#include <migraphx/cpu/target.hpp>
|
||||||
|
#include <migraphx/cpu/context.hpp>
|
||||||
|
#include <migraphx/cpu/lowering.hpp>
|
||||||
|
#include <migraphx/pass.hpp>
|
||||||
|
#include <migraphx/generate.hpp>
|
||||||
|
#include <migraphx/normalize_ops.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
std::string target::name() const { return "cpu"; }
|
||||||
|
|
||||||
|
// cppcheck-suppress constParameterReference
|
||||||
|
std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_options&) const
|
||||||
|
{
|
||||||
|
auto& ctx = any_cast<context>(gctx);
|
||||||
|
std::set<shape::type_t> unsupported_types(shape::types().begin(), shape::types().end());
|
||||||
|
std::set<std::string> unsupported_ops{
|
||||||
|
"all", "scatternd_add", "scatternd_mul", "scatternd_none"};
|
||||||
|
unsupported_types.erase(shape::type_t::float_type);
|
||||||
|
return {normalize_ops{},
|
||||||
|
rewrite_quantization{},
|
||||||
|
dead_code_elimination{},
|
||||||
|
eliminate_data_type{unsupported_types, shape::type_t::float_type, unsupported_ops},
|
||||||
|
dead_code_elimination{},
|
||||||
|
simplify_reshapes{},
|
||||||
|
eliminate_convert{},
|
||||||
|
eliminate_identity{},
|
||||||
|
eliminate_pad{},
|
||||||
|
dead_code_elimination{},
|
||||||
|
rewrite_rnn{},
|
||||||
|
dead_code_elimination{},
|
||||||
|
eliminate_common_subexpression{},
|
||||||
|
dead_code_elimination{},
|
||||||
|
simplify_algebra{},
|
||||||
|
simplify_reshapes{},
|
||||||
|
eliminate_convert{},
|
||||||
|
dead_code_elimination{},
|
||||||
|
simplify_reshapes{},
|
||||||
|
eliminate_convert{},
|
||||||
|
dead_code_elimination{},
|
||||||
|
simplify_algebra{},
|
||||||
|
simplify_reshapes{},
|
||||||
|
eliminate_convert{},
|
||||||
|
dead_code_elimination{},
|
||||||
|
propagate_constant{},
|
||||||
|
dead_code_elimination{},
|
||||||
|
auto_contiguous{},
|
||||||
|
lowering{},
|
||||||
|
eliminate_contiguous{"dnnl::reorder"},
|
||||||
|
dead_code_elimination{},
|
||||||
|
replace_allocate{cpu_allocation_model{}},
|
||||||
|
dead_code_elimination{},
|
||||||
|
adjust_allocation{cpu_allocation_model{}},
|
||||||
|
dead_code_elimination{},
|
||||||
|
fuse_ops{&ctx},
|
||||||
|
dead_code_elimination{},
|
||||||
|
write_literals{},
|
||||||
|
dead_code_elimination{},
|
||||||
|
memory_coloring{"cpu::allocate"},
|
||||||
|
dead_code_elimination{},
|
||||||
|
preallocate_param{"scratch", cpu_allocation_model{}},
|
||||||
|
dead_code_elimination{}};
|
||||||
|
}
|
||||||
|
|
||||||
|
argument target::allocate(const shape& s) const { return fill_argument(s, 0); }
|
||||||
|
|
||||||
|
MIGRAPHX_REGISTER_TARGET(target);
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
70
docker/rocm/migraphx/targets/cpu/write_literals.cpp
Normal file
70
docker/rocm/migraphx/targets/cpu/write_literals.cpp
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/cpu/write_literals.hpp>
|
||||||
|
#include <migraphx/module.hpp>
|
||||||
|
#include <migraphx/instruction.hpp>
|
||||||
|
#include <migraphx/iterator_for.hpp>
|
||||||
|
#include <migraphx/register_op.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace cpu {
|
||||||
|
|
||||||
|
struct cpu_literal
|
||||||
|
{
|
||||||
|
argument data;
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return pack(f(self.data, "data"));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name() const { return "cpu::literal"; }
|
||||||
|
|
||||||
|
shape compute_shape(const std::vector<shape>&) const { return data.get_shape(); }
|
||||||
|
|
||||||
|
argument compute(const shape&, const std::vector<argument>&) const { return data; }
|
||||||
|
|
||||||
|
friend std::ostream& operator<<(std::ostream& os, const cpu_literal& x)
|
||||||
|
{
|
||||||
|
os << x.name();
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
MIGRAPHX_REGISTER_OP(cpu_literal);
|
||||||
|
|
||||||
|
void write_literals::apply(module& m) const
|
||||||
|
{
|
||||||
|
for(auto ins : iterator_for(m))
|
||||||
|
{
|
||||||
|
if(ins->name() != "@literal")
|
||||||
|
continue;
|
||||||
|
m.replace_instruction(ins, cpu_literal{ins->get_literal().get_argument()});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace cpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
43
docker/rocm/migraphx/targets/fpga/CMakeLists.txt
Normal file
43
docker/rocm/migraphx/targets/fpga/CMakeLists.txt
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
#####################################################################################
|
||||||
|
# The MIT License (MIT)
|
||||||
|
#
|
||||||
|
# Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
# in the Software without restriction, including without limitation the rights
|
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in
|
||||||
|
# all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
# THE SOFTWARE.
|
||||||
|
#####################################################################################
|
||||||
|
|
||||||
|
add_library(migraphx_fpga
|
||||||
|
target.cpp
|
||||||
|
lowering.cpp
|
||||||
|
subgraph.cpp
|
||||||
|
vitis_ai_adapter.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
set_target_properties(migraphx_fpga PROPERTIES EXPORT_NAME fpga)
|
||||||
|
rocm_set_soversion(migraphx_fpga ${MIGRAPHX_SO_VERSION})
|
||||||
|
|
||||||
|
rocm_clang_tidy_check(migraphx_fpga)
|
||||||
|
target_link_libraries(migraphx_fpga migraphx)
|
||||||
|
|
||||||
|
rocm_install_targets(
|
||||||
|
PRIVATE
|
||||||
|
TARGETS migraphx_fpga
|
||||||
|
INCLUDE
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||||
|
)
|
||||||
@ -0,0 +1,45 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MIGRAPHX_GUARD_FPGA_CONTEXT_HPP
|
||||||
|
#define MIGRAPHX_GUARD_FPGA_CONTEXT_HPP
|
||||||
|
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace fpga {
|
||||||
|
|
||||||
|
struct context
|
||||||
|
{
|
||||||
|
int id = 0;
|
||||||
|
|
||||||
|
void finish() const {}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace fpga
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif // MIGRAPHX_GUARD_FPGA_CONTEXT_HPP
|
||||||
@ -0,0 +1,47 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MIGRAPHX_GUARD_FPGA_LOWERING_HPP
|
||||||
|
#define MIGRAPHX_GUARD_FPGA_LOWERING_HPP
|
||||||
|
|
||||||
|
#include <migraphx/program.hpp>
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/fpga/context.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace fpga {
|
||||||
|
|
||||||
|
struct lowering
|
||||||
|
{
|
||||||
|
context* ctx = nullptr;
|
||||||
|
std::string name() const { return "fpga::lowering"; }
|
||||||
|
void apply(module& m) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace fpga
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif // MIGRAPHX_GUARD_FPGA_LOWERING_HPP
|
||||||
@ -0,0 +1,45 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MIGRAPHX_GUARD_FPGA_SUBGRAPH_HPP
|
||||||
|
#define MIGRAPHX_GUARD_FPGA_SUBGRAPH_HPP
|
||||||
|
|
||||||
|
#include <migraphx/program.hpp>
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace fpga {
|
||||||
|
|
||||||
|
struct subgraph
|
||||||
|
{
|
||||||
|
std::string name() const { return "fpga::subgraph"; }
|
||||||
|
void apply(module_pass_manager& mpm) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace fpga
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif // MIGRAPHX_GUARD_FPGA_SUBGRAPH_HPP
|
||||||
@ -0,0 +1,55 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MIGRAPHX_GUARD_FPGA_TARGET_HPP
|
||||||
|
#define MIGRAPHX_GUARD_FPGA_TARGET_HPP
|
||||||
|
|
||||||
|
#include <migraphx/program.hpp>
|
||||||
|
#include <migraphx/register_target.hpp>
|
||||||
|
#include <migraphx/compile_options.hpp>
|
||||||
|
#include <migraphx/fpga/context.hpp>
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/supported_segments.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
struct pass;
|
||||||
|
namespace fpga {
|
||||||
|
|
||||||
|
struct target
|
||||||
|
{
|
||||||
|
std::string name() const;
|
||||||
|
std::vector<pass> get_passes(migraphx::context& ctx, const compile_options&) const;
|
||||||
|
migraphx::context get_context() const { return context{}; }
|
||||||
|
supported_segments find_supported(const_module_ref mod, support_metric m) const;
|
||||||
|
argument copy_to(const argument& arg) const { return arg; }
|
||||||
|
argument copy_from(const argument& arg) const { return arg; }
|
||||||
|
argument allocate(const shape& s) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace fpga
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif // MIGRAPHX_GUARD_FPGA_TARGET_HPP
|
||||||
@ -0,0 +1,52 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MIGRAPHX_GUARD_FPGA_VITIS_AI_ADAPTER_HPP
|
||||||
|
#define MIGRAPHX_GUARD_FPGA_VITIS_AI_ADAPTER_HPP
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include <migraphx/instruction.hpp>
|
||||||
|
#include <migraphx/pass_manager.hpp>
|
||||||
|
|
||||||
|
namespace vitis_ai {
|
||||||
|
|
||||||
|
class x_model
|
||||||
|
{
|
||||||
|
migraphx::shape shape;
|
||||||
|
|
||||||
|
public:
|
||||||
|
migraphx::shape get_shape() const;
|
||||||
|
void set_shape(migraphx::shape);
|
||||||
|
};
|
||||||
|
|
||||||
|
x_model create_xmodel(migraphx::const_module_ref mod);
|
||||||
|
|
||||||
|
migraphx::argument execute(const x_model& xmodel,
|
||||||
|
const migraphx::shape& output_shape,
|
||||||
|
std::vector<migraphx::argument>& args);
|
||||||
|
|
||||||
|
} // namespace vitis_ai
|
||||||
|
|
||||||
|
#endif // MIGRAPHX_GUARD_FPGA_VITIS_AI_ADAPTER_HPP
|
||||||
91
docker/rocm/migraphx/targets/fpga/lowering.cpp
Normal file
91
docker/rocm/migraphx/targets/fpga/lowering.cpp
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <migraphx/fpga/lowering.hpp>
|
||||||
|
#include <migraphx/instruction.hpp>
|
||||||
|
#include <migraphx/iterator_for.hpp>
|
||||||
|
#include <migraphx/register_op.hpp>
|
||||||
|
#include <migraphx/stringutils.hpp>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "migraphx/fpga/vitis_ai_adapter.hpp"
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
|
||||||
|
namespace fpga {
|
||||||
|
|
||||||
|
struct fpga_vitis_op
|
||||||
|
{
|
||||||
|
fpga_vitis_op() = default;
|
||||||
|
explicit fpga_vitis_op(vitis_ai::x_model model) : xmodel(std::move(model)){};
|
||||||
|
|
||||||
|
vitis_ai::x_model xmodel;
|
||||||
|
int dummy = 0;
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
// return pack(f(self.xmodel, "xmodel"));
|
||||||
|
return pack(f(self.dummy, "dummy"));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name() const { return "fpga::vitis_ai"; }
|
||||||
|
|
||||||
|
shape compute_shape(const std::vector<shape>& inputs) const
|
||||||
|
{
|
||||||
|
(void)inputs;
|
||||||
|
return xmodel.get_shape();
|
||||||
|
}
|
||||||
|
|
||||||
|
argument
|
||||||
|
compute(const context& ctx, const shape& output_shape, std::vector<argument> args) const
|
||||||
|
{
|
||||||
|
std::cout << "The context is " << ctx.id << std::endl;
|
||||||
|
return ::vitis_ai::execute(xmodel, output_shape, args);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
MIGRAPHX_REGISTER_OP(fpga_vitis_op)
|
||||||
|
|
||||||
|
void lowering::apply(module& m) const
|
||||||
|
{
|
||||||
|
auto* mod = &m;
|
||||||
|
|
||||||
|
// test modifying the context from a pass
|
||||||
|
ctx->id = 2;
|
||||||
|
|
||||||
|
for(auto it : iterator_for(*mod))
|
||||||
|
{
|
||||||
|
if(it->name() == "fpga::vitis_placeholder")
|
||||||
|
{
|
||||||
|
assert(it->module_inputs().size() == 1);
|
||||||
|
auto xmodel = ::vitis_ai::create_xmodel(it->module_inputs()[0]);
|
||||||
|
mod->replace_instruction(it, fpga_vitis_op{xmodel}, it->inputs());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fpga
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
133
docker/rocm/migraphx/targets/fpga/subgraph.cpp
Normal file
133
docker/rocm/migraphx/targets/fpga/subgraph.cpp
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <migraphx/fpga/subgraph.hpp>
|
||||||
|
|
||||||
|
#include <migraphx/instruction.hpp>
|
||||||
|
#include "migraphx/iterator.hpp"
|
||||||
|
#include <migraphx/iterator_for.hpp>
|
||||||
|
#include "migraphx/make_op.hpp"
|
||||||
|
#include "migraphx/module.hpp"
|
||||||
|
#include "migraphx/ranges.hpp"
|
||||||
|
#include <migraphx/register_op.hpp>
|
||||||
|
#include <migraphx/stringutils.hpp>
|
||||||
|
#include <migraphx/pass_manager.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
|
||||||
|
namespace fpga {
|
||||||
|
|
||||||
|
struct fpga_placeholder_op
|
||||||
|
{
|
||||||
|
fpga_placeholder_op() = default;
|
||||||
|
|
||||||
|
int dummy = 0;
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return pack(f(self.dummy, "dummy"));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name() const { return "fpga::vitis_placeholder"; }
|
||||||
|
|
||||||
|
shape compute_shape(const std::vector<shape>& inputs, std::vector<module_ref> mods) const
|
||||||
|
{
|
||||||
|
(void)inputs;
|
||||||
|
if(mods.size() != 1)
|
||||||
|
{
|
||||||
|
MIGRAPHX_THROW("should have one submodule.");
|
||||||
|
}
|
||||||
|
module_ref sm = mods.front();
|
||||||
|
if(sm->get_output_shapes().size() != 1)
|
||||||
|
MIGRAPHX_THROW("Only one return");
|
||||||
|
return sm->get_output_shapes().front();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
MIGRAPHX_REGISTER_OP(fpga_placeholder_op)
|
||||||
|
|
||||||
|
bool is_fpga_instr(migraphx::instruction_ref it)
|
||||||
|
{
|
||||||
|
// assuming all instructions that aren't @param, @literal, or input data are fpga instrs
|
||||||
|
if(migraphx::starts_with(it->name(), "@"))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// no inputs to the instr means it's input data
|
||||||
|
if(it->inputs().empty())
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void subgraph::apply(module_pass_manager& mpm) const
|
||||||
|
{
|
||||||
|
auto& mod = mpm.get_module();
|
||||||
|
auto* pm = mpm.create_module(mod.name() + ":fpga");
|
||||||
|
pm->set_bypass();
|
||||||
|
|
||||||
|
migraphx::instruction_ref first = mod.end();
|
||||||
|
migraphx::instruction_ref last;
|
||||||
|
std::vector<migraphx::instruction_ref> literal_inputs;
|
||||||
|
for(auto it : iterator_for(mod))
|
||||||
|
{
|
||||||
|
// assuming we want all the params/literals as inputs to the FPGA submodule
|
||||||
|
if(migraphx::starts_with(it->name(), "@param") or
|
||||||
|
migraphx::starts_with(it->name(), "@literal"))
|
||||||
|
{
|
||||||
|
literal_inputs.push_back(it);
|
||||||
|
}
|
||||||
|
if(is_fpga_instr(it))
|
||||||
|
{
|
||||||
|
if(first == mod.end())
|
||||||
|
{
|
||||||
|
first = it;
|
||||||
|
}
|
||||||
|
last = it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(varunsh): this code may be replaceable by code in the fuse_pointwise pass
|
||||||
|
|
||||||
|
// assuming all FPGA instructions are in one contiguous range
|
||||||
|
pm->insert_instructions(pm->end(), first, std::next(last), {});
|
||||||
|
migraphx::instruction_ref placeholder_ins;
|
||||||
|
for(auto it : iterator_for(mod))
|
||||||
|
{
|
||||||
|
if(migraphx::starts_with(it->name(), "@return"))
|
||||||
|
{
|
||||||
|
placeholder_ins = mod.insert_instruction(
|
||||||
|
it, migraphx::make_op("fpga::vitis_placeholder"), literal_inputs, {pm});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mod.replace_return({placeholder_ins});
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fpga
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
83
docker/rocm/migraphx/targets/fpga/target.cpp
Normal file
83
docker/rocm/migraphx/targets/fpga/target.cpp
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <migraphx/fpga/target.hpp>
|
||||||
|
#include <migraphx/fpga/lowering.hpp>
|
||||||
|
#include <migraphx/fpga/subgraph.hpp>
|
||||||
|
#include <migraphx/register_target.hpp>
|
||||||
|
#include <migraphx/pass.hpp>
|
||||||
|
#include <migraphx/auto_contiguous.hpp>
|
||||||
|
#include <migraphx/rewrite_rnn.hpp>
|
||||||
|
#include <migraphx/eliminate_pad.hpp>
|
||||||
|
#include <migraphx/insert_pad.hpp>
|
||||||
|
#include <migraphx/dead_code_elimination.hpp>
|
||||||
|
#include <migraphx/generate.hpp>
|
||||||
|
#include <migraphx/normalize_ops.hpp>
|
||||||
|
#include <migraphx/iterator_for.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace fpga {
|
||||||
|
|
||||||
|
std::string target::name() const { return "fpga"; }
|
||||||
|
|
||||||
|
std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_options&) const
|
||||||
|
{
|
||||||
|
// not sure if all these passes are needed but they were copied from ref/
|
||||||
|
auto& ctx = any_cast<context>(gctx);
|
||||||
|
return {normalize_ops{},
|
||||||
|
eliminate_pad{},
|
||||||
|
dead_code_elimination{},
|
||||||
|
insert_pad{},
|
||||||
|
dead_code_elimination{},
|
||||||
|
rewrite_rnn{},
|
||||||
|
dead_code_elimination{},
|
||||||
|
auto_contiguous{},
|
||||||
|
dead_code_elimination{},
|
||||||
|
subgraph{},
|
||||||
|
dead_code_elimination{},
|
||||||
|
lowering{&ctx},
|
||||||
|
dead_code_elimination{}};
|
||||||
|
}
|
||||||
|
|
||||||
|
argument target::allocate(const shape& s) const { return fill_argument(s, 0); }
|
||||||
|
|
||||||
|
supported_segments target::find_supported(const_module_ref mod, support_metric m) const
|
||||||
|
{
|
||||||
|
(void)m;
|
||||||
|
|
||||||
|
supported_segment instrs;
|
||||||
|
for(const auto ins : iterator_for(*mod))
|
||||||
|
{
|
||||||
|
instrs.instructions.insert(ins);
|
||||||
|
}
|
||||||
|
instrs.metric = 1; // arbitrary value
|
||||||
|
return {instrs};
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_REGISTER_TARGET(target);
|
||||||
|
|
||||||
|
} // namespace fpga
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
65
docker/rocm/migraphx/targets/fpga/vitis_ai_adapter.cpp
Normal file
65
docker/rocm/migraphx/targets/fpga/vitis_ai_adapter.cpp
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "migraphx/fpga/vitis_ai_adapter.hpp"
|
||||||
|
|
||||||
|
#include "migraphx/module.hpp"
|
||||||
|
|
||||||
|
#include "migraphx/stringutils.hpp"
|
||||||
|
namespace vitis_ai {
|
||||||
|
|
||||||
|
migraphx::shape x_model::get_shape() const { return shape; };
|
||||||
|
|
||||||
|
void x_model::set_shape(migraphx::shape s) { shape = s; }
|
||||||
|
|
||||||
|
x_model create_xmodel(migraphx::const_module_ref mod)
|
||||||
|
{
|
||||||
|
std::cout << "Calling an external function: create_xmodel!\n";
|
||||||
|
x_model xmodel;
|
||||||
|
xmodel.set_shape(migraphx::shape(mod->get_output_shapes()));
|
||||||
|
return xmodel;
|
||||||
|
}
|
||||||
|
|
||||||
|
migraphx::argument execute(const x_model& xmodel,
|
||||||
|
const migraphx::shape& output_shape,
|
||||||
|
std::vector<migraphx::argument>& args)
|
||||||
|
{
|
||||||
|
(void)xmodel;
|
||||||
|
|
||||||
|
std::cout << "Calling an external function: execute!\n";
|
||||||
|
|
||||||
|
std::cout << "Output Shape: " << output_shape << std::endl;
|
||||||
|
std::cout << "Args: " << args.size() << std::endl;
|
||||||
|
for(const auto& arg : args)
|
||||||
|
{
|
||||||
|
std::cout << " " << arg.get_shape() << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
migraphx::argument result{output_shape};
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace vitis_ai
|
||||||
407
docker/rocm/migraphx/targets/gpu/CMakeLists.txt
Normal file
407
docker/rocm/migraphx/targets/gpu/CMakeLists.txt
Normal file
@ -0,0 +1,407 @@
|
|||||||
|
# ####################################################################################
|
||||||
|
# The MIT License (MIT)
|
||||||
|
#
|
||||||
|
# Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
# in the Software without restriction, including without limitation the rights
|
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in
|
||||||
|
# all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
# THE SOFTWARE.
|
||||||
|
# ####################################################################################
|
||||||
|
|
||||||
|
find_package(hip REQUIRED)
|
||||||
|
if(NOT GPU_TARGETS)
|
||||||
|
set(fatal_msg "HIP package is broken and has no GPU_TARGETS. Please pass GPU_TARGETS to cmake.")
|
||||||
|
if(NOT WIN32)
|
||||||
|
set(fatal_msg "${fatal_msg}\nUse -DGPU_TARGETS=$(/opt/rocm/bin/rocminfo | grep -o -m1 'gfx.*') to build for your GPU.")
|
||||||
|
endif()
|
||||||
|
message(FATAL_ERROR ${fatal_msg})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MIGRAPHX_USE_MIOPEN)
|
||||||
|
find_package(miopen REQUIRED)
|
||||||
|
message(STATUS "MIGraphX is using MIOpen")
|
||||||
|
else()
|
||||||
|
message(STATUS "MIGraphX is not using MIOpen")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MIGRAPHX_USE_ROCBLAS)
|
||||||
|
# rocblas
|
||||||
|
find_package(rocblas REQUIRED)
|
||||||
|
message(STATUS "MIGraphX build with rocBLAS")
|
||||||
|
else()
|
||||||
|
message(STATUS "MIGraphX build without rocBLAS")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MIGRAPHX_USE_HIPBLASLT)
|
||||||
|
# hipblaslt
|
||||||
|
find_package(hipblaslt REQUIRED)
|
||||||
|
# Making hipblas required to workaround the broken hipblaslt package.
|
||||||
|
find_package(hipblas REQUIRED)
|
||||||
|
message(STATUS "MIGraphx build with hipBLAS and hipBLASLt")
|
||||||
|
else()
|
||||||
|
message(STATUS "MIGraphX build without hipBLAS and hipBLASLt")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MIGRAPHX_USE_COMPOSABLEKERNEL)
|
||||||
|
find_package(composable_kernel 1.0.0 REQUIRED COMPONENTS jit_library)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(BUILD_DEV)
|
||||||
|
set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "Use hipRTC APIs")
|
||||||
|
else()
|
||||||
|
set(MIGRAPHX_USE_HIPRTC ON CACHE BOOL "Use hipRTC APIs")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
file(GLOB KERNEL_FILES CONFIGURE_DEPENDS
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/*.hpp)
|
||||||
|
|
||||||
|
if(NOT MIGRAPHX_USE_COMPOSABLEKERNEL)
|
||||||
|
list(REMOVE_ITEM KERNEL_FILES
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/ck_gemm.hpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/ck_gemm_softmax_gemm.hpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/ck.hpp)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
include(Embed)
|
||||||
|
add_embed_library(migraphx_kernels ${KERNEL_FILES} RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/)
|
||||||
|
|
||||||
|
configure_file(device/targets.hpp.in include/migraphx/gpu/device/targets.hpp)
|
||||||
|
file(GLOB DEVICE_GPU_SRCS CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/device/*.cpp)
|
||||||
|
add_library(migraphx_device ${DEVICE_GPU_SRCS})
|
||||||
|
|
||||||
|
add_library(compile_for_gpu INTERFACE)
|
||||||
|
target_compile_features(compile_for_gpu INTERFACE cxx_std_17)
|
||||||
|
target_compile_options(compile_for_gpu INTERFACE -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fnative-half-arguments-and-returns)
|
||||||
|
target_link_options(compile_for_gpu INTERFACE -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored)
|
||||||
|
target_link_libraries(compile_for_gpu INTERFACE hip::device)
|
||||||
|
check_cxx_compiler_flag("--cuda-host-only -fhip-lambda-host-device -x hip" HAS_HIP_LAMBDA_HOST_DEVICE)
|
||||||
|
|
||||||
|
if(HAS_HIP_LAMBDA_HOST_DEVICE)
|
||||||
|
message(STATUS "Enable -fhip-lambda-host-device")
|
||||||
|
target_compile_options(compile_for_gpu INTERFACE -fhip-lambda-host-device)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
|
||||||
|
rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION})
|
||||||
|
rocm_clang_tidy_check(migraphx_device)
|
||||||
|
target_link_libraries(migraphx_device PUBLIC migraphx)
|
||||||
|
target_link_libraries(migraphx_device PRIVATE compile_for_gpu)
|
||||||
|
if(NOT MIGRAPHX_USE_MIOPEN AND NOT MIGRAPHX_USE_ROCBLAS)
|
||||||
|
target_link_libraries(migraphx_device INTERFACE hip::host)
|
||||||
|
endif()
|
||||||
|
target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
|
||||||
|
target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINAR_DIR}/include>)
|
||||||
|
target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
|
||||||
|
target_compile_options(migraphx_device PRIVATE -Wno-ignored-attributes)
|
||||||
|
migraphx_generate_export_header(migraphx_device DIRECTORY migraphx/gpu/device)
|
||||||
|
|
||||||
|
add_library(kernel_file_check EXCLUDE_FROM_ALL)
|
||||||
|
|
||||||
|
foreach(KERNEL_FILE ${KERNEL_FILES})
|
||||||
|
get_filename_component(KERNEL_BASE_FILE ${KERNEL_FILE} NAME_WE)
|
||||||
|
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp "#include <migraphx/kernels/${KERNEL_BASE_FILE}.hpp>\n")
|
||||||
|
target_sources(kernel_file_check PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp)
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
target_compile_definitions(kernel_file_check PRIVATE -DMIGRAPHX_NLOCAL=256)
|
||||||
|
target_compile_definitions(kernel_file_check PRIVATE -DMIGRAPHX_WAVEFRONTSIZE=64)
|
||||||
|
target_include_directories(kernel_file_check PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/>)
|
||||||
|
target_link_libraries(kernel_file_check compile_for_gpu)
|
||||||
|
if(MIGRAPHX_USE_COMPOSABLEKERNEL)
|
||||||
|
target_link_libraries(kernel_file_check composable_kernel::jit_library)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
rocm_clang_tidy_check(kernel_file_check)
|
||||||
|
|
||||||
|
file(GLOB JIT_GPU_SRCS CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jit/*.cpp)
|
||||||
|
|
||||||
|
if(NOT MIGRAPHX_USE_COMPOSABLEKERNEL)
|
||||||
|
list(REMOVE_ITEM JIT_GPU_SRCS
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/jit/ck_gemm.cpp
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/jit/ck_gemm_softmax_gemm.cpp)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MIGRAPHX_USE_MIOPEN)
|
||||||
|
set(MIOPEN_SRCS abs.cpp)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_library(migraphx_gpu
|
||||||
|
analyze_streams.cpp
|
||||||
|
allocation_model.cpp
|
||||||
|
argmax.cpp
|
||||||
|
argmin.cpp
|
||||||
|
code_object_op.cpp
|
||||||
|
compile_ops.cpp
|
||||||
|
compile_gen.cpp
|
||||||
|
compile_hip.cpp
|
||||||
|
compile_hip_code_object.cpp
|
||||||
|
compile_hipblaslt.cpp
|
||||||
|
compile_miopen.cpp
|
||||||
|
compile_pointwise.cpp
|
||||||
|
compiler.cpp
|
||||||
|
device_name.cpp
|
||||||
|
fuse_ck.cpp
|
||||||
|
fuse_mlir.cpp
|
||||||
|
fuse_ops.cpp
|
||||||
|
gemm_impl.cpp
|
||||||
|
hip.cpp
|
||||||
|
hipblaslt.cpp
|
||||||
|
hip_gemm_impl.cpp
|
||||||
|
kernel.cpp
|
||||||
|
lowering.cpp
|
||||||
|
logsoftmax.cpp
|
||||||
|
loop.cpp
|
||||||
|
lrn.cpp
|
||||||
|
mlir.cpp
|
||||||
|
multinomial.cpp
|
||||||
|
no_device.cpp
|
||||||
|
nonzero.cpp
|
||||||
|
pack_args.cpp
|
||||||
|
prefuse_ops.cpp
|
||||||
|
prepare_reduce.cpp
|
||||||
|
perfdb.cpp
|
||||||
|
pooling.cpp
|
||||||
|
problem_cache.cpp
|
||||||
|
reverse.cpp
|
||||||
|
rnn_variable_seq_lens.cpp
|
||||||
|
rocblas.cpp
|
||||||
|
schedule_model.cpp
|
||||||
|
sync_device.cpp
|
||||||
|
target.cpp
|
||||||
|
time_op.cpp
|
||||||
|
topk.cpp
|
||||||
|
write_literals.cpp
|
||||||
|
${JIT_GPU_SRCS}
|
||||||
|
${MIOPEN_SRCS}
|
||||||
|
)
|
||||||
|
|
||||||
|
set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
|
||||||
|
migraphx_generate_export_header(migraphx_gpu)
|
||||||
|
|
||||||
|
function(register_migraphx_gpu_ops PREFIX)
|
||||||
|
foreach(OP ${ARGN})
|
||||||
|
register_op(migraphx_gpu HEADER migraphx/gpu/${OP}.hpp OPERATORS gpu::${PREFIX}${OP} INCLUDES migraphx/gpu/context.hpp)
|
||||||
|
endforeach()
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
register_migraphx_gpu_ops(hip_
|
||||||
|
argmax
|
||||||
|
argmin
|
||||||
|
logsoftmax
|
||||||
|
loop
|
||||||
|
multinomial
|
||||||
|
nonzero
|
||||||
|
prefix_scan_sum
|
||||||
|
reverse
|
||||||
|
topk
|
||||||
|
)
|
||||||
|
if (MIGRAPHX_USE_MIOPEN)
|
||||||
|
register_migraphx_gpu_ops(miopen_
|
||||||
|
abs
|
||||||
|
contiguous
|
||||||
|
lrn
|
||||||
|
pooling
|
||||||
|
)
|
||||||
|
else()
|
||||||
|
register_migraphx_gpu_ops(miopen_
|
||||||
|
contiguous
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
register_op(migraphx_gpu
|
||||||
|
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
|
||||||
|
OPERATORS gpu::hip_rnn_var_sl_shift_sequence gpu::hip_rnn_var_sl_shift_output gpu::hip_rnn_var_sl_last_output
|
||||||
|
INCLUDES migraphx/gpu/context.hpp)
|
||||||
|
if(MIGRAPHX_USE_ROCBLAS)
|
||||||
|
register_op(migraphx_gpu
|
||||||
|
HEADER migraphx/gpu/gemm.hpp
|
||||||
|
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
|
||||||
|
INCLUDES migraphx/gpu/context.hpp)
|
||||||
|
endif()
|
||||||
|
if(MIGRAPHX_USE_HIPBLASLT)
|
||||||
|
register_op(migraphx_gpu
|
||||||
|
HEADER migraphx/gpu/hip_gemm.hpp
|
||||||
|
OPERATORS gpu::hip_gemm<op::dot> gpu::hip_gemm<op::quant_dot>
|
||||||
|
INCLUDES migraphx/gpu/context.hpp)
|
||||||
|
endif()
|
||||||
|
if (MIGRAPHX_USE_MIOPEN)
|
||||||
|
register_op(migraphx_gpu HEADER migraphx/gpu/convolution.hpp
|
||||||
|
OPERATORS gpu::miopen_convolution<op::convolution> gpu::miopen_convolution<op::convolution_backwards> gpu::miopen_convolution<op::quant_convolution>
|
||||||
|
INCLUDES migraphx/gpu/context.hpp)
|
||||||
|
endif()
|
||||||
|
rocm_set_soversion(migraphx_gpu ${MIGRAPHX_SO_VERSION})
|
||||||
|
rocm_clang_tidy_check(migraphx_gpu)
|
||||||
|
|
||||||
|
set(MIGRAPHX_ENABLE_MLIR ON CACHE BOOL "")
|
||||||
|
|
||||||
|
if(MIGRAPHX_ENABLE_MLIR)
|
||||||
|
# Find package rocMLIR
|
||||||
|
find_package(rocMLIR 1.0.0 CONFIG REQUIRED)
|
||||||
|
message(STATUS "Build with rocMLIR::rockCompiler ${rocMLIR_VERSION}")
|
||||||
|
target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_MLIR")
|
||||||
|
# Make this private to avoid multiple inclusions of LLVM symbols.
|
||||||
|
# TODO: Fix rocMLIR's library to hide LLVM internals.
|
||||||
|
target_link_libraries(migraphx_gpu PRIVATE rocMLIR::rockCompiler)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MIGRAPHX_USE_HIPRTC)
|
||||||
|
find_package(hiprtc REQUIRED)
|
||||||
|
message(STATUS "MIGraphX is using hipRTC")
|
||||||
|
target_compile_definitions(migraphx_gpu PRIVATE -DMIGRAPHX_USE_HIPRTC=1)
|
||||||
|
target_link_libraries(migraphx_gpu PUBLIC hiprtc::hiprtc)
|
||||||
|
else()
|
||||||
|
message(STATUS "MIGraphX is using HIP Clang")
|
||||||
|
|
||||||
|
# Get flags needed to compile hip
|
||||||
|
include(TargetFlags)
|
||||||
|
target_flags(HIP_COMPILER_FLAGS hip::device)
|
||||||
|
|
||||||
|
# Remove cuda arch flags
|
||||||
|
string(REGEX REPLACE "--cuda-gpu-arch=[a-z0-9]+ ?" "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
|
||||||
|
string(REGEX REPLACE "--offload-arch=[a-z0-9:+-]+ ?" "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
|
||||||
|
|
||||||
|
# Skip library paths since hip will incorrectly treat it as a source file
|
||||||
|
string(APPEND HIP_COMPILER_FLAGS " ")
|
||||||
|
|
||||||
|
if(WIN32)
|
||||||
|
string(REPLACE "\\" "/" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
|
||||||
|
endif()
|
||||||
|
foreach(_unused RANGE 2)
|
||||||
|
string(REGEX REPLACE " /[^ ]+\\.(a|so) " " " HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
message(STATUS "Hip compiler flags: \"${HIP_COMPILER_FLAGS}\"")
|
||||||
|
target_compile_definitions(migraphx_gpu PRIVATE
|
||||||
|
-DMIGRAPHX_HIP_COMPILER="${CMAKE_CXX_COMPILER}"
|
||||||
|
-DMIGRAPHX_HIP_COMPILER_FLAGS="${HIP_COMPILER_FLAGS}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if(DEFINED CMAKE_CXX_COMPILER_LAUNCHER)
|
||||||
|
if(WIN32)
|
||||||
|
execute_process(COMMAND where ${CMAKE_CXX_COMPILER_LAUNCHER} OUTPUT_VARIABLE MIGRAPHX_HIP_COMPILER_LAUNCHER)
|
||||||
|
else()
|
||||||
|
execute_process(COMMAND which ${CMAKE_CXX_COMPILER_LAUNCHER} OUTPUT_VARIABLE MIGRAPHX_HIP_COMPILER_LAUNCHER)
|
||||||
|
endif()
|
||||||
|
string(STRIP "${MIGRAPHX_HIP_COMPILER_LAUNCHER}" MIGRAPHX_HIP_COMPILER_LAUNCHER)
|
||||||
|
target_compile_definitions(migraphx_gpu PRIVATE -DMIGRAPHX_HIP_COMPILER_LAUNCHER="${MIGRAPHX_HIP_COMPILER_LAUNCHER}")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_CXX_COMPILER="${CMAKE_CXX_COMPILER}")
|
||||||
|
|
||||||
|
# Check miopen find mode api
|
||||||
|
|
||||||
|
include(CheckLibraryExists)
|
||||||
|
if (MIGRAPHX_USE_MIOPEN)
|
||||||
|
get_target_property(MIOPEN_LOCATION MIOpen LOCATION)
|
||||||
|
target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_USE_MIOPEN=1)
|
||||||
|
check_library_exists(MIOpen "miopenHiddenSetConvolutionFindMode" "${MIOPEN_LOCATION}" HAS_FIND_MODE_API)
|
||||||
|
check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_2_API)
|
||||||
|
else()
|
||||||
|
target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_USE_MIOPEN=0)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MIGRAPHX_USE_ROCBLAS)
|
||||||
|
get_target_property(ROCBLAS_LOCATION roc::rocblas LOCATION)
|
||||||
|
target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_USE_ROCBLAS=1)
|
||||||
|
# Beta API for automated GEMM tuning
|
||||||
|
check_library_exists(roc::rocblas "rocblas_gemm_ex_get_solutions" "${ROCBLAS_LOCATION}" HAS_ROCBLAS_TUNING_BETA_FEATURE_API)
|
||||||
|
# rocblas FP8 API
|
||||||
|
check_library_exists(roc::rocblas "rocblas_gemm_strided_batched_ex3" "${ROCBLAS_LOCATION}" HAS_ROCBLAS_FP8_BETA_API)
|
||||||
|
else()
|
||||||
|
target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_USE_ROCBLAS=0)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MIGRAPHX_USE_HIPBLASLT)
|
||||||
|
target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_USE_HIPBLASLT=1)
|
||||||
|
else()
|
||||||
|
target_compile_definitions(migraphx_gpu PUBLIC MIGRAPHX_USE_HIPBLASLT=0)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MIGRAPHX_USE_MIOPEN)
|
||||||
|
set(MIGRAPHX_USE_FIND_2_API "${HAS_FIND_2_API}" CACHE BOOL "")
|
||||||
|
|
||||||
|
if(MIGRAPHX_USE_FIND_2_API)
|
||||||
|
check_library_exists(MIOpen "miopenSetFindOptionPreallocatedTensor" "${MIOPEN_LOCATION}" HAS_PREALLOCATION_API)
|
||||||
|
if(HAS_PREALLOCATION_API)
|
||||||
|
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API -DMIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS)
|
||||||
|
else()
|
||||||
|
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API)
|
||||||
|
endif()
|
||||||
|
message(STATUS "MIGraphx is using Find-2.0 API of MIOpen")
|
||||||
|
else()
|
||||||
|
message(STATUS "MIGraphx is using legacy Find API in MIOpen")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(HAS_FIND_MODE_API)
|
||||||
|
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_MODE_API)
|
||||||
|
message(STATUS "MIGraphx is using Find Mode API of MIOpen")
|
||||||
|
else()
|
||||||
|
message(STATUS "MIOpen does not have find mode api")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
target_link_libraries(migraphx_gpu PUBLIC MIOpen)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MIGRAPHX_USE_ROCBLAS)
|
||||||
|
if(HAS_ROCBLAS_TUNING_BETA_FEATURE_API)
|
||||||
|
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_USE_ROCBLAS_TUNING_API -DROCBLAS_BETA_FEATURES_API -DROCBLAS_NO_DEPRECATED_WARNINGS)
|
||||||
|
message(STATUS "MIGraphx is using Beta API of rocBLAS")
|
||||||
|
else()
|
||||||
|
message(STATUS "rocBLAS does not have User Tuning Beta API")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(HAS_ROCBLAS_FP8_BETA_API)
|
||||||
|
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_USE_ROCBLAS_FP8_API -DROCBLAS_BETA_FEATURES_API -DROCBLAS_NO_DEPRECATED_WARNINGS)
|
||||||
|
message(STATUS "MIGraphX is using Beta API of rocBLAS for FP8 computations")
|
||||||
|
else()
|
||||||
|
message(STATUS "rocBLAS does not have Fp8 Beta API")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
|
||||||
|
target_link_libraries(migraphx_gpu PUBLIC roc::rocblas)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MIGRAPHX_USE_HIPBLASLT)
|
||||||
|
target_link_libraries(migraphx_gpu PUBLIC roc::hipblaslt)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(WIN32)
|
||||||
|
# Temporary workaround on rocMLIR not exporting correctly libraries it depends on.
|
||||||
|
target_link_libraries(migraphx_gpu PRIVATE ntdll)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
target_link_libraries(migraphx_gpu PUBLIC migraphx)
|
||||||
|
if(NOT MIGRAPHX_USE_MIOPEN AND NOT MIGRAPHX_USE_ROCBLAS)
|
||||||
|
target_link_libraries(migraphx_gpu PUBLIC migraphx_device)
|
||||||
|
else()
|
||||||
|
target_link_libraries(migraphx_gpu PRIVATE migraphx_device)
|
||||||
|
endif()
|
||||||
|
target_link_libraries(migraphx_gpu PRIVATE migraphx_kernels)
|
||||||
|
if(MIGRAPHX_USE_COMPOSABLEKERNEL)
|
||||||
|
target_link_libraries(migraphx_gpu PRIVATE composable_kernel::jit_library)
|
||||||
|
target_compile_definitions(migraphx_gpu PRIVATE MIGRAPHX_USE_COMPOSABLEKERNEL=1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_subdirectory(driver)
|
||||||
|
add_subdirectory(hiprtc)
|
||||||
|
|
||||||
|
rocm_install_targets(
|
||||||
|
PRIVATE
|
||||||
|
TARGETS migraphx_gpu migraphx_device compile_for_gpu
|
||||||
|
INCLUDE
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||||
|
)
|
||||||
61
docker/rocm/migraphx/targets/gpu/abs.cpp
Normal file
61
docker/rocm/migraphx/targets/gpu/abs.cpp
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/abs.hpp>
|
||||||
|
#include <migraphx/gpu/context.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
#if MIGRAPHX_USE_MIOPEN
|
||||||
|
shape miopen_abs::compute_shape(const std::vector<shape>& inputs) const
|
||||||
|
{
|
||||||
|
check_shapes{inputs, *this}.has(2).packed();
|
||||||
|
return inputs.at(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
argument miopen_abs::compute(context& ctx,
|
||||||
|
const shape& output_shape,
|
||||||
|
const std::vector<argument>& args) const
|
||||||
|
{
|
||||||
|
float alpha = 1;
|
||||||
|
float beta = 0;
|
||||||
|
auto x_desc = make_tensor(args[0].get_shape());
|
||||||
|
auto y_desc = make_tensor(output_shape);
|
||||||
|
miopenActivationForward(ctx.get_stream().get_miopen(),
|
||||||
|
ad.get(),
|
||||||
|
&alpha,
|
||||||
|
x_desc.get(),
|
||||||
|
args[0].implicit(),
|
||||||
|
&beta,
|
||||||
|
y_desc.get(),
|
||||||
|
args[1].implicit());
|
||||||
|
|
||||||
|
return args[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
void miopen_abs::finalize(context&, const shape&, const std::vector<shape>&) { ad = make_abs(); }
|
||||||
|
#endif
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
48
docker/rocm/migraphx/targets/gpu/allocation_model.cpp
Normal file
48
docker/rocm/migraphx/targets/gpu/allocation_model.cpp
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/allocation_model.hpp>
|
||||||
|
#include <migraphx/make_op.hpp>
|
||||||
|
#include <migraphx/instruction.hpp>
|
||||||
|
#include <migraphx/module.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
std::string gpu_allocation_model::name() const { return "hip::allocate"; }
|
||||||
|
operation gpu_allocation_model::allocate(const shape& s) const
|
||||||
|
{
|
||||||
|
return make_op(name(), {{"shape", to_value(s)}});
|
||||||
|
}
|
||||||
|
|
||||||
|
operation gpu_allocation_model::preallocate(const shape& s, const std::string& id) const
|
||||||
|
{
|
||||||
|
return make_op("hip::hip_allocate_memory", {{"shape", to_value(s)}, {"id", id}});
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string gpu_allocation_model::copy() const { return "hip::copy"; }
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
82
docker/rocm/migraphx/targets/gpu/analyze_streams.cpp
Normal file
82
docker/rocm/migraphx/targets/gpu/analyze_streams.cpp
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/analyze_streams.hpp>
|
||||||
|
#include <migraphx/program.hpp>
|
||||||
|
#include <migraphx/instruction.hpp>
|
||||||
|
#include <migraphx/ranges.hpp>
|
||||||
|
#include <migraphx/iterator_for.hpp>
|
||||||
|
#include <migraphx/value.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
struct hip_stream_model
|
||||||
|
{
|
||||||
|
std::size_t max_stream = 0;
|
||||||
|
std::unordered_map<migraphx::instruction_ref, std::size_t> ins2stream{};
|
||||||
|
std::size_t get_nstream() const { return max_stream + 1; }
|
||||||
|
std::size_t get_stream(migraphx::instruction_ref ins) const { return ins2stream.at(ins); }
|
||||||
|
std::size_t get_event_id(migraphx::instruction_ref ins) const
|
||||||
|
{
|
||||||
|
auto v = ins->get_operator().to_value();
|
||||||
|
return v["event"].to<std::size_t>();
|
||||||
|
}
|
||||||
|
bool has_stream(migraphx::instruction_ref ins) const { return ins2stream.count(ins) > 0; }
|
||||||
|
bool is_record(migraphx::instruction_ref ins) const
|
||||||
|
{
|
||||||
|
return ins->name() == "gpu::record_event";
|
||||||
|
}
|
||||||
|
bool is_wait(migraphx::instruction_ref ins) const { return ins->name() == "gpu::wait_event"; }
|
||||||
|
};
|
||||||
|
|
||||||
|
stream_model make_stream_model(const module& m)
|
||||||
|
{
|
||||||
|
hip_stream_model hsm;
|
||||||
|
std::size_t stream = 0;
|
||||||
|
for(auto ins : iterator_for(m))
|
||||||
|
{
|
||||||
|
if(ins->name() == "gpu::set_stream")
|
||||||
|
{
|
||||||
|
auto v = ins->get_operator().to_value();
|
||||||
|
stream = v["stream"].to<std::size_t>();
|
||||||
|
hsm.max_stream = std::max(stream, hsm.max_stream);
|
||||||
|
}
|
||||||
|
if(ins->get_operator().is_context_free())
|
||||||
|
continue;
|
||||||
|
if(contains({"hip::hip_allocate_memory", "hip::hip_copy_literal", "@param"}, ins->name()))
|
||||||
|
continue;
|
||||||
|
hsm.ins2stream[ins] = stream;
|
||||||
|
}
|
||||||
|
return hsm;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<stream_race> analyze_streams(const module& m)
|
||||||
|
{
|
||||||
|
return migraphx::analyze_streams(m, make_stream_model(m));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
50
docker/rocm/migraphx/targets/gpu/argmax.cpp
Normal file
50
docker/rocm/migraphx/targets/gpu/argmax.cpp
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/argmax.hpp>
|
||||||
|
#include <migraphx/gpu/device/argmax.hpp>
|
||||||
|
#include <migraphx/gpu/context.hpp>
|
||||||
|
#include <migraphx/tune_axis.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
shape hip_argmax::compute_shape(const std::vector<shape>& inputs) const
|
||||||
|
{
|
||||||
|
check_shapes{inputs, *this}.has(2);
|
||||||
|
return op.normalize_compute_shape({inputs.at(0)});
|
||||||
|
}
|
||||||
|
|
||||||
|
argument hip_argmax::compute(context& ctx, const shape&, const std::vector<argument>& args) const
|
||||||
|
{
|
||||||
|
auto n_dim = args.front().get_shape().lens().size();
|
||||||
|
int64_t tuned_axis = tune_axis(n_dim, op.axis, op.name());
|
||||||
|
device::argmax(
|
||||||
|
ctx.get_stream().get(), args.back(), args.front(), tuned_axis, op.select_last_index);
|
||||||
|
return args.back();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
50
docker/rocm/migraphx/targets/gpu/argmin.cpp
Normal file
50
docker/rocm/migraphx/targets/gpu/argmin.cpp
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/argmin.hpp>
|
||||||
|
#include <migraphx/gpu/device/argmin.hpp>
|
||||||
|
#include <migraphx/gpu/context.hpp>
|
||||||
|
#include <migraphx/tune_axis.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
shape hip_argmin::compute_shape(const std::vector<shape>& inputs) const
|
||||||
|
{
|
||||||
|
check_shapes{inputs, *this}.has(2);
|
||||||
|
return op.normalize_compute_shape({inputs.at(0)});
|
||||||
|
}
|
||||||
|
|
||||||
|
argument hip_argmin::compute(context& ctx, const shape&, const std::vector<argument>& args) const
|
||||||
|
{
|
||||||
|
auto n_dim = args.front().get_shape().lens().size();
|
||||||
|
int64_t tuned_axis = tune_axis(n_dim, op.axis, op.name());
|
||||||
|
device::argmin(
|
||||||
|
ctx.get_stream().get(), args.back(), args.front(), tuned_axis, op.select_last_index);
|
||||||
|
return args.back();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
67
docker/rocm/migraphx/targets/gpu/code_object_op.cpp
Normal file
67
docker/rocm/migraphx/targets/gpu/code_object_op.cpp
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/code_object_op.hpp>
|
||||||
|
#include <migraphx/gpu/context.hpp>
|
||||||
|
#include <migraphx/register_op.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
MIGRAPHX_REGISTER_OP(code_object_op);
|
||||||
|
|
||||||
|
shape code_object_op::compute_shape(std::vector<shape> inputs) const
|
||||||
|
{
|
||||||
|
std::transform(inputs.begin(), inputs.end(), inputs.begin(), [](const shape& s) {
|
||||||
|
return s.normalize_standard();
|
||||||
|
});
|
||||||
|
auto einputs = expected_inputs;
|
||||||
|
std::transform(einputs.begin(), einputs.end(), einputs.begin(), [](const shape& s) {
|
||||||
|
return s.normalize_standard();
|
||||||
|
});
|
||||||
|
if(not migraphx::equal(flatten(einputs), flatten(inputs), &shape::is_compatible))
|
||||||
|
MIGRAPHX_THROW("Input shapes have changed: [" + to_string_range(einputs) + "] -> [" +
|
||||||
|
to_string_range(inputs) + "]");
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
argument
|
||||||
|
code_object_op::compute(context& ctx, const shape&, const std::vector<argument>& args) const
|
||||||
|
{
|
||||||
|
auto fargs = flatten(args);
|
||||||
|
std::vector<void*> kargs(fargs.size());
|
||||||
|
std::transform(
|
||||||
|
fargs.begin(), fargs.end(), kargs.begin(), [](const argument& a) { return a.data(); });
|
||||||
|
auto [start, stop] = ctx.get_perf_events();
|
||||||
|
k.launch(ctx.get_stream().get(), global, local, std::move(kargs), start, stop);
|
||||||
|
return args[get_output_arg(args.size())];
|
||||||
|
}
|
||||||
|
void code_object_op::finalize(context&, const shape&, const std::vector<shape>&)
|
||||||
|
{
|
||||||
|
assert(not code_object.empty());
|
||||||
|
k = kernel(code_object, symbol_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
576
docker/rocm/migraphx/targets/gpu/compile_gen.cpp
Normal file
576
docker/rocm/migraphx/targets/gpu/compile_gen.cpp
Normal file
@ -0,0 +1,576 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/compile_gen.hpp>
|
||||||
|
#include <migraphx/gpu/context.hpp>
|
||||||
|
#include <migraphx/gpu/compile_hip_code_object.hpp>
|
||||||
|
#include <migraphx/gpu/prepare_reduce.hpp>
|
||||||
|
#include <migraphx/algorithm.hpp>
|
||||||
|
#include <migraphx/shape.hpp>
|
||||||
|
#include <migraphx/permutation.hpp>
|
||||||
|
#include <migraphx/stringutils.hpp>
|
||||||
|
#include <migraphx/module.hpp>
|
||||||
|
#include <migraphx/rewrite_quantization.hpp>
|
||||||
|
#include <migraphx/optimize_module.hpp>
|
||||||
|
#include <migraphx/cpp_generator.hpp>
|
||||||
|
#include <migraphx/pass_manager.hpp>
|
||||||
|
#include <migraphx/iterator_for.hpp>
|
||||||
|
#include <migraphx/instruction.hpp>
|
||||||
|
#include <migraphx/make_op.hpp>
|
||||||
|
#include <migraphx/array.hpp>
|
||||||
|
#include <migraphx/ranges.hpp>
|
||||||
|
#include <migraphx/fp8_types.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace gen {
|
||||||
|
|
||||||
|
static std::vector<std::size_t> vector_sizes(const std::vector<shape>& inputs)
|
||||||
|
{
|
||||||
|
// If all inputs are half then only use half2
|
||||||
|
if(std::all_of(inputs.begin(), inputs.end(), [](const auto& s) {
|
||||||
|
return s.type() == shape::half_type;
|
||||||
|
}))
|
||||||
|
return {2};
|
||||||
|
return {4, 2};
|
||||||
|
}
|
||||||
|
|
||||||
|
vectorize vectorize::elements(std::size_t axis,
|
||||||
|
const std::vector<shape>& inputs,
|
||||||
|
const std::vector<std::size_t>& sizes)
|
||||||
|
{
|
||||||
|
// disable vectorization for fp8 types
|
||||||
|
if(std::any_of(inputs.begin(), inputs.end(), [&](auto ishape) {
|
||||||
|
return contains(fp8_types{}.get(), ishape.type());
|
||||||
|
}))
|
||||||
|
return {1, axis};
|
||||||
|
if(std::all_of(
|
||||||
|
inputs.begin(), inputs.end(), [&](const auto& s) { return s.lens()[axis] == 1; }))
|
||||||
|
return {1, axis};
|
||||||
|
std::vector<std::size_t> max_vec_size;
|
||||||
|
std::transform(inputs.begin(),
|
||||||
|
inputs.end(),
|
||||||
|
std::back_inserter(max_vec_size),
|
||||||
|
[&](const auto& input) -> std::size_t {
|
||||||
|
auto stride = input.strides()[axis];
|
||||||
|
auto len = input.lens()[axis];
|
||||||
|
if(not contains({0, 1}, stride))
|
||||||
|
return 1;
|
||||||
|
if(len == 1 and input.elements() > sizes.front())
|
||||||
|
return sizes.front();
|
||||||
|
auto it = std::find_if(sizes.begin(), sizes.end(), [&](auto vsize) {
|
||||||
|
// The len is divisible by the size and all the strides are divisible by
|
||||||
|
// the size
|
||||||
|
return (len % vsize) == 0 and
|
||||||
|
std::all_of(
|
||||||
|
input.strides().begin(), input.strides().end(), [&](auto i) {
|
||||||
|
return contains({0, 1}, i) or i % vsize == 0;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
if(it != sizes.end())
|
||||||
|
return *it;
|
||||||
|
return 1;
|
||||||
|
});
|
||||||
|
return {*std::min_element(max_vec_size.begin(), max_vec_size.end()), axis};
|
||||||
|
}
|
||||||
|
|
||||||
|
vectorize vectorize::elements(context& ctx, std::size_t axis, const std::vector<shape>& inputs)
|
||||||
|
{
|
||||||
|
// disable vectorization for fp8 types
|
||||||
|
if(std::any_of(inputs.begin(), inputs.end(), [&](auto ishape) {
|
||||||
|
return contains(fp8_types{}.get(), ishape.type());
|
||||||
|
}))
|
||||||
|
return {1, axis};
|
||||||
|
if(inputs.empty())
|
||||||
|
return {1, axis};
|
||||||
|
std::size_t n = std::max_element(inputs.begin(),
|
||||||
|
inputs.end(),
|
||||||
|
by(std::less<>{}, [](const auto& s) { return s.elements(); }))
|
||||||
|
->elements();
|
||||||
|
std::size_t max_global = ctx.get_current_device().get_cu_count() *
|
||||||
|
ctx.get_current_device().get_max_workitems_per_cu();
|
||||||
|
std::size_t over = n / max_global;
|
||||||
|
bool broadcasted =
|
||||||
|
std::any_of(inputs.begin(), inputs.end(), [](const auto& s) { return s.broadcasted(); });
|
||||||
|
std::vector<std::size_t> sizes;
|
||||||
|
if(broadcasted and over > 8)
|
||||||
|
sizes.push_back(8);
|
||||||
|
if(over > 4)
|
||||||
|
sizes.push_back(4);
|
||||||
|
sizes.push_back(2);
|
||||||
|
return elements(axis, inputs, sizes);
|
||||||
|
}
|
||||||
|
|
||||||
|
vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs)
|
||||||
|
{
|
||||||
|
return elements(axis, inputs, vector_sizes(inputs));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string vectorize::str() const
|
||||||
|
{
|
||||||
|
return "vectorize<" + to_string(size) + ", " + to_string(axis) + ">()";
|
||||||
|
}
|
||||||
|
|
||||||
|
preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs)
|
||||||
|
{
|
||||||
|
const std::size_t max_lds_bytes = 4096;
|
||||||
|
std::vector<bool> result(inputs.size());
|
||||||
|
std::vector<std::size_t> preloaded;
|
||||||
|
auto idxs = range(inputs.size());
|
||||||
|
std::copy_if(idxs.begin(), idxs.end(), std::back_inserter(preloaded), [&](auto i) {
|
||||||
|
return inputs[i].strides()[axis] == 0;
|
||||||
|
});
|
||||||
|
std::sort(preloaded.begin(), preloaded.end(), by(std::less<>{}, [&](auto i) {
|
||||||
|
return inputs[i].bytes();
|
||||||
|
}));
|
||||||
|
|
||||||
|
std::size_t bytes = 0;
|
||||||
|
for(auto i : preloaded)
|
||||||
|
{
|
||||||
|
const auto& input = inputs[i];
|
||||||
|
bytes += input.bytes();
|
||||||
|
if(bytes > max_lds_bytes)
|
||||||
|
break;
|
||||||
|
result[i] = true;
|
||||||
|
}
|
||||||
|
return {result};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string preload::str() const
|
||||||
|
{
|
||||||
|
std::vector<std::string> bool_strs;
|
||||||
|
std::transform(args.begin(), std::prev(args.end()), std::back_inserter(bool_strs), [](bool b) {
|
||||||
|
if(b)
|
||||||
|
return "true";
|
||||||
|
return "false";
|
||||||
|
});
|
||||||
|
return "auto_preload<false, " + join_strings(bool_strs, ", ") + ">(idx)";
|
||||||
|
}
|
||||||
|
|
||||||
|
bool preload::is_preloading() const
|
||||||
|
{
|
||||||
|
return std::accumulate(args.begin(), args.end(), false, std::logical_or<>{});
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::size_t integer_divide_ceil(std::size_t x, std::size_t y)
|
||||||
|
{
|
||||||
|
return (x + y - std::size_t{1}) / y;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::size_t compute_tile_factor(std::size_t r, std::size_t max_size = 64)
|
||||||
|
{
|
||||||
|
std::size_t n = 1;
|
||||||
|
auto factors = make_array(2, 3, 5, 7, 11);
|
||||||
|
while(n < max_size)
|
||||||
|
{
|
||||||
|
// NOLINTNEXTLINE(readability-qualified-auto)
|
||||||
|
auto it = std::find_if(factors.begin(), factors.end(), [&](auto d) { return r % d == 0; });
|
||||||
|
if(it == factors.end())
|
||||||
|
break;
|
||||||
|
r /= *it;
|
||||||
|
n *= *it;
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
tile tile::elements(const std::vector<shape>& inputs, std::size_t noutputs)
|
||||||
|
{
|
||||||
|
tile result;
|
||||||
|
auto ndim = inputs.front().ndim();
|
||||||
|
std::vector<std::size_t> faxes;
|
||||||
|
std::transform(
|
||||||
|
inputs.begin(), inputs.end(), std::back_inserter(faxes), MIGRAPHX_LIFT(find_fast_axis));
|
||||||
|
result.axis = std::accumulate(faxes.begin(), faxes.end(), ndim, MIGRAPHX_LIFT(std::min));
|
||||||
|
if(result.axis >= (ndim - 1))
|
||||||
|
return {};
|
||||||
|
auto select = [&](auto m) {
|
||||||
|
return [&, m](std::size_t faxis, shape input) {
|
||||||
|
if(input.broadcasted())
|
||||||
|
return none;
|
||||||
|
if(faxis < (ndim - 1))
|
||||||
|
return m;
|
||||||
|
return none;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
std::transform(faxes.begin(),
|
||||||
|
faxes.end() - noutputs,
|
||||||
|
inputs.begin(),
|
||||||
|
std::back_inserter(result.args),
|
||||||
|
select(load));
|
||||||
|
std::transform(faxes.end() - noutputs,
|
||||||
|
faxes.end(),
|
||||||
|
inputs.end() - noutputs,
|
||||||
|
std::back_inserter(result.args),
|
||||||
|
select(store));
|
||||||
|
|
||||||
|
auto nargs = std::count_if(
|
||||||
|
result.args.begin(), result.args.end(), [](auto m) { return m != mode::none; });
|
||||||
|
// TODO: Handle tiling more than one arguments
|
||||||
|
if(nargs != 1)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
const auto& s = inputs.front();
|
||||||
|
auto dim1 = compute_tile_factor(s.lens()[result.axis]);
|
||||||
|
auto dim2 = compute_tile_factor(s.lens().back(), 4096 / dim1);
|
||||||
|
if(dim1 == 1 or dim2 == 1)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
result.inner = s.lens();
|
||||||
|
std::fill(result.inner.begin(), result.inner.end(), 1);
|
||||||
|
result.inner[result.axis] = dim1;
|
||||||
|
result.inner.back() = dim2;
|
||||||
|
|
||||||
|
result.outer = s.lens();
|
||||||
|
result.outer[result.axis] /= dim1;
|
||||||
|
result.outer.back() /= dim2;
|
||||||
|
|
||||||
|
auto tile_size = dim1 * dim2;
|
||||||
|
result.ntiles = s.elements() / tile_size;
|
||||||
|
// equivalent to dim1 * (dim2 + 1) to avoid bank conflicts
|
||||||
|
auto tile_bytes = (tile_size + dim1) * s.type_size();
|
||||||
|
if(tile_bytes > 65536)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
result.block_size = std::min<std::size_t>(256, integer_divide_ceil(tile_size / 4, 64) * 64);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string tile::str() const
|
||||||
|
{
|
||||||
|
if(args.empty())
|
||||||
|
return "transform_args()";
|
||||||
|
std::vector<std::string> strs;
|
||||||
|
std::transform(args.begin(), args.end(), std::back_inserter(strs), [](mode m) {
|
||||||
|
switch(m)
|
||||||
|
{
|
||||||
|
case load: return "tile::load";
|
||||||
|
case store: return "tile::store";
|
||||||
|
case none: return "tile::none";
|
||||||
|
}
|
||||||
|
MIGRAPHX_THROW("Invalid mode");
|
||||||
|
});
|
||||||
|
const std::string auto_tile = "auto_tile<${modes}>(${inner}, ${outer})";
|
||||||
|
return interpolate_string(auto_tile,
|
||||||
|
{{"modes", join_strings(strs, ", ")},
|
||||||
|
{"inner", generate_index_ints(inner)},
|
||||||
|
{"outer", generate_index_ints(outer)}});
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t find_fast_axis(const shape& input)
|
||||||
|
{
|
||||||
|
if(input.scalar())
|
||||||
|
return input.ndim() - 1;
|
||||||
|
if(input.broadcasted())
|
||||||
|
{
|
||||||
|
auto stride_it = std::min_element(
|
||||||
|
input.strides().begin(), input.strides().end(), by(std::less<>{}, [](std::size_t i) {
|
||||||
|
if(i == 0)
|
||||||
|
return std::numeric_limits<std::size_t>::max();
|
||||||
|
return i;
|
||||||
|
}));
|
||||||
|
return stride_it - input.strides().begin();
|
||||||
|
}
|
||||||
|
auto permutation = invert_permutation(find_permutation(input));
|
||||||
|
auto it = std::max_element(permutation.begin(), permutation.end());
|
||||||
|
return it - permutation.begin();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t find_fast_axis(const std::vector<shape>& inputs)
|
||||||
|
{
|
||||||
|
auto permutation = invert_permutation(find_permutation(inputs));
|
||||||
|
auto it = std::max_element(permutation.begin(), permutation.end());
|
||||||
|
return it - permutation.begin();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string make_transformer_args(std::vector<std::string> transformers)
|
||||||
|
{
|
||||||
|
return join_strings(std::move(transformers), ", ");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void generate_pointwise(cpp_generator& gg,
|
||||||
|
const module& pm,
|
||||||
|
const std::string& name,
|
||||||
|
bool always_return_tuple = false)
|
||||||
|
{
|
||||||
|
module m = pm;
|
||||||
|
run_passes(m, {rewrite_quantization{}, optimize_module{}});
|
||||||
|
m.sort();
|
||||||
|
cpp_generator g;
|
||||||
|
g.always_return_tuple(always_return_tuple);
|
||||||
|
g.fmap([](const std::string& fname) { return "migraphx::" + fname; });
|
||||||
|
g.add_point_op("where", "${function:where}(${0}, ${1}, ${2})");
|
||||||
|
g.add_point_op("prelu", "${function:where}(${0} < 0, ${0} * ${1}, ${0})");
|
||||||
|
g.add_point_op("sign", "${function:where}(${0} > 0, 1, ${function:where}(${0} < 0, -1, 0))");
|
||||||
|
g.add_point_op("equal", "migraphx::abs(${0} == ${1})");
|
||||||
|
g.add_point_op("less", "migraphx::abs(${0} < ${1})");
|
||||||
|
g.add_point_op("greater", "migraphx::abs(${0} > ${1})");
|
||||||
|
g.add_point_op("not", "migraphx::abs(not ${0})");
|
||||||
|
// Add explict conversions
|
||||||
|
g.fresult(
|
||||||
|
[](const shape& s) { return "migraphx::convert<" + shape::cpp_type(s.type()) + ">"; });
|
||||||
|
gg.create_function(g.generate_module(m)
|
||||||
|
.set_attributes({"__device__", "__attribute__((const))"})
|
||||||
|
.set_generic_types(m)
|
||||||
|
.set_name(name));
|
||||||
|
}
|
||||||
|
std::string generate_pointwise(const module& pm, const std::string& name, bool always_return_tuple)
|
||||||
|
{
|
||||||
|
cpp_generator g;
|
||||||
|
generate_pointwise(g, pm, name, always_return_tuple);
|
||||||
|
return g.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string reduce_op::str() const
|
||||||
|
{
|
||||||
|
return write + "(r.reduce(" + reduction + ", " + init + ", " + read + ")(" +
|
||||||
|
join_strings(inputs, ", ") + "))";
|
||||||
|
}
|
||||||
|
void reduce_op::set(const std::string& name, const shape& input, const shape& output)
|
||||||
|
{
|
||||||
|
assert(input.type() != shape::tuple_type);
|
||||||
|
assert(output.type() != shape::tuple_type);
|
||||||
|
if(name == "reduce_sum")
|
||||||
|
{
|
||||||
|
reduction = "op::sum{}";
|
||||||
|
}
|
||||||
|
else if(name == "reduce_mean")
|
||||||
|
{
|
||||||
|
auto reduce_elements = input.elements() / output.elements();
|
||||||
|
auto reduce_type = input.type();
|
||||||
|
reduction = "op::sum{}";
|
||||||
|
std::string mean = "op::mean<" + std::to_string(reduce_elements) + ">{}";
|
||||||
|
// Use float accumulator when reduction size is too large for half
|
||||||
|
if(reduce_type == shape::half_type and reduce_elements > 16384)
|
||||||
|
read = "compose(" + mean + ", op::convert_to<float>{})";
|
||||||
|
else if(contains({shape::float_type, shape::half_type, shape::double_type}, reduce_type))
|
||||||
|
read = mean;
|
||||||
|
else
|
||||||
|
write = mean;
|
||||||
|
}
|
||||||
|
else if(name == "reduce_max")
|
||||||
|
{
|
||||||
|
reduction = "op::max{}";
|
||||||
|
init = "lowest{}";
|
||||||
|
}
|
||||||
|
else if(name == "reduce_min")
|
||||||
|
{
|
||||||
|
reduction = "op::min{}";
|
||||||
|
init = "highest{}";
|
||||||
|
}
|
||||||
|
else if(name == "reduce_prod")
|
||||||
|
{
|
||||||
|
reduction = "op::product{}";
|
||||||
|
init = "1";
|
||||||
|
}
|
||||||
|
else if(name == "reduce_any")
|
||||||
|
{
|
||||||
|
reduction = "op::logical_or{}";
|
||||||
|
init = "bool{false}";
|
||||||
|
}
|
||||||
|
else if(name == "reduce_all")
|
||||||
|
{
|
||||||
|
reduction = "op::logical_and{}";
|
||||||
|
init = "bool{true}";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MIGRAPHX_THROW("Unsupported reduce");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void reduce_op::set(instruction_ref ins, const operation& op)
|
||||||
|
{
|
||||||
|
if(op.name() == "gpu::parallel_reduce")
|
||||||
|
{
|
||||||
|
auto rop = from_value<operation>(op.to_value().at("op"));
|
||||||
|
auto input = ins->inputs().front()->get_shape();
|
||||||
|
auto output = ins->get_shape().sub_shapes().front();
|
||||||
|
set(rop.name(), input, output);
|
||||||
|
read = "compose(array_apply(" + read + "), MIGRAPHX_LIFT(make_array))";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
set(op.name(), ins->inputs().front()->get_shape(), ins->get_shape());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::string reduce_op::generate(instruction_ref ins, const std::vector<std::string>& x)
|
||||||
|
{
|
||||||
|
reduce_op r{x};
|
||||||
|
r.set(ins, ins->get_operator());
|
||||||
|
return r.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool use_lazy_inner(instruction_ref ins)
|
||||||
|
{
|
||||||
|
if(ins->outputs().size() != 1)
|
||||||
|
return false;
|
||||||
|
// When the inputs are broadcasted, it means the lambda will capture SGPRs
|
||||||
|
// when doing block/wave reduction. This can cause register spilling in
|
||||||
|
// the compiler when the lambda is evaluated at a later time although it
|
||||||
|
// shouldn't. Instead, use `inner` to workaround this issue in the
|
||||||
|
// compiler.
|
||||||
|
if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [](instruction_ref input) {
|
||||||
|
return input->get_shape().broadcasted();
|
||||||
|
}))
|
||||||
|
return false;
|
||||||
|
auto output = ins->outputs().front();
|
||||||
|
return contains(output->name(), "reduce") or output->name() == "@return";
|
||||||
|
}
|
||||||
|
|
||||||
|
void preload_params(module& m)
|
||||||
|
{
|
||||||
|
for(auto ins : iterator_for(m))
|
||||||
|
{
|
||||||
|
if(ins->name() != "@param")
|
||||||
|
continue;
|
||||||
|
if(ins->outputs().size() <= 1)
|
||||||
|
continue;
|
||||||
|
auto id = m.insert_instruction(std::next(ins), make_op("identity"), ins);
|
||||||
|
m.replace_instruction(ins, id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string generate_reduce(module m, const std::string& name)
|
||||||
|
{
|
||||||
|
preload_params(m);
|
||||||
|
run_passes(m, {optimize_module{}, prepare_reduce{}, optimize_module{}});
|
||||||
|
m.sort();
|
||||||
|
cpp_generator g;
|
||||||
|
g.always_return_tuple();
|
||||||
|
auto param_shapes = m.get_parameter_shapes();
|
||||||
|
auto max_shape =
|
||||||
|
std::max_element(param_shapes.begin(),
|
||||||
|
param_shapes.end(),
|
||||||
|
by(std::less<>{}, [](const auto& p) { return p.second.elements(); }));
|
||||||
|
auto ilens = max_shape->second.lens();
|
||||||
|
std::size_t i = 0;
|
||||||
|
auto f = g.generate_module(m, [&](instruction_ref ins, const auto& names) {
|
||||||
|
if(contains(ins->name(), "reduce"))
|
||||||
|
{
|
||||||
|
return reduce_op::generate(ins, cpp_generator::to_args(ins->inputs(), names));
|
||||||
|
}
|
||||||
|
if(ins->name() == "pointwise")
|
||||||
|
{
|
||||||
|
auto pointwise_name = "pointwise" + std::to_string(i);
|
||||||
|
i++;
|
||||||
|
generate_pointwise(g, *ins->module_inputs().front(), pointwise_name);
|
||||||
|
std::vector<instruction_ref> tensors;
|
||||||
|
std::copy_if(ins->inputs().begin(),
|
||||||
|
ins->inputs().end(),
|
||||||
|
std::back_inserter(tensors),
|
||||||
|
[&](auto input) {
|
||||||
|
return input->get_shape().lens() == ilens and
|
||||||
|
not input->get_shape().broadcasted();
|
||||||
|
});
|
||||||
|
auto inner_names = names;
|
||||||
|
for(auto input : ins->inputs())
|
||||||
|
{
|
||||||
|
if(input->name() != "@param")
|
||||||
|
continue;
|
||||||
|
if(contains(tensors, input))
|
||||||
|
continue;
|
||||||
|
inner_names[input] += "[out_idx]";
|
||||||
|
}
|
||||||
|
for(auto input : tensors)
|
||||||
|
inner_names[input] += "_lambda_param";
|
||||||
|
auto call_function =
|
||||||
|
pointwise_name + "(" +
|
||||||
|
join_strings(cpp_generator::to_args(ins->inputs(), inner_names), ", ") + ")";
|
||||||
|
if(tensors.empty())
|
||||||
|
return call_function;
|
||||||
|
const std::string inner_template =
|
||||||
|
"r.${inner}([=](${params}) { return ${call}; })(${args})";
|
||||||
|
std::string inner_name = use_lazy_inner(ins) ? "lazy_inner" : "inner";
|
||||||
|
auto args = cpp_generator::to_args(tensors, names);
|
||||||
|
auto params = cpp_generator::to_args(tensors, inner_names);
|
||||||
|
std::transform(
|
||||||
|
params.begin(), params.end(), params.begin(), [](auto s) { return "auto " + s; });
|
||||||
|
return interpolate_string(inner_template,
|
||||||
|
{{"inner", inner_name},
|
||||||
|
{"params", join_strings(params, ", ")},
|
||||||
|
{"args", join_strings(args, ", ")},
|
||||||
|
{"call", call_function}});
|
||||||
|
}
|
||||||
|
if(ins->name() == "multibroadcast")
|
||||||
|
{
|
||||||
|
return names.at(ins->inputs().front());
|
||||||
|
}
|
||||||
|
if(ins->name() == "get_tuple_elem")
|
||||||
|
{
|
||||||
|
const auto& x = names.at(ins->inputs().front());
|
||||||
|
auto index = ins->get_operator().to_value()["index"].to<std::size_t>();
|
||||||
|
return interpolate_string("${x}[${index}]",
|
||||||
|
{{"x", x}, {"index", std::to_string(index)}});
|
||||||
|
}
|
||||||
|
if(ins->name() == "identity")
|
||||||
|
{
|
||||||
|
const auto& x = names.at(ins->inputs().front());
|
||||||
|
return "r.inner(op::id{})(" + x + ")";
|
||||||
|
}
|
||||||
|
MIGRAPHX_THROW("Unknown operator: " + ins->name());
|
||||||
|
});
|
||||||
|
f.set_attributes({"__device__", "__attribute__((const))"}).set_generic_types(m).set_name(name);
|
||||||
|
f.add_generic_param("r");
|
||||||
|
f.add_generic_param("out_idx");
|
||||||
|
f.unused_param("out_idx");
|
||||||
|
g.create_function(f);
|
||||||
|
return g.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<std::string> get_op_names(const module& m)
|
||||||
|
{
|
||||||
|
std::vector<std::string> result;
|
||||||
|
for(auto& ins : m)
|
||||||
|
{
|
||||||
|
if(starts_with(ins.name(), "@"))
|
||||||
|
continue;
|
||||||
|
if(contains({"multibroadcast", "contiguous", "identity"}, ins.name()))
|
||||||
|
continue;
|
||||||
|
if(ins.name() == "pointwise")
|
||||||
|
{
|
||||||
|
auto names = get_op_names(*ins.module_inputs().front());
|
||||||
|
result.insert(result.end(), names.begin(), names.end());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result.push_back(ins.name());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string generate_name_from_ops(const module& m, const std::string& postname)
|
||||||
|
{
|
||||||
|
auto op_names = get_op_names(m);
|
||||||
|
if(not postname.empty())
|
||||||
|
op_names.push_back(postname);
|
||||||
|
if(op_names.empty())
|
||||||
|
return "noop";
|
||||||
|
return join_strings(op_names, "_");
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gen
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
406
docker/rocm/migraphx/targets/gpu/compile_hip.cpp
Normal file
406
docker/rocm/migraphx/targets/gpu/compile_hip.cpp
Normal file
@ -0,0 +1,406 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/compile_hip.hpp>
|
||||||
|
#include <migraphx/errors.hpp>
|
||||||
|
#include <migraphx/stringutils.hpp>
|
||||||
|
#include <migraphx/ranges.hpp>
|
||||||
|
#include <migraphx/env.hpp>
|
||||||
|
#include <migraphx/fileutils.hpp>
|
||||||
|
#include <cassert>
|
||||||
|
#include <iostream>
|
||||||
|
#include <deque>
|
||||||
|
|
||||||
|
#ifdef MIGRAPHX_USE_HIPRTC
|
||||||
|
#include <hip/hiprtc.h>
|
||||||
|
#include <migraphx/manage_ptr.hpp>
|
||||||
|
#include <migraphx/value.hpp>
|
||||||
|
#include <migraphx/tmp_dir.hpp>
|
||||||
|
#include <migraphx/dynamic_loader.hpp>
|
||||||
|
#include <migraphx/process.hpp>
|
||||||
|
#include <migraphx/msgpack.hpp>
|
||||||
|
#include <migraphx/serialize.hpp>
|
||||||
|
#include <migraphx/file_buffer.hpp>
|
||||||
|
#else
|
||||||
|
#include <migraphx/compile_src.hpp>
|
||||||
|
#include <migraphx/process.hpp>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG);
|
||||||
|
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG_SYM);
|
||||||
|
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_OPTIMIZE);
|
||||||
|
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_ASM);
|
||||||
|
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_SRC);
|
||||||
|
|
||||||
|
#ifdef MIGRAPHX_USE_HIPRTC
|
||||||
|
|
||||||
|
std::string hiprtc_error(hiprtcResult err, const std::string& msg)
|
||||||
|
{
|
||||||
|
return "hiprtc: " + (hiprtcGetErrorString(err) + (": " + msg));
|
||||||
|
}
|
||||||
|
|
||||||
|
void hiprtc_check_error(hiprtcResult err, const std::string& msg, const std::string& ctx)
|
||||||
|
{
|
||||||
|
if(err != HIPRTC_SUCCESS)
|
||||||
|
throw make_exception(ctx, hiprtc_error(err, msg));
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOLINTNEXTLINE
|
||||||
|
#define MIGRAPHX_HIPRTC(...) \
|
||||||
|
hiprtc_check_error(__VA_ARGS__, #__VA_ARGS__, MIGRAPHX_MAKE_SOURCE_CTX())
|
||||||
|
|
||||||
|
#define MIGRAPHX_HIPRTC_THROW(error, msg) MIGRAPHX_THROW(hiprtc_error(error, msg))
|
||||||
|
|
||||||
|
// Workaround hiprtc's broken API
|
||||||
|
void hiprtc_program_destroy(hiprtcProgram prog) { hiprtcDestroyProgram(&prog); }
|
||||||
|
using hiprtc_program_ptr = MIGRAPHX_MANAGE_PTR(hiprtcProgram, hiprtc_program_destroy);
|
||||||
|
|
||||||
|
template <class... Ts>
|
||||||
|
hiprtc_program_ptr hiprtc_program_create(Ts... xs)
|
||||||
|
{
|
||||||
|
hiprtcProgram prog = nullptr;
|
||||||
|
auto result = hiprtcCreateProgram(&prog, xs...);
|
||||||
|
hiprtc_program_ptr p{prog};
|
||||||
|
if(result != HIPRTC_SUCCESS)
|
||||||
|
MIGRAPHX_HIPRTC_THROW(result, "Create program failed.");
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct hiprtc_program
|
||||||
|
{
|
||||||
|
struct string_array
|
||||||
|
{
|
||||||
|
std::deque<std::string> strings{};
|
||||||
|
std::vector<const char*> c_strs{};
|
||||||
|
|
||||||
|
string_array() {}
|
||||||
|
string_array(const string_array&) = delete;
|
||||||
|
|
||||||
|
std::size_t size() const { return strings.size(); }
|
||||||
|
|
||||||
|
const char** data() { return c_strs.data(); }
|
||||||
|
|
||||||
|
void push_back(std::string s)
|
||||||
|
{
|
||||||
|
strings.push_back(std::move(s));
|
||||||
|
c_strs.push_back(strings.back().c_str());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
hiprtc_program_ptr prog = nullptr;
|
||||||
|
string_array headers{};
|
||||||
|
string_array include_names{};
|
||||||
|
std::string cpp_src = "";
|
||||||
|
std::string cpp_name = "";
|
||||||
|
|
||||||
|
hiprtc_program(const std::string& src, const std::string& name = "main.cpp")
|
||||||
|
: cpp_src(src), cpp_name(name)
|
||||||
|
{
|
||||||
|
create_program();
|
||||||
|
}
|
||||||
|
|
||||||
|
hiprtc_program(std::vector<hiprtc_src_file> srcs)
|
||||||
|
{
|
||||||
|
for(auto&& src : srcs)
|
||||||
|
{
|
||||||
|
if(ends_with(src.path, ".cpp"))
|
||||||
|
{
|
||||||
|
cpp_src = std::move(src.content);
|
||||||
|
cpp_name = std::move(src.path);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
headers.push_back(std::move(src.content));
|
||||||
|
include_names.push_back(std::move(src.path));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
create_program();
|
||||||
|
}
|
||||||
|
|
||||||
|
void create_program()
|
||||||
|
{
|
||||||
|
assert(not cpp_src.empty());
|
||||||
|
assert(not cpp_name.empty());
|
||||||
|
assert(headers.size() == include_names.size());
|
||||||
|
prog = hiprtc_program_create(cpp_src.c_str(),
|
||||||
|
cpp_name.c_str(),
|
||||||
|
headers.size(),
|
||||||
|
headers.data(),
|
||||||
|
include_names.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
void compile(const std::vector<std::string>& options, bool quiet = false) const
|
||||||
|
{
|
||||||
|
if(enabled(MIGRAPHX_TRACE_HIPRTC{}))
|
||||||
|
std::cout << "hiprtc " << join_strings(options, " ") << " " << cpp_name << std::endl;
|
||||||
|
std::vector<const char*> c_options;
|
||||||
|
std::transform(options.begin(),
|
||||||
|
options.end(),
|
||||||
|
std::back_inserter(c_options),
|
||||||
|
[](const std::string& s) { return s.c_str(); });
|
||||||
|
auto result = hiprtcCompileProgram(prog.get(), c_options.size(), c_options.data());
|
||||||
|
auto prog_log = log();
|
||||||
|
if(not prog_log.empty() and not quiet)
|
||||||
|
{
|
||||||
|
std::cerr << prog_log << std::endl;
|
||||||
|
}
|
||||||
|
if(result != HIPRTC_SUCCESS)
|
||||||
|
MIGRAPHX_HIPRTC_THROW(result, "Compilation failed.");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string log() const
|
||||||
|
{
|
||||||
|
std::size_t n = 0;
|
||||||
|
MIGRAPHX_HIPRTC(hiprtcGetProgramLogSize(prog.get(), &n));
|
||||||
|
if(n == 0)
|
||||||
|
return {};
|
||||||
|
std::string buffer(n, '\0');
|
||||||
|
MIGRAPHX_HIPRTC(hiprtcGetProgramLog(prog.get(), buffer.data()));
|
||||||
|
assert(buffer.back() != 0);
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<char> get_code_obj() const
|
||||||
|
{
|
||||||
|
std::size_t n = 0;
|
||||||
|
MIGRAPHX_HIPRTC(hiprtcGetCodeSize(prog.get(), &n));
|
||||||
|
std::vector<char> buffer(n);
|
||||||
|
MIGRAPHX_HIPRTC(hiprtcGetCode(prog.get(), buffer.data()));
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<std::vector<char>> compile_hip_src_with_hiprtc(std::vector<hiprtc_src_file> srcs,
|
||||||
|
const std::vector<std::string>& params,
|
||||||
|
const std::string& arch)
|
||||||
|
{
|
||||||
|
hiprtc_program prog(std::move(srcs));
|
||||||
|
auto options = params;
|
||||||
|
options.push_back("-DMIGRAPHX_USE_HIPRTC=1");
|
||||||
|
if(enabled(MIGRAPHX_GPU_DEBUG{}))
|
||||||
|
options.push_back("-DMIGRAPHX_DEBUG");
|
||||||
|
if(std::none_of(options.begin(), options.end(), [](const std::string& s) {
|
||||||
|
return starts_with(s, "--std=") or starts_with(s, "-std=");
|
||||||
|
}))
|
||||||
|
options.push_back("-std=c++17");
|
||||||
|
options.push_back("-fno-gpu-rdc");
|
||||||
|
options.push_back("-O" + string_value_of(MIGRAPHX_GPU_OPTIMIZE{}, "3"));
|
||||||
|
options.push_back("-Wno-cuda-compat");
|
||||||
|
options.push_back("--offload-arch=" + arch);
|
||||||
|
prog.compile(options);
|
||||||
|
return {prog.get_code_obj()};
|
||||||
|
}
|
||||||
|
|
||||||
|
bool hip_has_flags(const std::vector<std::string>& flags)
|
||||||
|
{
|
||||||
|
hiprtc_program prog{" "};
|
||||||
|
|
||||||
|
std::string src = " ";
|
||||||
|
src_file input{"main.cpp", src};
|
||||||
|
std::vector<src_file> srcs = {input};
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
std::string arch = "gfx900";
|
||||||
|
compile_hip_src(srcs, flags, arch);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
catch(...)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<char>> compile_hip_src(const std::vector<src_file>& srcs,
|
||||||
|
const std::vector<std::string>& params,
|
||||||
|
const std::string& arch)
|
||||||
|
{
|
||||||
|
std::vector<hiprtc_src_file> hsrcs{srcs.begin(), srcs.end()};
|
||||||
|
if(enabled(MIGRAPHX_GPU_DUMP_SRC{}))
|
||||||
|
{
|
||||||
|
for(const auto& src : srcs)
|
||||||
|
{
|
||||||
|
if(src.path.extension() != ".cpp")
|
||||||
|
continue;
|
||||||
|
std::cout << std::string(src.content) << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto fname = make_executable_filename("migraphx-hiprtc-driver");
|
||||||
|
auto p = dynamic_loader::path(&compile_hip_src_with_hiprtc);
|
||||||
|
auto driver = p.parent_path() / fname;
|
||||||
|
|
||||||
|
bool found = fs::exists(driver);
|
||||||
|
if(not found)
|
||||||
|
{
|
||||||
|
driver = p.parent_path().parent_path() / "bin" / fname;
|
||||||
|
found = fs::exists(driver);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(found)
|
||||||
|
{
|
||||||
|
value v;
|
||||||
|
v["srcs"] = to_value(hsrcs);
|
||||||
|
v["params"] = to_value(params);
|
||||||
|
v["arch"] = to_value(arch);
|
||||||
|
|
||||||
|
tmp_dir td{};
|
||||||
|
auto out = td.path / "output";
|
||||||
|
|
||||||
|
process(driver, {quote_string(out.string())}).write([&](auto writer) {
|
||||||
|
to_msgpack(v, writer);
|
||||||
|
});
|
||||||
|
if(fs::exists(out))
|
||||||
|
return {read_buffer(out)};
|
||||||
|
}
|
||||||
|
return compile_hip_src_with_hiprtc(std::move(hsrcs), params, arch);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else // MIGRAPHX_USE_HIPRTC
|
||||||
|
|
||||||
|
std::vector<std::vector<char>>
|
||||||
|
compile_hip_src_with_hiprtc(std::vector<hiprtc_src_file>, // NOLINT
|
||||||
|
const std::vector<std::string>&, // NOLINT
|
||||||
|
const std::string&)
|
||||||
|
{
|
||||||
|
MIGRAPHX_THROW("Not using hiprtc");
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_hip_clang_compiler()
|
||||||
|
{
|
||||||
|
static const auto result = fs::path{MIGRAPHX_HIP_COMPILER}.stem() == "clang++";
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef MIGRAPHX_HIP_COMPILER_LAUNCHER
|
||||||
|
|
||||||
|
bool has_compiler_launcher()
|
||||||
|
{
|
||||||
|
static const auto result = fs::exists(MIGRAPHX_HIP_COMPILER_LAUNCHER);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
src_compiler assemble(src_compiler compiler)
|
||||||
|
{
|
||||||
|
compiler.out_ext = ".S";
|
||||||
|
std::replace(compiler.flags.begin(), compiler.flags.end(), "-c", "-S");
|
||||||
|
return compiler;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::vector<char>> compile_hip_src(const std::vector<src_file>& srcs,
|
||||||
|
const std::vector<std::string>& params,
|
||||||
|
const std::string& arch)
|
||||||
|
{
|
||||||
|
assert(not srcs.empty());
|
||||||
|
|
||||||
|
if(not is_hip_clang_compiler())
|
||||||
|
MIGRAPHX_THROW("Unknown hip compiler: " MIGRAPHX_HIP_COMPILER);
|
||||||
|
|
||||||
|
src_compiler compiler;
|
||||||
|
compiler.flags = params;
|
||||||
|
compiler.compiler = MIGRAPHX_HIP_COMPILER;
|
||||||
|
#ifdef MIGRAPHX_HIP_COMPILER_LAUNCHER
|
||||||
|
if(has_compiler_launcher())
|
||||||
|
compiler.launcher = MIGRAPHX_HIP_COMPILER_LAUNCHER;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if(std::none_of(params.begin(), params.end(), [](const std::string& s) {
|
||||||
|
return starts_with(s, "--std=") or starts_with(s, "-std=");
|
||||||
|
}))
|
||||||
|
compiler.flags.emplace_back("--std=c++17");
|
||||||
|
compiler.flags.emplace_back(" -fno-gpu-rdc");
|
||||||
|
if(enabled(MIGRAPHX_GPU_DEBUG_SYM{}))
|
||||||
|
compiler.flags.emplace_back("-g");
|
||||||
|
compiler.flags.emplace_back("-c");
|
||||||
|
compiler.flags.emplace_back("--offload-arch=" + arch);
|
||||||
|
compiler.flags.emplace_back("--cuda-device-only");
|
||||||
|
compiler.flags.emplace_back("-O" + string_value_of(MIGRAPHX_GPU_OPTIMIZE{}, "3") + " ");
|
||||||
|
|
||||||
|
if(enabled(MIGRAPHX_GPU_DEBUG{}))
|
||||||
|
compiler.flags.emplace_back("-DMIGRAPHX_DEBUG");
|
||||||
|
|
||||||
|
compiler.flags.emplace_back("-Wno-unused-command-line-argument");
|
||||||
|
compiler.flags.emplace_back("-Wno-cuda-compat");
|
||||||
|
compiler.flags.emplace_back(MIGRAPHX_HIP_COMPILER_FLAGS);
|
||||||
|
|
||||||
|
if(enabled(MIGRAPHX_GPU_DUMP_SRC{}))
|
||||||
|
{
|
||||||
|
for(const auto& src : srcs)
|
||||||
|
{
|
||||||
|
if(src.path.extension() != ".cpp")
|
||||||
|
continue;
|
||||||
|
std::cout << std::string(src.content) << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(enabled(MIGRAPHX_GPU_DUMP_ASM{}))
|
||||||
|
{
|
||||||
|
|
||||||
|
std::cout << assemble(compiler).compile(srcs).data() << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {compiler.compile(srcs)};
|
||||||
|
}
|
||||||
|
|
||||||
|
bool hip_has_flags(const std::vector<std::string>& flags)
|
||||||
|
{
|
||||||
|
src_compiler compiler;
|
||||||
|
compiler.compiler = MIGRAPHX_HIP_COMPILER;
|
||||||
|
compiler.flags = flags;
|
||||||
|
compiler.flags.emplace_back("-x hip");
|
||||||
|
compiler.flags.emplace_back("-c");
|
||||||
|
compiler.flags.emplace_back("--offload-arch=gfx900");
|
||||||
|
compiler.flags.emplace_back("--cuda-device-only");
|
||||||
|
|
||||||
|
std::string src;
|
||||||
|
src_file input{"main.cpp", src};
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
compiler.compile({input});
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
catch(...)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // MIGRAPHX_USE_HIPRTC
|
||||||
|
|
||||||
|
std::string enum_params(std::size_t count, std::string param)
|
||||||
|
{
|
||||||
|
std::vector<std::string> items(count);
|
||||||
|
transform(range(count), items.begin(), [&](auto i) { return param + std::to_string(i); });
|
||||||
|
return join_strings(items, ",");
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
215
docker/rocm/migraphx/targets/gpu/compile_hip_code_object.cpp
Normal file
215
docker/rocm/migraphx/targets/gpu/compile_hip_code_object.cpp
Normal file
@ -0,0 +1,215 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/compile_hip_code_object.hpp>
|
||||||
|
#include <migraphx/gpu/compile_hip.hpp>
|
||||||
|
#include <migraphx/gpu/code_object_op.hpp>
|
||||||
|
#include <migraphx/gpu/context.hpp>
|
||||||
|
#include <migraphx/gpu/device_name.hpp>
|
||||||
|
#include <migraphx/context.hpp>
|
||||||
|
#include <migraphx_kernels.hpp>
|
||||||
|
#include <migraphx/stringutils.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
std::string generate_make_shape(const shape& s)
|
||||||
|
{
|
||||||
|
return "make_shape(" + generate_index_ints(s.lens()) + ", " + generate_index_ints(s.strides()) +
|
||||||
|
")";
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char* const make_tensor_template = R"__migraphx__(
|
||||||
|
template<>
|
||||||
|
struct make_tensor<${n}>
|
||||||
|
{
|
||||||
|
static __device__ auto apply(void* __restrict__ p)
|
||||||
|
{
|
||||||
|
return make_tensor_view(reinterpret_cast<${type}* __restrict__>(p), make_shape(${lens}, ${strides}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
)__migraphx__";
|
||||||
|
|
||||||
|
std::string generate_make_tensor(std::size_t n, const shape& s)
|
||||||
|
{
|
||||||
|
return interpolate_string(make_tensor_template,
|
||||||
|
{{"n", std::to_string(n)},
|
||||||
|
{"type", shape::cpp_type(s.type())},
|
||||||
|
{"lens", generate_index_ints(s.lens())},
|
||||||
|
{"strides", generate_index_ints(s.strides())}});
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string generate_args_hpp(const std::vector<shape>& inputs)
|
||||||
|
{
|
||||||
|
std::string inner;
|
||||||
|
for(std::size_t i = 0; i < inputs.size(); i++)
|
||||||
|
{
|
||||||
|
inner += generate_make_tensor(i, inputs[i]);
|
||||||
|
}
|
||||||
|
const std::string args_hpp = R"__migraphx__(
|
||||||
|
#ifndef MIGRAPHX_GUARD_AUTO_ARGS_HPP
|
||||||
|
#define MIGRAPHX_GUARD_AUTO_ARGS_HPP
|
||||||
|
|
||||||
|
#include <migraphx/kernels/args.hpp>
|
||||||
|
#include <migraphx/kernels/tensor_view.hpp>
|
||||||
|
#include <migraphx/kernels/types.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
|
||||||
|
__content__
|
||||||
|
|
||||||
|
} // namespace migraphx
|
||||||
|
#endif
|
||||||
|
)__migraphx__";
|
||||||
|
return replace_string(args_hpp, "__content__", inner);
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<std::string> get_compiler_warnings()
|
||||||
|
{
|
||||||
|
std::vector<std::string> warnings = {
|
||||||
|
"-Weverything",
|
||||||
|
"-Wno-c++98-compat",
|
||||||
|
"-Wno-c++98-compat-pedantic",
|
||||||
|
"-Wno-conversion",
|
||||||
|
"-Wno-double-promotion",
|
||||||
|
"-Wno-exit-time-destructors",
|
||||||
|
"-Wno-extra-semi",
|
||||||
|
"-Wno-extra-semi-stmt",
|
||||||
|
"-Wno-float-conversion",
|
||||||
|
"-Wno-gnu-anonymous-struct",
|
||||||
|
"-Wno-gnu-zero-variadic-macro-arguments",
|
||||||
|
"-Wno-missing-prototypes",
|
||||||
|
"-Wno-nested-anon-types",
|
||||||
|
"-Wno-padded",
|
||||||
|
"-Wno-shorten-64-to-32",
|
||||||
|
"-Wno-sign-conversion",
|
||||||
|
"-Wno-sign-compare",
|
||||||
|
"-Wno-unused-command-line-argument",
|
||||||
|
"-Wno-weak-vtables",
|
||||||
|
"-Wno-c99-extensions",
|
||||||
|
};
|
||||||
|
|
||||||
|
if(hip_has_flags({"-Werror", "-Wunsafe-buffer-usage"}))
|
||||||
|
warnings.push_back("-Wno-unsafe-buffer-usage");
|
||||||
|
return warnings;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<std::string>& compiler_warnings()
|
||||||
|
{
|
||||||
|
static std::vector<std::string> warnings = get_compiler_warnings();
|
||||||
|
return warnings;
|
||||||
|
}
|
||||||
|
|
||||||
|
void hip_compile_options::set_launch_params(
|
||||||
|
const value& v,
|
||||||
|
const std::function<std::size_t(std::size_t local)>& compute_global,
|
||||||
|
std::size_t default_local)
|
||||||
|
{
|
||||||
|
local = v.get("local", default_local);
|
||||||
|
if(v.contains("global"))
|
||||||
|
global = v.at("global").to<std::size_t>();
|
||||||
|
else
|
||||||
|
global = compute_global(local);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool hip_accept_non_uniform_wg()
|
||||||
|
{
|
||||||
|
static bool non_uniform_wg = hip_has_flags({"-fno-offload-uniform-block"});
|
||||||
|
return non_uniform_wg;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::function<std::size_t(std::size_t local)>
|
||||||
|
compute_global_for(context& ctx, std::size_t n, std::size_t over)
|
||||||
|
{
|
||||||
|
assert(over > 0);
|
||||||
|
std::size_t max_global = ctx.get_current_device().get_cu_count() *
|
||||||
|
ctx.get_current_device().get_max_workitems_per_cu();
|
||||||
|
return [n, over, max_global](std::size_t local) {
|
||||||
|
std::size_t num_elements = n;
|
||||||
|
if(not hip_accept_non_uniform_wg())
|
||||||
|
{
|
||||||
|
num_elements = (1 + (n - 1) / local) * local;
|
||||||
|
}
|
||||||
|
std::size_t groups = 1 + (num_elements - 1) / local;
|
||||||
|
std::size_t max_blocks = max_global / local;
|
||||||
|
std::size_t nglobal = std::min(max_blocks * over, groups) * local;
|
||||||
|
return std::min(nglobal, num_elements);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t compute_block_size(context& ctx, std::size_t n, std::size_t max_block_size)
|
||||||
|
{
|
||||||
|
const std::size_t min_block_size = ctx.get_current_device().get_wavefront_size();
|
||||||
|
auto block_size = (((n - 1) / min_block_size + 1)) * min_block_size;
|
||||||
|
return std::min(std::max(min_block_size, block_size), max_block_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
operation
|
||||||
|
compile_hip_code_object(context& ctx, const std::string& content, hip_compile_options options)
|
||||||
|
{
|
||||||
|
assert(options.global > 0);
|
||||||
|
assert(options.local > 0);
|
||||||
|
assert(not options.inputs.empty());
|
||||||
|
assert(options.inputs.size() == options.virtual_inputs.size() or
|
||||||
|
options.virtual_inputs.empty());
|
||||||
|
std::vector<src_file> srcs = options.additional_src_files;
|
||||||
|
static auto kernels{::migraphx_kernels()};
|
||||||
|
std::transform(
|
||||||
|
kernels.begin(),
|
||||||
|
kernels.end(),
|
||||||
|
std::back_inserter(srcs),
|
||||||
|
[](const std::pair<std::string_view, std::string_view>& elem) { return src_file{elem}; });
|
||||||
|
srcs.emplace_back("main.cpp", content);
|
||||||
|
auto args_hpp =
|
||||||
|
generate_args_hpp(options.virtual_inputs.empty() ? options.inputs : options.virtual_inputs);
|
||||||
|
srcs.emplace_back("args.hpp", args_hpp);
|
||||||
|
|
||||||
|
if(options.global % options.local != 0 and hip_accept_non_uniform_wg())
|
||||||
|
options.emplace_param("-fno-offload-uniform-block");
|
||||||
|
else
|
||||||
|
assert(options.global % options.local == 0);
|
||||||
|
|
||||||
|
options.emplace_param("-DMIGRAPHX_NGLOBAL=" + std::to_string(options.global));
|
||||||
|
options.emplace_param("-DMIGRAPHX_NLOCAL=" + std::to_string(options.local));
|
||||||
|
options.emplace_param("-DMIGRAPHX_WAVEFRONTSIZE=" +
|
||||||
|
std::to_string(ctx.get_current_device().get_wavefront_size()));
|
||||||
|
const auto& warnings = compiler_warnings();
|
||||||
|
options.params.insert(options.params.end(), warnings.begin(), warnings.end());
|
||||||
|
options.emplace_param("-ftemplate-backtrace-limit=0");
|
||||||
|
options.emplace_param("-Werror");
|
||||||
|
auto cos = compile_hip_src(srcs, options.params, get_device_name());
|
||||||
|
if(cos.size() != 1)
|
||||||
|
MIGRAPHX_THROW("No code object");
|
||||||
|
return code_object_op{value::binary{cos.front()},
|
||||||
|
options.kernel_name,
|
||||||
|
options.global,
|
||||||
|
options.local,
|
||||||
|
options.inputs,
|
||||||
|
options.output,
|
||||||
|
options.output_arg};
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
78
docker/rocm/migraphx/targets/gpu/compile_hipblaslt.cpp
Normal file
78
docker/rocm/migraphx/targets/gpu/compile_hipblaslt.cpp
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if MIGRAPHX_USE_HIPBLASLT
|
||||||
|
#include <migraphx/gpu/compile_hipblaslt.hpp>
|
||||||
|
#include <migraphx/gpu/context.hpp>
|
||||||
|
#include <migraphx/module.hpp>
|
||||||
|
#include <migraphx/iterator_for.hpp>
|
||||||
|
#include <migraphx/instruction.hpp>
|
||||||
|
#include <migraphx/make_op.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
static size_t compile(migraphx::context& ctx, operation& op, instruction_ref ins)
|
||||||
|
{
|
||||||
|
auto v = op.compile(ctx, ins->get_shape(), to_shapes(ins->inputs()));
|
||||||
|
return v.get<std::size_t>("workspace", 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void compile_hipblaslt::apply(module& m) const
|
||||||
|
{
|
||||||
|
assert(ctx);
|
||||||
|
for(auto ins : iterator_for(m))
|
||||||
|
{
|
||||||
|
if(ins->name() != "gpu::hipblaslt_op")
|
||||||
|
continue;
|
||||||
|
auto op = any_cast<hipblaslt_op>(ins->get_operator()).op;
|
||||||
|
auto inputs = ins->inputs();
|
||||||
|
|
||||||
|
std::size_t ws = hipblaslt_workspace_size;
|
||||||
|
|
||||||
|
auto alloc = m.insert_instruction(
|
||||||
|
ins, make_op("allocate", {{"shape", to_value(shape{shape::uint8_type, {ws}})}}));
|
||||||
|
inputs.insert(std::prev(inputs.end()), alloc);
|
||||||
|
m.replace_instruction(ins, op, inputs);
|
||||||
|
|
||||||
|
// Calculate workspace size
|
||||||
|
ws = compile(*ctx, op, ins);
|
||||||
|
auto alloc_after = m.insert_instruction(
|
||||||
|
ins, make_op("allocate", {{"shape", to_value(shape{shape::uint8_type, {ws}})}}));
|
||||||
|
|
||||||
|
// Replace the workspace size with actual worksapce size needed.
|
||||||
|
auto it = std::find(inputs.begin(), inputs.end(), alloc);
|
||||||
|
if(it != inputs.end())
|
||||||
|
{
|
||||||
|
*it = alloc_after; // Replace `alloc` with `alloc_after`
|
||||||
|
}
|
||||||
|
m.replace_instruction(ins, op, inputs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
#endif // MIGRAPHX_USE_HIPBLASLT
|
||||||
89
docker/rocm/migraphx/targets/gpu/compile_miopen.cpp
Normal file
89
docker/rocm/migraphx/targets/gpu/compile_miopen.cpp
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/compile_miopen.hpp>
|
||||||
|
#include <migraphx/gpu/context.hpp>
|
||||||
|
#include <migraphx/module.hpp>
|
||||||
|
#include <migraphx/iterator_for.hpp>
|
||||||
|
#include <migraphx/instruction.hpp>
|
||||||
|
#include <migraphx/make_op.hpp>
|
||||||
|
#include <migraphx/register_op.hpp>
|
||||||
|
#include <migraphx/op/identity.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
struct miopen_op
|
||||||
|
{
|
||||||
|
operation op = op::identity{};
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return pack(f(self.op, "op"));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name() const { return "gpu::miopen_op"; }
|
||||||
|
|
||||||
|
shape compute_shape(std::vector<shape> inputs) const
|
||||||
|
{
|
||||||
|
inputs.push_back(inputs.back());
|
||||||
|
return op.compute_shape(inputs);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
|
||||||
|
{
|
||||||
|
return shapes.size() - 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
MIGRAPHX_REGISTER_OP(miopen_op);
|
||||||
|
|
||||||
|
std::size_t compile_miopen::compile(operation& op, instruction_ref ins) const
|
||||||
|
{
|
||||||
|
auto v = op.compile(*ctx, ins->get_shape(), to_shapes(ins->inputs()));
|
||||||
|
return v.get<std::size_t>("workspace", 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void compile_miopen::apply(module& m) const
|
||||||
|
{
|
||||||
|
assert(ctx);
|
||||||
|
for(auto ins : iterator_for(m))
|
||||||
|
{
|
||||||
|
if(ins->name() != "gpu::miopen_op")
|
||||||
|
continue;
|
||||||
|
auto op = any_cast<miopen_op>(ins->get_operator()).op;
|
||||||
|
std::size_t ws = 0;
|
||||||
|
ws = compile(op, ins);
|
||||||
|
auto inputs = ins->inputs();
|
||||||
|
auto alloc = m.insert_instruction(
|
||||||
|
ins, make_op("allocate", {{"shape", to_value(shape{shape::int8_type, {ws}})}}));
|
||||||
|
inputs.insert(std::prev(inputs.end()), alloc);
|
||||||
|
|
||||||
|
m.replace_instruction(ins, op, inputs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
332
docker/rocm/migraphx/targets/gpu/compile_ops.cpp
Normal file
332
docker/rocm/migraphx/targets/gpu/compile_ops.cpp
Normal file
@ -0,0 +1,332 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/program.hpp>
|
||||||
|
#include <migraphx/module.hpp>
|
||||||
|
#include <migraphx/iterator_for.hpp>
|
||||||
|
#include <migraphx/instruction.hpp>
|
||||||
|
#include <migraphx/par_for.hpp>
|
||||||
|
#include <migraphx/register_op.hpp>
|
||||||
|
#include <migraphx/algorithm.hpp>
|
||||||
|
#include <migraphx/op/identity.hpp>
|
||||||
|
#include <migraphx/gpu/compiler.hpp>
|
||||||
|
#include <migraphx/gpu/compile_ops.hpp>
|
||||||
|
#include <migraphx/gpu/context.hpp>
|
||||||
|
#include <migraphx/gpu/time_op.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_COMPILE_PARALLEL);
|
||||||
|
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_BENCHMARKING);
|
||||||
|
|
||||||
|
struct precompile_op
|
||||||
|
{
|
||||||
|
operation op = op::identity{};
|
||||||
|
std::size_t additional_args = 1;
|
||||||
|
bool ignore_modules = false;
|
||||||
|
std::optional<shape> output_shape = nullopt;
|
||||||
|
|
||||||
|
template <class Self, class F>
|
||||||
|
static auto reflect(Self& self, F f)
|
||||||
|
{
|
||||||
|
return pack(f(self.op, "op"),
|
||||||
|
f(self.additional_args, "additional_args"),
|
||||||
|
f(self.ignore_modules, "ignore_modules"),
|
||||||
|
f(self.output_shape, "output_shape"));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name() const { return "gpu::precompile_op"; }
|
||||||
|
|
||||||
|
shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
|
||||||
|
{
|
||||||
|
// Pop off additional args
|
||||||
|
inputs.resize(inputs.size() - additional_args);
|
||||||
|
if(output_shape.has_value())
|
||||||
|
return output_shape.value();
|
||||||
|
if(ignore_modules)
|
||||||
|
return op.compute_shape(inputs);
|
||||||
|
return op.compute_shape(inputs, mods);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
|
||||||
|
{
|
||||||
|
return shapes.size() - 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
MIGRAPHX_REGISTER_OP(precompile_op);
|
||||||
|
|
||||||
|
struct compiled_result
|
||||||
|
{
|
||||||
|
compiler_replace replace;
|
||||||
|
instruction_ref ins;
|
||||||
|
|
||||||
|
friend std::ostream& operator<<(std::ostream& os, const compiled_result& cr)
|
||||||
|
{
|
||||||
|
cr.replace.trace(os, cr.ins);
|
||||||
|
return os;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct compile_plan
|
||||||
|
{
|
||||||
|
context* ctx;
|
||||||
|
operation preop;
|
||||||
|
instruction_ref ins;
|
||||||
|
optional<tuning_config> config = nullopt;
|
||||||
|
std::vector<optional<compiled_result>> results = {};
|
||||||
|
void update_config(bool exhaustive)
|
||||||
|
{
|
||||||
|
config = get_tuning_config(*ctx, ins, preop, exhaustive);
|
||||||
|
}
|
||||||
|
template <class Vector>
|
||||||
|
void insert_compiles(Vector& compiles, const value& solution, std::size_t i)
|
||||||
|
{
|
||||||
|
compiles.emplace_back([=] {
|
||||||
|
try
|
||||||
|
{
|
||||||
|
results[i] = compiled_result{compile(*ctx, ins, preop, solution), ins};
|
||||||
|
}
|
||||||
|
catch(const std::exception& e)
|
||||||
|
{
|
||||||
|
const auto trace_level = value_of(MIGRAPHX_TRACE_BENCHMARKING{});
|
||||||
|
if(trace_level > 0)
|
||||||
|
std::cerr << "Exception in " + preop.name() + ": " + e.what() << std::endl;
|
||||||
|
results[i] = nullopt;
|
||||||
|
}
|
||||||
|
catch(...)
|
||||||
|
{
|
||||||
|
results[i] = nullopt;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Vector>
|
||||||
|
void add_compiles(Vector& compiles)
|
||||||
|
{
|
||||||
|
if(config.has_value())
|
||||||
|
{
|
||||||
|
const auto& problem = config->problem;
|
||||||
|
if(auto sol = ctx->get_problem_cache().get(preop.name(), problem))
|
||||||
|
{
|
||||||
|
auto solution = sol.value();
|
||||||
|
// No solution yet until benchmarked so skip for now
|
||||||
|
if(solution.is_null())
|
||||||
|
return;
|
||||||
|
results.resize(1);
|
||||||
|
insert_compiles(compiles, solution, 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ctx->get_problem_cache().mark(preop.name(), problem);
|
||||||
|
const auto& solutions = config->solutions;
|
||||||
|
if(solutions.empty())
|
||||||
|
MIGRAPHX_THROW("No solutions provided for " + preop.name() + " with " +
|
||||||
|
to_string(problem));
|
||||||
|
results.resize(solutions.size());
|
||||||
|
for(auto i : range(solutions.size()))
|
||||||
|
{
|
||||||
|
auto solution = solutions[i];
|
||||||
|
insert_compiles(compiles, solution, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
results.resize(1);
|
||||||
|
insert_compiles(compiles, value{}, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::string problem_string() const
|
||||||
|
{
|
||||||
|
if(config)
|
||||||
|
return to_string(config->problem);
|
||||||
|
return "<no problem key>";
|
||||||
|
}
|
||||||
|
|
||||||
|
const compiled_result& benchmark() const
|
||||||
|
{
|
||||||
|
const auto trace_level = value_of(MIGRAPHX_TRACE_BENCHMARKING{});
|
||||||
|
if(trace_level > 0 and not results.empty())
|
||||||
|
{
|
||||||
|
std::cout << "Benchmarking " << preop.name() << ": " << results.size() << " configs"
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
if(results.empty())
|
||||||
|
MIGRAPHX_THROW("No valid tuned compilation for " + preop.name() + " with " +
|
||||||
|
problem_string());
|
||||||
|
if(results.size() == 1)
|
||||||
|
{
|
||||||
|
if(not results.front().has_value())
|
||||||
|
MIGRAPHX_THROW("No valid tuned compilation for " + preop.name() + " with " +
|
||||||
|
problem_string());
|
||||||
|
return *results.front();
|
||||||
|
}
|
||||||
|
if(not config)
|
||||||
|
MIGRAPHX_THROW("Multiple kernels without config for " + preop.name());
|
||||||
|
if(trace_level > 1)
|
||||||
|
std::cout << "Problem: " << config->problem << std::endl;
|
||||||
|
std::vector<double> times;
|
||||||
|
times.reserve(results.size());
|
||||||
|
std::transform(results.begin(),
|
||||||
|
results.end(),
|
||||||
|
config->solutions.begin(),
|
||||||
|
std::back_inserter(times),
|
||||||
|
[&](const auto& cr, const auto& solution) {
|
||||||
|
if(trace_level > 1)
|
||||||
|
std::cout << "Benchmarking solution: " << solution << std::endl;
|
||||||
|
if(not cr.has_value())
|
||||||
|
{
|
||||||
|
if(trace_level > 1)
|
||||||
|
std::cout << "No binary" << std::endl;
|
||||||
|
return std::numeric_limits<double>::max();
|
||||||
|
}
|
||||||
|
if(trace_level > 2)
|
||||||
|
std::cout << *cr << std::endl;
|
||||||
|
/*
|
||||||
|
create a small program with insturction being compiled and call "replace"
|
||||||
|
on that which would insert all the compiled code objects, prefills etc.
|
||||||
|
necessary to run candidate code object
|
||||||
|
*/
|
||||||
|
program bench_prog;
|
||||||
|
auto* bench_mm = bench_prog.get_main_module();
|
||||||
|
std::vector<instruction_ref> bench_ins_inputs;
|
||||||
|
|
||||||
|
std::transform(cr->ins->inputs().begin(),
|
||||||
|
cr->ins->inputs().end(),
|
||||||
|
std::back_inserter(bench_ins_inputs),
|
||||||
|
[&](const auto& arg) {
|
||||||
|
return bench_mm->add_parameter(
|
||||||
|
std::to_string(bench_ins_inputs.size()),
|
||||||
|
arg->get_shape());
|
||||||
|
});
|
||||||
|
auto bench_ins = bench_mm->add_instruction(
|
||||||
|
cr->ins->get_operator(), bench_ins_inputs, cr->ins->module_inputs());
|
||||||
|
cr->replace.replace(*bench_mm, bench_ins);
|
||||||
|
// do dead code elimination by directly removing instruction
|
||||||
|
bench_mm->remove_instruction(bench_ins);
|
||||||
|
auto t = time_program(*ctx, bench_prog, 20);
|
||||||
|
if(trace_level > 1)
|
||||||
|
std::cout << t << "ms" << std::endl;
|
||||||
|
return t;
|
||||||
|
});
|
||||||
|
std::this_thread::sleep_for(std::chrono::milliseconds{50});
|
||||||
|
auto i = std::distance(times.begin(), std::min_element(times.begin(), times.end()));
|
||||||
|
if(trace_level > 0)
|
||||||
|
std::cout << "Fastest solution: " << config->solutions.at(i) << std::endl;
|
||||||
|
ctx->get_problem_cache().insert(preop.name(), config->problem, config->solutions.at(i));
|
||||||
|
if(not results[i].has_value())
|
||||||
|
MIGRAPHX_THROW("No valid tuned compilation for " + preop.name() + " with " +
|
||||||
|
problem_string());
|
||||||
|
auto skipped = std::count_if(
|
||||||
|
results.begin(), results.end(), [](const auto& cr) { return not cr.has_value(); });
|
||||||
|
if(skipped > 0)
|
||||||
|
std::cout << "Skipped " << skipped << " configs for " << preop.name() << std::endl;
|
||||||
|
|
||||||
|
return *results[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
void replace(module& m) const
|
||||||
|
{
|
||||||
|
const auto& cr = benchmark();
|
||||||
|
cr.replace.replace(m, cr.ins);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
void par_compile(std::size_t n, F f)
|
||||||
|
{
|
||||||
|
if(n == 0)
|
||||||
|
return;
|
||||||
|
auto d = value_of(MIGRAPHX_GPU_COMPILE_PARALLEL{});
|
||||||
|
if(d == 0)
|
||||||
|
d = n;
|
||||||
|
par_for(n, n / d, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct compile_manager
|
||||||
|
{
|
||||||
|
std::vector<compile_plan> cps;
|
||||||
|
bool exhaustive = false;
|
||||||
|
|
||||||
|
template <class... Ts>
|
||||||
|
void add_plan(Ts&&... xs)
|
||||||
|
{
|
||||||
|
cps.push_back({std::forward<Ts>(xs)...});
|
||||||
|
}
|
||||||
|
|
||||||
|
void update_configs()
|
||||||
|
{
|
||||||
|
par_compile(cps.size(), [&](auto i) { cps[i].update_config(exhaustive); });
|
||||||
|
}
|
||||||
|
|
||||||
|
void compile(module& m)
|
||||||
|
{
|
||||||
|
std::vector<std::function<void()>> compiles;
|
||||||
|
for(auto& cp : cps)
|
||||||
|
{
|
||||||
|
cp.add_compiles(compiles);
|
||||||
|
}
|
||||||
|
par_compile(compiles.size(), [&](auto i) { compiles[i](); });
|
||||||
|
|
||||||
|
// Replace and/or benchmark
|
||||||
|
for(const auto& cp : cps)
|
||||||
|
{
|
||||||
|
if(cp.results.empty())
|
||||||
|
continue;
|
||||||
|
cp.replace(m);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove compile_plan already executed
|
||||||
|
cps.erase(std::remove_if(cps.begin(),
|
||||||
|
cps.end(),
|
||||||
|
[](const auto& cp) { return not cp.results.empty(); }),
|
||||||
|
cps.end());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void compile_ops::apply(module& m) const
|
||||||
|
{
|
||||||
|
compile_manager cm;
|
||||||
|
cm.exhaustive = exhaustive_tune;
|
||||||
|
// Find all precompile ops
|
||||||
|
for(auto ins : iterator_for(m))
|
||||||
|
{
|
||||||
|
if(ins->name() != "gpu::precompile_op")
|
||||||
|
continue;
|
||||||
|
operation preop = any_cast<precompile_op>(ins->get_operator()).op;
|
||||||
|
cm.add_plan(ctx, preop, ins);
|
||||||
|
}
|
||||||
|
cm.update_configs();
|
||||||
|
cm.compile(m);
|
||||||
|
// Compile already tuned configs
|
||||||
|
cm.compile(m);
|
||||||
|
assert(cm.cps.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
50
docker/rocm/migraphx/targets/gpu/compile_pointwise.cpp
Normal file
50
docker/rocm/migraphx/targets/gpu/compile_pointwise.cpp
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/compile_pointwise.hpp>
|
||||||
|
#include <migraphx/gpu/context.hpp>
|
||||||
|
#include <migraphx/gpu/compile_gen.hpp>
|
||||||
|
#include <migraphx/gpu/compiler.hpp>
|
||||||
|
#include <migraphx/module.hpp>
|
||||||
|
#include <migraphx/instruction.hpp>
|
||||||
|
#include <migraphx/make_op.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
operation
|
||||||
|
compile_pointwise(context& ctx, const std::vector<migraphx::shape>& in_shapes, const_module_ref pm)
|
||||||
|
{
|
||||||
|
auto pf = gen::generate_pointwise(*pm, "inner_pointwise", true);
|
||||||
|
std::string lambda = "MIGRAPHX_LIFT(inner_pointwise)";
|
||||||
|
auto kernel_name = gen::generate_name_from_ops(*pm, "kernel");
|
||||||
|
return gpu::compile_op("pointwise",
|
||||||
|
ctx,
|
||||||
|
in_shapes,
|
||||||
|
{{"lambda", lambda}, {"preamble", pf}, {"kernel", kernel_name}});
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
74
docker/rocm/migraphx/targets/gpu/compiler.cpp
Normal file
74
docker/rocm/migraphx/targets/gpu/compiler.cpp
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/compiler.hpp>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
struct compiler_handle
|
||||||
|
{
|
||||||
|
compiler_compile compile;
|
||||||
|
compiler_compile_op compile_op;
|
||||||
|
compiler_tuning_config get_tuning_config;
|
||||||
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
auto& compiler_map()
|
||||||
|
{
|
||||||
|
static std::unordered_map<std::string, compiler_handle> m; // NOLINT
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
void register_compiler(const std::string& name,
|
||||||
|
compiler_compile c,
|
||||||
|
compiler_compile_op cop,
|
||||||
|
compiler_tuning_config ctg)
|
||||||
|
{
|
||||||
|
compiler_map()[name] = {std::move(c), std::move(cop), std::move(ctg)};
|
||||||
|
}
|
||||||
|
|
||||||
|
bool has_compiler_for(const std::string& name) { return compiler_map().count(name) > 0; }
|
||||||
|
compiler_replace
|
||||||
|
compile(context& ctx, instruction_ref ins, const operation& op, const value& solution)
|
||||||
|
{
|
||||||
|
return compiler_map().at(op.name()).compile(ctx, ins, op, solution);
|
||||||
|
}
|
||||||
|
operation
|
||||||
|
compile_op(const std::string& name, context& ctx, const std::vector<shape>& inputs, const value& v)
|
||||||
|
{
|
||||||
|
return compiler_map().at(name).compile_op(ctx, inputs, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
optional<tuning_config>
|
||||||
|
get_tuning_config(context& ctx, instruction_ref ins, const operation& op, bool exhaustive)
|
||||||
|
{
|
||||||
|
return compiler_map().at(op.name()).get_tuning_config(ctx, ins, op, exhaustive);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
52
docker/rocm/migraphx/targets/gpu/device/argmax.cpp
Normal file
52
docker/rocm/migraphx/targets/gpu/device/argmax.cpp
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/shape.hpp>
|
||||||
|
#include <migraphx/argument.hpp>
|
||||||
|
#include <migraphx/gpu/device/argmax.hpp>
|
||||||
|
#include <migraphx/gpu/device/tensor.hpp>
|
||||||
|
#include <migraphx/gpu/device/launch.hpp>
|
||||||
|
#include <migraphx/gpu/device/types.hpp>
|
||||||
|
#include <migraphx/gpu/device/arg_op.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
void argmax(hipStream_t stream,
|
||||||
|
const argument& result,
|
||||||
|
const argument& arg,
|
||||||
|
int64_t axis,
|
||||||
|
bool select_last_index)
|
||||||
|
{
|
||||||
|
if(select_last_index)
|
||||||
|
arg_op(argmax_op_last_index{}, stream, result, arg, axis);
|
||||||
|
else
|
||||||
|
arg_op(argmax_op_first_index{}, stream, result, arg, axis);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
52
docker/rocm/migraphx/targets/gpu/device/argmin.cpp
Normal file
52
docker/rocm/migraphx/targets/gpu/device/argmin.cpp
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/shape.hpp>
|
||||||
|
#include <migraphx/argument.hpp>
|
||||||
|
#include <migraphx/gpu/device/argmin.hpp>
|
||||||
|
#include <migraphx/gpu/device/tensor.hpp>
|
||||||
|
#include <migraphx/gpu/device/launch.hpp>
|
||||||
|
#include <migraphx/gpu/device/types.hpp>
|
||||||
|
#include <migraphx/gpu/device/arg_op.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
void argmin(hipStream_t stream,
|
||||||
|
const argument& result,
|
||||||
|
const argument& arg,
|
||||||
|
int64_t axis,
|
||||||
|
bool select_last_index)
|
||||||
|
{
|
||||||
|
if(select_last_index)
|
||||||
|
arg_op(argmin_op_last_index{}, stream, result, arg, axis);
|
||||||
|
else
|
||||||
|
arg_op(argmin_op_first_index{}, stream, result, arg, axis);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
65
docker/rocm/migraphx/targets/gpu/device/contiguous.cpp
Normal file
65
docker/rocm/migraphx/targets/gpu/device/contiguous.cpp
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <migraphx/gpu/device/contiguous.hpp>
|
||||||
|
#include <migraphx/gpu/device/nary.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
void contiguous_nonstandard(hipStream_t stream, const argument& result, const argument& arg)
|
||||||
|
{
|
||||||
|
shape s{result.get_shape().type(), result.get_shape().lens()};
|
||||||
|
visit_all(result, arg)([&](auto output_v, auto input_v) {
|
||||||
|
hip_visit_views(output_v, input_v, s)([&](auto output, auto input, auto standard_shape) {
|
||||||
|
mi_gs_launch(stream,
|
||||||
|
standard_shape)([=](auto idx) __device__ { output[idx] = input[idx]; });
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void contiguous_packed(hipStream_t stream, const argument& result, const argument& arg)
|
||||||
|
{
|
||||||
|
index_int nelements = result.get_shape().elements();
|
||||||
|
visit_all(result, arg)([&](auto output_v, auto input_v) {
|
||||||
|
const auto* input = device_cast(input_v.data());
|
||||||
|
auto* output = device_cast(output_v.data());
|
||||||
|
gs_launch(stream, nelements)([=](auto i) __device__ { output[i] = input[i]; });
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void contiguous(hipStream_t stream, const argument& result, const argument& arg)
|
||||||
|
{
|
||||||
|
if(result.get_shape() == arg.get_shape() and result.get_shape().packed())
|
||||||
|
contiguous_packed(stream, result, arg);
|
||||||
|
else
|
||||||
|
contiguous_nonstandard(stream, result, arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
40
docker/rocm/migraphx/targets/gpu/device/fill.cpp
Normal file
40
docker/rocm/migraphx/targets/gpu/device/fill.cpp
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/device/fill.hpp>
|
||||||
|
#include <migraphx/gpu/device/nary.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
void fill(hipStream_t stream, const argument& result, unsigned long val)
|
||||||
|
{
|
||||||
|
nary(stream, result)([=]() __device__ { return val; });
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
@ -0,0 +1,185 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ARRAY_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARRAY_HPP
|
||||||
|
|
||||||
|
#include <migraphx/gpu/device/types.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
// NOLINTNEXTLINE
|
||||||
|
#define MIGRAPHX_DEVICE_ARRAY_OP(op, binary_op) \
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR hip_array& operator op(const hip_array& x) \
|
||||||
|
{ \
|
||||||
|
for(index_int i = 0; i < N; i++) \
|
||||||
|
d[i] op x[i]; \
|
||||||
|
return *this; \
|
||||||
|
} \
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR hip_array& operator op(const T& x) \
|
||||||
|
{ \
|
||||||
|
for(index_int i = 0; i < N; i++) \
|
||||||
|
d[i] op x; \
|
||||||
|
return *this; \
|
||||||
|
} \
|
||||||
|
friend MIGRAPHX_DEVICE_CONSTEXPR hip_array operator binary_op(hip_array x, const hip_array& y) \
|
||||||
|
{ \
|
||||||
|
return x op y; \
|
||||||
|
} \
|
||||||
|
friend MIGRAPHX_DEVICE_CONSTEXPR hip_array operator binary_op(hip_array x, const T& y) \
|
||||||
|
{ \
|
||||||
|
return x op y; \
|
||||||
|
} \
|
||||||
|
friend MIGRAPHX_DEVICE_CONSTEXPR hip_array operator binary_op(const T& y, hip_array x) \
|
||||||
|
{ \
|
||||||
|
return x op y; \
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, index_int N>
|
||||||
|
struct hip_array
|
||||||
|
{
|
||||||
|
T d[N];
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T& operator[](index_int i) { return d[i]; }
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR const T& operator[](index_int i) const { return d[i]; }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T& front() { return d[0]; }
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR const T& front() const { return d[0]; }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T& back() { return d[N - 1]; }
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR const T& back() const { return d[N - 1]; }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T* data() { return d; }
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR const T* data() const { return d; }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR std::integral_constant<index_int, N> size() const { return {}; }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T* begin() { return d; }
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR const T* begin() const { return d; }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T* end() { return d + size(); }
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR const T* end() const { return d + size(); }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T dot(const hip_array& x) const
|
||||||
|
{
|
||||||
|
T result = 0;
|
||||||
|
for(index_int i = 0; i < N; i++)
|
||||||
|
result += x[i] * d[i];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T product() const
|
||||||
|
{
|
||||||
|
T result = 1;
|
||||||
|
for(index_int i = 0; i < N; i++)
|
||||||
|
result *= d[i];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T single(index_int width = 100) const
|
||||||
|
{
|
||||||
|
T result = 0;
|
||||||
|
T a = 1;
|
||||||
|
for(index_int i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
result += d[N - i - 1] * a;
|
||||||
|
a *= width;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_ARRAY_OP(+=, +)
|
||||||
|
MIGRAPHX_DEVICE_ARRAY_OP(*=, *)
|
||||||
|
MIGRAPHX_DEVICE_ARRAY_OP(/=, /)
|
||||||
|
MIGRAPHX_DEVICE_ARRAY_OP(%=, %)
|
||||||
|
MIGRAPHX_DEVICE_ARRAY_OP(&=, &)
|
||||||
|
MIGRAPHX_DEVICE_ARRAY_OP(|=, |)
|
||||||
|
MIGRAPHX_DEVICE_ARRAY_OP(^=, ^)
|
||||||
|
|
||||||
|
friend MIGRAPHX_DEVICE_CONSTEXPR bool operator==(const hip_array& x, const hip_array& y)
|
||||||
|
{
|
||||||
|
for(index_int i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
if(x[i] != y[i])
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
friend MIGRAPHX_DEVICE_CONSTEXPR bool operator!=(const hip_array& x, const hip_array& y)
|
||||||
|
{
|
||||||
|
return not(x == y);
|
||||||
|
}
|
||||||
|
// This uses the product order rather than lexical order
|
||||||
|
friend MIGRAPHX_DEVICE_CONSTEXPR bool operator<(const hip_array& x, const hip_array& y)
|
||||||
|
{
|
||||||
|
for(index_int i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
if(not(x[i] < y[i]))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
friend MIGRAPHX_DEVICE_CONSTEXPR bool operator>(const hip_array& x, const hip_array& y)
|
||||||
|
{
|
||||||
|
return y < x;
|
||||||
|
}
|
||||||
|
friend MIGRAPHX_DEVICE_CONSTEXPR bool operator<=(const hip_array& x, const hip_array& y)
|
||||||
|
{
|
||||||
|
return (x < y) or (x == y);
|
||||||
|
}
|
||||||
|
friend MIGRAPHX_DEVICE_CONSTEXPR bool operator>=(const hip_array& x, const hip_array& y)
|
||||||
|
{
|
||||||
|
return (y < x) or (x == y);
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR hip_array carry(hip_array result) const
|
||||||
|
{
|
||||||
|
uint32_t overflow = 0;
|
||||||
|
for(std::ptrdiff_t i = result.size() - 1; i > 0; i--)
|
||||||
|
{
|
||||||
|
auto z = result[i] + overflow;
|
||||||
|
// Reset overflow
|
||||||
|
overflow = 0;
|
||||||
|
// Compute overflow using while loop instead of mod
|
||||||
|
while(z >= d[i])
|
||||||
|
{
|
||||||
|
z -= d[i];
|
||||||
|
overflow += 1;
|
||||||
|
}
|
||||||
|
result[i] = z;
|
||||||
|
}
|
||||||
|
result[0] += overflow;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,70 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_FAST_DIV_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_FAST_DIV_HPP
|
||||||
|
|
||||||
|
#include <migraphx/gpu/device/types.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
constexpr const uint64_t fast_div_shift = 42;
|
||||||
|
inline uint64_t encode_divisor(uint64_t divisor)
|
||||||
|
{
|
||||||
|
if(divisor == 0)
|
||||||
|
return 0;
|
||||||
|
auto p = uint64_t{1} << fast_div_shift;
|
||||||
|
return (p + divisor - 1) / divisor;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline constexpr bool is_divisor_encodable(uint64_t i)
|
||||||
|
{
|
||||||
|
return i < (uint64_t{1} << (fast_div_shift / 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR uint64_t fast_div(uint64_t dividend, uint64_t encoded_divisor)
|
||||||
|
{
|
||||||
|
return (dividend * encoded_divisor) >> fast_div_shift;
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR uint64_t remainder(uint64_t result, uint64_t dividend, uint64_t divisor)
|
||||||
|
{
|
||||||
|
return dividend - divisor * result;
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR uint64_t fast_mod(uint64_t dividend,
|
||||||
|
uint64_t divisor,
|
||||||
|
uint64_t encoded_divisor)
|
||||||
|
{
|
||||||
|
return remainder(fast_div(dividend, encoded_divisor), dividend, divisor);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,74 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_FLOAT_EQUAL_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_FLOAT_EQUAL_HPP
|
||||||
|
|
||||||
|
#include <migraphx/requires.hpp>
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/gpu/device/types.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
template <class... Ts>
|
||||||
|
using common_type = typename std::common_type<Ts...>::type;
|
||||||
|
|
||||||
|
template <class T, MIGRAPHX_REQUIRES(is_floating_point<T>{})>
|
||||||
|
__device__ bool float_equal_device(T x, T y)
|
||||||
|
{
|
||||||
|
return std::isfinite(x) and std::isfinite(y) and
|
||||||
|
std::nextafter(x, std::numeric_limits<T>::lowest()) <= y and
|
||||||
|
std::nextafter(x, std::numeric_limits<T>::max()) >= y;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
__device__ bool float_equal_device(__bf16 x, __bf16 y) // NOLINT(misc-definitions-in-headers)
|
||||||
|
{
|
||||||
|
float xf = x;
|
||||||
|
float yf = y;
|
||||||
|
return std::isfinite(xf) and std::isfinite(yf) and
|
||||||
|
std::nextafter(xf, std::numeric_limits<float>::lowest()) <= yf and
|
||||||
|
std::nextafter(xf, std::numeric_limits<float>::max()) >= yf;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, MIGRAPHX_REQUIRES(not is_floating_point<T>{})>
|
||||||
|
__device__ bool float_equal_device(T x, T y)
|
||||||
|
{
|
||||||
|
return x == y;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class U>
|
||||||
|
__device__ bool float_equal(T x, U y)
|
||||||
|
{
|
||||||
|
return float_equal_device<common_type<T, U>>(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,146 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_LAUNCH_HPP
|
||||||
|
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/ranges.hpp>
|
||||||
|
#include <migraphx/gpu/device/types.hpp>
|
||||||
|
#include <migraphx/gpu/device/targets.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
struct index
|
||||||
|
{
|
||||||
|
index_int global = 0;
|
||||||
|
index_int local = 0;
|
||||||
|
index_int group = 0;
|
||||||
|
|
||||||
|
__device__ index_int nglobal() const { return blockDim.x * gridDim.x; } // NOLINT
|
||||||
|
|
||||||
|
__device__ index_int nlocal() const { return blockDim.x; } // NOLINT
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
__device__ void global_stride(index_int n, F f) const
|
||||||
|
{
|
||||||
|
const auto stride = nglobal();
|
||||||
|
for(index_int i = global; i < n; i += stride)
|
||||||
|
{
|
||||||
|
f(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
__device__ void local_stride(index_int n, F f) const
|
||||||
|
{
|
||||||
|
const auto stride = nlocal();
|
||||||
|
for(index_int i = local; i < n; i += stride)
|
||||||
|
{
|
||||||
|
f(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
__global__ void launcher(F f)
|
||||||
|
{
|
||||||
|
index idx{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT
|
||||||
|
f(idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline auto launch(hipStream_t stream, index_int global, index_int local)
|
||||||
|
{
|
||||||
|
return [=](auto f) {
|
||||||
|
assert(local > 0);
|
||||||
|
assert(global > 0);
|
||||||
|
using f_type = decltype(f);
|
||||||
|
dim3 nblocks(global / local);
|
||||||
|
dim3 nthreads(local);
|
||||||
|
/*
|
||||||
|
hipGetLastError() returns error for the first failed HIP call that happened previously.
|
||||||
|
MIGraphX calls into various backend libraries and failed HIP calls can also happen there.
|
||||||
|
Calling hipGetLastError() would reset error code to hipSuccess, so that inside MIGraphX
|
||||||
|
failed call to hipLaunchKernelGGL() can be captured.
|
||||||
|
*/
|
||||||
|
hipError_t flush_call = hipGetLastError();
|
||||||
|
(void)(flush_call);
|
||||||
|
// cppcheck-suppress migraphx-UseDeviceLaunch
|
||||||
|
hipLaunchKernelGGL((launcher<f_type>), nblocks, nthreads, 0, stream, f);
|
||||||
|
hipError_t kernel_launch_status = hipGetLastError();
|
||||||
|
if(kernel_launch_status != hipSuccess)
|
||||||
|
{
|
||||||
|
std::string message = hipGetErrorString(kernel_launch_status);
|
||||||
|
if(not contains(get_targets(), get_device_name()))
|
||||||
|
{
|
||||||
|
message += ". Trying to run a kernel for " + get_device_name() +
|
||||||
|
" but MIGraphX was built for targets " + get_targets_as_string() +
|
||||||
|
". Please rebuild MIGraphX with -DGPU_TARGETS='" + get_device_name() +
|
||||||
|
"'.";
|
||||||
|
}
|
||||||
|
MIGRAPHX_THROW("MIGraphX device kernel failed to launch with error: " + message);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR auto gs_invoke(F&& f, index_int i, index idx) -> decltype(f(i, idx))
|
||||||
|
{
|
||||||
|
return f(i, idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR auto gs_invoke(F&& f, index_int i, index) -> decltype(f(i))
|
||||||
|
{
|
||||||
|
return f(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline auto gs_launch(hipStream_t stream, index_int n, index_int local = 1024)
|
||||||
|
{
|
||||||
|
index_int groups = (n + local - 1) / local;
|
||||||
|
// max possible number of blocks is set to 1B (1,073,741,824)
|
||||||
|
index_int nglobal = std::min<index_int>(1073741824, groups) * local;
|
||||||
|
|
||||||
|
return [=](auto f) {
|
||||||
|
launch(stream, nglobal, local)([=](auto idx) __device__ {
|
||||||
|
idx.global_stride(n, [&](auto i) { gs_invoke(f, i, idx); });
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef MIGRAPHX_USE_CLANG_TIDY
|
||||||
|
#define MIGRAPHX_DEVICE_SHARED
|
||||||
|
#else
|
||||||
|
#define MIGRAPHX_DEVICE_SHARED __shared__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,164 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_MULTI_INDEX_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_MULTI_INDEX_HPP
|
||||||
|
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/gpu/device/launch.hpp>
|
||||||
|
#include <migraphx/gpu/device/shape.hpp>
|
||||||
|
#include <migraphx/functional.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
template <index_int N>
|
||||||
|
struct multi_index
|
||||||
|
{
|
||||||
|
using hip_index = hip_array<index_int, N>;
|
||||||
|
hip_index id{};
|
||||||
|
hip_index stride{};
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR auto for_stride(hip_index n) const
|
||||||
|
{
|
||||||
|
// f should return void, but this helps with type deduction
|
||||||
|
return [=](auto f) -> decltype(f(hip_index{})) {
|
||||||
|
for(hip_index i = id; i < n; i = n.carry(i + stride))
|
||||||
|
{
|
||||||
|
f(i);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class ForStride>
|
||||||
|
__device__ __host__ auto deduce_for_stride(ForStride fs) -> decltype(fs(id{}));
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR multi_index<1> make_multi_index(index_int i, index_int n)
|
||||||
|
{
|
||||||
|
return {{i}, {n}};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <index_int N>
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR multi_index<N>
|
||||||
|
make_multi_index(const hip_shape<N>& s, index_int i, index_int n)
|
||||||
|
{
|
||||||
|
return {s.multi(i), s.multi(n)};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <index_int N>
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR multi_index<N>
|
||||||
|
make_multi_index(const hip_shape<N>& s, index_int i, const hip_array<index_int, N>& n)
|
||||||
|
{
|
||||||
|
return {s.multi(i), n};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <index_int N>
|
||||||
|
inline auto mi_nglobal(const hip_shape<N>& s, index_int nlocal)
|
||||||
|
{
|
||||||
|
assert(s.standard);
|
||||||
|
assert(s.elements() > 0);
|
||||||
|
index_int n = s.elements();
|
||||||
|
index_int groups = (n + nlocal - 1) / nlocal;
|
||||||
|
// max possible number of blocks is set to 1B (1,073,741,824)
|
||||||
|
index_int nglobal = std::min<index_int>(1073741824, groups) * nlocal;
|
||||||
|
|
||||||
|
assert(groups > 0);
|
||||||
|
assert(nglobal > 0);
|
||||||
|
auto nglobal_multi = s.multi(nglobal);
|
||||||
|
|
||||||
|
// Skip checking this, since this will cause metadata to not be generated
|
||||||
|
// for some unknown reason.
|
||||||
|
//
|
||||||
|
// assert(std::any_of(nglobal_multi.begin(), nglobal_multi.end(), [](auto x){return x>0;}));
|
||||||
|
|
||||||
|
// cppcheck-suppress migraphx-RedundantLocalVariable
|
||||||
|
return nglobal_multi;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <index_int N>
|
||||||
|
inline auto mi_nlocal(const hip_shape<N>& s, index_int local)
|
||||||
|
{
|
||||||
|
assert(s.standard);
|
||||||
|
assert(s.elements() > 0);
|
||||||
|
auto nlocal_multi = s.multi(local);
|
||||||
|
|
||||||
|
// Skip checking this, since this will cause metadata to not be generated
|
||||||
|
// for some unknown reason.
|
||||||
|
//
|
||||||
|
// assert(std::any_of(nlocal_multi.begin(), nlocal_multi.end(), [](auto x){return x>0;}));
|
||||||
|
|
||||||
|
// cppcheck-suppress migraphx-RedundantLocalVariable
|
||||||
|
return nlocal_multi;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <index_int N>
|
||||||
|
inline auto mi_launch(hipStream_t stream, const hip_shape<N>& global, index_int nlocal = 1024)
|
||||||
|
{
|
||||||
|
auto nglobal_multi = mi_nglobal(global, nlocal);
|
||||||
|
auto nglobal = global.index(nglobal_multi);
|
||||||
|
|
||||||
|
return [=](auto f) {
|
||||||
|
launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
|
||||||
|
auto midx = make_multi_index(global, idx.global, nglobal_multi);
|
||||||
|
f(idx, midx.for_stride(global.lens));
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <index_int N>
|
||||||
|
inline auto mi_launch(hipStream_t stream,
|
||||||
|
const hip_shape<N>& global,
|
||||||
|
const hip_shape<N>& local,
|
||||||
|
index_int nlocal = 1024)
|
||||||
|
{
|
||||||
|
auto nglobal_multi = mi_nglobal(global, 1);
|
||||||
|
auto nglobal = global.index(nglobal_multi);
|
||||||
|
auto nlocal_multi = mi_nlocal(local, nlocal);
|
||||||
|
|
||||||
|
return [=](auto f) {
|
||||||
|
launch(stream, nglobal * nlocal, nlocal)([=](auto idx) {
|
||||||
|
// TODO: Use fast div for nlocal
|
||||||
|
auto midx = make_multi_index(global, idx.global / nlocal, nglobal_multi);
|
||||||
|
auto lidx = make_multi_index(local, idx.local, nlocal_multi);
|
||||||
|
f(idx, midx.for_stride(global.lens), lidx.for_stride(local.lens));
|
||||||
|
});
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <index_int N>
|
||||||
|
inline auto mi_gs_launch(hipStream_t stream, const hip_shape<N>& global, index_int nlocal = 1024)
|
||||||
|
{
|
||||||
|
return [=](auto f) {
|
||||||
|
mi_launch(stream, global, nlocal)([=](auto, auto g) { g([&](auto i) { f(i); }); });
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,473 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_NARY_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_NARY_HPP
|
||||||
|
|
||||||
|
#include <migraphx/gpu/device/launch.hpp>
|
||||||
|
#include <migraphx/gpu/device/multi_index.hpp>
|
||||||
|
#include <migraphx/gpu/device/visit.hpp>
|
||||||
|
#include <migraphx/functional.hpp>
|
||||||
|
#include <migraphx/ranges.hpp>
|
||||||
|
#include <migraphx/array.hpp>
|
||||||
|
#include <migraphx/env.hpp>
|
||||||
|
#include <migraphx/permutation.hpp>
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_NARY);
|
||||||
|
|
||||||
|
// NOLINTNEXTLINE
|
||||||
|
#define MIGRAPHX_TRACE_NARY_FUNCTION \
|
||||||
|
if(enabled(MIGRAPHX_TRACE_NARY{})) \
|
||||||
|
std::cout << "nary device function: " << __PRETTY_FUNCTION__ << std::endl;
|
||||||
|
|
||||||
|
template <class... Ts>
|
||||||
|
constexpr auto pack(Ts... xs)
|
||||||
|
{
|
||||||
|
return [=](auto f) { return f(xs...); };
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F, class... Arguments>
|
||||||
|
auto nary_nonstandard_nonpacked_impl(hipStream_t stream, F f, argument result, Arguments... args)
|
||||||
|
{
|
||||||
|
MIGRAPHX_TRACE_NARY_FUNCTION
|
||||||
|
shape s{result.get_shape().type(), result.get_shape().lens()};
|
||||||
|
hip_visit_all(s, result, args...)([&](auto standard_shape, auto output, auto... inputs) {
|
||||||
|
mi_gs_launch(stream,
|
||||||
|
standard_shape)([=](auto idx) __device__ { output[idx] = f(inputs[idx]...); });
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
inline auto create_broadcast_index(index_int len, index_int stride)
|
||||||
|
{
|
||||||
|
auto next_stride = stride * len;
|
||||||
|
auto e_next_stride = encode_divisor(next_stride);
|
||||||
|
auto e_stride = encode_divisor(stride);
|
||||||
|
return [=](auto i) __device__ {
|
||||||
|
// ( i % next_stride) / stride
|
||||||
|
return fast_div(i, e_stride) - len * fast_div(i, e_next_stride);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F, class... Arguments>
|
||||||
|
auto nary_nonstandard_packed_impl(hipStream_t stream,
|
||||||
|
F f,
|
||||||
|
const argument& result,
|
||||||
|
Arguments... args)
|
||||||
|
{
|
||||||
|
MIGRAPHX_TRACE_NARY_FUNCTION
|
||||||
|
auto arg_shape = make_array(args...).front().get_shape();
|
||||||
|
auto perm = find_permutation(arg_shape);
|
||||||
|
auto s = reorder_shape(arg_shape, perm);
|
||||||
|
hip_visit_all(s, result.reshape(reorder_shape(result.get_shape(), perm)), args.reshape(s)...)(
|
||||||
|
[&](auto standard_shape, auto output, auto... inputs) {
|
||||||
|
mi_gs_launch(stream, standard_shape)(
|
||||||
|
[=](auto idx) __device__ { output[idx] = f(inputs[idx]...); });
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F, class... Arguments>
|
||||||
|
void nary_broadcast_vec_impl(
|
||||||
|
hipStream_t stream, F f, argument result, argument barg, Arguments... args)
|
||||||
|
{
|
||||||
|
MIGRAPHX_TRACE_NARY_FUNCTION
|
||||||
|
const auto& output_shape = result.get_shape();
|
||||||
|
const auto& b_shape = barg.get_shape();
|
||||||
|
auto bdim =
|
||||||
|
std::distance(b_shape.strides().begin(),
|
||||||
|
std::find_if(b_shape.strides().begin(), b_shape.strides().end(), [](auto x) {
|
||||||
|
return x != 0;
|
||||||
|
}));
|
||||||
|
auto bdim_len = output_shape.lens()[bdim];
|
||||||
|
auto bdim_stride = output_shape.strides()[bdim];
|
||||||
|
auto broadcast_idx = create_broadcast_index(bdim_len, bdim_stride);
|
||||||
|
|
||||||
|
const index_int vec_size = 4;
|
||||||
|
const index_int nlocal = 1024;
|
||||||
|
const index_int nglobal = 256 * nlocal;
|
||||||
|
const index_int bdim_vec_len = bdim_len / vec_size;
|
||||||
|
hip_vec_visit_all<vec_size>(result, barg, args...)(
|
||||||
|
[&](auto output, auto binput, auto... inputs) {
|
||||||
|
using type = typename decltype(output)::value_type;
|
||||||
|
const index_int nelements = output.size() / vec_size;
|
||||||
|
launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
|
||||||
|
MIGRAPHX_DEVICE_SHARED type buffer[2048 / vec_size];
|
||||||
|
// Load bias into LDS
|
||||||
|
for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
|
||||||
|
{
|
||||||
|
buffer[i] = binput.data()[i];
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
const auto* bp = as_pointer(buffer);
|
||||||
|
// Process the data
|
||||||
|
for(size_t i = idx.global; i < nelements; i += nglobal)
|
||||||
|
{
|
||||||
|
auto bidx = broadcast_idx(i * vec_size);
|
||||||
|
auto b = bp[bidx];
|
||||||
|
auto out = output.data()[i];
|
||||||
|
for(index_int j = 0; j < vec_size; j++)
|
||||||
|
{
|
||||||
|
out[j] = f(inputs.data()[i][j]..., b);
|
||||||
|
}
|
||||||
|
output.data()[i] = out;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F, class... Arguments>
|
||||||
|
void nary_broadcast_impl(hipStream_t stream, F f, argument result, argument barg, Arguments... args)
|
||||||
|
{
|
||||||
|
MIGRAPHX_TRACE_NARY_FUNCTION
|
||||||
|
const auto& output_shape = result.get_shape();
|
||||||
|
const auto& b_shape = barg.get_shape();
|
||||||
|
auto bdim =
|
||||||
|
std::distance(b_shape.strides().begin(),
|
||||||
|
std::find_if(b_shape.strides().begin(), b_shape.strides().end(), [](auto x) {
|
||||||
|
return x != 0;
|
||||||
|
}));
|
||||||
|
auto bdim_len = output_shape.lens()[bdim];
|
||||||
|
auto bdim_stride = output_shape.strides()[bdim];
|
||||||
|
auto broadcast_idx = create_broadcast_index(bdim_len, bdim_stride);
|
||||||
|
|
||||||
|
const index_int nlocal = 1024;
|
||||||
|
const index_int nglobal = 256 * nlocal;
|
||||||
|
index_int nelements = result.get_shape().elements();
|
||||||
|
hip_visit_all(result, barg, args...)([&](auto output, auto binput, auto... inputs) {
|
||||||
|
using type = typename decltype(output)::value_type;
|
||||||
|
launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
|
||||||
|
MIGRAPHX_DEVICE_SHARED type buffer[2048];
|
||||||
|
// Load bias into LDS
|
||||||
|
for(size_t i = idx.local; i < bdim_len; i += nlocal)
|
||||||
|
{
|
||||||
|
buffer[i] = binput.data()[i];
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
// Process the data
|
||||||
|
for(size_t i = idx.global; i < nelements; i += nglobal)
|
||||||
|
{
|
||||||
|
auto bidx = broadcast_idx(i);
|
||||||
|
auto b = buffer[bidx];
|
||||||
|
output.data()[i] = f(inputs.data()[i]..., b);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F, class... Arguments>
|
||||||
|
void nary_double_broadcast_vec_impl(
|
||||||
|
hipStream_t stream, F f, argument result, argument barg1, argument barg2, Arguments... args)
|
||||||
|
{
|
||||||
|
MIGRAPHX_TRACE_NARY_FUNCTION
|
||||||
|
assert(barg1.get_shape().broadcasted());
|
||||||
|
assert(barg2.get_shape().broadcasted());
|
||||||
|
assert(barg1.get_shape() == barg2.get_shape());
|
||||||
|
const auto& output_shape = result.get_shape();
|
||||||
|
const auto& b_shape = barg1.get_shape();
|
||||||
|
auto bdim =
|
||||||
|
std::distance(b_shape.strides().begin(),
|
||||||
|
std::find_if(b_shape.strides().begin(), b_shape.strides().end(), [](auto x) {
|
||||||
|
return x != 0;
|
||||||
|
}));
|
||||||
|
auto bdim_len = output_shape.lens()[bdim];
|
||||||
|
auto bdim_stride = output_shape.strides()[bdim];
|
||||||
|
auto broadcast_idx = create_broadcast_index(bdim_len, bdim_stride);
|
||||||
|
|
||||||
|
const index_int vec_size = 4;
|
||||||
|
const index_int nlocal = 1024;
|
||||||
|
const index_int nglobal = 256 * nlocal;
|
||||||
|
const index_int bdim_vec_len = bdim_len / vec_size;
|
||||||
|
hip_vec_visit_all<vec_size>(result, barg1, barg2, args...)(
|
||||||
|
[&](auto output, auto binput1, auto binput2, auto... inputs) {
|
||||||
|
using type = typename decltype(output)::value_type;
|
||||||
|
const index_int nelements = output.size() / vec_size;
|
||||||
|
launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
|
||||||
|
MIGRAPHX_DEVICE_SHARED type buffer[2048 / vec_size];
|
||||||
|
// Load bias into LDS
|
||||||
|
for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
|
||||||
|
{
|
||||||
|
buffer[i] = binput1.data()[i];
|
||||||
|
}
|
||||||
|
for(size_t i = idx.local; i < bdim_vec_len; i += nlocal)
|
||||||
|
{
|
||||||
|
buffer[i + bdim_vec_len] = binput2.data()[i];
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
const auto* bp = as_pointer(buffer);
|
||||||
|
// Process the data
|
||||||
|
for(size_t i = idx.global; i < nelements; i += nglobal)
|
||||||
|
{
|
||||||
|
auto bidx = broadcast_idx(i * vec_size);
|
||||||
|
auto b1 = bp[bidx];
|
||||||
|
auto b2 = bp[bidx + bdim_len];
|
||||||
|
auto out = output.data()[i];
|
||||||
|
for(index_int j = 0; j < vec_size; j++)
|
||||||
|
{
|
||||||
|
out[j] = f(inputs.data()[i][j]..., b2, b1);
|
||||||
|
}
|
||||||
|
output.data()[i] = out;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F, class... Arguments>
|
||||||
|
void nary_double_broadcast_impl(
|
||||||
|
hipStream_t stream, F f, argument result, argument barg1, argument barg2, Arguments... args)
|
||||||
|
{
|
||||||
|
MIGRAPHX_TRACE_NARY_FUNCTION
|
||||||
|
assert(barg1.get_shape().broadcasted());
|
||||||
|
assert(barg2.get_shape().broadcasted());
|
||||||
|
assert(barg1.get_shape() == barg2.get_shape());
|
||||||
|
const auto& output_shape = result.get_shape();
|
||||||
|
const auto& b_shape = barg1.get_shape();
|
||||||
|
auto bdim =
|
||||||
|
std::distance(b_shape.strides().begin(),
|
||||||
|
std::find_if(b_shape.strides().begin(), b_shape.strides().end(), [](auto x) {
|
||||||
|
return x != 0;
|
||||||
|
}));
|
||||||
|
auto bdim_len = output_shape.lens()[bdim];
|
||||||
|
auto bdim_stride = output_shape.strides()[bdim];
|
||||||
|
auto broadcast_idx = create_broadcast_index(bdim_len, bdim_stride);
|
||||||
|
|
||||||
|
const index_int nlocal = 1024;
|
||||||
|
const index_int nglobal = 256 * nlocal;
|
||||||
|
index_int nelements = result.get_shape().elements();
|
||||||
|
hip_visit_all(result, barg1, barg2, args...)(
|
||||||
|
[&](auto output, auto binput1, auto binput2, auto... inputs) {
|
||||||
|
using type = typename decltype(output)::value_type;
|
||||||
|
launch(stream, nglobal, nlocal)([=](auto idx) __device__ {
|
||||||
|
MIGRAPHX_DEVICE_SHARED type buffer[2048];
|
||||||
|
// Load bias into LDS
|
||||||
|
for(size_t i = idx.local; i < bdim_len; i += nlocal)
|
||||||
|
{
|
||||||
|
buffer[i] = binput1.data()[i];
|
||||||
|
}
|
||||||
|
for(size_t i = idx.local; i < bdim_len; i += nlocal)
|
||||||
|
{
|
||||||
|
buffer[i + bdim_len] = binput2.data()[i];
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
// Process the data
|
||||||
|
for(size_t i = idx.global; i < nelements; i += nglobal)
|
||||||
|
{
|
||||||
|
auto bidx = broadcast_idx(i);
|
||||||
|
auto b1 = buffer[bidx];
|
||||||
|
auto b2 = buffer[bidx + bdim_len];
|
||||||
|
output.data()[i] = f(inputs.data()[i]..., b2, b1);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F, class... Arguments>
|
||||||
|
void nary_standard_vec_impl(hipStream_t stream, F f, argument result, Arguments... args)
|
||||||
|
{
|
||||||
|
MIGRAPHX_TRACE_NARY_FUNCTION
|
||||||
|
const auto& output_shape = result.get_shape();
|
||||||
|
visit_all(result, args...)([&](auto output, auto... inputs) {
|
||||||
|
using type = device_type<std::remove_cv_t<typename decltype(output)::value_type>>;
|
||||||
|
const index_int vec_size = 4;
|
||||||
|
auto data = pack_vec<4>(device_cast(inputs.data())...);
|
||||||
|
auto* outp = as_vec<4>(device_cast(output.data()));
|
||||||
|
gs_launch(stream, output_shape.elements() / vec_size)([=](auto i) __device__ {
|
||||||
|
vec<type, 4> out = outp[i];
|
||||||
|
data(
|
||||||
|
[&](auto... xs) {
|
||||||
|
for(index_int j = 0; j < vec_size; j++)
|
||||||
|
{
|
||||||
|
out[j] = f(xs[j]...);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
i);
|
||||||
|
outp[i] = out;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F, class... Arguments>
|
||||||
|
void nary_standard_impl(hipStream_t stream, F f, argument result, Arguments... args)
|
||||||
|
{
|
||||||
|
MIGRAPHX_TRACE_NARY_FUNCTION
|
||||||
|
index_int nelements = result.get_shape().elements();
|
||||||
|
hip_pointer_visit_all(result, args...)([&](auto output, auto... inputs) {
|
||||||
|
gs_launch(stream, nelements)([=](auto i) __device__ { output[i] = f(inputs[i]...); });
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F, class... Arguments>
|
||||||
|
void nary_impl(hipStream_t stream, F f, argument result, Arguments... args)
|
||||||
|
{
|
||||||
|
MIGRAPHX_TRACE_NARY_FUNCTION
|
||||||
|
const auto shapes = make_array(args.get_shape()...);
|
||||||
|
const bool standard = all_of(shapes, [](const shape& s) { return s.standard(); });
|
||||||
|
const bool packed =
|
||||||
|
all_of(shapes, [](const shape& s) { return s.packed() and not s.broadcasted(); });
|
||||||
|
const bool same_shapes =
|
||||||
|
all_of(shapes, [&](const shape& s) { return s == result.get_shape(); });
|
||||||
|
const bool same_input_shapes = all_of(shapes, [&](const shape& s) { return s == shapes[0]; });
|
||||||
|
if((result.get_shape().standard() and standard) or (packed and same_shapes))
|
||||||
|
nary_standard_impl(stream, f, result, args...);
|
||||||
|
else if(packed and same_input_shapes)
|
||||||
|
nary_nonstandard_packed_impl(stream, f, result, args...);
|
||||||
|
else
|
||||||
|
nary_nonstandard_nonpacked_impl(stream, f, result, args...);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class... Arguments>
|
||||||
|
auto nary_nonstandard(hipStream_t stream, argument result, Arguments... args)
|
||||||
|
{
|
||||||
|
return [=](auto f) { nary_nonstandard_nonpacked_impl(stream, f, result, args...); };
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class... Arguments>
|
||||||
|
auto nary_standard(hipStream_t stream, argument result, Arguments... args)
|
||||||
|
{
|
||||||
|
return [=](auto f) { nary_standard_impl(stream, f, result, args...); };
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class... Arguments>
|
||||||
|
bool broadcastable(bool& divisible_by_4,
|
||||||
|
index_int max_size,
|
||||||
|
const argument& result,
|
||||||
|
const argument& barg,
|
||||||
|
const Arguments&... args)
|
||||||
|
{
|
||||||
|
divisible_by_4 = false;
|
||||||
|
auto bshape = barg.get_shape();
|
||||||
|
const bool standard =
|
||||||
|
all_of({args.get_shape()...}, [](const shape& s) { return s.standard(); });
|
||||||
|
const bool same_shapes =
|
||||||
|
all_of({args.get_shape()...}, [&](const shape& s) { return s == result.get_shape(); });
|
||||||
|
// TODO: Check result and args shape is the same
|
||||||
|
if(standard and same_shapes and bshape.broadcasted() and not bshape.scalar())
|
||||||
|
{
|
||||||
|
auto not_zero = [](auto x) { return x != 0; };
|
||||||
|
const auto& strides = bshape.strides();
|
||||||
|
auto b_it = std::find_if(strides.begin(), strides.end(), not_zero);
|
||||||
|
auto b_idx = std::distance(strides.begin(), b_it);
|
||||||
|
auto b_len = result.get_shape().lens()[b_idx];
|
||||||
|
auto b_stride = result.get_shape().strides()[b_idx];
|
||||||
|
assert(bshape.lens()[b_idx] == b_len);
|
||||||
|
if(b_len <= max_size and std::none_of(std::next(b_it), strides.end(), not_zero) and
|
||||||
|
is_divisor_encodable(b_stride * b_len))
|
||||||
|
{
|
||||||
|
|
||||||
|
divisible_by_4 = (b_len % 4 == 0) and (b_stride % 4 == 0) and
|
||||||
|
(front_args(args...).get_shape().elements() % 4 == 0);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool broadcastable(bool& divisible_by_4, index_int, const argument&, const argument&)
|
||||||
|
{
|
||||||
|
divisible_by_4 = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Nullary
|
||||||
|
inline auto nary(hipStream_t stream, argument result)
|
||||||
|
{
|
||||||
|
return [=](auto f) { nary_standard_impl(stream, f, result); };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unary
|
||||||
|
inline auto nary(hipStream_t stream, argument result, argument arg)
|
||||||
|
{
|
||||||
|
return [=](auto f) { nary_impl(stream, f, result, arg); };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Binary
|
||||||
|
inline auto nary(hipStream_t stream, argument result, argument arg, argument barg)
|
||||||
|
{
|
||||||
|
return [=](auto f) {
|
||||||
|
bool divisible_by_4 = false;
|
||||||
|
if(broadcastable(divisible_by_4, 2048, result, barg, arg))
|
||||||
|
{
|
||||||
|
if(divisible_by_4)
|
||||||
|
nary_broadcast_vec_impl(stream, f, result, barg, arg);
|
||||||
|
else
|
||||||
|
nary_broadcast_impl(stream, f, result, barg, arg);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
nary_impl(stream, f, result, arg, barg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class... Arguments>
|
||||||
|
auto nary(hipStream_t stream, argument result, Arguments... args)
|
||||||
|
{
|
||||||
|
static_assert(sizeof...(args) > 2, "Args needs to be greater than 2");
|
||||||
|
return [=](auto f) {
|
||||||
|
auto barg1 = back_args(args...);
|
||||||
|
bool fallback1 = pop_back_args(args...)([&](auto&&... args2) {
|
||||||
|
auto barg2 = back_args(args2...);
|
||||||
|
bool fallback2 =
|
||||||
|
barg2.get_shape() != barg1.get_shape() or not barg2.get_shape().broadcasted() or
|
||||||
|
pop_back_args(args2...)([&](auto&&... args3) {
|
||||||
|
bool divisible_by_4 = false;
|
||||||
|
if(broadcastable(divisible_by_4, 1024, result, barg2, args3...))
|
||||||
|
{
|
||||||
|
if(divisible_by_4)
|
||||||
|
nary_double_broadcast_vec_impl(
|
||||||
|
stream, f, result, barg1, barg2, args3...);
|
||||||
|
else
|
||||||
|
nary_double_broadcast_impl(stream, f, result, barg1, barg2, args3...);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
if(not fallback2)
|
||||||
|
return false;
|
||||||
|
bool divisible_by_4 = false;
|
||||||
|
if(broadcastable(divisible_by_4, 2048, result, barg1, args2...))
|
||||||
|
{
|
||||||
|
if(divisible_by_4)
|
||||||
|
nary_broadcast_vec_impl(stream, f, result, barg1, args2...);
|
||||||
|
else
|
||||||
|
nary_broadcast_impl(stream, f, result, barg1, args2...);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
if(fallback1)
|
||||||
|
nary_impl(stream, f, result, args...);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,311 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_HPP
|
||||||
|
|
||||||
|
#include <migraphx/gpu/device/launch.hpp>
|
||||||
|
#include <migraphx/gpu/device/visit.hpp>
|
||||||
|
#include <migraphx/gpu/device/multi_index.hpp>
|
||||||
|
#include <migraphx/gpu/device/reduce_ops.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
#ifdef MIGRAPHX_NO_DPP
|
||||||
|
|
||||||
|
template <index_int N,
|
||||||
|
class Op,
|
||||||
|
class T,
|
||||||
|
class ForStride,
|
||||||
|
class F,
|
||||||
|
MIGRAPHX_REQUIRES(not std::is_integral<ForStride>{})>
|
||||||
|
__device__ auto block_reduce(index idx, Op op, T init, ForStride fs, F f)
|
||||||
|
{
|
||||||
|
using type = decltype(f(deduce_for_stride(fs)));
|
||||||
|
MIGRAPHX_DEVICE_SHARED type buffer[N];
|
||||||
|
type x = init;
|
||||||
|
fs([&](auto i) { x = op(x, f(i)); });
|
||||||
|
buffer[idx.local] = x;
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
for(index_int s = 1; s < idx.nlocal(); s *= 2)
|
||||||
|
{
|
||||||
|
const index_int index = 2 * s * idx.local;
|
||||||
|
if(index + s < idx.nlocal())
|
||||||
|
{
|
||||||
|
buffer[index] = op(buffer[index], buffer[index + s]);
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
return buffer[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
constexpr unsigned int dpp_row_shr(unsigned int x) { return 0x110u | x; }
|
||||||
|
|
||||||
|
constexpr unsigned int dpp_row_bcast(unsigned int x)
|
||||||
|
{
|
||||||
|
unsigned int y = 0;
|
||||||
|
switch(x)
|
||||||
|
{
|
||||||
|
case 15: y = 0x142; break;
|
||||||
|
case 31: y = 0x143; break;
|
||||||
|
default: throw std::runtime_error("Unknown bcast");
|
||||||
|
}
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <unsigned int DppCtrl,
|
||||||
|
unsigned int RowMask = 0xf,
|
||||||
|
unsigned int BankMask = 0xf,
|
||||||
|
bool BoundCtrl = false,
|
||||||
|
class T>
|
||||||
|
__device__ T dpp_mov(T& x)
|
||||||
|
{
|
||||||
|
static const index_int n = sizeof(T) < 4 ? 1 : sizeof(T) / 4;
|
||||||
|
union type
|
||||||
|
{
|
||||||
|
uint32_t reg[n];
|
||||||
|
T data;
|
||||||
|
};
|
||||||
|
type output{};
|
||||||
|
type input{};
|
||||||
|
// cppcheck-suppress unreadVariable
|
||||||
|
input.data = x;
|
||||||
|
for(index_int i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
output.reg[i] = __hip_move_dpp(input.reg[i], DppCtrl, RowMask, BankMask, BoundCtrl);
|
||||||
|
}
|
||||||
|
return output.data;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class Op>
|
||||||
|
__device__ void dpp_reduce(T& in, Op op)
|
||||||
|
{
|
||||||
|
T out{};
|
||||||
|
out = dpp_mov<dpp_row_shr(1)>(in);
|
||||||
|
in = op(in, out);
|
||||||
|
out = dpp_mov<dpp_row_shr(2)>(in);
|
||||||
|
in = op(in, out);
|
||||||
|
out = dpp_mov<dpp_row_shr(4), 0xf, 0xe>(in);
|
||||||
|
in = op(in, out);
|
||||||
|
out = dpp_mov<dpp_row_shr(8), 0xf, 0xc>(in);
|
||||||
|
in = op(in, out);
|
||||||
|
#if __AMDGCN_WAVEFRONT_SIZE == 64
|
||||||
|
out = dpp_mov<dpp_row_bcast(15), 0xa>(in);
|
||||||
|
in = op(in, out);
|
||||||
|
out = dpp_mov<dpp_row_bcast(31), 0xc>(in);
|
||||||
|
in = op(in, out);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ inline void dpp_reduce(float& x, sum)
|
||||||
|
{
|
||||||
|
#if defined(MIGRAPHX_USE_CLANG_TIDY) || defined(CPPCHECK)
|
||||||
|
x = 1;
|
||||||
|
#else
|
||||||
|
__asm__ volatile("s_nop 4\n"
|
||||||
|
"v_add_f32 %0 %0 %0 row_shr:1\n"
|
||||||
|
"s_nop 1\n"
|
||||||
|
"v_add_f32 %0 %0 %0 row_shr:2\n"
|
||||||
|
"s_nop 1\n"
|
||||||
|
"v_add_f32 %0 %0 %0 row_shr:4 bank_mask:0xe\n"
|
||||||
|
"s_nop 1\n"
|
||||||
|
"v_add_f32 %0 %0 %0 row_shr:8 bank_mask:0xc\n"
|
||||||
|
"s_nop 1\n"
|
||||||
|
#if __AMDGCN_WAVEFRONT_SIZE == 64
|
||||||
|
"v_add_f32 %0 %0 %0 row_bcast:15 row_mask:0xa\n"
|
||||||
|
"s_nop 1\n"
|
||||||
|
"v_add_f32 %0 %0 %0 row_bcast:31 row_mask:0xc\n"
|
||||||
|
#endif
|
||||||
|
"s_nop 1\n"
|
||||||
|
: "=v"(x)
|
||||||
|
: "0"(x));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <index_int N,
|
||||||
|
class Op,
|
||||||
|
class T,
|
||||||
|
class ForStride,
|
||||||
|
class F,
|
||||||
|
MIGRAPHX_REQUIRES(not std::is_integral<ForStride>{})>
|
||||||
|
__device__ auto block_reduce(index idx, Op op, T init, ForStride fs, F f)
|
||||||
|
{
|
||||||
|
|
||||||
|
#if __AMDGCN_WAVEFRONT_SIZE == 32
|
||||||
|
constexpr index_int nthreads = 16;
|
||||||
|
#else
|
||||||
|
constexpr index_int nthreads = 64;
|
||||||
|
#endif
|
||||||
|
using type = decltype(f(deduce_for_stride(fs)));
|
||||||
|
MIGRAPHX_DEVICE_SHARED type buffer[N / nthreads];
|
||||||
|
type x = init;
|
||||||
|
fs([&](auto i) { x = op(x, f(i)); });
|
||||||
|
dpp_reduce(x, op);
|
||||||
|
|
||||||
|
const auto ldsidx = idx.local / nthreads;
|
||||||
|
if((idx.local % nthreads) == nthreads - 1)
|
||||||
|
{
|
||||||
|
buffer[ldsidx] = x;
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
type y = init;
|
||||||
|
for(index_int i = 0; i < idx.nlocal() / nthreads; i++)
|
||||||
|
{
|
||||||
|
y = op(y, buffer[i]);
|
||||||
|
}
|
||||||
|
return y;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
template <index_int N, class Op, class T, class F>
|
||||||
|
__device__ auto block_reduce(index idx, Op op, T init, index_int n, F f)
|
||||||
|
{
|
||||||
|
auto midx = make_multi_index(idx.local, idx.nlocal());
|
||||||
|
// Workaround hcc, create a local array
|
||||||
|
auto fs = midx.id;
|
||||||
|
fs[0] = n;
|
||||||
|
return block_reduce<N>(
|
||||||
|
idx, op, init, midx.for_stride(fs), [&](auto mi) __device__ { return f(mi[0]); });
|
||||||
|
}
|
||||||
|
constexpr index_int compute_block_size(index_int n, index_int max_block_size)
|
||||||
|
{
|
||||||
|
size_t block_size = 64;
|
||||||
|
while(block_size < max_block_size and block_size < n)
|
||||||
|
block_size *= 2;
|
||||||
|
return block_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::vector<index_int> get_reduce_lens(const std::vector<size_t>& input_lens,
|
||||||
|
const std::vector<size_t>& output_lens)
|
||||||
|
{
|
||||||
|
std::vector<index_int> reduce_lens;
|
||||||
|
std::transform(output_lens.begin(),
|
||||||
|
output_lens.end(),
|
||||||
|
input_lens.begin(),
|
||||||
|
std::back_inserter(reduce_lens),
|
||||||
|
[](auto x, auto y) -> index_int {
|
||||||
|
if(x == y)
|
||||||
|
return 1;
|
||||||
|
else
|
||||||
|
return y;
|
||||||
|
});
|
||||||
|
return reduce_lens;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Op, class T, class Input, class Output>
|
||||||
|
void reduce_multi_impl(hipStream_t stream,
|
||||||
|
const argument& result,
|
||||||
|
const argument& arg,
|
||||||
|
Op op,
|
||||||
|
T init,
|
||||||
|
Input read_input,
|
||||||
|
Output read_output,
|
||||||
|
const shape& reduce_slice)
|
||||||
|
{
|
||||||
|
hip_visit_all(result, arg, reduce_slice)([&](auto output, auto input, auto reduce_shape) {
|
||||||
|
auto relements = reduce_slice.elements();
|
||||||
|
|
||||||
|
const index_int max_block_size = 256;
|
||||||
|
const index_int block_size = compute_block_size(relements, max_block_size);
|
||||||
|
mi_launch(stream, output.get_shape(), reduce_shape, block_size)(
|
||||||
|
[=](auto idx, auto global, auto local) __device__ {
|
||||||
|
global([&](auto i) __device__ {
|
||||||
|
auto r =
|
||||||
|
block_reduce<max_block_size>(idx, op, init, local, [&](auto j) __device__ {
|
||||||
|
return read_input(input[i + j]);
|
||||||
|
});
|
||||||
|
if(idx.local == 0)
|
||||||
|
output[i] = read_output(r);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Op, class T, class Input, class Output>
|
||||||
|
void reduce_standard_impl(hipStream_t stream,
|
||||||
|
const argument& result,
|
||||||
|
const argument& arg,
|
||||||
|
Op op,
|
||||||
|
T init,
|
||||||
|
Input read_input,
|
||||||
|
Output read_output,
|
||||||
|
index_int relements)
|
||||||
|
{
|
||||||
|
hip_visit_all(result, arg)([&](auto output, auto input) {
|
||||||
|
auto nelements = result.get_shape().elements();
|
||||||
|
|
||||||
|
const index_int max_block_size = 256;
|
||||||
|
const index_int block_size = compute_block_size(relements, max_block_size);
|
||||||
|
gs_launch(stream, nelements * block_size, block_size)([=](auto i, auto idx) __device__ {
|
||||||
|
const auto out_idx = i / block_size;
|
||||||
|
const auto base_idx = out_idx * relements;
|
||||||
|
auto r = block_reduce<max_block_size>(idx, op, init, relements, [&](auto j) __device__ {
|
||||||
|
return read_input(input.data()[base_idx + j]);
|
||||||
|
});
|
||||||
|
if(idx.local == 0)
|
||||||
|
output.data()[out_idx] = read_output(r);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Op, class T, class Input, class Output>
|
||||||
|
void reduce(hipStream_t stream,
|
||||||
|
const argument& result,
|
||||||
|
const argument& arg,
|
||||||
|
Op op,
|
||||||
|
T init,
|
||||||
|
Input read_input,
|
||||||
|
Output read_output)
|
||||||
|
{
|
||||||
|
auto&& output_shape = result.get_shape();
|
||||||
|
auto&& input_shape = arg.get_shape();
|
||||||
|
auto input_lens = input_shape.lens();
|
||||||
|
auto output_lens = output_shape.lens();
|
||||||
|
assert(output_lens.size() == input_lens.size());
|
||||||
|
if(input_shape.standard() and output_shape.standard() and
|
||||||
|
output_lens.back() != input_lens.back() and
|
||||||
|
std::equal(output_lens.begin(), std::prev(output_lens.end()), input_lens.begin()))
|
||||||
|
{
|
||||||
|
reduce_standard_impl(
|
||||||
|
stream, result, arg, op, init, read_input, read_output, input_lens.back());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
std::vector<index_int> reduce_lens = get_reduce_lens(input_lens, output_lens);
|
||||||
|
shape reduce_slice{output_shape.type(), reduce_lens};
|
||||||
|
reduce_multi_impl(stream, result, arg, op, init, read_input, read_output, reduce_slice);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif // MIGRAPHX_NO_DPP
|
||||||
@ -0,0 +1,111 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_DEVICE_REDUCE_OPS_HPP
|
||||||
|
#define MIGRAPHX_GUARD_DEVICE_REDUCE_OPS_HPP
|
||||||
|
|
||||||
|
#include <migraphx/gpu/device/types.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
struct sum
|
||||||
|
{
|
||||||
|
template <class T, class U>
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const
|
||||||
|
{
|
||||||
|
return x + y;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct product
|
||||||
|
{
|
||||||
|
template <class T, class U>
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const
|
||||||
|
{
|
||||||
|
return x * y;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct id
|
||||||
|
{
|
||||||
|
template <class T>
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x) const
|
||||||
|
{
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct mean
|
||||||
|
{
|
||||||
|
size_t item_num = 1;
|
||||||
|
template <class T>
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x) const
|
||||||
|
{
|
||||||
|
return x / static_cast<T>(item_num);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct max
|
||||||
|
{
|
||||||
|
template <class T, class U>
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const
|
||||||
|
{
|
||||||
|
return (x > y) ? x : y;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct min
|
||||||
|
{
|
||||||
|
template <class T, class U>
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR auto operator()(T x, U y) const
|
||||||
|
{
|
||||||
|
return (x < y) ? x : y;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct lowest
|
||||||
|
{
|
||||||
|
template <class T>
|
||||||
|
__device__ __host__ operator T() const
|
||||||
|
{
|
||||||
|
return device_cast(std::numeric_limits<host_type<T>>::lowest());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct highest
|
||||||
|
{
|
||||||
|
template <class T>
|
||||||
|
__device__ __host__ operator T() const
|
||||||
|
{
|
||||||
|
return device_cast(std::numeric_limits<host_type<T>>::max());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
#endif // MIGRAPHX_GUARD_DEVICE_REDUCE_OPS_HPP
|
||||||
@ -0,0 +1,97 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_DEVICE_SCAN_HPP
|
||||||
|
#define MIGRAPHX_GUARD_DEVICE_SCAN_HPP
|
||||||
|
|
||||||
|
#include <migraphx/gpu/device/launch.hpp>
|
||||||
|
#include <migraphx/gpu/device/visit.hpp>
|
||||||
|
#include <migraphx/gpu/device/multi_index.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
template <index_int N,
|
||||||
|
class Op,
|
||||||
|
class T,
|
||||||
|
class ForStride,
|
||||||
|
class Input,
|
||||||
|
class Output,
|
||||||
|
MIGRAPHX_REQUIRES(not std::is_integral<ForStride>{})>
|
||||||
|
__device__ void block_scan(index idx, Op op, T init, ForStride fs, Input input, Output output)
|
||||||
|
{
|
||||||
|
using type = decltype(input(deduce_for_stride(fs)));
|
||||||
|
MIGRAPHX_DEVICE_SHARED type buffer[2][N];
|
||||||
|
type x = init;
|
||||||
|
fs([&](auto i) {
|
||||||
|
index_int iout = 0;
|
||||||
|
index_int iin = 1;
|
||||||
|
if(idx.local == 0)
|
||||||
|
buffer[iout][idx.local] = op(input(i), x);
|
||||||
|
else
|
||||||
|
buffer[iout][idx.local] = input(i);
|
||||||
|
__syncthreads();
|
||||||
|
for(index_int s = 1; s < idx.nlocal(); s *= 2)
|
||||||
|
{
|
||||||
|
iout = 1 - iout;
|
||||||
|
iin = 1 - iin;
|
||||||
|
if(idx.local >= s)
|
||||||
|
{
|
||||||
|
buffer[iout][idx.local] = op(buffer[iin][idx.local], buffer[iin][idx.local - s]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
buffer[iout][idx.local] = buffer[iin][idx.local];
|
||||||
|
}
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
|
x = buffer[iout][idx.nlocal() - 1];
|
||||||
|
output(i, buffer[iout][idx.local]);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <index_int N, class Op, class T, class Input, class Output>
|
||||||
|
__device__ void block_scan(index idx, Op op, T init, index_int n, Input input, Output output)
|
||||||
|
{
|
||||||
|
block_scan<N>(
|
||||||
|
idx,
|
||||||
|
op,
|
||||||
|
init,
|
||||||
|
[&](auto f) -> decltype(f(index_int{})) { return idx.local_stride(n, f); },
|
||||||
|
input,
|
||||||
|
output);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
constexpr auto reverse_scan(index_int n, F f)
|
||||||
|
{
|
||||||
|
return [=](auto i, auto&&... xs) { return f(n - i - 1, xs...); };
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
#endif // MIGRAPHX_GUARD_DEVICE_SCAN_HPP
|
||||||
@ -0,0 +1,120 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_SHAPE_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_SHAPE_HPP
|
||||||
|
|
||||||
|
#include <migraphx/gpu/device/array.hpp>
|
||||||
|
#include <migraphx/gpu/device/fast_div.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
template <index_int N>
|
||||||
|
struct hip_shape
|
||||||
|
{
|
||||||
|
using hip_index = hip_array<index_int, N>;
|
||||||
|
hip_index lens = {};
|
||||||
|
hip_index strides = {};
|
||||||
|
hip_array<std::uint64_t, N> divs = {};
|
||||||
|
bool standard = false;
|
||||||
|
|
||||||
|
__device__ __host__ hip_shape() = default;
|
||||||
|
|
||||||
|
hip_shape(const shape& s) : standard(s.standard())
|
||||||
|
{
|
||||||
|
assert(s.lens().size() == N);
|
||||||
|
assert(s.strides().size() == N);
|
||||||
|
std::copy(s.lens().begin(), s.lens().end(), lens.begin());
|
||||||
|
std::copy(s.strides().begin(), s.strides().end(), strides.begin());
|
||||||
|
assert(std::all_of(s.lens().begin(), s.lens().end(), &is_divisor_encodable));
|
||||||
|
std::transform(s.lens().begin(), s.lens().end(), divs.begin(), &encode_divisor);
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR index_int elements() const { return lens.product(); }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR index_int index(hip_index x) const { return x.dot(strides); }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR index_int index(std::initializer_list<index_int> x) const
|
||||||
|
{
|
||||||
|
index_int idx = 0;
|
||||||
|
for(index_int i = 0; i < x.size(); i++)
|
||||||
|
idx += *(x.begin() + i) * strides[i];
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR index_int index(index_int i) const
|
||||||
|
{
|
||||||
|
if(this->standard)
|
||||||
|
return i;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const index_int rank = this->lens.size();
|
||||||
|
index_int s = 1;
|
||||||
|
index_int result = 0;
|
||||||
|
for(index_int j = 0; j < this->lens.size(); j++)
|
||||||
|
{
|
||||||
|
const index_int k = rank - j - 1;
|
||||||
|
const index_int stride = this->strides[k];
|
||||||
|
const index_int len = this->lens[k];
|
||||||
|
const index_int slen = s * len;
|
||||||
|
const index_int idx = (i % slen) / s;
|
||||||
|
result += stride * idx;
|
||||||
|
s = slen;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR hip_index multi(index_int idx) const
|
||||||
|
{
|
||||||
|
hip_index result;
|
||||||
|
index_int tidx = idx;
|
||||||
|
for(std::ptrdiff_t is = result.size() - 1; is > 0; is--)
|
||||||
|
{
|
||||||
|
// result[is] = tidx % lens[is];
|
||||||
|
// tidx = tidx / lens[is];
|
||||||
|
auto q = fast_div(tidx, divs[is]);
|
||||||
|
result[is] = remainder(q, tidx, lens[is]);
|
||||||
|
tidx = q;
|
||||||
|
}
|
||||||
|
result[0] = tidx;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <index_int N>
|
||||||
|
hip_shape<N> make_hip_shape(const shape& x)
|
||||||
|
{
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,76 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
|
||||||
|
|
||||||
|
#include <migraphx/gpu/device/visit.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
template <index_int NDim>
|
||||||
|
using hip_tensor_index = hip_array<index_int, NDim>;
|
||||||
|
|
||||||
|
template <index_int NDim>
|
||||||
|
struct hip_tensor_descriptor
|
||||||
|
{
|
||||||
|
__device__ __host__ hip_tensor_descriptor() = default;
|
||||||
|
|
||||||
|
hip_tensor_descriptor(const shape& s)
|
||||||
|
{
|
||||||
|
std::copy(s.lens().begin(), s.lens().end(), lens);
|
||||||
|
std::copy(s.strides().begin(), s.strides().end(), strides);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __host__ hip_tensor_index<NDim> multi(index_int idx) const
|
||||||
|
{
|
||||||
|
hip_tensor_index<NDim> result{};
|
||||||
|
index_int tidx = idx;
|
||||||
|
for(index_int is = 0; is < NDim; is++)
|
||||||
|
{
|
||||||
|
result[is] = tidx / strides[is];
|
||||||
|
tidx = tidx % strides[is];
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
__device__ __host__ index_int linear(hip_tensor_index<NDim> s) const
|
||||||
|
{
|
||||||
|
index_int idx = 0;
|
||||||
|
for(index_int i = 0; i < NDim; i++)
|
||||||
|
idx += s[i] * strides[i];
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
index_int lens[NDim] = {};
|
||||||
|
index_int strides[NDim] = {};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,82 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_TENSOR_VIEW_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TENSOR_VIEW_HPP
|
||||||
|
|
||||||
|
#include <migraphx/gpu/device/shape.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
template <class T, index_int N>
|
||||||
|
struct hip_tensor_view
|
||||||
|
{
|
||||||
|
using value_type = T;
|
||||||
|
using hip_index = typename hip_shape<N>::hip_index;
|
||||||
|
__device__ __host__ hip_tensor_view() = default;
|
||||||
|
__host__ hip_tensor_view(tensor_view<T> x) : d(x.data()), s(x.get_shape()) {}
|
||||||
|
__host__ hip_tensor_view(T* x, const shape& ss) : d(x), s(ss) {}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR const hip_shape<N>& get_shape() const { return s; }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR index_int size() const { return s.elements(); }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR value_type* data() const { return d; }
|
||||||
|
|
||||||
|
template <class U>
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR value_type& operator[](U i) const
|
||||||
|
{
|
||||||
|
return d[s.index(i)];
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR value_type* begin() const { return d; }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR value_type* end() const { return d + size(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
value_type* d = nullptr;
|
||||||
|
hip_shape<N> s{};
|
||||||
|
};
|
||||||
|
|
||||||
|
template <index_int N, class T>
|
||||||
|
hip_tensor_view<T, N> make_hip_view(const shape& s, T* x)
|
||||||
|
{
|
||||||
|
return {x, s};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <index_int N, class T>
|
||||||
|
hip_tensor_view<T, N> make_hip_view(tensor_view<T> x)
|
||||||
|
{
|
||||||
|
return {x};
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,213 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
|
||||||
|
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
#include <migraphx/half.hpp>
|
||||||
|
#include <migraphx/bf16.hpp>
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/tensor_view.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
using index_int = std::uint32_t;
|
||||||
|
|
||||||
|
#define MIGRAPHX_DEVICE_CONSTEXPR constexpr __device__ __host__ // NOLINT
|
||||||
|
|
||||||
|
template <class T, index_int N>
|
||||||
|
using vec = T __attribute__((ext_vector_type(N)));
|
||||||
|
|
||||||
|
template <index_int N, class T>
|
||||||
|
__device__ __host__ T* as_pointer(vec<T, N>* x)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<T*>(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <index_int N, class T>
|
||||||
|
__device__ __host__ vec<T, N>* as_vec(T* x)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<vec<T, N>*>(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <index_int N, class T>
|
||||||
|
tensor_view<vec<T, N>> as_vec(tensor_view<T> x)
|
||||||
|
{
|
||||||
|
return {x.get_shape(), as_vec<N>(x.data())};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <index_int N, class... Ts>
|
||||||
|
auto pack_vec(Ts... xs)
|
||||||
|
{
|
||||||
|
return [=](auto f, index_int n) { return f(as_vec<N>(xs)[n]...); };
|
||||||
|
}
|
||||||
|
|
||||||
|
using gpu_half = __fp16;
|
||||||
|
using gpu_bf16 = __bf16;
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
|
template <class T>
|
||||||
|
struct device_type
|
||||||
|
{
|
||||||
|
using type = T;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T, index_int N>
|
||||||
|
struct device_type<vec<T, N>>
|
||||||
|
{
|
||||||
|
using type = vec<typename device_type<T>::type, N>;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct device_type<half>
|
||||||
|
{
|
||||||
|
using type = gpu_half;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct device_type<bf16>
|
||||||
|
{
|
||||||
|
using type = gpu_bf16;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
struct host_type
|
||||||
|
{
|
||||||
|
using type = T;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct host_type<gpu_half>
|
||||||
|
{
|
||||||
|
using type = half;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct host_type<gpu_bf16>
|
||||||
|
{
|
||||||
|
using type = bf16;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
using host_type = typename detail::host_type<T>::type;
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
using device_type = typename detail::device_type<T>::type;
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
host_type<T> host_cast(T x)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<const host_type<T>&>(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
host_type<T>* host_cast(T* x)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<host_type<T>*>(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
__device__ __host__ device_type<T> device_cast(const T& x)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<const device_type<T>&>(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
__device__ __host__ device_type<T>* device_cast(T* x)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<device_type<T>*>(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
__device__ __host__ tensor_view<device_type<T>> device_cast(tensor_view<T> x)
|
||||||
|
{
|
||||||
|
return {x.get_shape(), reinterpret_cast<device_type<T>*>(x.data())};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
__device__ __host__ T to_hip_type(T x)
|
||||||
|
{
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hip doens't support __fp16 and __bf16
|
||||||
|
inline __device__ __host__ float to_hip_type(gpu_half x) { return x; }
|
||||||
|
inline __device__ __host__ float to_hip_type(gpu_bf16 x) { return x; }
|
||||||
|
|
||||||
|
template <class X>
|
||||||
|
struct is_floating_point : std::is_floating_point<X>
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct is_floating_point<__fp16> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class X>
|
||||||
|
struct is_signed : std::is_signed<X>
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct is_signed<__fp16> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class X>
|
||||||
|
struct is_arithmetic : std::is_arithmetic<X>
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct is_arithmetic<__fp16> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
// Redo for __bf16
|
||||||
|
template <>
|
||||||
|
struct is_floating_point<__bf16> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct is_signed<__bf16> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct is_arithmetic<__bf16> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,99 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_VECTOR_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_VECTOR_HPP
|
||||||
|
|
||||||
|
#include <migraphx/gpu/device/types.hpp>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
template <class T, index_int N>
|
||||||
|
struct hip_vector
|
||||||
|
{
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR hip_vector() = default;
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR hip_vector(index_int s) : len(s) {}
|
||||||
|
template <class Iterator>
|
||||||
|
__device__ __host__ hip_vector(Iterator start, Iterator last)
|
||||||
|
{
|
||||||
|
auto it = std::copy(start, last, d);
|
||||||
|
len = std::distance(d, it);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __host__ hip_vector(std::initializer_list<T> x)
|
||||||
|
{
|
||||||
|
std::copy(x.begin(), x.end(), d);
|
||||||
|
len = x.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T& operator[](index_int i) { return d[i]; }
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR const T& operator[](index_int i) const { return d[i]; }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T& front() { return d[0]; }
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR const T& front() const { return d[0]; }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T& back() { return d[size() - 1]; }
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR const T& back() const { return d[size() - 1]; }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T* data() { return d; }
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR const T* data() const { return d; }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR index_int size() const { return len; }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T* begin() { return d; }
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR const T* begin() const { return d; }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR T* end() { return d + size(); }
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR const T* end() const { return d + size(); }
|
||||||
|
|
||||||
|
template <class U>
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR void push_back(U&& x)
|
||||||
|
{
|
||||||
|
d[len] = static_cast<U&&>(x);
|
||||||
|
len++;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
T d[N] = {};
|
||||||
|
index_int len = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <index_int N, class T>
|
||||||
|
hip_vector<T, N> to_hip_vector(const std::vector<T>& x)
|
||||||
|
{
|
||||||
|
hip_vector<T, N> result(x.size());
|
||||||
|
std::copy(x.begin(), x.end(), result.begin());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
@ -0,0 +1,245 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_VISIT_HPP
|
||||||
|
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_VISIT_HPP
|
||||||
|
|
||||||
|
#include <migraphx/gpu/device/tensor_view.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
constexpr void visit_tensor_size(index_int n, F f)
|
||||||
|
{
|
||||||
|
switch(n)
|
||||||
|
{
|
||||||
|
case 1: {
|
||||||
|
f(std::integral_constant<index_int, 1>{});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 2: {
|
||||||
|
f(std::integral_constant<index_int, 2>{});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 3: {
|
||||||
|
f(std::integral_constant<index_int, 3>{});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 4: {
|
||||||
|
f(std::integral_constant<index_int, 4>{});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case 5: {
|
||||||
|
f(std::integral_constant<index_int, 5>{});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default: throw std::runtime_error("Tensor dims " + std::to_string(n) + " out of range");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline shape get_shape(const shape& x) { return x; }
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
auto get_shape(const T& x) -> decltype(x.get_shape())
|
||||||
|
{
|
||||||
|
return x.get_shape();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
struct is_hip_type : std::false_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct is_hip_type<float> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct is_hip_type<half> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct is_hip_type<bool> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct is_hip_type<std::int8_t> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct is_hip_type<std::uint8_t> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct is_hip_type<std::int32_t> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct is_hip_type<bf16> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T, class V, MIGRAPHX_REQUIRES(is_hip_type<typename T::type>{})>
|
||||||
|
void hip_visitor_invoke(T as, V&& v)
|
||||||
|
{
|
||||||
|
v(as);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class V, MIGRAPHX_REQUIRES(not is_hip_type<typename T::type>{})>
|
||||||
|
void hip_visitor_invoke(T, V&&)
|
||||||
|
{
|
||||||
|
MIGRAPHX_THROW(std::string("Unsupported data type on GPU: ") + __PRETTY_FUNCTION__);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class V>
|
||||||
|
auto hip_visitor(V v)
|
||||||
|
{
|
||||||
|
return [=](auto as) { hip_visitor_invoke(as, v); };
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class V, class F, class... Ts>
|
||||||
|
void hip_visit_all_impl(const shape& s, F f, V&& v, Ts&&... xs)
|
||||||
|
{
|
||||||
|
std::initializer_list<migraphx::shape::type_t> types = {get_shape(xs).type()...};
|
||||||
|
if(not std::all_of(
|
||||||
|
types.begin(), types.end(), [&](migraphx::shape::type_t t) { return t == s.type(); }))
|
||||||
|
MIGRAPHX_THROW("Types must be the same");
|
||||||
|
std::initializer_list<index_int> ranks = {static_cast<index_int>(get_shape(xs).ndim())...};
|
||||||
|
if(not std::all_of(ranks.begin(), ranks.end(), [&](index_int r) { return r == s.ndim(); }))
|
||||||
|
MIGRAPHX_THROW("Ranks must be the same");
|
||||||
|
visit_tensor_size(s.ndim(), [&](auto ndim) {
|
||||||
|
s.visit_type(hip_visitor([&](auto as) { v(f(xs, ndim, as)...); }));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class V, class F, class... Ts>
|
||||||
|
void hip_visit_views_impl(const shape& s, F f, V&& v, Ts&&... xs)
|
||||||
|
{
|
||||||
|
std::initializer_list<index_int> ranks = {static_cast<index_int>(get_shape(xs).ndim())...};
|
||||||
|
if(not std::all_of(ranks.begin(), ranks.end(), [&](index_int r) { return r == s.ndim(); }))
|
||||||
|
MIGRAPHX_THROW("Ranks must be the same");
|
||||||
|
visit_tensor_size(s.ndim(), [&](auto ndim) { v(f(xs, ndim)...); });
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
struct hip_convert
|
||||||
|
{
|
||||||
|
F f;
|
||||||
|
template <class RawData, class N, class As>
|
||||||
|
auto operator()(RawData x, N ndim, As as) const
|
||||||
|
-> decltype(make_hip_view<ndim>(x.get_shape(), f(as.from(x.data()))))
|
||||||
|
{
|
||||||
|
return make_hip_view<ndim>(x.get_shape(), f(as.from(x.data())));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class N, class As>
|
||||||
|
auto operator()(const shape& s, N ndim, As) const
|
||||||
|
{
|
||||||
|
return make_hip_shape<ndim>(s);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
hip_convert<F> make_hip_convert(F f)
|
||||||
|
{
|
||||||
|
return {f};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
struct hip_convert_view
|
||||||
|
{
|
||||||
|
F f;
|
||||||
|
template <class T, class N>
|
||||||
|
auto operator()(tensor_view<T> x, N ndim) const
|
||||||
|
{
|
||||||
|
return make_hip_view<ndim>(f(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class N>
|
||||||
|
auto operator()(const shape& s, N ndim) const
|
||||||
|
{
|
||||||
|
return make_hip_shape<ndim>(s);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class F>
|
||||||
|
hip_convert_view<F> make_hip_convert_view(F f)
|
||||||
|
{
|
||||||
|
return {f};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class... Ts>
|
||||||
|
auto hip_visit_all(T&& x, Ts&&... xs)
|
||||||
|
{
|
||||||
|
return [&](auto f) {
|
||||||
|
hip_visit_all_impl(
|
||||||
|
get_shape(x), make_hip_convert([](auto* p) { return device_cast(p); }), f, x, xs...);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <index_int N, class T, class... Ts>
|
||||||
|
auto hip_vec_visit_all(T&& x, Ts&&... xs)
|
||||||
|
{
|
||||||
|
return [&](auto f) {
|
||||||
|
auto sx = get_shape(x);
|
||||||
|
auto lens = sx.lens();
|
||||||
|
assert(lens.back() % N == 0);
|
||||||
|
assert(sx.strides().back() == 1);
|
||||||
|
lens.back() /= N;
|
||||||
|
shape vec_sx{sx.type(), lens};
|
||||||
|
hip_visit_all_impl(vec_sx,
|
||||||
|
make_hip_convert([](auto* p) { return as_vec<N>(device_cast(p)); }),
|
||||||
|
f,
|
||||||
|
x,
|
||||||
|
xs...);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class... Ts>
|
||||||
|
auto hip_pointer_visit_all(T&& x, Ts&&... xs)
|
||||||
|
{
|
||||||
|
return [&](auto f) { visit_all(x, xs...)([&](auto... vs) { f(device_cast(vs.data())...); }); };
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T, class... Ts>
|
||||||
|
auto hip_visit_views(T&& x, Ts&&... xs)
|
||||||
|
{
|
||||||
|
return [&](auto f) {
|
||||||
|
hip_visit_views_impl(get_shape(x),
|
||||||
|
make_hip_convert_view([](auto v) { return device_cast(v); }),
|
||||||
|
f,
|
||||||
|
x,
|
||||||
|
xs...);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
|
||||||
|
#endif
|
||||||
80
docker/rocm/migraphx/targets/gpu/device/logsoftmax.cpp
Normal file
80
docker/rocm/migraphx/targets/gpu/device/logsoftmax.cpp
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/shape.hpp>
|
||||||
|
#include <migraphx/argument.hpp>
|
||||||
|
#include <migraphx/gpu/device/logsoftmax.hpp>
|
||||||
|
#include <migraphx/gpu/device/reduce.hpp>
|
||||||
|
#include <migraphx/gpu/device/tensor.hpp>
|
||||||
|
#include <migraphx/gpu/device/launch.hpp>
|
||||||
|
#include <migraphx/gpu/device/types.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
void logsoftmax(hipStream_t stream, const argument& result, const argument& arg, int64_t axis)
|
||||||
|
{
|
||||||
|
auto batch_lens = result.get_shape().lens();
|
||||||
|
index_int batch_item_num = batch_lens[axis];
|
||||||
|
batch_lens[axis] = 1;
|
||||||
|
migraphx::shape batch_shape{result.get_shape().type(), batch_lens};
|
||||||
|
|
||||||
|
hip_visit_all(result, arg, batch_shape)([&](auto output, auto input, auto batch) {
|
||||||
|
const index_int max_block_size = 256;
|
||||||
|
const index_int block_size = compute_block_size(batch_item_num, max_block_size);
|
||||||
|
gs_launch(stream,
|
||||||
|
batch_shape.elements() * block_size,
|
||||||
|
block_size)([=](auto i, auto idx) __device__ {
|
||||||
|
auto data_idx = batch.multi(i / block_size);
|
||||||
|
using type = device_type<std::remove_cv_t<typename decltype(input)::value_type>>;
|
||||||
|
type init = lowest();
|
||||||
|
|
||||||
|
auto batch_max = block_reduce<max_block_size>(
|
||||||
|
idx, max{}, init, batch_item_num, [&](auto j) __device__ {
|
||||||
|
data_idx[axis] = j;
|
||||||
|
return input[data_idx];
|
||||||
|
});
|
||||||
|
|
||||||
|
auto batch_sum =
|
||||||
|
block_reduce<max_block_size>(idx, sum{}, 0, batch_item_num, [&](auto j) __device__ {
|
||||||
|
data_idx[axis] = j;
|
||||||
|
auto val = input[data_idx] - batch_max;
|
||||||
|
return ::exp(to_hip_type(val));
|
||||||
|
});
|
||||||
|
|
||||||
|
auto log_batch_sum = ::log(to_hip_type(batch_sum)) + batch_max;
|
||||||
|
|
||||||
|
idx.local_stride(batch_item_num, [&](auto j) __device__ {
|
||||||
|
data_idx[axis] = j;
|
||||||
|
output[data_idx] = input[data_idx] - log_batch_sum;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
90
docker/rocm/migraphx/targets/gpu/device/multinomial.cpp
Normal file
90
docker/rocm/migraphx/targets/gpu/device/multinomial.cpp
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/shape.hpp>
|
||||||
|
#include <migraphx/argument.hpp>
|
||||||
|
#include <migraphx/dfor.hpp>
|
||||||
|
#include <migraphx/gpu/device/multinomial.hpp>
|
||||||
|
#include <migraphx/gpu/device/tensor.hpp>
|
||||||
|
#include <migraphx/gpu/device/launch.hpp>
|
||||||
|
#include <migraphx/gpu/device/types.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
template <class Iterator, class T>
|
||||||
|
constexpr Iterator upper_bound(Iterator first, Iterator last, const T& value)
|
||||||
|
{
|
||||||
|
Iterator it;
|
||||||
|
typename std::iterator_traits<Iterator>::difference_type count;
|
||||||
|
typename std::iterator_traits<Iterator>::difference_type step;
|
||||||
|
count = std::distance(first, last);
|
||||||
|
|
||||||
|
while(count > 0)
|
||||||
|
{
|
||||||
|
it = first;
|
||||||
|
step = count / 2;
|
||||||
|
std::advance(it, step);
|
||||||
|
if(not(value < *it))
|
||||||
|
{
|
||||||
|
first = ++it;
|
||||||
|
count -= step + 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
count = step;
|
||||||
|
}
|
||||||
|
return first;
|
||||||
|
}
|
||||||
|
|
||||||
|
void multinomial(hipStream_t stream,
|
||||||
|
const argument& result,
|
||||||
|
const argument& arg0,
|
||||||
|
const argument& arg1)
|
||||||
|
{
|
||||||
|
size_t batch_size = arg0.get_shape().lens().front();
|
||||||
|
size_t class_size = arg0.get_shape().lens().back();
|
||||||
|
size_t sample_size = result.get_shape().lens().back();
|
||||||
|
|
||||||
|
visit_all(arg0, arg1)([&](auto cdf_host, auto dist_host) {
|
||||||
|
result.visit([&](auto output_host) {
|
||||||
|
hip_visit_views(cdf_host, dist_host, output_host)(
|
||||||
|
[&](auto cdf, auto dist, auto output) {
|
||||||
|
gs_launch(stream, batch_size * sample_size)([=](auto i) __device__ {
|
||||||
|
auto idx = output.get_shape().multi(i);
|
||||||
|
auto cdf_begin = cdf.begin() + (idx.front() * class_size);
|
||||||
|
auto cdf_end = cdf_begin + class_size;
|
||||||
|
auto* sample_iter =
|
||||||
|
upper_bound(cdf_begin, cdf_end, dist[i] * *(std::prev(cdf_end)));
|
||||||
|
output[i] = std::distance(cdf_begin, sample_iter);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
77
docker/rocm/migraphx/targets/gpu/device/nonzero.cpp
Normal file
77
docker/rocm/migraphx/targets/gpu/device/nonzero.cpp
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/device/nonzero.hpp>
|
||||||
|
#include <migraphx/gpu/device/float_equal.hpp>
|
||||||
|
#include <migraphx/gpu/device/scan.hpp>
|
||||||
|
#include <migraphx/gpu/device/reduce_ops.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
argument nonzero(hipStream_t stream, const argument& result, const argument& arg_data)
|
||||||
|
{
|
||||||
|
auto s = arg_data.get_shape();
|
||||||
|
auto elem_num = s.elements();
|
||||||
|
auto out_elem_num = result.get_shape().elements();
|
||||||
|
|
||||||
|
// call the prefix_sum function to do a prefix_sum to compute
|
||||||
|
// index in the output. Only 1 block can be used since we have
|
||||||
|
// only one prefix sum
|
||||||
|
const index_int block_size = 256;
|
||||||
|
hip_visit_all(arg_data, s)([&](auto input, auto si) {
|
||||||
|
const auto* in_ptr = device_cast(input.data());
|
||||||
|
auto* ptr = result.cast<int64_t>();
|
||||||
|
gs_launch(stream, block_size, block_size)([=](auto, auto idx) __device__ {
|
||||||
|
// fill all output to 0 first
|
||||||
|
idx.local_stride(out_elem_num, [&](auto j) { ptr[j] = 0; });
|
||||||
|
|
||||||
|
block_scan<block_size>(
|
||||||
|
idx,
|
||||||
|
sum{},
|
||||||
|
0,
|
||||||
|
elem_num,
|
||||||
|
[&](auto j) { return (float_equal(in_ptr[j], 0)) ? 0 : 1; },
|
||||||
|
[&](auto j, auto x) {
|
||||||
|
auto out_loc = x - 1;
|
||||||
|
if(float_equal(in_ptr[j], 0))
|
||||||
|
return;
|
||||||
|
|
||||||
|
auto index = si.multi(j);
|
||||||
|
for(size_t k = 0; k < index.size(); ++k)
|
||||||
|
{
|
||||||
|
ptr[k * elem_num + out_loc] = index[k];
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
143
docker/rocm/migraphx/targets/gpu/device/prefix_scan_sum.cpp
Normal file
143
docker/rocm/migraphx/targets/gpu/device/prefix_scan_sum.cpp
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/device/prefix_scan_sum.hpp>
|
||||||
|
#include <migraphx/gpu/device/scan.hpp>
|
||||||
|
#include <migraphx/gpu/device/reduce_ops.hpp>
|
||||||
|
#include <migraphx/gpu/device/reduce.hpp>
|
||||||
|
#include <migraphx/gpu/device/types.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
void prefix_scan_sum(hipStream_t stream,
|
||||||
|
const argument& result,
|
||||||
|
const argument& arg,
|
||||||
|
int32_t axis,
|
||||||
|
bool exclusive,
|
||||||
|
bool reverse)
|
||||||
|
{
|
||||||
|
const index_int max_block_size = 256;
|
||||||
|
const index_int n = arg.get_shape().lens()[axis];
|
||||||
|
auto rlens = result.get_shape().lens();
|
||||||
|
rlens[axis] = 1;
|
||||||
|
|
||||||
|
hip_visit_all(result, arg, result.get_shape().with_lens(rlens))(
|
||||||
|
[=](auto output, auto input, auto rshape) {
|
||||||
|
const index_int block_size = compute_block_size(rshape.elements(), max_block_size);
|
||||||
|
if(reverse and exclusive)
|
||||||
|
{
|
||||||
|
gs_launch(stream, rshape.elements() * block_size, block_size)(
|
||||||
|
[=](auto i, auto idx) __device__ {
|
||||||
|
const auto ridx = rshape.multi(i / block_size);
|
||||||
|
auto compute_idx = [&](auto j) {
|
||||||
|
auto k = ridx;
|
||||||
|
k[axis] = j;
|
||||||
|
return k;
|
||||||
|
};
|
||||||
|
block_scan<max_block_size>(
|
||||||
|
idx,
|
||||||
|
sum{},
|
||||||
|
0,
|
||||||
|
n,
|
||||||
|
reverse_scan(n, [&](auto j) { return input[compute_idx(j)]; }),
|
||||||
|
reverse_scan(n, [&](auto j, auto x) {
|
||||||
|
if(j == n - 1)
|
||||||
|
output[compute_idx(j)] = 0;
|
||||||
|
if(j > 0)
|
||||||
|
output[compute_idx(j - 1)] = x;
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else if(reverse)
|
||||||
|
{
|
||||||
|
gs_launch(stream, rshape.elements() * block_size, block_size)(
|
||||||
|
[=](auto i, auto idx) __device__ {
|
||||||
|
const auto ridx = rshape.multi(i / block_size);
|
||||||
|
auto compute_idx = [&](auto j) {
|
||||||
|
auto k = ridx;
|
||||||
|
k[axis] = j;
|
||||||
|
return k;
|
||||||
|
};
|
||||||
|
block_scan<max_block_size>(
|
||||||
|
idx,
|
||||||
|
sum{},
|
||||||
|
0,
|
||||||
|
n,
|
||||||
|
reverse_scan(n, [&](auto j) { return input[compute_idx(j)]; }),
|
||||||
|
reverse_scan(n, [&](auto j, auto x) { output[compute_idx(j)] = x; }));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else if(exclusive)
|
||||||
|
{
|
||||||
|
gs_launch(stream, rshape.elements() * block_size, block_size)(
|
||||||
|
[=](auto i, auto idx) __device__ {
|
||||||
|
const auto ridx = rshape.multi(i / block_size);
|
||||||
|
auto compute_idx = [&](auto j) {
|
||||||
|
auto k = ridx;
|
||||||
|
k[axis] = j;
|
||||||
|
return k;
|
||||||
|
};
|
||||||
|
block_scan<max_block_size>(
|
||||||
|
idx,
|
||||||
|
sum{},
|
||||||
|
0,
|
||||||
|
n,
|
||||||
|
[&](auto j) { return input[compute_idx(j)]; },
|
||||||
|
[&](auto j, auto x) {
|
||||||
|
auto k = j + 1;
|
||||||
|
if(j == 0)
|
||||||
|
output[compute_idx(0)] = 0;
|
||||||
|
if(k < n)
|
||||||
|
output[compute_idx(k)] = x;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gs_launch(stream, rshape.elements() * block_size, block_size)(
|
||||||
|
[=](auto i, auto idx) __device__ {
|
||||||
|
const auto ridx = rshape.multi(i / block_size);
|
||||||
|
auto compute_idx = [&](auto j) {
|
||||||
|
auto k = ridx;
|
||||||
|
k[axis] = j;
|
||||||
|
return k;
|
||||||
|
};
|
||||||
|
block_scan<max_block_size>(
|
||||||
|
idx,
|
||||||
|
sum{},
|
||||||
|
0,
|
||||||
|
n,
|
||||||
|
[&](auto j) { return input[compute_idx(j)]; },
|
||||||
|
[&](auto j, auto x) { output[compute_idx(j)] = x; });
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
66
docker/rocm/migraphx/targets/gpu/device/reverse.cpp
Normal file
66
docker/rocm/migraphx/targets/gpu/device/reverse.cpp
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include "migraphx/gpu/device/visit.hpp"
|
||||||
|
#include <migraphx/shape.hpp>
|
||||||
|
#include <migraphx/argument.hpp>
|
||||||
|
#include <migraphx/gpu/device/reverse.hpp>
|
||||||
|
#include <migraphx/gpu/device/tensor.hpp>
|
||||||
|
#include <migraphx/gpu/device/launch.hpp>
|
||||||
|
#include <migraphx/gpu/device/types.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
argument
|
||||||
|
reverse(hipStream_t stream, argument result, argument arg1, const std::vector<int64_t>& axes)
|
||||||
|
{
|
||||||
|
auto s = arg1.get_shape();
|
||||||
|
// auto lens = s.lens();
|
||||||
|
std::vector<std::size_t> axis_len(axes.begin(), axes.end());
|
||||||
|
shape sa{shape::float_type, axis_len};
|
||||||
|
std::size_t nelements = s.elements();
|
||||||
|
visit_all(result, arg1)([&](auto output1, auto input1) {
|
||||||
|
hip_visit_views(output1, input1, s)([&](auto output, auto input, auto hs) {
|
||||||
|
hip_visit_views(sa)([&](auto daxes) {
|
||||||
|
auto lens = hs.lens;
|
||||||
|
gs_launch(stream, nelements)([=](auto i) __device__ {
|
||||||
|
auto idx = hs.multi(i);
|
||||||
|
auto in_idx = idx;
|
||||||
|
for(auto axis : daxes.lens)
|
||||||
|
in_idx[axis] = lens[axis] - 1 - idx[axis];
|
||||||
|
output[idx] = input[in_idx];
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
@ -0,0 +1,140 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/device/rnn_variable_seq_lens.hpp>
|
||||||
|
#include <migraphx/gpu/device/nary.hpp>
|
||||||
|
#include <migraphx/gpu/device/shape.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
void rnn_var_sl_shift_sequence(hipStream_t stream,
|
||||||
|
const argument& result,
|
||||||
|
const argument& arg_hs,
|
||||||
|
const argument& arg_sl)
|
||||||
|
{
|
||||||
|
auto output_shape = result.get_shape();
|
||||||
|
int64_t max_len = output_shape.lens()[0];
|
||||||
|
visit_all(result, arg_hs)([&](auto output, auto input) {
|
||||||
|
const auto* in_data = device_cast(input.data());
|
||||||
|
auto* out_data = device_cast(output.data());
|
||||||
|
auto out_s = make_hip_shape<3>(output_shape);
|
||||||
|
arg_sl.visit([&](auto sl) {
|
||||||
|
const auto* sl_data = device_cast(sl.data());
|
||||||
|
gs_launch(stream, output_shape.elements(), 256)([=](auto i) __device__ {
|
||||||
|
auto idx = out_s.multi(i);
|
||||||
|
auto t = idx[0];
|
||||||
|
auto b = idx[1];
|
||||||
|
auto l = sl_data[b];
|
||||||
|
auto val = in_data[0];
|
||||||
|
val = 0;
|
||||||
|
if(t >= max_len - l)
|
||||||
|
{
|
||||||
|
auto in_idx = idx;
|
||||||
|
in_idx[0] -= (max_len - l);
|
||||||
|
val = in_data[out_s.index(in_idx)];
|
||||||
|
}
|
||||||
|
out_data[i] = val;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void rnn_var_sl_shift_output(hipStream_t stream,
|
||||||
|
const argument& result,
|
||||||
|
const argument& arg_hs,
|
||||||
|
const argument& arg_sl,
|
||||||
|
bool is_reverse)
|
||||||
|
{
|
||||||
|
auto output_shape = result.get_shape();
|
||||||
|
int64_t max_len = output_shape.lens()[0];
|
||||||
|
visit_all(result, arg_hs)([&](auto output, auto input) {
|
||||||
|
const auto* in_data = device_cast(input.data());
|
||||||
|
auto* out_data = device_cast(output.data());
|
||||||
|
auto out_s = make_hip_shape<4>(output_shape);
|
||||||
|
arg_sl.visit([&](auto sl) {
|
||||||
|
const auto* sl_data = device_cast(sl.data());
|
||||||
|
gs_launch(stream, output_shape.elements(), 256)([=](auto i) __device__ {
|
||||||
|
auto idx = out_s.multi(i);
|
||||||
|
auto t = idx[0];
|
||||||
|
auto d = idx[1];
|
||||||
|
auto b = idx[2];
|
||||||
|
auto l = sl_data[b];
|
||||||
|
auto val = in_data[0];
|
||||||
|
val = 0;
|
||||||
|
if(t < l)
|
||||||
|
{
|
||||||
|
int offset = (d == 1 or is_reverse) ? 1 : 0;
|
||||||
|
auto in_idx = idx;
|
||||||
|
in_idx[0] += offset * (max_len - l);
|
||||||
|
val = in_data[out_s.index(in_idx)];
|
||||||
|
}
|
||||||
|
out_data[i] = val;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void rnn_var_sl_last_output(hipStream_t stream,
|
||||||
|
const argument& result,
|
||||||
|
const argument& arg_hs,
|
||||||
|
const argument& arg_sl,
|
||||||
|
bool is_reverse)
|
||||||
|
{
|
||||||
|
auto input_shape = arg_hs.get_shape();
|
||||||
|
auto out_comp_lens = input_shape.lens();
|
||||||
|
out_comp_lens[0] = 1;
|
||||||
|
shape out_comp_shape{input_shape.type(), out_comp_lens};
|
||||||
|
|
||||||
|
visit_all(result, arg_hs)([&](auto output, auto input) {
|
||||||
|
const auto* in_data = device_cast(input.data());
|
||||||
|
auto* out_data = device_cast(output.data());
|
||||||
|
arg_sl.visit([&](auto sl) {
|
||||||
|
const auto* sl_data = device_cast(sl.data());
|
||||||
|
auto in_s = make_hip_shape<4>(input_shape);
|
||||||
|
auto out_s = make_hip_shape<4>(out_comp_shape);
|
||||||
|
gs_launch(stream, result.get_shape().elements(), 256)([=](auto i) __device__ {
|
||||||
|
auto idx = out_s.multi(i);
|
||||||
|
auto d = idx[1];
|
||||||
|
auto b = idx[2];
|
||||||
|
auto l = sl_data[b];
|
||||||
|
if(is_reverse or d == 1)
|
||||||
|
{
|
||||||
|
idx[0] = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
idx[0] = l - 1;
|
||||||
|
}
|
||||||
|
out_data[i] = in_data[in_s.index(idx)];
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
66
docker/rocm/migraphx/targets/gpu/device/targets.cpp
Normal file
66
docker/rocm/migraphx/targets/gpu/device/targets.cpp
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <migraphx/gpu/device/targets.hpp>
|
||||||
|
#include <migraphx/stringutils.hpp>
|
||||||
|
#include <migraphx/errors.hpp>
|
||||||
|
#include <hip/hip_runtime_api.h>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
static std::vector<std::string> parse_targets() { return split_string(MIGRAPHX_GPU_TARGETS, ';'); }
|
||||||
|
|
||||||
|
const std::vector<std::string>& get_targets()
|
||||||
|
{
|
||||||
|
static auto result = parse_targets();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string get_targets_as_string() { return join_strings(get_targets(), ", "); }
|
||||||
|
|
||||||
|
static int get_device_id()
|
||||||
|
{
|
||||||
|
int device;
|
||||||
|
auto status = hipGetDevice(&device);
|
||||||
|
if(status != hipSuccess)
|
||||||
|
MIGRAPHX_THROW("No device");
|
||||||
|
return device;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string get_device_name()
|
||||||
|
{
|
||||||
|
hipDeviceProp_t props{};
|
||||||
|
auto status = hipGetDeviceProperties(&props, get_device_id());
|
||||||
|
if(status != hipSuccess)
|
||||||
|
MIGRAPHX_THROW("Failed to get device properties");
|
||||||
|
return props.gcnArchName;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
52
docker/rocm/migraphx/targets/gpu/device/targets.hpp.in
Normal file
52
docker/rocm/migraphx/targets/gpu/device/targets.hpp.in
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_DEVICE_TARGETS_CPP
|
||||||
|
#define MIGRAPHX_GUARD_DEVICE_TARGETS_CPP
|
||||||
|
|
||||||
|
#include <migraphx/gpu/device/config.hpp>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
#define MIGRAPHX_GPU_TARGETS "@GPU_TARGETS@" // NOLINT
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_EXPORT
|
||||||
|
const std::vector<std::string>& get_targets();
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_EXPORT
|
||||||
|
std::string get_targets_as_string();
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_EXPORT
|
||||||
|
std::string get_device_name();
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
#endif // MIGRAPHX_GUARD_DEVICE_TARGETS_CPP
|
||||||
|
|
||||||
|
|
||||||
239
docker/rocm/migraphx/targets/gpu/device/topk.cpp
Normal file
239
docker/rocm/migraphx/targets/gpu/device/topk.cpp
Normal file
@ -0,0 +1,239 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/shape.hpp>
|
||||||
|
#include <migraphx/argument.hpp>
|
||||||
|
#include <migraphx/gpu/device/topk.hpp>
|
||||||
|
#include <migraphx/gpu/device/tensor.hpp>
|
||||||
|
#include <migraphx/gpu/device/launch.hpp>
|
||||||
|
#include <migraphx/gpu/device/types.hpp>
|
||||||
|
#include <migraphx/gpu/device/visit.hpp>
|
||||||
|
#include <migraphx/ranges.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace device {
|
||||||
|
|
||||||
|
template <class T, class Index, class Compare>
|
||||||
|
struct hip_heap_vector
|
||||||
|
{
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR hip_heap_vector(T* val, index_int n, Index v_idx, Compare comp)
|
||||||
|
: data(val), size(n), data_index(v_idx), compare(comp)
|
||||||
|
{
|
||||||
|
make_heap(size);
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR void try_push(const T val)
|
||||||
|
{
|
||||||
|
if(compare(val, data[data_index(0)]))
|
||||||
|
return;
|
||||||
|
|
||||||
|
pop_heap(size - 1);
|
||||||
|
data[data_index(size - 1)] = val;
|
||||||
|
push_heap(size - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR void sort() { sort_heap(size); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR inline static void swap(T& v1, T& v2) noexcept
|
||||||
|
{
|
||||||
|
T v = v1;
|
||||||
|
v1 = v2;
|
||||||
|
v2 = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR inline void heapify_down(index_int n, index_int index)
|
||||||
|
{
|
||||||
|
while(index < n)
|
||||||
|
{
|
||||||
|
auto pre_index = index;
|
||||||
|
index_int l = 2 * index + 1;
|
||||||
|
index_int r = 2 * index + 2;
|
||||||
|
|
||||||
|
if(l < n and compare(data[data_index(l)], data[data_index(index)]))
|
||||||
|
{
|
||||||
|
index = l;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(r < n and compare(data[data_index(r)], data[data_index(index)]))
|
||||||
|
{
|
||||||
|
index = r;
|
||||||
|
if(compare(data[data_index(l)], data[data_index(r)]))
|
||||||
|
{
|
||||||
|
index = l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(index == pre_index)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
swap(data[data_index(index)], data[data_index(pre_index)]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR inline void heapify_up(index_int index)
|
||||||
|
{
|
||||||
|
while(index > 0)
|
||||||
|
{
|
||||||
|
auto parent_idx = (index - 1) / 2;
|
||||||
|
|
||||||
|
if(not compare(data[data_index(index)], data[data_index(parent_idx)]))
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
swap(data[data_index(index)], data[data_index(parent_idx)]);
|
||||||
|
index = parent_idx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR inline void make_heap(index_int n)
|
||||||
|
{
|
||||||
|
for(int j = n / 2 - 1; j >= 0; --j)
|
||||||
|
{
|
||||||
|
heapify_down(n, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR inline void push_heap(index_int loc) { heapify_up(loc); }
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR inline void pop_heap(index_int loc)
|
||||||
|
{
|
||||||
|
swap(data[data_index(0)], data[data_index(loc)]);
|
||||||
|
heapify_down(loc, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
MIGRAPHX_DEVICE_CONSTEXPR inline void sort_heap(index_int n)
|
||||||
|
{
|
||||||
|
for(int j = n - 1; j > 0; --j)
|
||||||
|
{
|
||||||
|
swap(data[data_index(0)], data[data_index(j)]);
|
||||||
|
heapify_down(j, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
T* data = nullptr;
|
||||||
|
index_int size;
|
||||||
|
Index data_index;
|
||||||
|
Compare compare;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T, class Index, class Compare>
|
||||||
|
__device__ hip_heap_vector<T, Index, Compare>
|
||||||
|
make_heap(T* data, index_int n, Index idx, Compare compare)
|
||||||
|
{
|
||||||
|
return {data, n, idx, compare};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Compare>
|
||||||
|
std::vector<argument> topk(hipStream_t stream,
|
||||||
|
const argument& val_res,
|
||||||
|
const argument& ind_res,
|
||||||
|
const argument& arg,
|
||||||
|
int64_t k,
|
||||||
|
int64_t axis,
|
||||||
|
Compare compare)
|
||||||
|
{
|
||||||
|
auto in_s = arg.get_shape();
|
||||||
|
auto in_lens = in_s.lens();
|
||||||
|
auto out_s = val_res.get_shape();
|
||||||
|
auto axis_dim = in_s.lens()[axis];
|
||||||
|
auto comp_lens = in_lens;
|
||||||
|
comp_lens[axis] = 1;
|
||||||
|
shape comp_s{in_s.type(), comp_lens};
|
||||||
|
std::size_t elem_num = comp_s.elements();
|
||||||
|
|
||||||
|
hip_visit_all(val_res, arg, out_s, in_s, comp_s)(
|
||||||
|
[&](auto out_val, auto input, auto oss, auto iss, auto css) {
|
||||||
|
auto* data = device_cast(input.data());
|
||||||
|
auto* out = device_cast(out_val.data());
|
||||||
|
auto* const ind = ind_res.cast<int64_t>();
|
||||||
|
gs_launch(stream, elem_num)([=](auto i) __device__ {
|
||||||
|
auto idx = css.multi(i);
|
||||||
|
|
||||||
|
auto in_idx = [&](int ii) {
|
||||||
|
auto iidx = idx;
|
||||||
|
iidx[axis] = ii;
|
||||||
|
return iss.index(iidx);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto out_idx = [&](int ii) {
|
||||||
|
auto iidx = idx;
|
||||||
|
iidx[axis] = ii;
|
||||||
|
return oss.index(iidx);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto data_compare = [=](auto ii, auto jj) {
|
||||||
|
return compare(data[in_idx(ii)], data[in_idx(jj)]);
|
||||||
|
};
|
||||||
|
|
||||||
|
for(int j = 0; j < k; ++j)
|
||||||
|
{
|
||||||
|
ind[out_idx(j)] = j;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto hp = make_heap(ind, k, out_idx, data_compare);
|
||||||
|
for(int j = k; j < axis_dim; ++j)
|
||||||
|
{
|
||||||
|
hp.try_push(j);
|
||||||
|
}
|
||||||
|
hp.sort();
|
||||||
|
|
||||||
|
for(int j = 0; j < k; ++j)
|
||||||
|
{
|
||||||
|
out[out_idx(j)] = data[in_idx(ind[out_idx(j)])];
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return {val_res, ind_res};
|
||||||
|
}
|
||||||
|
|
||||||
|
argument topk_largest(hipStream_t stream,
|
||||||
|
const argument& val_res,
|
||||||
|
const argument& ind_res,
|
||||||
|
const argument& arg,
|
||||||
|
int64_t k,
|
||||||
|
int64_t axis)
|
||||||
|
{
|
||||||
|
return {topk(stream, val_res, ind_res, arg, k, axis, std::less<>{})};
|
||||||
|
}
|
||||||
|
|
||||||
|
argument topk_smallest(hipStream_t stream,
|
||||||
|
const argument& val_res,
|
||||||
|
const argument& ind_res,
|
||||||
|
const argument& arg,
|
||||||
|
int64_t k,
|
||||||
|
int64_t axis)
|
||||||
|
{
|
||||||
|
return {topk(stream, val_res, ind_res, arg, k, axis, std::greater<>{})};
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace device
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
68
docker/rocm/migraphx/targets/gpu/device_name.cpp
Normal file
68
docker/rocm/migraphx/targets/gpu/device_name.cpp
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/device_name.hpp>
|
||||||
|
#include <migraphx/errors.hpp>
|
||||||
|
#include <migraphx/rank.hpp>
|
||||||
|
#include <migraphx/stringutils.hpp>
|
||||||
|
#include <hip/hip_runtime_api.h>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
|
||||||
|
int get_device_id()
|
||||||
|
{
|
||||||
|
int device;
|
||||||
|
auto status = hipGetDevice(&device);
|
||||||
|
if(status != hipSuccess)
|
||||||
|
MIGRAPHX_THROW("No device");
|
||||||
|
return device;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string get_device_name()
|
||||||
|
{
|
||||||
|
hipDeviceProp_t props{};
|
||||||
|
auto status = hipGetDeviceProperties(&props, get_device_id());
|
||||||
|
if(status != hipSuccess)
|
||||||
|
MIGRAPHX_THROW("Failed to get device properties");
|
||||||
|
return props.gcnArchName;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool gfx_has_fp8fnuz_intrinsics()
|
||||||
|
{
|
||||||
|
const auto device_name = trim(split_string(get_device_name(), ':').front());
|
||||||
|
return (starts_with(device_name, "gfx94"));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool gfx_has_fp8ocp_intrinsics()
|
||||||
|
{
|
||||||
|
const auto device_name = trim(split_string(get_device_name(), ':').front());
|
||||||
|
bool is_navi_with_fp8ocp = starts_with(device_name, "gfx12") and device_name >= "gfx1200";
|
||||||
|
bool is_mi_with_fp8ocp = starts_with(device_name, "gfx9") and device_name >= "gfx950";
|
||||||
|
return (is_navi_with_fp8ocp or is_mi_with_fp8ocp);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
31
docker/rocm/migraphx/targets/gpu/driver/CMakeLists.txt
Normal file
31
docker/rocm/migraphx/targets/gpu/driver/CMakeLists.txt
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
#####################################################################################
|
||||||
|
# The MIT License (MIT)
|
||||||
|
#
|
||||||
|
# Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
# in the Software without restriction, including without limitation the rights
|
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in
|
||||||
|
# all copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
# THE SOFTWARE.
|
||||||
|
#####################################################################################
|
||||||
|
|
||||||
|
file(GLOB GPU_DRIVER_SRCS CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
|
||||||
|
add_executable(gpu-driver
|
||||||
|
${GPU_DRIVER_SRCS}
|
||||||
|
)
|
||||||
|
rocm_clang_tidy_check(gpu-driver)
|
||||||
|
target_include_directories(gpu-driver PRIVATE include)
|
||||||
|
target_link_libraries(gpu-driver PRIVATE migraphx_gpu)
|
||||||
50
docker/rocm/migraphx/targets/gpu/driver/action.cpp
Normal file
50
docker/rocm/migraphx/targets/gpu/driver/action.cpp
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/driver/action.hpp>
|
||||||
|
#include <migraphx/errors.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace driver {
|
||||||
|
|
||||||
|
auto& action_map()
|
||||||
|
{
|
||||||
|
static std::unordered_map<std::string, action_function> m;
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
action_function get_action(const std::string& name)
|
||||||
|
{
|
||||||
|
if(action_map().count(name) == 0)
|
||||||
|
MIGRAPHX_THROW("Missing action: " + name);
|
||||||
|
return action_map().at(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
void register_action(const std::string& name, const action_function& a) { action_map()[name] = a; }
|
||||||
|
|
||||||
|
} // namespace driver
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
50
docker/rocm/migraphx/targets/gpu/driver/compile_op.cpp
Normal file
50
docker/rocm/migraphx/targets/gpu/driver/compile_op.cpp
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/driver/action.hpp>
|
||||||
|
#include <migraphx/gpu/time_op.hpp>
|
||||||
|
#include <migraphx/gpu/compiler.hpp>
|
||||||
|
#include <migraphx/gpu/context.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace driver {
|
||||||
|
|
||||||
|
struct compile_op : action<compile_op>
|
||||||
|
{
|
||||||
|
static void apply(const parser& p, const value& v)
|
||||||
|
{
|
||||||
|
context ctx;
|
||||||
|
auto inputs = p.parse_shapes(v.at("inputs"));
|
||||||
|
auto op = gpu::compile_op(v.at("name").to<std::string>(), ctx, inputs, v);
|
||||||
|
auto t = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
|
||||||
|
std::cout << op << " -> " << op.compute_shape(inputs) << ": " << t << "ms" << std::endl;
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace driver
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
@ -0,0 +1,60 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_GPU_DRIVER_ACTION_HPP
|
||||||
|
#define MIGRAPHX_GUARD_GPU_DRIVER_ACTION_HPP
|
||||||
|
|
||||||
|
#include <migraphx/config.hpp>
|
||||||
|
#include <migraphx/auto_register.hpp>
|
||||||
|
#include <migraphx/type_name.hpp>
|
||||||
|
#include <migraphx/gpu/driver/parser.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace driver {
|
||||||
|
|
||||||
|
using action_function = std::function<void(const parser&, const value&)>;
|
||||||
|
|
||||||
|
action_function get_action(const std::string& name);
|
||||||
|
void register_action(const std::string& name, const action_function& a);
|
||||||
|
|
||||||
|
struct auto_register_action
|
||||||
|
{
|
||||||
|
template <class T>
|
||||||
|
static void apply()
|
||||||
|
{
|
||||||
|
const auto& name = get_type_name<T>();
|
||||||
|
register_action(name.substr(name.rfind("::") + 2),
|
||||||
|
[](auto&&... xs) { T::apply(std::forward<decltype(xs)>(xs)...); });
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
using action = auto_register<auto_register_action, T>;
|
||||||
|
|
||||||
|
} // namespace driver
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
#endif // MIGRAPHX_GUARD_GPU_DRIVER_ACTION_HPP
|
||||||
@ -0,0 +1,68 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#ifndef MIGRAPHX_GUARD_GPU_DRIVER_PARSER_HPP
|
||||||
|
#define MIGRAPHX_GUARD_GPU_DRIVER_PARSER_HPP
|
||||||
|
|
||||||
|
#include <migraphx/value.hpp>
|
||||||
|
#include <migraphx/shape.hpp>
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <functional>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace driver {
|
||||||
|
|
||||||
|
[[noreturn]] void error(const std::string& msg);
|
||||||
|
|
||||||
|
struct parser
|
||||||
|
{
|
||||||
|
parser() = default;
|
||||||
|
|
||||||
|
template <class T>
|
||||||
|
T get(const value& v, const std::string& key, const T& default_value) const
|
||||||
|
{
|
||||||
|
return v.get(key, settings.get(key, default_value));
|
||||||
|
}
|
||||||
|
|
||||||
|
shape parse_shape(const value& v) const;
|
||||||
|
|
||||||
|
std::vector<shape> parse_shapes(const value& v) const;
|
||||||
|
|
||||||
|
void load_settings(const value& v);
|
||||||
|
|
||||||
|
static void process(const value& v);
|
||||||
|
|
||||||
|
private:
|
||||||
|
value settings = value::object{};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace driver
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
|
#endif // MIGRAPHX_GUARD_GPU_DRIVER_PARSER_HPP
|
||||||
44
docker/rocm/migraphx/targets/gpu/driver/main.cpp
Normal file
44
docker/rocm/migraphx/targets/gpu/driver/main.cpp
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/driver/parser.hpp>
|
||||||
|
#include <migraphx/json.hpp>
|
||||||
|
#include <migraphx/convert_to_json.hpp>
|
||||||
|
#include <migraphx/file_buffer.hpp>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
using namespace migraphx; // NOLINT
|
||||||
|
using namespace migraphx::gpu; // NOLINT
|
||||||
|
using namespace migraphx::gpu::driver; // NOLINT
|
||||||
|
|
||||||
|
int main(int argc, char const* argv[])
|
||||||
|
{
|
||||||
|
std::vector<std::string> args(argv, argv + argc);
|
||||||
|
if(args.size() < 2)
|
||||||
|
{
|
||||||
|
std::cout << "Usage: gpu-driver <input-file>" << std::endl;
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
auto v = from_json_string(convert_to_json(read_string(args[1])));
|
||||||
|
parser::process(v);
|
||||||
|
}
|
||||||
81
docker/rocm/migraphx/targets/gpu/driver/parser.cpp
Normal file
81
docker/rocm/migraphx/targets/gpu/driver/parser.cpp
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/driver/parser.hpp>
|
||||||
|
#include <migraphx/gpu/driver/action.hpp>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace driver {
|
||||||
|
|
||||||
|
[[noreturn]] void error(const std::string& msg)
|
||||||
|
{
|
||||||
|
std::cout << msg << std::endl;
|
||||||
|
std::abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
shape parser::parse_shape(const value& v) const
|
||||||
|
{
|
||||||
|
auto lens = get(v, "lens", std::vector<std::size_t>{});
|
||||||
|
auto strides = get(v, "strides", std::vector<std::size_t>{});
|
||||||
|
auto type = shape::parse_type(get<std::string>(v, "type", "float"));
|
||||||
|
if(strides.empty())
|
||||||
|
return shape{type, lens};
|
||||||
|
else
|
||||||
|
return shape{type, lens, strides};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<shape> parser::parse_shapes(const value& v) const
|
||||||
|
{
|
||||||
|
std::vector<shape> result;
|
||||||
|
std::transform(
|
||||||
|
v.begin(), v.end(), std::back_inserter(result), [&](auto&& x) { return parse_shape(x); });
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void parser::load_settings(const value& v)
|
||||||
|
{
|
||||||
|
if(v.contains("settings"))
|
||||||
|
settings = v.at("settings");
|
||||||
|
}
|
||||||
|
|
||||||
|
void parser::process(const value& v)
|
||||||
|
{
|
||||||
|
if(not v.is_object())
|
||||||
|
error("Input is not an object");
|
||||||
|
parser p{};
|
||||||
|
p.load_settings(v);
|
||||||
|
for(auto&& pp : v)
|
||||||
|
{
|
||||||
|
if(pp.get_key() == "settings")
|
||||||
|
continue;
|
||||||
|
get_action(pp.get_key())(p, pp.without_key());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace driver
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
84
docker/rocm/migraphx/targets/gpu/driver/precompile_op.cpp
Normal file
84
docker/rocm/migraphx/targets/gpu/driver/precompile_op.cpp
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/driver/action.hpp>
|
||||||
|
#include <migraphx/gpu/time_op.hpp>
|
||||||
|
#include <migraphx/gpu/context.hpp>
|
||||||
|
#include <migraphx/gpu/lowering.hpp>
|
||||||
|
#include <migraphx/gpu/compile_ops.hpp>
|
||||||
|
#include <migraphx/make_op.hpp>
|
||||||
|
#include <migraphx/pass_manager.hpp>
|
||||||
|
#include <migraphx/program.hpp>
|
||||||
|
#include <migraphx/instruction.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace driver {
|
||||||
|
|
||||||
|
struct precompile_op : action<precompile_op>
|
||||||
|
{
|
||||||
|
static program create_preop_program(const operation& preop, std::vector<shape> inputs)
|
||||||
|
{
|
||||||
|
program p;
|
||||||
|
auto* mm = p.get_main_module();
|
||||||
|
std::vector<instruction_ref> args;
|
||||||
|
inputs.pop_back();
|
||||||
|
transform(inputs, range(inputs.size()), std::back_inserter(args), [&](auto input, auto i) {
|
||||||
|
return mm->add_parameter("x" + std::to_string(i), input);
|
||||||
|
});
|
||||||
|
mm->add_instruction(preop, args);
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
static operation get_code_object(const program& p)
|
||||||
|
{
|
||||||
|
MIGRAPHX_TIDY_CONST auto* mm = p.get_main_module();
|
||||||
|
auto it = std::find_if(mm->begin(), mm->end(), [](const auto& ins) {
|
||||||
|
return (ins.name() == "gpu::code_object");
|
||||||
|
});
|
||||||
|
if(it == mm->end())
|
||||||
|
MIGRAPHX_THROW("Failed to create code object");
|
||||||
|
return it->get_operator();
|
||||||
|
}
|
||||||
|
static void apply(const parser& p, const value& v)
|
||||||
|
{
|
||||||
|
context ctx;
|
||||||
|
auto inputs = p.parse_shapes(v.at("inputs"));
|
||||||
|
auto name = v.at("name").to<std::string>();
|
||||||
|
auto preop = make_op(name);
|
||||||
|
if(v.contains("fields"))
|
||||||
|
preop.from_value(v.at("fields"));
|
||||||
|
bool exhaustive = v.get("exhaustive", false);
|
||||||
|
auto prog = create_preop_program(preop, inputs);
|
||||||
|
run_passes(prog, {lowering{}, compile_ops{&ctx, exhaustive}});
|
||||||
|
auto op = get_code_object(prog);
|
||||||
|
auto t = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
|
||||||
|
std::cout << preop << ": " << t << "ms" << std::endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace driver
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
54
docker/rocm/migraphx/targets/gpu/driver/run_op.cpp
Normal file
54
docker/rocm/migraphx/targets/gpu/driver/run_op.cpp
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
/*
|
||||||
|
* The MIT License (MIT)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
* THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
#include <migraphx/gpu/driver/action.hpp>
|
||||||
|
#include <migraphx/gpu/time_op.hpp>
|
||||||
|
#include <migraphx/gpu/context.hpp>
|
||||||
|
#include <migraphx/make_op.hpp>
|
||||||
|
|
||||||
|
namespace migraphx {
|
||||||
|
inline namespace MIGRAPHX_INLINE_NS {
|
||||||
|
namespace gpu {
|
||||||
|
namespace driver {
|
||||||
|
|
||||||
|
struct run_op : action<run_op>
|
||||||
|
{
|
||||||
|
static void apply(const parser& p, const value& v)
|
||||||
|
{
|
||||||
|
context ctx;
|
||||||
|
auto inputs = p.parse_shapes(v.at("inputs"));
|
||||||
|
auto name = v.at("name").to<std::string>();
|
||||||
|
if(not contains(name, "::"))
|
||||||
|
name = "gpu::" + name;
|
||||||
|
auto op = make_op(name);
|
||||||
|
if(v.contains("fields"))
|
||||||
|
op.from_value(v.at("fields"));
|
||||||
|
auto t = time_op(ctx, op, inputs, p.get(v, "iterations", 100));
|
||||||
|
std::cout << op << " -> " << op.compute_shape(inputs) << ": " << t << "ms" << std::endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace driver
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace MIGRAPHX_INLINE_NS
|
||||||
|
} // namespace migraphx
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user