mirror of
https://github.com/blakeblackshear/frigate.git
synced 2026-02-19 01:17:06 +03:00
296 lines
13 KiB
C++
296 lines
13 KiB
C++
/*
|
|
* The MIT License (MIT)
|
|
*
|
|
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*/
|
|
#include <migraphx/onnx/op_parser.hpp>
|
|
#include <migraphx/errors.hpp>
|
|
#include <migraphx/instruction.hpp>
|
|
#include <migraphx/make_op.hpp>
|
|
#include <migraphx/ranges.hpp>
|
|
#include <string>
|
|
|
|
namespace migraphx {
|
|
inline namespace MIGRAPHX_INLINE_NS {
|
|
namespace onnx {
|
|
|
|
enum class qkv_fomat_t
|
|
{
|
|
q_k_v = 0,
|
|
q_k_v_cross = 1,
|
|
kv_packed = 2,
|
|
qkv_packed = 3
|
|
};
|
|
|
|
struct multi_head_attention_parameters
|
|
{
|
|
int64_t batch_size;
|
|
int64_t q_sequence_length;
|
|
int64_t kv_sequence_length;
|
|
int64_t hidden_size;
|
|
int64_t hidden_size_v;
|
|
int64_t head_size;
|
|
int64_t head_size_v;
|
|
qkv_fomat_t qkv_fomat;
|
|
};
|
|
|
|
struct parse_multi_head_attention : op_parser<parse_multi_head_attention>
|
|
{
|
|
|
|
std::vector<op_desc> operators() const { return {{"MultiHeadAttention"}}; }
|
|
|
|
void unpack_qkv(const onnx_parser::node_info& info,
|
|
instruction_ref& query,
|
|
instruction_ref& key,
|
|
instruction_ref& value) const
|
|
{
|
|
// (batch_size, q_sequence_length, num_heads, 3, head_size) ->
|
|
// (3, batch_size, q_sequence_length, num_heads, head_size)
|
|
auto qkv_packed =
|
|
info.add_instruction(make_op("transpose", {{"permutation", {3, 0, 1, 2, 4}}}), query);
|
|
query = info.add_instruction(
|
|
make_op("slice", {{"axes", {0}}, {"starts", {0}}, {"ends", {1}}}), qkv_packed);
|
|
query = info.add_instruction(make_op("squeeze", {{"axes", {0}}}), query);
|
|
key = info.add_instruction(
|
|
make_op("slice", {{"axes", {0}}, {"starts", {1}}, {"ends", {2}}}), qkv_packed);
|
|
key = info.add_instruction(make_op("squeeze", {{"axes", {0}}}), key);
|
|
value = info.add_instruction(
|
|
make_op("slice", {{"axes", {0}}, {"starts", {2}}, {"ends", {3}}}), qkv_packed);
|
|
value = info.add_instruction(make_op("squeeze", {{"axes", {0}}}), value);
|
|
}
|
|
|
|
void unpack_kv(const onnx_parser::node_info& info,
|
|
instruction_ref& key,
|
|
instruction_ref& value) const
|
|
{
|
|
// (batch_size, kv_sequence_length, num_heads, 2, head_size) ->
|
|
// (2, batch_size, kv_sequence_length, num_heads, head_size)
|
|
auto kv_packed =
|
|
info.add_instruction(make_op("transpose", {{"permutation", {3, 0, 1, 2, 4}}}), key);
|
|
key = info.add_instruction(
|
|
make_op("slice", {{"axes", {0}}, {"starts", {0}}, {"ends", {1}}}), kv_packed);
|
|
key = info.add_instruction(make_op("squeeze", {{"axes", {0}}}), key);
|
|
value = info.add_instruction(
|
|
make_op("slice", {{"axes", {0}}, {"starts", {1}}, {"ends", {2}}}), kv_packed);
|
|
value = info.add_instruction(make_op("squeeze", {{"axes", {0}}}), value);
|
|
}
|
|
|
|
void check_inputs(const std::vector<instruction_ref>& args,
|
|
const int64_t num_heads,
|
|
multi_head_attention_parameters& params) const
|
|
{
|
|
if(args.empty() or args.size() > 3)
|
|
MIGRAPHX_THROW("MultiHeadAttention: Wrong number of inputs. Only 'query', 'key' and "
|
|
"'value' inputs are supported.");
|
|
|
|
auto query_dim = args[0]->get_shape().ndim();
|
|
auto query_lens = args[0]->get_shape().lens();
|
|
|
|
params.batch_size = query_lens[0];
|
|
params.q_sequence_length = query_lens[1];
|
|
|
|
if(query_dim != 3 and query_dim != 5)
|
|
MIGRAPHX_THROW("MultiHeadAttention: Input 'query' rank needs to be 3 or 5, current: " +
|
|
std::to_string(query_dim));
|
|
|
|
if(query_dim == 5)
|
|
{
|
|
if(query_lens[2] != num_heads or query_lens[3] != 3)
|
|
MIGRAPHX_THROW("MultiHeadAttention: Input 'query' shape needs to be (batch_size, "
|
|
"q_sequence_length, num_heads, 3, head_size) for packed input.");
|
|
|
|
params.kv_sequence_length = query_lens[1];
|
|
params.head_size = query_lens[4];
|
|
params.head_size_v = query_lens[4];
|
|
params.hidden_size = num_heads * query_lens[4];
|
|
params.hidden_size_v = num_heads * query_lens[4];
|
|
params.qkv_fomat = qkv_fomat_t::qkv_packed;
|
|
}
|
|
else // query_dim == 3
|
|
{
|
|
if(args.size() < 2)
|
|
MIGRAPHX_THROW("MultiHeadAttention: Wrong number of inputs, 'key' is missing.");
|
|
|
|
params.hidden_size = query_lens[2];
|
|
params.head_size = query_lens[2] / num_heads;
|
|
|
|
auto key_dim = args[1]->get_shape().ndim();
|
|
auto key_lens = args[1]->get_shape().lens();
|
|
|
|
if(key_dim < 3 or key_dim > 5)
|
|
MIGRAPHX_THROW(
|
|
"MultiHeadAttention: Input 'key' rank needs to be 3, 4 or 5, current: " +
|
|
std::to_string(key_dim));
|
|
|
|
if(key_dim == 5)
|
|
{
|
|
if(key_lens[0] != params.batch_size or key_lens[2] != num_heads or
|
|
key_lens[3] != 2 or key_lens[4] != params.head_size)
|
|
MIGRAPHX_THROW("MultiHeadAttention: Input 'key' shape needs to be (batch_size, "
|
|
"kv_sequence_length, num_heads, 2, head_size)");
|
|
|
|
params.kv_sequence_length = key_lens[1];
|
|
params.hidden_size_v = params.hidden_size;
|
|
params.head_size_v = key_lens[4];
|
|
params.qkv_fomat = qkv_fomat_t::kv_packed;
|
|
}
|
|
else
|
|
{
|
|
if(args.size() < 3)
|
|
MIGRAPHX_THROW(
|
|
"MultiHeadAttention: Wrong number of inputs, 'value' is missing.");
|
|
|
|
auto value_dim = args[2]->get_shape().ndim();
|
|
auto value_lens = args[2]->get_shape().lens();
|
|
|
|
if(key_dim != value_dim)
|
|
MIGRAPHX_THROW(
|
|
"MultiHeadAttention: Input 'key' and 'value' rank needs to be equal.");
|
|
|
|
if(key_dim == 3)
|
|
{
|
|
if(key_lens[0] != params.batch_size or key_lens[2] != params.hidden_size)
|
|
MIGRAPHX_THROW("MultiHeadAttention: Input 'key' shape needs to be "
|
|
"(batch_size, kv_sequence_length, hidden_size)");
|
|
|
|
if(value_lens[0] != params.batch_size or value_lens[1] != key_lens[1])
|
|
MIGRAPHX_THROW("MultiHeadAttention: Input 'value' shape needs to be "
|
|
"(batch_size, kv_sequence_length, hidden_size_v)");
|
|
|
|
params.kv_sequence_length = key_lens[1];
|
|
params.hidden_size_v = value_lens[2];
|
|
params.head_size_v = value_lens[2] / num_heads;
|
|
params.qkv_fomat = qkv_fomat_t::q_k_v;
|
|
}
|
|
else // key_dim == 4
|
|
{
|
|
if(key_lens[0] != params.batch_size or key_lens[1] != num_heads or
|
|
key_lens[3] != params.head_size)
|
|
MIGRAPHX_THROW("MultiHeadAttention: Input 'key' shape needs to be "
|
|
"(batch_size, num_heads, kv_sequence_length, head_size)");
|
|
|
|
if(value_lens[0] != params.batch_size or value_lens[1] != num_heads or
|
|
value_lens[2] != key_lens[2])
|
|
MIGRAPHX_THROW("MultiHeadAttention: Input 'value' shape needs to be "
|
|
"(batch_size, num_heads, kv_sequence_length, head_size_v)");
|
|
|
|
params.kv_sequence_length = key_lens[2];
|
|
params.hidden_size_v = value_lens[3] * num_heads;
|
|
params.head_size_v = value_lens[3];
|
|
params.qkv_fomat = qkv_fomat_t::q_k_v_cross;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
instruction_ref parse(const op_desc& /*opd*/,
|
|
const onnx_parser& parser,
|
|
const onnx_parser::node_info& info,
|
|
const std::vector<instruction_ref>& args) const
|
|
{
|
|
if(not contains(info.attributes, "num_heads"))
|
|
MIGRAPHX_THROW("MultiHeadAttention: num_heads attribute is required");
|
|
|
|
int64_t num_heads = parser.parse_value(info.attributes.at("num_heads")).at<int>();
|
|
|
|
multi_head_attention_parameters params;
|
|
check_inputs(args, num_heads, params);
|
|
|
|
auto query = args[0];
|
|
instruction_ref key;
|
|
instruction_ref value;
|
|
|
|
if(params.qkv_fomat == qkv_fomat_t::qkv_packed)
|
|
{
|
|
// Packed QKV: (batch_size, q_sequence_length, num_heads, 3, head_size)
|
|
unpack_qkv(info, query, key, value);
|
|
}
|
|
else
|
|
{
|
|
// Query: (batch_size, q_sequence_length, hidden_size)
|
|
std::vector<int64_t> q_dims{
|
|
params.batch_size, params.q_sequence_length, num_heads, params.head_size};
|
|
query = info.add_instruction(make_op("reshape", {{"dims", q_dims}}), query);
|
|
|
|
key = args[1];
|
|
|
|
if(params.qkv_fomat == qkv_fomat_t::kv_packed)
|
|
{
|
|
// Packed KV: (batch_size, kv_sequence_length, num_heads, 2, head_size)
|
|
unpack_kv(info, key, value);
|
|
}
|
|
else
|
|
{
|
|
value = args[2];
|
|
if(params.qkv_fomat == qkv_fomat_t::q_k_v)
|
|
{
|
|
// Key: (batch_size, kv_sequence_length, hidden_size)
|
|
// Value: (batch_size, kv_sequence_length, hidden_size_v)
|
|
std::vector<int64_t> k_dims{
|
|
params.batch_size, params.kv_sequence_length, num_heads, params.head_size};
|
|
std::vector<int64_t> v_dims{params.batch_size,
|
|
params.kv_sequence_length,
|
|
num_heads,
|
|
params.head_size_v};
|
|
key = info.add_instruction(make_op("reshape", {{"dims", k_dims}}), key);
|
|
value = info.add_instruction(make_op("reshape", {{"dims", v_dims}}), value);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Target shape: (batch_size, num_heads, sequence_length, head_size)
|
|
std::vector<int64_t> perm{0, 2, 1, 3};
|
|
query = info.add_instruction(make_op("transpose", {{"permutation", perm}}), query);
|
|
if(params.qkv_fomat != qkv_fomat_t::q_k_v_cross)
|
|
{
|
|
key = info.add_instruction(make_op("transpose", {{"permutation", perm}}), key);
|
|
value = info.add_instruction(make_op("transpose", {{"permutation", perm}}), value);
|
|
}
|
|
|
|
float scale = 1 / std::sqrt(params.head_size);
|
|
if(contains(info.attributes, "scale"))
|
|
scale = parser.parse_value(info.attributes.at("scale")).at<float>();
|
|
|
|
auto scale_literal = info.add_literal(
|
|
migraphx::literal{migraphx::shape{query->get_shape().type()}, {scale}});
|
|
|
|
auto key_transposed =
|
|
info.add_instruction(make_op("transpose", {{"permutation", {0, 1, 3, 2}}}), key);
|
|
|
|
auto result = info.add_instruction(make_op("dot"), query, key_transposed);
|
|
result = info.add_common_op("mul", result, scale_literal);
|
|
result = info.add_instruction(make_op("softmax", {{"axis", -1}}), result);
|
|
result = info.add_instruction(make_op("dot"), result, value);
|
|
result = info.add_instruction(make_op("transpose", {{"permutation", perm}}), result);
|
|
result = info.add_instruction(
|
|
make_op(
|
|
"reshape",
|
|
{{"dims", {params.batch_size, params.q_sequence_length, params.hidden_size_v}}}),
|
|
result);
|
|
|
|
return result;
|
|
}
|
|
};
|
|
|
|
} // namespace onnx
|
|
} // namespace MIGRAPHX_INLINE_NS
|
|
} // namespace migraphx
|