#pragma once // @generated by torchgen/gen.py from NativeFunction.h #include #include #include #include #include #include #include #include #include #include namespace at { namespace native { TORCH_API ::std::tuple _native_decoder_only_multi_head_attention_out(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional & mask, const c10::optional & incr_key, const c10::optional & incr_value, bool need_weights, bool average_attn_weights, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, at::Tensor & out3); TORCH_API ::std::tuple native_decoder_only_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const c10::optional & mask={}, const c10::optional & incr_key={}, const c10::optional & incr_value={}, bool need_weights=true, bool average_attn_weights=true); } // namespace native } // namespace at