/* Copyright 1993-2021 NVIDIA Corporation. All rights reserved. * * NOTICE TO LICENSEE: * * The source code and/or documentation ("Licensed Deliverables") are * subject to NVIDIA intellectual property rights under U.S. and * international Copyright laws. * * The Licensed Deliverables contained herein are PROPRIETARY and * CONFIDENTIAL to NVIDIA and are being provided under the terms and * conditions of a form of NVIDIA software license agreement by and * between NVIDIA and Licensee ("License Agreement") or electronically * accepted by Licensee. Notwithstanding any terms or conditions to * the contrary in the License Agreement, reproduction or disclosure * of the Licensed Deliverables to any third party without the express * written consent of NVIDIA is prohibited. * * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. THEY ARE * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THESE LICENSED DELIVERABLES. * * U.S. Government End Users. These Licensed Deliverables are a * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT * 1995), consisting of "commercial computer software" and "commercial * computer software documentation" as such terms are used in 48 * C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government * only as a commercial end item. Consistent with 48 C.F.R.12.212 and * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all * U.S. Government End Users acquire the Licensed Deliverables with * only those rights set forth herein. * * Any use of the Licensed Deliverables in individual and commercial * software must include, in the user documentation and internal * comments to the code, the above Disclaimer and U.S. Government End * Users Notice. */ #ifndef _COOPERATIVE_GROUPS_HELPERS_H_ # define _COOPERATIVE_GROUPS_HELPERS_H_ #include "info.h" #include "sync.h" _CG_BEGIN_NAMESPACE namespace details { #ifdef _CG_CPP11_FEATURES template struct _is_float_or_half : public _CG_STL_NAMESPACE::is_floating_point {}; # ifdef _CG_HAS_FP16_COLLECTIVE template <> struct _is_float_or_half<__half> : public _CG_STL_NAMESPACE::true_type {}; template <> struct _is_float_or_half<__half2> : public _CG_STL_NAMESPACE::true_type {}; # endif template using is_float_or_half = _is_float_or_half::type>; // Non-STL utility templates template using remove_qual = typename _CG_STL_NAMESPACE::remove_cv::type>::type; template using is_op_type_same = _CG_STL_NAMESPACE::is_same, remove_qual >; #endif template _CG_STATIC_QUALIFIER TyTrunc vec3_to_linear(dim3 index, dim3 nIndex) { return ((TyTrunc)index.z * nIndex.y * nIndex.x) + ((TyTrunc)index.y * nIndex.x) + (TyTrunc)index.x; } namespace cta { _CG_STATIC_QUALIFIER void sync() { __barrier_sync(0); } _CG_STATIC_QUALIFIER unsigned int num_threads() { return static_cast(blockDim.x * blockDim.y * blockDim.z); } _CG_STATIC_QUALIFIER unsigned int thread_rank() { return vec3_to_linear(threadIdx, blockDim); } _CG_STATIC_QUALIFIER dim3 group_index() { return dim3(blockIdx.x, blockIdx.y, blockIdx.z); } _CG_STATIC_QUALIFIER dim3 thread_index() { return dim3(threadIdx.x, threadIdx.y, threadIdx.z); } _CG_STATIC_QUALIFIER dim3 dim_threads() { return dim3(blockDim.x, blockDim.y, blockDim.z); } // Legacy aliases _CG_STATIC_QUALIFIER unsigned int size() { return num_threads(); } _CG_STATIC_QUALIFIER dim3 block_dim() { return dim_threads(); } }; class _coalesced_group_data_access { public: // Retrieve mask of coalesced groups template _CG_STATIC_QUALIFIER unsigned int get_mask(const TyGroup &group) { return group.get_mask(); } // Retrieve mask of tiles template