#pragma once #include #include #include #include // Defines the accumulation type for a scalar type. // Example: // using accscalar_t = acc_type; // // Accumulation types are an important concept in numeric computing // because you frequently want to perform intermediate computations // at a higher precision than the input and output precision, to avoid // compounding internal rounding errors. Accumulation is the most // well-known intermediate computation (it is of great importance for // sum reduction and matrix multiply, for example), but in PyTorch // acc_type ends up getting used for all sorts of other intermediate // computations, so it perhaps would be more accurately (ahem) called an // "accurate" type. acc_type is especially important for reduced // precision operations like float16 and bfloat16, where relatively // benign looking inputs can easily end up overflowing/underflowing. // // acc_type is parametrized by whether or not you are running on CUDA // or not, because on CUDA double precision operations are expensive // and so by default, we don't actually want to use double as an // acc_type on CUDA. A lot of things are typed out below, but // basically, the table is generated by a few rules: // // If bool: // Use 'bool' as acc_type. // If floating point: // If CUDA, use 'float' as acc_type (unless scalar_t is double), // otherwise (CPU) use 'double' // If integral: // Use 'int64_t' as acc_type // // You're not forced to use this template; if you happen to know // something specific about your use case, you can specify your own // desired behavior. This template, however, will give you a reasonable // default that will work for all dtypes supported in PyTorch. #if defined(__CUDACC__) #include #include #elif defined(__HIPCC__) #include #include #endif namespace at { template struct AccumulateType {}; #if defined(__CUDACC__) || defined(__HIPCC__) template <> struct AccumulateType { using type = float; }; #endif template <> struct AccumulateType { using type = float; }; template <> struct AccumulateType { using type = float; }; template <> struct AccumulateType { using type = float; }; template <> struct AccumulateType { using type = double; }; template <> struct AccumulateType { using type = int64_t; }; template <> struct AccumulateType { using type = int64_t; }; template <> struct AccumulateType { using type = int64_t; }; template <> struct AccumulateType { using type = int64_t; }; template <> struct AccumulateType { using type = int64_t; }; template <> struct AccumulateType { using type = int64_t; }; template <> struct AccumulateType { using type = bool; }; template <> struct AccumulateType { using type = float; }; template <> struct AccumulateType { using type = float; }; template <> struct AccumulateType, false> { using type = c10::complex; }; template <> struct AccumulateType, false> { using type = c10::complex; }; template <> struct AccumulateType, false> { using type = c10::complex; }; template <> struct AccumulateType, true> { using type = c10::complex; }; template <> struct AccumulateType, true> { using type = c10::complex; }; template <> struct AccumulateType, true> { using type = c10::complex; }; template <> struct AccumulateType { using type = double; }; template <> struct AccumulateType { using type = double; }; template <> struct AccumulateType { using type = int64_t; }; template <> struct AccumulateType { using type = int64_t; }; template <> struct AccumulateType { using type = int64_t; }; template <> struct AccumulateType { using type = int64_t; }; template <> struct AccumulateType { using type = int64_t; }; template <> struct AccumulateType { using type = int64_t; }; template <> struct AccumulateType { using type = bool; }; template using acc_type = typename AccumulateType::type; TORCH_API c10::ScalarType toAccumulateType(c10::ScalarType type, bool is_cuda); } // namespace at