#include #include #include #include #include #include #include #include namespace at { // Using DistAccumType in accumulate types for distributions. // Note: Ideally we'd be using ATen/AccumulateType.h but looks // like the there is some inconsistency in how accumulate types // are mapped currently, e.g. for the cpu side, float is mapped // to double. template struct DistAccumType { }; #if defined(__CUDACC__) || defined(__HIPCC__) template <> struct DistAccumType { using type = float; }; #endif template <> struct DistAccumType { using type = float; }; template <> struct DistAccumType { using type = float; }; template <> struct DistAccumType { using type = float; }; template <> struct DistAccumType { using type = double; }; template using dist_acctype = typename DistAccumType::type; namespace transformation { /** * A transformation function for `torch.Tensor.random_()`, when both `from` and `to` are specified. * `range` is `to - from` * `base` is `from` */ template C10_HOST_DEVICE inline T uniform_int_from_to(V val, uint64_t range, int64_t base) { return static_cast(static_cast((val % range) + base)); } /** * A transformation function for `torch.Tensor.random_()`, when `from=min_value(int64_t)` and to=None */ template C10_HOST_DEVICE inline T uniform_int_full_range(V val) { return static_cast(static_cast(val)); } /** * A transformation function for `torch.Tensor.random_()`, when used without specifying `from` and `to`. * In order to prevent compiler warnings reported in GitHub issue 46391, T can't be float or double * in this overloaded version */ template C10_HOST_DEVICE inline typename std::enable_if::value), T>::type uniform_int(V val) { if (std::is_same::value) { return static_cast(val & 1); } else if (std::is_same::value) { return static_cast(val % (static_cast(std::numeric_limits::max()) + 1)); } else if (std::is_same::value || std::is_same::value) { return static_cast(val % static_cast((1ULL << std::numeric_limits::digits) + 1)); } else if (std::is_integral::value) { return static_cast(val % (static_cast(std::numeric_limits::max()) + 1)); } else { assert(false); return 0; } } /** * An overloaded transformation function for `torch.Tensor.random_()`, when used without specifying `from` and `to`, * added to fix compiler warnings reported in GitHub issue 46391. T is either float or double in this version. */ template C10_HOST_DEVICE inline typename std::enable_if::value, T>::type uniform_int(V val) { return static_cast(val % static_cast((1ULL << std::numeric_limits::digits) + 1)); } template C10_HOST_DEVICE inline dist_acctype uniform_real(V val, T from, T to) { constexpr auto MASK = static_cast((static_cast(1) << std::numeric_limits::digits) - 1); constexpr auto DIVISOR = static_cast>(1) / (static_cast(1) << std::numeric_limits::digits); dist_acctype x = (val & MASK) * DIVISOR; return (x * (to - from) + from); } /** * Transforms normally distributed `val` with mean 0.0 and standard deviation 1.0 to * normally distributed with `mean` and standard deviation `std`. */ template C10_HOST_DEVICE inline T normal(T val, T mean, T std) { return val * std + mean; } /** * Transforms uniformly distributed `val` between 0.0 and 1.0 to * Cauchy distribution with location parameter `median` and scale parameter `sigma`. */ template C10_HOST_DEVICE inline T cauchy(T val, T median, T sigma) { // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function // __tanf overflows and returns `inf/-inf` when (val > 1 - eps) or (val < 0 + eps), // thus we clip those values. constexpr T eps = std::numeric_limits::epsilon(); constexpr T one_minus_eps = 1 - eps; constexpr T zero_plus_eps = 0 + eps; val = (val > one_minus_eps ? one_minus_eps : val); val = (val < zero_plus_eps ? zero_plus_eps : val); return median + sigma * at::tan(c10::pi * (val - static_cast(0.5))); } template <> C10_HOST_DEVICE inline double cauchy(double val, double median, double sigma) { // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function return median + sigma * at::tan(c10::pi * (val - static_cast(0.5))); } /** * Transforms uniformly distributed `val` between 0.0 and 1.0 to * exponentialy distributed with `lambda` parameter of the distribution. */ template C10_HOST_DEVICE __ubsan_ignore_float_divide_by_zero__ inline T exponential(T val, T lambda) { // https://en.wikipedia.org/wiki/Exponential_distribution#Generating_exponential_variates // Different implementations for CUDA and CPU to preserve original logic // TODO: must be investigated and unified!!! // https://github.com/pytorch/pytorch/issues/38662 #if defined(__CUDACC__) || defined(__HIPCC__) // BEFORE TOUCHING THIS CODE READ: https://github.com/pytorch/pytorch/issues/16706 // curand_uniform has (0,1] bounds. log(1) is 0 and exponential excludes 0. // we need log to be not 0, and not underflow when converted to half // fast __logf approximation can underflow, so set log to -epsilon/2 for 1 or close to 1 args auto log = val >= static_cast(1.) - std::numeric_limits::epsilon() / 2 ? -std::numeric_limits::epsilon() / 2 : at::log(val); return static_cast(-1.0) / lambda * log; #else return static_cast(-1.0) / lambda * at::log(static_cast(1.0) - val); #endif } /** * Transforms uniformly distributed `val` between 0.0 and 1.0 to * geometricaly distributed with success probability `p`. */ template C10_HOST_DEVICE inline T geometric(T val, T p) { // https://en.wikipedia.org/wiki/Geometric_distribution#Related_distributions return static_cast(::ceil(at::log(val) / at::log(static_cast(1.0) - p))); } /** * Transforms normally distributed `val` to log-normally distributed. */ template C10_HOST_DEVICE inline T log_normal(T val) { // https://en.wikipedia.org/wiki/Log-normal_distribution#Mode,_median,_quantiles return at::exp(val); } /** * Transforms uniformly distributed `val` between 0.0 and 1.0 to * bernoulli distributed with success probability `p`. */ template C10_HOST_DEVICE inline T bernoulli(T val, T p) { return val < p; } }} // namespace at::transformation