#pragma once // This file contains boxing (not unboxing) logic, // i.e. how to make a vector from a set of concrete arguments. #include #include #include #include #include namespace c10 { namespace impl { // // utils // // is_mutable_tensor_ref template struct is_mutable_tensor_ref : std::false_type {}; template <> struct is_mutable_tensor_ref : std::true_type {}; // is_tuple_of_mutable_tensor_refs // template struct is_tuple_of_mutable_tensor_refs : std::false_type {}; template struct is_tuple_of_mutable_tensor_refs::value, void>> : guts::typelist::all> {}; // has_ivalue_to tests the presence/absence of instance method IValue::to() // template struct has_ivalue_to : std::false_type {}; template struct has_ivalue_to().to())>> : std::true_type {}; // // boxing predicates // // A boxable arg type is one that IValue has a constructor for. template using can_box = guts::disjunction< std::is_constructible>, // TensorOptions are not directly constructible into IValue, // but torch::jit::push knows how to handle them std::is_same> >; template using can_box_all = guts::conjunction...>; // an unboxable result is one that can be extracted from an IValue template using can_unbox = guts::conjunction< guts::disjunction< has_ivalue_to, // void returns are ok std::is_same >, guts::negation> >; // // boxArgs - utility for pushing unboxed args onto IValue stack // template torch::jit::Stack boxArgs(Args... args) { // TODO Reuse stack vector instead of allocating? torch::jit::Stack stack; stack.reserve(sizeof...(Args)); torch::jit::push(stack, std::forward(args)...); return stack; } template static inline constexpr size_t boxed_size_one() { static_assert(!std::is_same, c10::TensorOptions>::value, "need to patch this path to support TensorOptions passed by reference"); return 1; } // torch::jit::push pushes 4 values for a TensorOptions; this needs to // be kept in sync. template <> inline constexpr size_t boxed_size_one() { return 4; } // NOTE: this could probably be simplified with C++17 fold expressions. template struct BoxedSize : std::integral_constant {}; template struct BoxedSize : std::integral_constant() + BoxedSize::value> {}; template static inline constexpr size_t boxed_size() { return BoxedSize::value; } using IValueAlignedStorage = std::aligned_storage_t; template C10_ALWAYS_INLINE_UNLESS_MOBILE void boxToStack(IValueAlignedStorage* dest, T& arg, int& lastIdx) { new (&dest[lastIdx]) IValue(arg); lastIdx++; } C10_ALWAYS_INLINE_UNLESS_MOBILE void boxToStack(IValueAlignedStorage* dest, c10::TensorOptions options, int& lastIdx) { new (&dest[lastIdx++]) IValue(c10::typeMetaToScalarType(options.dtype())); new (&dest[lastIdx++]) IValue(options.layout()); new (&dest[lastIdx++]) IValue(options.device()); new (&dest[lastIdx++]) IValue(options.pinned_memory()); } inline void boxArgsToStack(IValueAlignedStorage*, int&) {} template C10_ALWAYS_INLINE_UNLESS_MOBILE void boxArgsToStack(IValueAlignedStorage* dest, int& lastIdx, T& arg, Args &... args) { boxToStack(dest, arg, lastIdx); boxArgsToStack(dest, lastIdx, args...); } // // PopResult is a helper class whose specializations handle popping single and // multiple return values, respectively. // template struct PopResult final { static Result call(Stack& stack) { TORCH_INTERNAL_ASSERT_DEBUG_ONLY( stack.size() == 1, "Boxed kernel was expected to return one value on the stack, ", "but instead pushed ", stack.size(), " values." ); return std::move(stack[0]).to(); } }; template struct PopResult> final { using Result = std::tuple; static Result call(Stack& stack) { // for tuple return types, boxed kernel has pushed multiple values onto the stack constexpr int RetCount = sizeof...(Types); TORCH_INTERNAL_ASSERT_DEBUG_ONLY( stack.size() == RetCount, "Boxed kernel was expected to return ", RetCount, " values on the stack, ", "but instead pushed ", stack.size(), " values." ); return pop_to_tuple_impl(stack, std::make_index_sequence()); } private: // note: this has been moved into its own helper only to avoid a parse error on `indices` otherwise. // I'm sure there's an incantation that slips it past the parser but eh template static Result pop_to_tuple_impl(Stack& stack, std::index_sequence) { return std::make_tuple((std::move(stack[indices]).to())...); } }; // // BoxedKernelWrapper // // For a given function type FT, BoxedKernelWrapper implements // a `call` method that // - takes a boxed kernel and unboxed arguments as specified by FT, // - calls `boxArgs` to box the arguments // - calls the boxed kernel // - unboxes and returns the result // // The partial specializations below handle various cases: in // particular, not all types appearing in op signatures are supported, // and ops returning references have nonstandard wrapper implementations. // // 1. The base specialization of BoxedKernelWrapper should never be instantiated. // A "no call method defined on BoxedKernelWrapper" compile error means that // an op signature has failed to trigger any of the partial specializations // that follow this one. // template struct BoxedKernelWrapper { // The reason we're not just doing straight up static_assert(false, ...) here: // Basically, the way to make sure a static_assert only fires if a template // is actually instantiated (rather than every time the file is parsed) is to use // template parameters in the expression, e.g. FuncType here. However, since // `sizeof(FuncType) != sizeof(FuncType)` is always false, this has the same // effect. static_assert(sizeof(FuncType) != sizeof(FuncType), "Function signature contains one or more unsupported parameter and/or return types. " "Look for a nearby error like " "\"'call' is not a member of 'c10::impl::BoxedKernelWrapper<(your function type), void>'\" " "- (your function type) is the unsupported signature."); }; // // 2. Supported signatures, other than those involving non-const Tensor refs - // i.e., "functional" ops. // template struct BoxedKernelWrapper< Result(Args...), std::enable_if_t< can_box_all::value && can_unbox::value && !is_tuple_of_mutable_tensor_refs::value, void > > { static Result call( const BoxedKernel& boxed_kernel_func, const OperatorHandle& opHandle, DispatchKeySet dispatchKeySet, Args... args ) { torch::jit::Stack stack = boxArgs(std::forward(args)...); boxed_kernel_func.callBoxed(opHandle, dispatchKeySet, &stack); return guts::if_constexpr::value>( [&] (auto delay_check) { // op has pushed one or more values onto the stack. return delay_check(PopResult::call(stack)); }, [&] { // op returns void, boxed kernel has pushed nothing onto stack. TORCH_INTERNAL_ASSERT_DEBUG_ONLY( stack.size() == 0, "Boxed kernel was expected to return no values on the stack, ", "but instead returned ", stack.size(), " values." ); } ); } }; // // 3. in-place ops take a single non-const Tensor reference // as their first argument, and return it. // // Note: all signatures matching this pattern are assumed to be for such ops. // Because of this, the generated BoxedKernelWrapper specializations simply // return the in-place argument. // template struct BoxedKernelWrapper< at::Tensor&(at::Tensor&, OtherArgs...), std::enable_if_t::value, void> > { static at::Tensor& call( const BoxedKernel& boxed_kernel_func, const OperatorHandle& opHandle, DispatchKeySet dispatchKeySet, at::Tensor& outArg, OtherArgs... otherArgs ) { torch::jit::Stack stack = boxArgs(outArg, std::forward(otherArgs)...); boxed_kernel_func.callBoxed(opHandle, dispatchKeySet, &stack); TORCH_INTERNAL_ASSERT_DEBUG_ONLY( stack.size() == 1, "Boxed kernel was expected to return a single value on the stack, ", "but instead returned ", stack.size(), " values." ); return outArg; } }; // // 3.5. In-process migration to make in-place ops take and return // const references instead. template struct BoxedKernelWrapper< const at::Tensor&(const at::Tensor&, OtherArgs...), std::enable_if_t::value, void> > { static const at::Tensor& call( const BoxedKernel& boxed_kernel_func, const OperatorHandle& opHandle, DispatchKeySet dispatchKeySet, const at::Tensor& outArg, OtherArgs... otherArgs ) { torch::jit::Stack stack = boxArgs(outArg, otherArgs...); boxed_kernel_func.callBoxed(opHandle, dispatchKeySet, &stack); TORCH_INTERNAL_ASSERT_DEBUG_ONLY( stack.size() == 1, "Boxed kernel was expected to return a single value on the stack, ", "but instead returned ", stack.size(), " values." ); return outArg; } }; // // 4. out of place ops that take a single non-const Tensor reference as their // final argument, and also return it. // // Note: all signatures matching this pattern are assumed to be for such ops. // This assumption permits the generated BoxedKernelWrapper specializations to simply // return out arguments. // template struct BoxedKernelWrapper< at::Tensor&(FirstArg, RestArgs...), std::enable_if_t< can_box_all::value // this skips over in-place kernels with a non-const Tensor // arg at the front, so those can unambiguously trigger the preceding specialization. && !is_mutable_tensor_ref::value, void > > { static at::Tensor& call( const BoxedKernel& boxed_kernel_func, const OperatorHandle& opHandle, DispatchKeySet dispatchKeySet, FirstArg firstArg, RestArgs... restArgs ) { torch::jit::Stack stack = boxArgs(std::forward(firstArg), std::forward(restArgs)...); boxed_kernel_func.callBoxed(opHandle, dispatchKeySet, &stack); TORCH_INTERNAL_ASSERT_DEBUG_ONLY( stack.size() == 1, "Boxed kernel was expected to return a single value on the stack, ", "but instead returned ", stack.size(), " values." ); // reusing restArgs after it has been forwarded here is ok because we know // that the last element is of type `Tensor&`. return std::get(std::tuple{restArgs...}); } }; // // 5. out of place ops that take multiple non-const Tensor references as their // final arguments, and return them in a std::tuple. // // Note: all signatures matching this pattern are assumed to be for such ops. // This assumption permits the generated BoxedKernelWrapper specializations to simply // return the out arguments. // template struct BoxedKernelWrapper< Result(Args...), std::enable_if_t< can_box_all::value && is_tuple_of_mutable_tensor_refs::value, void > > { static Result call( const BoxedKernel& boxed_kernel_func, const OperatorHandle& opHandle, DispatchKeySet dispatchKeySet, Args... args ) { using ArgTuple = std::tuple; constexpr int RetCount = std::tuple_size(); torch::jit::Stack stack = boxArgs(std::forward(args)...); boxed_kernel_func.callBoxed(opHandle, dispatchKeySet, &stack); TORCH_INTERNAL_ASSERT_DEBUG_ONLY( stack.size() == RetCount, "Boxed kernel was expected to return ", RetCount, " values on the stack, ", "but instead returned ", stack.size(), " values." ); // reusing args after it has been forwarded here is ok because we know // that the last RetCount elements are of type `Tensor&`. auto result = guts::tuple_take(ArgTuple{std::forward(args)...}); static_assert( std::is_same::value, "The parameter list of an op returning a tuple of Tensor references " "must end with an equal number of Tensor reference parameters." ); return result; } }; } // impl } // c10