#pragma once #include #include #include #include // TODO move this to c10 namespace namespace torch { namespace jit { using c10::IValue; using Stack = std::vector; class Operation { template using accepts = std::is_constructible, F&&>; public: template ::value, int> = 0> C10_DEPRECATED_MESSAGE("Please use void(Stack&) to register operator instead.") Operation(F&& raw): op_([raw = std::forward(raw)](Stack& stack) { raw(&stack); }) {} template ::value && !std::is_same, Operation>::value, int> = 0> Operation(F&& op): op_(std::forward(op)) {} Operation(std::nullptr_t) noexcept {} explicit operator bool() const noexcept { return op_ ? true : false; } void operator()(Stack& stack) { op_(stack); } template T* target() noexcept { return op_.target(); } private: std::function op_; }; // An operation with N inputs and M outputs pops the last N inputs off // the stack and pushes its M inputs onto the stack // before: I0, I1, ... IN <- stack.back() // after: O0, O1, ... OM // operations are defined this way so that ownership of inputs can be // transferred to the operation and it can incrementally drop ownership of // tensors when they become unneeded. For large operations, like 'run an entire // subgraph', this functionality is very important for minimizing gpu memory // usage return value is the relative 'offset' to jump to for the next // operation: // pc += 1 + offset // so a return value of 0 goes to the next instruction // treat the last N elements of the stack as a list, looking up // element i static inline IValue& peek(Stack& stack, size_t i, size_t N) { return *(stack.end() - N + i); } static inline IValue& peek(Stack* stack, size_t i, size_t N) { return peek(*stack, i, N); } static inline const IValue& peek(const Stack& stack, size_t i, size_t N) { return *(stack.end() - N + i); } static inline const IValue& peek(const Stack* stack, size_t i, size_t N) { return peek(*stack, i, N); } // treat the last N elements of the stack as a list, looking up the // slice starting at index i and having length len static inline at::ArrayRef peekSlice( const Stack& stack, size_t i, size_t len, size_t N) { return at::ArrayRef(stack).slice(stack.size() - N + i, len); } static inline at::ArrayRef last(const Stack& stack, size_t N) { return peekSlice(stack, 0, N, N); } static inline at::ArrayRef last(const Stack* stack, size_t N) { return last(*stack, N); } static inline void drop(Stack& stack, size_t n) { stack.erase(stack.end() - n, stack.end()); } static inline void drop(Stack* stack, size_t n) { drop(*stack, n); } static inline IValue pop(Stack& stack) { auto r = std::move(stack.back()); stack.pop_back(); return r; } static inline IValue pop(Stack* stack) { return pop(*stack); } static inline std::vector pop(Stack& stack, size_t n) { std::vector result; result.reserve(n); for (const auto i : c10::irange(n)) { result.push_back(std::move(peek(stack, i, n))); } drop(stack, n); return result; } // variadic pop: // int64_t a; at::Tensor b; // pop(stack, a, b); // equivalent to: // b = pop(stack).toTensor(); // a = pop(stack).toInt(); template static inline void pop(Stack& stack, Types&... args) { size_t i = 0; constexpr size_t N = sizeof...(args); (void)std::initializer_list{ (args = std::move(peek(stack, i++, N)).template to(), 0)...}; drop(stack, N); } template static inline void pop(Stack* stack, Types&... args) { pop(*stack, args...); } template static inline void push_one(Stack& stack, Type&& arg) { stack.emplace_back(std::forward(arg)); } static inline void push_one(Stack& stack, c10::TensorOptions options) { stack.emplace_back(c10::typeMetaToScalarType(options.dtype())); stack.emplace_back(options.layout()); stack.emplace_back(options.device()); stack.emplace_back(options.pinned_memory()); } template static inline void push(Stack& stack, Types&&... args) { (void)std::initializer_list{(push_one(stack, std::forward(args)), 0)...}; } template static inline void push(Stack* stack, Types&&... args) { return push(*stack, std::forward(args)...); } template static inline void push_list_elements(Stack& stack, const c10::List& elements) { for (T elem : elements) { stack.push_back(std::move(elem)); } } // The packer here is carefully written not to make any unnecessary // copies. // pack takes the return values of aten functions pushes them onto the stack template inline void pack(Stack& stack, T&& v) { stack.emplace_back(std::forward(v)); } template inline void pack(Stack* stack, T&& v) { pack(*stack, std::forward(v)); } template struct TuplePacker { // NB: *Not* a universal reference. static void execute(Stack& stack, std::tuple&& t) { // NB: The move here does not "destroy" the entire tuple, that is // not what std::move does; only the particular tuple index // processed here gets stolen. pack(stack, std::get(std::move(t))); TuplePacker::execute(stack, std::move(t)); } }; template struct TuplePacker<0, Args...> { static void execute(Stack& /*stack*/, std::tuple&& /*t*/){}; }; template inline void pack(Stack& stack, std::tuple&& t) { TuplePacker::execute(stack, std::move(t)); } } // namespace jit } // namespace torch