diff --git a/source/nvidia/cuda/nvidia-cuda-context-stack.cpp b/source/nvidia/cuda/nvidia-cuda-context-stack.cpp new file mode 100644 index 0000000..a9c9f20 --- /dev/null +++ b/source/nvidia/cuda/nvidia-cuda-context-stack.cpp @@ -0,0 +1,48 @@ +/* + * Modern effects for a modern Streamer + * Copyright (C) 2020 Michael Fabian Dirks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "nvidia-cuda-context-stack.hpp" +#include + +nvidia::cuda::context_stack::context_stack(std::shared_ptr<::nvidia::cuda::cuda> cuda, + std::shared_ptr<::nvidia::cuda::context> context) + : _cuda(cuda), _ctx(context) +{ + using namespace ::nvidia::cuda; + + if (!cuda) + throw std::invalid_argument("cuda"); + if (!context) + throw std::invalid_argument("context"); + + if (cu_result res = _cuda->cuCtxPushCurrent(_ctx->get()); res != cu_result::SUCCESS) { + throw std::runtime_error("Failed to push context."); + } +} + +nvidia::cuda::context_stack::~context_stack() +{ + using namespace ::nvidia::cuda; + + cu_context_t ctx; + _cuda->cuCtxGetCurrent(&ctx); + if (ctx == _ctx->get()) { + _cuda->cuCtxPopCurrent(&ctx); + } +} diff --git a/source/nvidia/cuda/nvidia-cuda-context-stack.hpp b/source/nvidia/cuda/nvidia-cuda-context-stack.hpp new file mode 100644 index 0000000..736527f --- /dev/null +++ b/source/nvidia/cuda/nvidia-cuda-context-stack.hpp @@ -0,0 +1,34 @@ +/* + * Modern effects for a modern Streamer + * Copyright (C) 2020 Michael Fabian Dirks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once +#include +#include "nvidia-cuda-context.hpp" +#include "nvidia-cuda.hpp" + +namespace nvidia::cuda { + class context_stack { + std::shared_ptr<::nvidia::cuda::cuda> _cuda; + std::shared_ptr<::nvidia::cuda::context> _ctx; + + public: + context_stack(std::shared_ptr<::nvidia::cuda::cuda> cuda, std::shared_ptr<::nvidia::cuda::context> context); + ~context_stack(); + }; +} // namespace nvidia::cuda diff --git a/source/nvidia/cuda/nvidia-cuda-context.cpp b/source/nvidia/cuda/nvidia-cuda-context.cpp new file mode 100644 index 0000000..e593087 --- /dev/null +++ b/source/nvidia/cuda/nvidia-cuda-context.cpp @@ -0,0 +1,80 @@ +/* + * Modern effects for a modern Streamer + * Copyright (C) 2020 Michael Fabian Dirks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "nvidia-cuda-context.hpp" +#include + +#ifdef WIN32 +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable: 4191 4365 4777 5039) +#endif +#include +#ifdef _MSC_VER +#pragma warning(pop) +#endif +#endif + +nvidia::cuda::context::context(std::shared_ptr<::nvidia::cuda::cuda> cuda) + : _cuda(cuda), _ctx(), _has_device(false), _device() +{ + if (!cuda) + throw std::invalid_argument("cuda"); +} + +nvidia::cuda::context::~context() +{ + if (_has_device) { + _cuda->cuDevicePrimaryCtxRelease(_device); + } + _cuda->cuCtxDestroy(_ctx); +} + +#ifdef WIN32 +nvidia::cuda::context::context(std::shared_ptr<::nvidia::cuda::cuda> cuda, ID3D11Device* device) : context(cuda) +{ + using namespace nvidia::cuda; + + if (!device) + throw std::invalid_argument("device"); + // Get DXGI Device + IDXGIDevice* dxgi_device; // Don't use ATL::CComPtr + device->QueryInterface(__uuidof(IDXGIDevice), (void**)&dxgi_device); + + // Get DXGI Adapter + ATL::CComPtr dxgi_adapter; + dxgi_device->GetAdapter(&dxgi_adapter); + + // Get Device Index + if (cu_result res = _cuda->cuD3D11GetDevice(&_device, dxgi_adapter); res != cu_result::SUCCESS) { + throw std::runtime_error("Failed to get device index for device."); + } + + // Acquire Context + if (cu_result res = _cuda->cuDevicePrimaryCtxRetain(&_ctx, _device); res != cu_result::SUCCESS) { + throw std::runtime_error("Failed to acquire primary device context."); + } + _has_device = true; +} +#endif + +::nvidia::cuda::cu_context_t nvidia::cuda::context::get() +{ + return _ctx; +} diff --git a/source/nvidia/cuda/nvidia-cuda-context.hpp b/source/nvidia/cuda/nvidia-cuda-context.hpp new file mode 100644 index 0000000..3cabc9a --- /dev/null +++ b/source/nvidia/cuda/nvidia-cuda-context.hpp @@ -0,0 +1,45 @@ +/* + * Modern effects for a modern Streamer + * Copyright (C) 2020 Michael Fabian Dirks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once +#include +#include "nvidia-cuda.hpp" + +namespace nvidia::cuda { + class context { + std::shared_ptr<::nvidia::cuda::cuda> _cuda; + ::nvidia::cuda::cu_context_t _ctx; + + // Primary Device Context + bool _has_device; + std::int32_t _device; + + private: + context(std::shared_ptr<::nvidia::cuda::cuda> cuda); + + public: + ~context(); + +#ifdef WIN32 + context(std::shared_ptr<::nvidia::cuda::cuda> cuda, ID3D11Device* device); +#endif + + ::nvidia::cuda::cu_context_t get(); + }; +} // namespace nvidia::cuda diff --git a/source/nvidia/cuda/nvidia-cuda-gs-texture.cpp b/source/nvidia/cuda/nvidia-cuda-gs-texture.cpp new file mode 100644 index 0000000..a94f5e9 --- /dev/null +++ b/source/nvidia/cuda/nvidia-cuda-gs-texture.cpp @@ -0,0 +1,116 @@ +/* + * Modern effects for a modern Streamer + * Copyright (C) 2020 Michael Fabian Dirks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "nvidia-cuda-gs-texture.hpp" +#include "obs/gs/gs-helper.hpp" + +nvidia::cuda::gstexture::gstexture(std::shared_ptr cuda, std::shared_ptr texture) + : _cuda(cuda), _texture(texture), _resource(), _is_mapped(false), _pointer() +{ + if (!texture) + throw std::invalid_argument("texture"); + if (!cuda) + throw std::invalid_argument("cuda"); + + auto gtc = gs::context{}; + int dev_type = gs_get_device_type(); + + if (dev_type == GS_DEVICE_OPENGL) { + // ToDo + } +#ifdef WIN32 + if (dev_type == GS_DEVICE_DIRECT3D_11) { + ID3D11Resource* resource = nullptr; + switch (_texture->get_type()) { + case gs::texture::type::Cube: + case gs::texture::type::Normal: { + resource = static_cast(gs_texture_get_obj(_texture->get_object())); + break; + } + case gs::texture::type::Volume: { + resource = static_cast(gs_texture_get_obj(_texture->get_object())); + break; + } + } + + if (!resource) { + throw std::runtime_error("nvidia::cuda::gstexture: Failed to get resource from gs::texture."); + } + + switch (_cuda->cuGraphicsD3D11RegisterResource(&_resource, resource, 0)) { + case nvidia::cuda::cu_result::SUCCESS: + break; + default: + throw std::runtime_error("nvidia::cuda::gstexture: Failed to register resource."); + } + } +#endif +} + +nvidia::cuda::gstexture::~gstexture() +{ + unmap(); + _cuda->cuGraphicsUnregisterResource(_resource); +} + +nvidia::cuda::cu_array_t nvidia::cuda::gstexture::map(std::shared_ptr stream) +{ + if (_is_mapped) { + return _pointer; + } + + cu_graphics_resource_t resources[] = {_resource}; + switch (_cuda->cuGraphicsMapResources(1, resources, stream->get())) { + case nvidia::cuda::cu_result::SUCCESS: + break; + default: + throw std::runtime_error("nvidia::cuda::gstexture: Mapping failed."); + } + + _stream = stream; + _is_mapped = true; + + switch (_cuda->cuGraphicsSubResourceGetMappedArray(&_pointer, _resource, 0, 0)) { + case nvidia::cuda::cu_result::SUCCESS: + break; + default: + unmap(); + throw std::runtime_error("nvidia::cuda::gstexture: Mapping pointer failed."); + } + + return _pointer; +} + +void nvidia::cuda::gstexture::unmap() +{ + if (!_is_mapped) + return; + + cu_graphics_resource_t resources[] = {_resource}; + switch (_cuda->cuGraphicsUnmapResources(1, resources, _stream->get())) { + case nvidia::cuda::cu_result::SUCCESS: + break; + default: + throw std::runtime_error("nvidia::cuda::gstexture: Unmapping failed."); + } + + _is_mapped = false; + _pointer = nullptr; + _stream.reset(); +} diff --git a/source/nvidia/cuda/nvidia-cuda-gs-texture.hpp b/source/nvidia/cuda/nvidia-cuda-gs-texture.hpp new file mode 100644 index 0000000..f5ec127 --- /dev/null +++ b/source/nvidia/cuda/nvidia-cuda-gs-texture.hpp @@ -0,0 +1,44 @@ +/* + * Modern effects for a modern Streamer + * Copyright (C) 2020 Michael Fabian Dirks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once +#include +#include +#include "nvidia-cuda-stream.hpp" +#include "nvidia-cuda.hpp" +#include "obs/gs/gs-texture.hpp" + +namespace nvidia::cuda { + class gstexture { + std::shared_ptr<::nvidia::cuda::cuda> _cuda; + std::shared_ptr _texture; + cu_graphics_resource_t _resource; + + bool _is_mapped; + cu_array_t _pointer; + std::shared_ptr _stream; + + public: + gstexture(std::shared_ptr cuda, std::shared_ptr texture); + ~gstexture(); + + cu_array_t map(std::shared_ptr stream); + void unmap(); + }; +} // namespace nvidia::cuda diff --git a/source/nvidia/cuda/nvidia-cuda-memory.cpp b/source/nvidia/cuda/nvidia-cuda-memory.cpp new file mode 100644 index 0000000..e84b586 --- /dev/null +++ b/source/nvidia/cuda/nvidia-cuda-memory.cpp @@ -0,0 +1,47 @@ +/* + * Modern effects for a modern Streamer + * Copyright (C) 2020 Michael Fabian Dirks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "nvidia-cuda-memory.hpp" +#include + +nvidia::cuda::memory::memory(std::shared_ptr<::nvidia::cuda::cuda> cuda, std::size_t size) : _cuda(cuda), _pointer(), _size(size) +{ + ::nvidia::cuda::cu_result res = _cuda->cuMemAlloc(&_pointer, size); + switch (res) { + case ::nvidia::cuda::cu_result::SUCCESS: + break; + default: + throw std::runtime_error("nvidia::cuda::memory: cuMemAlloc failed."); + } +} + +nvidia::cuda::memory::~memory() +{ + _cuda->cuMemFree(_pointer); +} + +nvidia::cuda::cu_device_ptr_t nvidia::cuda::memory::get() +{ + return _pointer; +} + +std::size_t nvidia::cuda::memory::size() +{ + return _size; +} diff --git a/source/nvidia/cuda/nvidia-cuda-memory.hpp b/source/nvidia/cuda/nvidia-cuda-memory.hpp new file mode 100644 index 0000000..bc0bdd4 --- /dev/null +++ b/source/nvidia/cuda/nvidia-cuda-memory.hpp @@ -0,0 +1,39 @@ +/* + * Modern effects for a modern Streamer + * Copyright (C) 2020 Michael Fabian Dirks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once +#include +#include +#include "nvidia-cuda.hpp" + +namespace nvidia::cuda { + class memory { + std::shared_ptr<::nvidia::cuda::cuda> _cuda; + cu_device_ptr_t _pointer; + size_t _size; + + public: + memory(std::shared_ptr<::nvidia::cuda::cuda> cuda, std::size_t size); + ~memory(); + + cu_device_ptr_t get(); + + std::size_t size(); + }; +} // namespace nvidia::cuda diff --git a/source/nvidia/cuda/nvidia-cuda-stream.cpp b/source/nvidia/cuda/nvidia-cuda-stream.cpp new file mode 100644 index 0000000..8aac943 --- /dev/null +++ b/source/nvidia/cuda/nvidia-cuda-stream.cpp @@ -0,0 +1,42 @@ +/* + * Modern effects for a modern Streamer + * Copyright (C) 2020 Michael Fabian Dirks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "nvidia-cuda-stream.hpp" +#include + +nvidia::cuda::stream::stream(std::shared_ptr<::nvidia::cuda::cuda> cuda) : _cuda(cuda) +{ + nvidia::cuda::cu_result res = _cuda->cuStreamCreate(&_stream, 0); + switch (res) { + case nvidia::cuda::cu_result::SUCCESS: + break; + default: + throw std::runtime_error("Failed to create CUstream object."); + } +} + +nvidia::cuda::stream::~stream() +{ + _cuda->cuStreamDestroy(_stream); +} + +::nvidia::cuda::cu_stream_t nvidia::cuda::stream::get() +{ + return _stream; +} diff --git a/source/nvidia/cuda/nvidia-cuda-stream.hpp b/source/nvidia/cuda/nvidia-cuda-stream.hpp new file mode 100644 index 0000000..6bf94b7 --- /dev/null +++ b/source/nvidia/cuda/nvidia-cuda-stream.hpp @@ -0,0 +1,35 @@ +/* + * Modern effects for a modern Streamer + * Copyright (C) 2020 Michael Fabian Dirks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once +#include "nvidia-cuda.hpp" +#include + +namespace nvidia::cuda { + class stream { + std::shared_ptr<::nvidia::cuda::cuda> _cuda; + ::nvidia::cuda::cu_stream_t _stream; + + public: + stream(std::shared_ptr<::nvidia::cuda::cuda> cuda); + ~stream(); + + ::nvidia::cuda::cu_stream_t get(); + }; +} // namespace nvidia::cuda diff --git a/source/nvidia/cuda/nvidia-cuda.cpp b/source/nvidia/cuda/nvidia-cuda.cpp new file mode 100644 index 0000000..c95fc3a --- /dev/null +++ b/source/nvidia/cuda/nvidia-cuda.cpp @@ -0,0 +1,118 @@ +/* + * Modern effects for a modern Streamer + * Copyright (C) 2020 Michael Fabian Dirks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "nvidia-cuda.hpp" +#include +#include + +#if defined(_WIN32) || defined(_WIN64) +#define CUDA_NAME "nvcuda.dll" +#else +#define CUDA_NAME "libcuda.so.1" +#endif + +#define CUDA_LOAD_SYMBOL(NAME) \ + { \ + NAME = static_cast(os_dlsym(_library, #NAME)); \ + if (!NAME) \ + throw std::runtime_error("Failed to load '" #NAME "' from '" CUDA_NAME "'."); \ + } +#define CUDA_LOAD_SYMBOL_V2(NAME) \ + { \ + NAME = static_cast(os_dlsym(_library, #NAME "_v2")); \ + if (!NAME) \ + throw std::runtime_error("Failed to load '" #NAME "' from '" CUDA_NAME "'."); \ + } +#define CUDA_LOAD_SYMBOL_EX(NAME, OVERRIDE) \ + { \ + NAME = static_cast(os_dlsym(_library, #OVERRIDE)); \ + if (!NAME) \ + throw std::runtime_error("Failed to load '" #NAME "' from '" CUDA_NAME "'."); \ + } + +nvidia::cuda::cuda::cuda() +{ + _library = os_dlopen(CUDA_NAME); + if (!_library) + throw std::runtime_error("Failed to load '" CUDA_NAME "'."); + + // Initialization + CUDA_LOAD_SYMBOL(cuInit); + + // Version Management + CUDA_LOAD_SYMBOL(cuDriverGetVersion); + + // Primary Context Management + CUDA_LOAD_SYMBOL(cuDevicePrimaryCtxRetain); + CUDA_LOAD_SYMBOL_V2(cuDevicePrimaryCtxRelease); + + // Context Management + CUDA_LOAD_SYMBOL_V2(cuCtxDestroy); + CUDA_LOAD_SYMBOL(cuCtxGetCurrent); + CUDA_LOAD_SYMBOL_V2(cuCtxPopCurrent); + CUDA_LOAD_SYMBOL_V2(cuCtxPushCurrent); + CUDA_LOAD_SYMBOL(cuCtxSetCurrent); + + // Memory Management + CUDA_LOAD_SYMBOL_V2(cuArrayGetDescriptor); + CUDA_LOAD_SYMBOL_V2(cuMemAlloc); + CUDA_LOAD_SYMBOL_V2(cuMemAllocPitch); + CUDA_LOAD_SYMBOL_V2(cuMemFree); + CUDA_LOAD_SYMBOL_V2(cuMemHostGetDevicePointer); + CUDA_LOAD_SYMBOL(cuMemcpy); + CUDA_LOAD_SYMBOL_V2(cuMemcpy2D); + CUDA_LOAD_SYMBOL_V2(cuMemcpy2DAsync); + CUDA_LOAD_SYMBOL_V2(cuMemcpyAtoA); + CUDA_LOAD_SYMBOL_V2(cuMemcpyAtoD); + CUDA_LOAD_SYMBOL_V2(cuMemcpyAtoH); + CUDA_LOAD_SYMBOL_V2(cuMemcpyAtoHAsync); + CUDA_LOAD_SYMBOL_V2(cuMemcpyDtoA); + CUDA_LOAD_SYMBOL_V2(cuMemcpyDtoD); + CUDA_LOAD_SYMBOL_V2(cuMemcpyDtoH); + CUDA_LOAD_SYMBOL_V2(cuMemcpyDtoHAsync); + CUDA_LOAD_SYMBOL_V2(cuMemcpyHtoA); + CUDA_LOAD_SYMBOL_V2(cuMemcpyHtoAAsync); + CUDA_LOAD_SYMBOL_V2(cuMemcpyHtoD); + CUDA_LOAD_SYMBOL_V2(cuMemcpyHtoDAsync); + + // Stream Managment + CUDA_LOAD_SYMBOL(cuStreamCreate); + CUDA_LOAD_SYMBOL_V2(cuStreamDestroy); + CUDA_LOAD_SYMBOL(cuStreamSynchronize); + + // Graphics Interoperability + CUDA_LOAD_SYMBOL(cuGraphicsMapResources); + CUDA_LOAD_SYMBOL(cuGraphicsSubResourceGetMappedArray); + CUDA_LOAD_SYMBOL(cuGraphicsUnmapResources); + CUDA_LOAD_SYMBOL(cuGraphicsUnregisterResource); + +#ifdef WIN32 + // Direct3D11 Interopability + CUDA_LOAD_SYMBOL(cuD3D11GetDevice); + CUDA_LOAD_SYMBOL(cuGraphicsD3D11RegisterResource); +#endif + + // Initialize CUDA + cuInit(0); +} + +nvidia::cuda::cuda::~cuda() +{ + os_dlclose(_library); +} diff --git a/source/nvidia/cuda/nvidia-cuda.hpp b/source/nvidia/cuda/nvidia-cuda.hpp new file mode 100644 index 0000000..fb6acdc --- /dev/null +++ b/source/nvidia/cuda/nvidia-cuda.hpp @@ -0,0 +1,386 @@ +/* + * Modern effects for a modern Streamer + * Copyright (C) 2020 Michael Fabian Dirks + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once +#include +#include +#include + +#ifdef WIN32 +#pragma warning(push) +#pragma warning(disable : 4365) +#include +#include +#pragma warning(pop) +#endif + +#define CUDA_DEFINE_FUNCTION(name, ...) \ + private: \ + typedef ::nvidia::cuda::cu_result (*t##name)(__VA_ARGS__); \ + \ + public: \ + t##name name; + +namespace nvidia::cuda { + enum class cu_result : std::size_t { + SUCCESS = 0, + INVALID_VALUE = 1, + OUT_OF_MEMORY = 2, + NOT_INITIALIZED = 3, + DEINITIALIZED = 4, + NO_DEVICE = 100, + INVALID_DEVICE = 101, + INVALID_CONTEXT = 201, + MAP_FAILED = 205, + UNMAP_FAILED = 206, + ARRAY_IS_MAPPED = 207, + ALREADY_MAPPED = 208, + NOT_MAPPED = 211, + INVALID_GRAPHICS_CONTEXT = 219, + // Still missing some. + }; + + enum class cu_memory_type : std::uint32_t { + HOST = 1, + DEVICE = 2, + ARRAY = 3, + UNIFIED = 4, + }; + + enum class cu_array_format : std::uint32_t { + UNSIGNED_INT8 = 0b00000001, + UNSIGNED_INT16 = 0b00000010, + UNSIGNED_INT32 = 0b00000011, + SIGNED_INT8 = 0b00001000, + SIGNED_INT16 = 0b00001001, + SIGNED_INT32 = 0b00001010, + HALF = 0b00010000, + FLOAT = 0b00100000, + }; + + typedef void* cu_array_t; + typedef void* cu_context_t; + typedef std::uint64_t cu_device_ptr_t; + typedef void* cu_graphics_resource_t; + typedef void* cu_stream_t; + + struct cu_memcpy2d_t { + size_t src_x_in_bytes; + size_t src_y; + + cu_memory_type src_memory_type; + const void* src_host; + cu_device_ptr_t src_device; + cu_array_t src_array; + std::size_t src_pitch; + + size_t dst_x_in_bytes; + size_t dst_y; + + cu_memory_type dst_memory_type; + const void* dst_host; + cu_device_ptr_t dst_device; + cu_array_t dst_array; + std::size_t dst_pitch; + + std::size_t width_in_bytes; + std::size_t height; + }; + + struct cu_array_descriptor_t { + std::size_t width; + std::size_t height; + std::uint32_t num_channels; + cu_array_format format; + }; + + class cuda { + private: + void* _library; + + public: + cuda(); + ~cuda(); + + public: + // Initialization + CUDA_DEFINE_FUNCTION(cuInit, std::int32_t flags); + + // Version Management + CUDA_DEFINE_FUNCTION(cuDriverGetVersion, std::int32_t* driverVersion); + + // Device Management + // cuDeviceGet + // cuDeviceGetAttribute + // cuDeviceGetCount + // cuDeviceGetLuid + // cuDeviceGetName + // cuDeviceGetNvSciSyncAttributes + // cuDeviceGetUuid + // cuDeviceTotalMem_v2 + + // Primary Context Management + // cuDevicePrimaryCtxGetState + CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRelease, std::int32_t device); + // cuDevicePrimaryCtxReset_v2 + CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRetain, cu_context_t* ctx, std::int32_t device); + // cuDevicePrimaryCtxSetFlags_v2 + + // Context Management + // cuCtxCreate_v2 + CUDA_DEFINE_FUNCTION(cuCtxDestroy, cu_context_t ctx); + // cuCtxGetApiVersion + // cuCtxGetCacheConfig + CUDA_DEFINE_FUNCTION(cuCtxGetCurrent, cu_context_t* ctx); + // cuCtxGetDevice + // cuCtxGetFlags + // cuCtxGetLimit + // cuCtxGetSharedMemConfig + // cuCtxGetStreamPriorityRange + CUDA_DEFINE_FUNCTION(cuCtxPopCurrent, cu_context_t* ctx); + CUDA_DEFINE_FUNCTION(cuCtxPushCurrent, cu_context_t ctx); + // cuCtxSetCacheConfig + CUDA_DEFINE_FUNCTION(cuCtxSetCurrent, cu_context_t ctx); + // cuCtxSetLimit + // cuCtxSetSharedMemConfig + // cuCtxSynchronize + // UNDOCUMENTED? cuCtxResetPersistingL2Cache + + // Module Management + // cuLinkAddData + // cuLinkAddFile + // cuLinkComplete + // cuLinkCreate + // cuLinkDestroy + // cuModuleGetFunction + // cuModuleGetGlobal + // cuModuleGetSurfRef + // cuModuleGetTexRef + // cuModuleLoad + // cuModuleLoadData + // cuModuleLoadDataEx + // cuModuleLoadFatBinary + // cuModuleUnload + + // Memory Management + // cuArray3DCreate_v2 + // cuArray3DGetDescripter_v2 + // cuArrayCreate_v2 + // cuArrayDestroy + CUDA_DEFINE_FUNCTION(cuArrayGetDescriptor, cu_array_descriptor_t* pArrayDescripter, cu_array_t array); + // cuArrayGetDescriptor_v2 + // cuDeviceGetByPCIBusId + // cuDeviceGetPCIBusId + // cuIpcCloseMemHandle + // cuIpcGetEventHandle + // cuIpcGetMemHandle + // cuIpcOpenEventHandle + // cuIpcOpenMemHandle + CUDA_DEFINE_FUNCTION(cuMemAlloc, cu_device_ptr_t* ptr, std::size_t bytes); + // cuMemAllocHost_v2 + // cuMemAllocManaged + CUDA_DEFINE_FUNCTION(cuMemAllocPitch, cu_device_ptr_t* ptr, std::size_t* pitch, std::size_t width_in_bytes, + std::size_t height, std::uint32_t element_size_bytes); + CUDA_DEFINE_FUNCTION(cuMemFree, cu_device_ptr_t ptr); + // cuMemFreeHost + // cuMemGetAddressRange_v2 + // cuMemGetInfo_v2 + // cuMemHostAlloc + CUDA_DEFINE_FUNCTION(cuMemHostGetDevicePointer, cu_device_ptr_t* devptr, void* ptr, std::uint32_t flags); + // cuMemHostGetFlags + // cuMemHostRegister_v2 + // cuMemHostUnregister + CUDA_DEFINE_FUNCTION(cuMemcpy, cu_device_ptr_t dst, cu_device_ptr_t src, std::size_t bytes); + CUDA_DEFINE_FUNCTION(cuMemcpy2D, const cu_memcpy2d_t* copy); + CUDA_DEFINE_FUNCTION(cuMemcpy2DAsync, const cu_memcpy2d_t* copy, cu_stream_t stream); + // cuMemcpy2DUnaligned_v2 / _v2_ptds + // cuMemcpy3D_v2 / _v2_ptds + // cuMemcpy3DAsync_v2 / _v2_ptsz + // cuMemcpy3DPeer / _ptds + // cuMemcpy3DPeerAsync_v2 / _v2_ptsz + // cuMemcpyAsync / _ptsz + CUDA_DEFINE_FUNCTION(cuMemcpyAtoA, cu_array_t dst, std::size_t dstOffset, cu_array_t src, std::size_t srcOffset, + std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyAtoD, cu_device_ptr_t dst, cu_array_t src, std::size_t srcOffset, + std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyAtoH, void* dst, cu_array_t src, std::size_t srcOffset, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyAtoHAsync, void* dst, cu_array_t src, std::size_t srcOffset, + std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyDtoA, cu_array_t dst, std::size_t dstOffset, cu_device_ptr_t src, + std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyDtoD, cu_device_ptr_t dst, cu_array_t srcArray, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyDtoH, void* dst, cu_array_t src, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyDtoHAsync, void* dst, cu_array_t src, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyHtoA, cu_array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyHtoAAsync, cu_array_t dst, std::size_t dstOffset, void* src, + std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyHtoD, cu_device_ptr_t dst, void* src, std::size_t byteCount); + CUDA_DEFINE_FUNCTION(cuMemcpyHtoDAsync, cu_device_ptr_t dst, void* src, std::size_t byteCount); + // cuMemcpyPeer / _ptds + // cuMemcpyPeerAsync / _ptsz + // cuMemsetD16 + // cuMemsetD16Async + // cuMemsetD2D16 + // cuMemsetD2D16Async + // cuMemsetD2D32 + // cuMemsetD2D32Async + // cuMemsetD2D8 + // cuMemsetD2D8Async + // cuMemsetD32 + // cuMemsetD32Async + // cuMemsetD8 + // cuMemsetD8Async + // cuMipmappedArrayCreate + // cuMipmappedArrayDestroy + // cuMipmappedArrayGetLevel + + // Virtual Memory Management + // cuMemAddressFree + // cuMemAddressReserve + // cuMemCreate + // cuMemExportToShareableHandle + // cuMemGetAccess + // cuMemGetAllocationGranularity + // cuMemGetAllocationPropertiesFromHandle + // cuMemImportFromShareableHandle + // cuMemMap + // cuMemRelease + // cuMemSetAccess + // cuMemUnmap + + // Unified Addressing + // cuMemAdvise + // cuMemPrefetchAsync + // cuMemRangeGetAttribute + // cuMemRangeGetAttributes + // cuPointerGetAttribute + // cuPointerGetAttributes + // cuPointerSetAttribute + + // Stream Managment + // cuStreamAddCallback + // cuStreamAttachMemAsync + // cuStreamBeginCapture_v2 + CUDA_DEFINE_FUNCTION(cuStreamCreate, cu_stream_t* stream, std::uint32_t flags); + // cuStreamCreateWithPriority + CUDA_DEFINE_FUNCTION(cuStreamDestroy, cu_stream_t stream); + // cuStreamEndCapture + // cuStreamGetCaptureInfo + // cuStreamGetCtx + // cuStreamGetFlags + // cuStreamGetPriority + // cuStreamIsCapturing + // cuStreamQuery + CUDA_DEFINE_FUNCTION(cuStreamSynchronize, cu_stream_t stream); + // cuStreamWaitEvent + // cuThreadExchangeStreamCaptureMode + + // Event Management + // cuEventCreate + // cuEventDestroy_v2 + // cuEventElapsedTime + // cuEventQuery + // cuEventRecord + // cuEventSynchronize + + // External Resource Interoperability + // cuDestroyExternalMemory + // cuDestroyExternalSemaphore + // cuExternalMemoryGetMappedBuffer + // cuExternalMemoryGetMappedMipmappedArray + // cuImportExternalMemory + // cuImportExternalSemaphore + // cuSignalExternalSemaphoresAsync + // cuWaitExternalSemaphoresAsync + + // Stream Memory Operations + // cuStreamBatchMemOp + // cuStreamWaitValue32 + // cuStreamWaitValue64 + // cuStreamWriteValue32 + // cuStreamWriteValue64 + + // Execution Control + // cuFuncGetAttribute + // cuFuncSetAttribute + // cuFuncSetCacheConfig + // cuFuncSetSharedMemConfig + // cuLaunchCooperativeKernel + // cuLaunchCooperativeKernelMultiDevice + // cuLaunchHostFunc + // cuLaunchKernel + + // Graph Management + // Todo! + + // Occupancy + // Todo + + // Texture Object Management + // Todo + + // Surface Object Management + // Todo + + // Peer Context Memory Access + // Todo + + // Graphics Interoperability + CUDA_DEFINE_FUNCTION(cuGraphicsMapResources, std::uint32_t count, cu_graphics_resource_t* resources, + cu_stream_t stream); + // cuGraphicsResourcesGetMappedMipmappedArray + // cuGraphicsResourcesGetMappedPointer_v2 + // cuGraphicsResourcesSetMapFlags_v2 + CUDA_DEFINE_FUNCTION(cuGraphicsSubResourceGetMappedArray, cu_array_t* array, cu_graphics_resource_t resource, + std::uint32_t index, std::uint32_t level); + CUDA_DEFINE_FUNCTION(cuGraphicsUnmapResources, std::uint32_t count, cu_graphics_resource_t* resources, + cu_stream_t stream); + CUDA_DEFINE_FUNCTION(cuGraphicsUnregisterResource, cu_graphics_resource_t resource); + + // Profile Control + // Todo + + // OpenGL Interoperability + // cuGLGetDevices + // cuGraphcisGLRegisterBuffer + // cuGraphcisGLRegisterImage +#ifdef WIN32 + // cuWGLGetDevice + + // Direct3D9 Interopability + // cuD3D9CtxCreate + // cuD3D9CtxCreateOnDevice + // cuD3D9CtxGetDevice + // cuD3D9CtxGetDevices + // cuD3D9GetDirect3DDevice + // cuGraphicsD3D9RegisterResource + + // Direct3D10 Interopability + // cuD3D10GetDevice + // cuD3D10GetDevices + // cuGraphicsD3D10RegisterResource + + // Direct3D11 Interopability + CUDA_DEFINE_FUNCTION(cuD3D11GetDevice, std::int32_t* device, IDXGIAdapter* adapter); + // cuD3D11GetDevices + CUDA_DEFINE_FUNCTION(cuGraphicsD3D11RegisterResource, cu_graphics_resource_t* resource, + ID3D11Resource* d3dresource, std::uint32_t flags); +#endif + }; +} // namespace nvidia::cuda