From 88213e81f66f77b57c013ef396fe31d6da26ee08 Mon Sep 17 00:00:00 2001
From: Michael Fabian 'Xaymar' Dirks <info@xaymar.com>
Date: Fri, 20 Mar 2020 23:10:19 +0100
Subject: [PATCH] nvidia-cuda: GPL compatible CUDA library interface

Due to the 'nvcuda' library being part of the driver, it falls in a clause of the GPL which allows us to load and interface with system drivers. Since we can't rely on Nvidias headers here (incompatible license), most of this was pulled from FFmpeg and other things were found out via testing.
---
 .../nvidia/cuda/nvidia-cuda-context-stack.cpp |  48 +++
 .../nvidia/cuda/nvidia-cuda-context-stack.hpp |  34 ++
 source/nvidia/cuda/nvidia-cuda-context.cpp    |  80 ++++
 source/nvidia/cuda/nvidia-cuda-context.hpp    |  45 ++
 source/nvidia/cuda/nvidia-cuda-gs-texture.cpp | 116 ++++++
 source/nvidia/cuda/nvidia-cuda-gs-texture.hpp |  44 ++
 source/nvidia/cuda/nvidia-cuda-memory.cpp     |  47 +++
 source/nvidia/cuda/nvidia-cuda-memory.hpp     |  39 ++
 source/nvidia/cuda/nvidia-cuda-stream.cpp     |  42 ++
 source/nvidia/cuda/nvidia-cuda-stream.hpp     |  35 ++
 source/nvidia/cuda/nvidia-cuda.cpp            | 118 ++++++
 source/nvidia/cuda/nvidia-cuda.hpp            | 386 ++++++++++++++++++
 12 files changed, 1034 insertions(+)
 create mode 100644 source/nvidia/cuda/nvidia-cuda-context-stack.cpp
 create mode 100644 source/nvidia/cuda/nvidia-cuda-context-stack.hpp
 create mode 100644 source/nvidia/cuda/nvidia-cuda-context.cpp
 create mode 100644 source/nvidia/cuda/nvidia-cuda-context.hpp
 create mode 100644 source/nvidia/cuda/nvidia-cuda-gs-texture.cpp
 create mode 100644 source/nvidia/cuda/nvidia-cuda-gs-texture.hpp
 create mode 100644 source/nvidia/cuda/nvidia-cuda-memory.cpp
 create mode 100644 source/nvidia/cuda/nvidia-cuda-memory.hpp
 create mode 100644 source/nvidia/cuda/nvidia-cuda-stream.cpp
 create mode 100644 source/nvidia/cuda/nvidia-cuda-stream.hpp
 create mode 100644 source/nvidia/cuda/nvidia-cuda.cpp
 create mode 100644 source/nvidia/cuda/nvidia-cuda.hpp
diff --git a/source/nvidia/cuda/nvidia-cuda-context-stack.cpp b/source/nvidia/cuda/nvidia-cuda-context-stack.cpp
new file mode 100644
index 0000000..a9c9f20
--- /dev/null
+++ b/source/nvidia/cuda/nvidia-cuda-context-stack.cpp
@@ -0,0 +1,48 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#include "nvidia-cuda-context-stack.hpp"
+#include <stdexcept>
+
+nvidia::cuda::context_stack::context_stack(std::shared_ptr<::nvidia::cuda::cuda>    cuda,
+										   std::shared_ptr<::nvidia::cuda::context> context)
+	: _cuda(cuda), _ctx(context)
+{
+	using namespace ::nvidia::cuda;
+
+	if (!cuda)
+		throw std::invalid_argument("cuda");
+	if (!context)
+		throw std::invalid_argument("context");
+
+	if (cu_result res = _cuda->cuCtxPushCurrent(_ctx->get()); res != cu_result::SUCCESS) {
+		throw std::runtime_error("Failed to push context.");
+	}
+}
+
+nvidia::cuda::context_stack::~context_stack()
+{
+	using namespace ::nvidia::cuda;
+
+	cu_context_t ctx;
+	_cuda->cuCtxGetCurrent(&ctx);
+	if (ctx == _ctx->get()) {
+		_cuda->cuCtxPopCurrent(&ctx);
+	}
+}
diff --git a/source/nvidia/cuda/nvidia-cuda-context-stack.hpp b/source/nvidia/cuda/nvidia-cuda-context-stack.hpp
new file mode 100644
index 0000000..736527f
--- /dev/null
+++ b/source/nvidia/cuda/nvidia-cuda-context-stack.hpp
@@ -0,0 +1,34 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+#include <memory>
+#include "nvidia-cuda-context.hpp"
+#include "nvidia-cuda.hpp"
+
+namespace nvidia::cuda {
+	class context_stack {
+		std::shared_ptr<::nvidia::cuda::cuda>      _cuda;
+		std::shared_ptr<::nvidia::cuda::context> _ctx;
+
+		public:
+		context_stack(std::shared_ptr<::nvidia::cuda::cuda> cuda, std::shared_ptr<::nvidia::cuda::context> context);
+		~context_stack();
+	};
+} // namespace nvidia::cuda
diff --git a/source/nvidia/cuda/nvidia-cuda-context.cpp b/source/nvidia/cuda/nvidia-cuda-context.cpp
new file mode 100644
index 0000000..e593087
--- /dev/null
+++ b/source/nvidia/cuda/nvidia-cuda-context.cpp
@@ -0,0 +1,80 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#include "nvidia-cuda-context.hpp"
+#include <stdexcept>
+
+#ifdef WIN32
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4191 4365 4777 5039)
+#endif
+#include <atlutil.h>
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+#endif
+
+nvidia::cuda::context::context(std::shared_ptr<::nvidia::cuda::cuda> cuda)
+	: _cuda(cuda), _ctx(), _has_device(false), _device()
+{
+	if (!cuda)
+		throw std::invalid_argument("cuda");
+}
+
+nvidia::cuda::context::~context()
+{
+	if (_has_device) {
+		_cuda->cuDevicePrimaryCtxRelease(_device);
+	}
+	_cuda->cuCtxDestroy(_ctx);
+}
+
+#ifdef WIN32
+nvidia::cuda::context::context(std::shared_ptr<::nvidia::cuda::cuda> cuda, ID3D11Device* device) : context(cuda)
+{
+	using namespace nvidia::cuda;
+
+	if (!device)
+		throw std::invalid_argument("device");
+	// Get DXGI Device
+	IDXGIDevice* dxgi_device; // Don't use ATL::CComPtr
+	device->QueryInterface(__uuidof(IDXGIDevice), (void**)&dxgi_device);
+
+	// Get DXGI Adapter
+	ATL::CComPtr<IDXGIAdapter> dxgi_adapter;
+	dxgi_device->GetAdapter(&dxgi_adapter);
+
+	// Get Device Index
+	if (cu_result res = _cuda->cuD3D11GetDevice(&_device, dxgi_adapter); res != cu_result::SUCCESS) {
+		throw std::runtime_error("Failed to get device index for device.");
+	}
+
+	// Acquire Context
+	if (cu_result res = _cuda->cuDevicePrimaryCtxRetain(&_ctx, _device); res != cu_result::SUCCESS) {
+		throw std::runtime_error("Failed to acquire primary device context.");
+	}
+	_has_device = true;
+}
+#endif
+
+::nvidia::cuda::cu_context_t nvidia::cuda::context::get()
+{
+	return _ctx;
+}
diff --git a/source/nvidia/cuda/nvidia-cuda-context.hpp b/source/nvidia/cuda/nvidia-cuda-context.hpp
new file mode 100644
index 0000000..3cabc9a
--- /dev/null
+++ b/source/nvidia/cuda/nvidia-cuda-context.hpp
@@ -0,0 +1,45 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+#include <memory>
+#include "nvidia-cuda.hpp"
+
+namespace nvidia::cuda {
+	class context {
+		std::shared_ptr<::nvidia::cuda::cuda> _cuda;
+		::nvidia::cuda::cu_context_t          _ctx;
+
+		// Primary Device Context
+		bool         _has_device;
+		std::int32_t _device;
+
+		private:
+		context(std::shared_ptr<::nvidia::cuda::cuda> cuda);
+
+		public:
+		~context();
+
+#ifdef WIN32
+		context(std::shared_ptr<::nvidia::cuda::cuda> cuda, ID3D11Device* device);
+#endif
+
+		::nvidia::cuda::cu_context_t get();
+	};
+} // namespace nvidia::cuda
diff --git a/source/nvidia/cuda/nvidia-cuda-gs-texture.cpp b/source/nvidia/cuda/nvidia-cuda-gs-texture.cpp
new file mode 100644
index 0000000..a94f5e9
--- /dev/null
+++ b/source/nvidia/cuda/nvidia-cuda-gs-texture.cpp
@@ -0,0 +1,116 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#include "nvidia-cuda-gs-texture.hpp"
+#include "obs/gs/gs-helper.hpp"
+
+nvidia::cuda::gstexture::gstexture(std::shared_ptr<nvidia::cuda::cuda> cuda, std::shared_ptr<gs::texture> texture)
+	: _cuda(cuda), _texture(texture), _resource(), _is_mapped(false), _pointer()
+{
+	if (!texture)
+		throw std::invalid_argument("texture");
+	if (!cuda)
+		throw std::invalid_argument("cuda");
+
+	auto gtc      = gs::context{};
+	int  dev_type = gs_get_device_type();
+
+	if (dev_type == GS_DEVICE_OPENGL) {
+		// ToDo
+	}
+#ifdef WIN32
+	if (dev_type == GS_DEVICE_DIRECT3D_11) {
+		ID3D11Resource* resource = nullptr;
+		switch (_texture->get_type()) {
+		case gs::texture::type::Cube:
+		case gs::texture::type::Normal: {
+			resource = static_cast<ID3D11Resource*>(gs_texture_get_obj(_texture->get_object()));
+			break;
+		}
+		case gs::texture::type::Volume: {
+			resource = static_cast<ID3D11Resource*>(gs_texture_get_obj(_texture->get_object()));
+			break;
+		}
+		}
+
+		if (!resource) {
+			throw std::runtime_error("nvidia::cuda::gstexture: Failed to get resource from gs::texture.");
+		}
+
+		switch (_cuda->cuGraphicsD3D11RegisterResource(&_resource, resource, 0)) {
+		case nvidia::cuda::cu_result::SUCCESS:
+			break;
+		default:
+			throw std::runtime_error("nvidia::cuda::gstexture: Failed to register resource.");
+		}
+	}
+#endif
+}
+
+nvidia::cuda::gstexture::~gstexture()
+{
+	unmap();
+	_cuda->cuGraphicsUnregisterResource(_resource);
+}
+
+nvidia::cuda::cu_array_t nvidia::cuda::gstexture::map(std::shared_ptr<nvidia::cuda::stream> stream)
+{
+	if (_is_mapped) {
+		return _pointer;
+	}
+
+	cu_graphics_resource_t resources[] = {_resource};
+	switch (_cuda->cuGraphicsMapResources(1, resources, stream->get())) {
+	case nvidia::cuda::cu_result::SUCCESS:
+		break;
+	default:
+		throw std::runtime_error("nvidia::cuda::gstexture: Mapping failed.");
+	}
+
+	_stream    = stream;
+	_is_mapped = true;
+
+	switch (_cuda->cuGraphicsSubResourceGetMappedArray(&_pointer, _resource, 0, 0)) {
+	case nvidia::cuda::cu_result::SUCCESS:
+		break;
+	default:
+		unmap();
+		throw std::runtime_error("nvidia::cuda::gstexture: Mapping pointer failed.");
+	}
+
+	return _pointer;
+}
+
+void nvidia::cuda::gstexture::unmap()
+{
+	if (!_is_mapped)
+		return;
+
+	cu_graphics_resource_t resources[] = {_resource};
+	switch (_cuda->cuGraphicsUnmapResources(1, resources, _stream->get())) {
+	case nvidia::cuda::cu_result::SUCCESS:
+		break;
+	default:
+		throw std::runtime_error("nvidia::cuda::gstexture: Unmapping failed.");
+	}
+
+	_is_mapped = false;
+	_pointer   = nullptr;
+	_stream.reset();
+}
diff --git a/source/nvidia/cuda/nvidia-cuda-gs-texture.hpp b/source/nvidia/cuda/nvidia-cuda-gs-texture.hpp
new file mode 100644
index 0000000..f5ec127
--- /dev/null
+++ b/source/nvidia/cuda/nvidia-cuda-gs-texture.hpp
@@ -0,0 +1,44 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+#include <cstddef>
+#include <memory>
+#include "nvidia-cuda-stream.hpp"
+#include "nvidia-cuda.hpp"
+#include "obs/gs/gs-texture.hpp"
+
+namespace nvidia::cuda {
+	class gstexture {
+		std::shared_ptr<::nvidia::cuda::cuda> _cuda;
+		std::shared_ptr<gs::texture>          _texture;
+		cu_graphics_resource_t                _resource;
+
+		bool                                  _is_mapped;
+		cu_array_t                            _pointer;
+		std::shared_ptr<nvidia::cuda::stream> _stream;
+
+		public:
+		gstexture(std::shared_ptr<nvidia::cuda::cuda> cuda, std::shared_ptr<gs::texture> texture);
+		~gstexture();
+
+		cu_array_t map(std::shared_ptr<nvidia::cuda::stream> stream);
+		void       unmap();
+	};
+} // namespace nvidia::cuda
diff --git a/source/nvidia/cuda/nvidia-cuda-memory.cpp b/source/nvidia/cuda/nvidia-cuda-memory.cpp
new file mode 100644
index 0000000..e84b586
--- /dev/null
+++ b/source/nvidia/cuda/nvidia-cuda-memory.cpp
@@ -0,0 +1,47 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#include "nvidia-cuda-memory.hpp"
+#include <stdexcept>
+
+nvidia::cuda::memory::memory(std::shared_ptr<::nvidia::cuda::cuda> cuda, std::size_t size) : _cuda(cuda), _pointer(), _size(size)
+{
+	::nvidia::cuda::cu_result res = _cuda->cuMemAlloc(&_pointer, size);
+	switch (res) {
+	case ::nvidia::cuda::cu_result::SUCCESS:
+		break;
+	default:
+		throw std::runtime_error("nvidia::cuda::memory: cuMemAlloc failed.");
+	}
+}
+
+nvidia::cuda::memory::~memory()
+{
+	_cuda->cuMemFree(_pointer);
+}
+
+nvidia::cuda::cu_device_ptr_t nvidia::cuda::memory::get()
+{
+	return _pointer;
+}
+
+std::size_t nvidia::cuda::memory::size()
+{
+	return _size;
+}
diff --git a/source/nvidia/cuda/nvidia-cuda-memory.hpp b/source/nvidia/cuda/nvidia-cuda-memory.hpp
new file mode 100644
index 0000000..bc0bdd4
--- /dev/null
+++ b/source/nvidia/cuda/nvidia-cuda-memory.hpp
@@ -0,0 +1,39 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+#include <cstddef>
+#include <memory>
+#include "nvidia-cuda.hpp"
+
+namespace nvidia::cuda {
+	class memory {
+		std::shared_ptr<::nvidia::cuda::cuda> _cuda;
+		cu_device_ptr_t                       _pointer;
+		size_t                                _size;
+
+		public:
+		memory(std::shared_ptr<::nvidia::cuda::cuda> cuda, std::size_t size);
+		~memory();
+
+		cu_device_ptr_t get();
+
+		std::size_t size();
+	};
+} // namespace nvidia::cuda
diff --git a/source/nvidia/cuda/nvidia-cuda-stream.cpp b/source/nvidia/cuda/nvidia-cuda-stream.cpp
new file mode 100644
index 0000000..8aac943
--- /dev/null
+++ b/source/nvidia/cuda/nvidia-cuda-stream.cpp
@@ -0,0 +1,42 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#include "nvidia-cuda-stream.hpp"
+#include <stdexcept>
+
+nvidia::cuda::stream::stream(std::shared_ptr<::nvidia::cuda::cuda> cuda) : _cuda(cuda)
+{
+	nvidia::cuda::cu_result res = _cuda->cuStreamCreate(&_stream, 0);
+	switch (res) {
+	case nvidia::cuda::cu_result::SUCCESS:
+		break;
+	default:
+		throw std::runtime_error("Failed to create CUstream object.");
+	}
+}
+
+nvidia::cuda::stream::~stream()
+{
+	_cuda->cuStreamDestroy(_stream);
+}
+
+::nvidia::cuda::cu_stream_t nvidia::cuda::stream::get()
+{
+	return _stream;
+}
diff --git a/source/nvidia/cuda/nvidia-cuda-stream.hpp b/source/nvidia/cuda/nvidia-cuda-stream.hpp
new file mode 100644
index 0000000..6bf94b7
--- /dev/null
+++ b/source/nvidia/cuda/nvidia-cuda-stream.hpp
@@ -0,0 +1,35 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+#include "nvidia-cuda.hpp"
+#include <memory>
+
+namespace nvidia::cuda {
+	class stream {
+		std::shared_ptr<::nvidia::cuda::cuda> _cuda;
+		::nvidia::cuda::cu_stream_t _stream;
+
+		public:
+		stream(std::shared_ptr<::nvidia::cuda::cuda> cuda);
+		~stream();
+
+		::nvidia::cuda::cu_stream_t get();
+	};
+} // namespace nvidia::cuda
diff --git a/source/nvidia/cuda/nvidia-cuda.cpp b/source/nvidia/cuda/nvidia-cuda.cpp
new file mode 100644
index 0000000..c95fc3a
--- /dev/null
+++ b/source/nvidia/cuda/nvidia-cuda.cpp
@@ -0,0 +1,118 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#include "nvidia-cuda.hpp"
+#include <stdexcept>
+#include <util/platform.h>
+
+#if defined(_WIN32) || defined(_WIN64)
+#define CUDA_NAME "nvcuda.dll"
+#else
+#define CUDA_NAME "libcuda.so.1"
+#endif
+
+#define CUDA_LOAD_SYMBOL(NAME)                                                            \
+	{                                                                                     \
+		NAME = static_cast<decltype(NAME)>(os_dlsym(_library, #NAME));                    \
+		if (!NAME)                                                                        \
+			throw std::runtime_error("Failed to load '" #NAME "' from '" CUDA_NAME "'."); \
+	}
+#define CUDA_LOAD_SYMBOL_V2(NAME)                                                         \
+	{                                                                                     \
+		NAME = static_cast<decltype(NAME)>(os_dlsym(_library, #NAME "_v2"));              \
+		if (!NAME)                                                                        \
+			throw std::runtime_error("Failed to load '" #NAME "' from '" CUDA_NAME "'."); \
+	}
+#define CUDA_LOAD_SYMBOL_EX(NAME, OVERRIDE)                                               \
+	{                                                                                     \
+		NAME = static_cast<decltype(NAME)>(os_dlsym(_library, #OVERRIDE));                \
+		if (!NAME)                                                                        \
+			throw std::runtime_error("Failed to load '" #NAME "' from '" CUDA_NAME "'."); \
+	}
+
+nvidia::cuda::cuda::cuda()
+{
+	_library = os_dlopen(CUDA_NAME);
+	if (!_library)
+		throw std::runtime_error("Failed to load '" CUDA_NAME "'.");
+
+	// Initialization
+	CUDA_LOAD_SYMBOL(cuInit);
+
+	// Version Management
+	CUDA_LOAD_SYMBOL(cuDriverGetVersion);
+
+	// Primary Context Management
+	CUDA_LOAD_SYMBOL(cuDevicePrimaryCtxRetain);
+	CUDA_LOAD_SYMBOL_V2(cuDevicePrimaryCtxRelease);
+
+	// Context Management
+	CUDA_LOAD_SYMBOL_V2(cuCtxDestroy);
+	CUDA_LOAD_SYMBOL(cuCtxGetCurrent);
+	CUDA_LOAD_SYMBOL_V2(cuCtxPopCurrent);
+	CUDA_LOAD_SYMBOL_V2(cuCtxPushCurrent);
+	CUDA_LOAD_SYMBOL(cuCtxSetCurrent);
+
+	// Memory Management
+	CUDA_LOAD_SYMBOL_V2(cuArrayGetDescriptor);
+	CUDA_LOAD_SYMBOL_V2(cuMemAlloc);
+	CUDA_LOAD_SYMBOL_V2(cuMemAllocPitch);
+	CUDA_LOAD_SYMBOL_V2(cuMemFree);
+	CUDA_LOAD_SYMBOL_V2(cuMemHostGetDevicePointer);
+	CUDA_LOAD_SYMBOL(cuMemcpy);
+	CUDA_LOAD_SYMBOL_V2(cuMemcpy2D);
+	CUDA_LOAD_SYMBOL_V2(cuMemcpy2DAsync);
+	CUDA_LOAD_SYMBOL_V2(cuMemcpyAtoA);
+	CUDA_LOAD_SYMBOL_V2(cuMemcpyAtoD);
+	CUDA_LOAD_SYMBOL_V2(cuMemcpyAtoH);
+	CUDA_LOAD_SYMBOL_V2(cuMemcpyAtoHAsync);
+	CUDA_LOAD_SYMBOL_V2(cuMemcpyDtoA);
+	CUDA_LOAD_SYMBOL_V2(cuMemcpyDtoD);
+	CUDA_LOAD_SYMBOL_V2(cuMemcpyDtoH);
+	CUDA_LOAD_SYMBOL_V2(cuMemcpyDtoHAsync);
+	CUDA_LOAD_SYMBOL_V2(cuMemcpyHtoA);
+	CUDA_LOAD_SYMBOL_V2(cuMemcpyHtoAAsync);
+	CUDA_LOAD_SYMBOL_V2(cuMemcpyHtoD);
+	CUDA_LOAD_SYMBOL_V2(cuMemcpyHtoDAsync);
+	
+		// Stream Managment
+	CUDA_LOAD_SYMBOL(cuStreamCreate);
+	CUDA_LOAD_SYMBOL_V2(cuStreamDestroy);
+	CUDA_LOAD_SYMBOL(cuStreamSynchronize);
+	
+		// Graphics Interoperability
+	CUDA_LOAD_SYMBOL(cuGraphicsMapResources);
+	CUDA_LOAD_SYMBOL(cuGraphicsSubResourceGetMappedArray);
+	CUDA_LOAD_SYMBOL(cuGraphicsUnmapResources);
+	CUDA_LOAD_SYMBOL(cuGraphicsUnregisterResource);
+
+#ifdef WIN32
+		// Direct3D11 Interopability
+	CUDA_LOAD_SYMBOL(cuD3D11GetDevice);
+	CUDA_LOAD_SYMBOL(cuGraphicsD3D11RegisterResource);
+#endif
+
+	// Initialize CUDA
+	cuInit(0);
+}
+
+nvidia::cuda::cuda::~cuda()
+{
+	os_dlclose(_library);
+}
diff --git a/source/nvidia/cuda/nvidia-cuda.hpp b/source/nvidia/cuda/nvidia-cuda.hpp
new file mode 100644
index 0000000..fb6acdc
--- /dev/null
+++ b/source/nvidia/cuda/nvidia-cuda.hpp
@@ -0,0 +1,386 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+#include <cstddef>
+#include <functional>
+#include <memory>
+
+#ifdef WIN32
+#pragma warning(push)
+#pragma warning(disable : 4365)
+#include <d3d11.h>
+#include <dxgi.h>
+#pragma warning(pop)
+#endif
+
+#define CUDA_DEFINE_FUNCTION(name, ...)                        \
+	private:                                                   \
+	typedef ::nvidia::cuda::cu_result (*t##name)(__VA_ARGS__); \
+                                                               \
+	public:                                                    \
+	t##name name;
+
+namespace nvidia::cuda {
+	enum class cu_result : std::size_t {
+		SUCCESS                  = 0,
+		INVALID_VALUE            = 1,
+		OUT_OF_MEMORY            = 2,
+		NOT_INITIALIZED          = 3,
+		DEINITIALIZED            = 4,
+		NO_DEVICE                = 100,
+		INVALID_DEVICE           = 101,
+		INVALID_CONTEXT          = 201,
+		MAP_FAILED               = 205,
+		UNMAP_FAILED             = 206,
+		ARRAY_IS_MAPPED          = 207,
+		ALREADY_MAPPED           = 208,
+		NOT_MAPPED               = 211,
+		INVALID_GRAPHICS_CONTEXT = 219,
+		// Still missing some.
+	};
+
+	enum class cu_memory_type : std::uint32_t {
+		HOST    = 1,
+		DEVICE  = 2,
+		ARRAY   = 3,
+		UNIFIED = 4,
+	};
+
+	enum class cu_array_format : std::uint32_t {
+		UNSIGNED_INT8  = 0b00000001,
+		UNSIGNED_INT16 = 0b00000010,
+		UNSIGNED_INT32 = 0b00000011,
+		SIGNED_INT8    = 0b00001000,
+		SIGNED_INT16   = 0b00001001,
+		SIGNED_INT32   = 0b00001010,
+		HALF           = 0b00010000,
+		FLOAT          = 0b00100000,
+	};
+
+	typedef void*         cu_array_t;
+	typedef void*         cu_context_t;
+	typedef std::uint64_t cu_device_ptr_t;
+	typedef void*         cu_graphics_resource_t;
+	typedef void*         cu_stream_t;
+
+	struct cu_memcpy2d_t {
+		size_t          src_x_in_bytes;
+		size_t          src_y;
+
+		cu_memory_type  src_memory_type;
+		const void*     src_host;
+		cu_device_ptr_t src_device;
+		cu_array_t      src_array;
+		std::size_t     src_pitch;
+
+		size_t          dst_x_in_bytes;
+		size_t          dst_y;
+
+		cu_memory_type  dst_memory_type;
+		const void*     dst_host;
+		cu_device_ptr_t dst_device;
+		cu_array_t      dst_array;
+		std::size_t     dst_pitch;
+
+		std::size_t     width_in_bytes;
+		std::size_t     height;
+	};
+
+	struct cu_array_descriptor_t {
+		std::size_t     width;
+		std::size_t     height;
+		std::uint32_t   num_channels;
+		cu_array_format format;
+	};
+
+	class cuda {
+		private:
+		void* _library;
+
+		public:
+		cuda();
+		~cuda();
+
+		public:
+		// Initialization
+		CUDA_DEFINE_FUNCTION(cuInit, std::int32_t flags);
+
+		// Version Management
+		CUDA_DEFINE_FUNCTION(cuDriverGetVersion, std::int32_t* driverVersion);
+
+		// Device Management
+		// cuDeviceGet
+		// cuDeviceGetAttribute
+		// cuDeviceGetCount
+		// cuDeviceGetLuid
+		// cuDeviceGetName
+		// cuDeviceGetNvSciSyncAttributes
+		// cuDeviceGetUuid
+		// cuDeviceTotalMem_v2
+
+		// Primary Context Management
+		// cuDevicePrimaryCtxGetState
+		CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRelease, std::int32_t device);
+		// cuDevicePrimaryCtxReset_v2
+		CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRetain, cu_context_t* ctx, std::int32_t device);
+		// cuDevicePrimaryCtxSetFlags_v2
+
+		// Context Management
+		// cuCtxCreate_v2
+		CUDA_DEFINE_FUNCTION(cuCtxDestroy, cu_context_t ctx);
+		// cuCtxGetApiVersion
+		// cuCtxGetCacheConfig
+		CUDA_DEFINE_FUNCTION(cuCtxGetCurrent, cu_context_t* ctx);
+		// cuCtxGetDevice
+		// cuCtxGetFlags
+		// cuCtxGetLimit
+		// cuCtxGetSharedMemConfig
+		// cuCtxGetStreamPriorityRange
+		CUDA_DEFINE_FUNCTION(cuCtxPopCurrent, cu_context_t* ctx);
+		CUDA_DEFINE_FUNCTION(cuCtxPushCurrent, cu_context_t ctx);
+		// cuCtxSetCacheConfig
+		CUDA_DEFINE_FUNCTION(cuCtxSetCurrent, cu_context_t ctx);
+		// cuCtxSetLimit
+		// cuCtxSetSharedMemConfig
+		// cuCtxSynchronize
+		// UNDOCUMENTED? cuCtxResetPersistingL2Cache
+
+		// Module Management
+		// cuLinkAddData
+		// cuLinkAddFile
+		// cuLinkComplete
+		// cuLinkCreate
+		// cuLinkDestroy
+		// cuModuleGetFunction
+		// cuModuleGetGlobal
+		// cuModuleGetSurfRef
+		// cuModuleGetTexRef
+		// cuModuleLoad
+		// cuModuleLoadData
+		// cuModuleLoadDataEx
+		// cuModuleLoadFatBinary
+		// cuModuleUnload
+
+		// Memory Management
+		// cuArray3DCreate_v2
+		// cuArray3DGetDescripter_v2
+		// cuArrayCreate_v2
+		// cuArrayDestroy
+		CUDA_DEFINE_FUNCTION(cuArrayGetDescriptor, cu_array_descriptor_t* pArrayDescripter, cu_array_t array);
+		// cuArrayGetDescriptor_v2
+		// cuDeviceGetByPCIBusId
+		// cuDeviceGetPCIBusId
+		// cuIpcCloseMemHandle
+		// cuIpcGetEventHandle
+		// cuIpcGetMemHandle
+		// cuIpcOpenEventHandle
+		// cuIpcOpenMemHandle
+		CUDA_DEFINE_FUNCTION(cuMemAlloc, cu_device_ptr_t* ptr, std::size_t bytes);
+		// cuMemAllocHost_v2
+		// cuMemAllocManaged
+		CUDA_DEFINE_FUNCTION(cuMemAllocPitch, cu_device_ptr_t* ptr, std::size_t* pitch, std::size_t width_in_bytes,
+							 std::size_t height, std::uint32_t element_size_bytes);
+		CUDA_DEFINE_FUNCTION(cuMemFree, cu_device_ptr_t ptr);
+		// cuMemFreeHost
+		// cuMemGetAddressRange_v2
+		// cuMemGetInfo_v2
+		// cuMemHostAlloc
+		CUDA_DEFINE_FUNCTION(cuMemHostGetDevicePointer, cu_device_ptr_t* devptr, void* ptr, std::uint32_t flags);
+		// cuMemHostGetFlags
+		// cuMemHostRegister_v2
+		// cuMemHostUnregister
+		CUDA_DEFINE_FUNCTION(cuMemcpy, cu_device_ptr_t dst, cu_device_ptr_t src, std::size_t bytes);
+		CUDA_DEFINE_FUNCTION(cuMemcpy2D, const cu_memcpy2d_t* copy);
+		CUDA_DEFINE_FUNCTION(cuMemcpy2DAsync, const cu_memcpy2d_t* copy, cu_stream_t stream);
+		// cuMemcpy2DUnaligned_v2 / _v2_ptds
+		// cuMemcpy3D_v2 / _v2_ptds
+		// cuMemcpy3DAsync_v2 / _v2_ptsz
+		// cuMemcpy3DPeer / _ptds
+		// cuMemcpy3DPeerAsync_v2 / _v2_ptsz
+		// cuMemcpyAsync / _ptsz
+		CUDA_DEFINE_FUNCTION(cuMemcpyAtoA, cu_array_t dst, std::size_t dstOffset, cu_array_t src, std::size_t srcOffset,
+							 std::size_t byteCount);
+		CUDA_DEFINE_FUNCTION(cuMemcpyAtoD, cu_device_ptr_t dst, cu_array_t src, std::size_t srcOffset,
+							 std::size_t byteCount);
+		CUDA_DEFINE_FUNCTION(cuMemcpyAtoH, void* dst, cu_array_t src, std::size_t srcOffset, std::size_t byteCount);
+		CUDA_DEFINE_FUNCTION(cuMemcpyAtoHAsync, void* dst, cu_array_t src, std::size_t srcOffset,
+							 std::size_t byteCount);
+		CUDA_DEFINE_FUNCTION(cuMemcpyDtoA, cu_array_t dst, std::size_t dstOffset, cu_device_ptr_t src,
+							 std::size_t byteCount);
+		CUDA_DEFINE_FUNCTION(cuMemcpyDtoD, cu_device_ptr_t dst, cu_array_t srcArray, std::size_t byteCount);
+		CUDA_DEFINE_FUNCTION(cuMemcpyDtoH, void* dst, cu_array_t src, std::size_t byteCount);
+		CUDA_DEFINE_FUNCTION(cuMemcpyDtoHAsync, void* dst, cu_array_t src, std::size_t byteCount);
+		CUDA_DEFINE_FUNCTION(cuMemcpyHtoA, cu_array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount);
+		CUDA_DEFINE_FUNCTION(cuMemcpyHtoAAsync, cu_array_t dst, std::size_t dstOffset, void* src,
+							 std::size_t byteCount);
+		CUDA_DEFINE_FUNCTION(cuMemcpyHtoD, cu_device_ptr_t dst, void* src, std::size_t byteCount);
+		CUDA_DEFINE_FUNCTION(cuMemcpyHtoDAsync, cu_device_ptr_t dst, void* src, std::size_t byteCount);
+		// cuMemcpyPeer / _ptds
+		// cuMemcpyPeerAsync / _ptsz
+		// cuMemsetD16
+		// cuMemsetD16Async
+		// cuMemsetD2D16
+		// cuMemsetD2D16Async
+		// cuMemsetD2D32
+		// cuMemsetD2D32Async
+		// cuMemsetD2D8
+		// cuMemsetD2D8Async
+		// cuMemsetD32
+		// cuMemsetD32Async
+		// cuMemsetD8
+		// cuMemsetD8Async
+		// cuMipmappedArrayCreate
+		// cuMipmappedArrayDestroy
+		// cuMipmappedArrayGetLevel
+
+		// Virtual Memory Management
+		// cuMemAddressFree
+		// cuMemAddressReserve
+		// cuMemCreate
+		// cuMemExportToShareableHandle
+		// cuMemGetAccess
+		// cuMemGetAllocationGranularity
+		// cuMemGetAllocationPropertiesFromHandle
+		// cuMemImportFromShareableHandle
+		// cuMemMap
+		// cuMemRelease
+		// cuMemSetAccess
+		// cuMemUnmap
+
+		// Unified Addressing
+		// cuMemAdvise
+		// cuMemPrefetchAsync
+		// cuMemRangeGetAttribute
+		// cuMemRangeGetAttributes
+		// cuPointerGetAttribute
+		// cuPointerGetAttributes
+		// cuPointerSetAttribute
+
+		// Stream Managment
+		// cuStreamAddCallback
+		// cuStreamAttachMemAsync
+		// cuStreamBeginCapture_v2
+		CUDA_DEFINE_FUNCTION(cuStreamCreate, cu_stream_t* stream, std::uint32_t flags);
+		// cuStreamCreateWithPriority
+		CUDA_DEFINE_FUNCTION(cuStreamDestroy, cu_stream_t stream);
+		// cuStreamEndCapture
+		// cuStreamGetCaptureInfo
+		// cuStreamGetCtx
+		// cuStreamGetFlags
+		// cuStreamGetPriority
+		// cuStreamIsCapturing
+		// cuStreamQuery
+		CUDA_DEFINE_FUNCTION(cuStreamSynchronize, cu_stream_t stream);
+		// cuStreamWaitEvent
+		// cuThreadExchangeStreamCaptureMode
+
+		// Event Management
+		// cuEventCreate
+		// cuEventDestroy_v2
+		// cuEventElapsedTime
+		// cuEventQuery
+		// cuEventRecord
+		// cuEventSynchronize
+
+		// External Resource Interoperability
+		// cuDestroyExternalMemory
+		// cuDestroyExternalSemaphore
+		// cuExternalMemoryGetMappedBuffer
+		// cuExternalMemoryGetMappedMipmappedArray
+		// cuImportExternalMemory
+		// cuImportExternalSemaphore
+		// cuSignalExternalSemaphoresAsync
+		// cuWaitExternalSemaphoresAsync
+
+		// Stream Memory Operations
+		// cuStreamBatchMemOp
+		// cuStreamWaitValue32
+		// cuStreamWaitValue64
+		// cuStreamWriteValue32
+		// cuStreamWriteValue64
+
+		// Execution Control
+		// cuFuncGetAttribute
+		// cuFuncSetAttribute
+		// cuFuncSetCacheConfig
+		// cuFuncSetSharedMemConfig
+		// cuLaunchCooperativeKernel
+		// cuLaunchCooperativeKernelMultiDevice
+		// cuLaunchHostFunc
+		// cuLaunchKernel
+
+		// Graph Management
+		// Todo!
+
+		// Occupancy
+		// Todo
+
+		// Texture Object Management
+		// Todo
+
+		// Surface Object Management
+		// Todo
+
+		// Peer Context Memory Access
+		// Todo
+
+		// Graphics Interoperability
+		CUDA_DEFINE_FUNCTION(cuGraphicsMapResources, std::uint32_t count, cu_graphics_resource_t* resources,
+							 cu_stream_t stream);
+		// cuGraphicsResourcesGetMappedMipmappedArray
+		// cuGraphicsResourcesGetMappedPointer_v2
+		// cuGraphicsResourcesSetMapFlags_v2
+		CUDA_DEFINE_FUNCTION(cuGraphicsSubResourceGetMappedArray, cu_array_t* array, cu_graphics_resource_t resource,
+							 std::uint32_t index, std::uint32_t level);
+		CUDA_DEFINE_FUNCTION(cuGraphicsUnmapResources, std::uint32_t count, cu_graphics_resource_t* resources,
+							 cu_stream_t stream);
+		CUDA_DEFINE_FUNCTION(cuGraphicsUnregisterResource, cu_graphics_resource_t resource);
+
+		// Profile Control
+		// Todo
+
+		// OpenGL Interoperability
+		// cuGLGetDevices
+		// cuGraphcisGLRegisterBuffer
+		// cuGraphcisGLRegisterImage
+#ifdef WIN32
+		// cuWGLGetDevice
+
+		// Direct3D9 Interopability
+		// cuD3D9CtxCreate
+		// cuD3D9CtxCreateOnDevice
+		// cuD3D9CtxGetDevice
+		// cuD3D9CtxGetDevices
+		// cuD3D9GetDirect3DDevice
+		// cuGraphicsD3D9RegisterResource
+
+		// Direct3D10 Interopability
+		// cuD3D10GetDevice
+		// cuD3D10GetDevices
+		// cuGraphicsD3D10RegisterResource
+
+		// Direct3D11 Interopability
+		CUDA_DEFINE_FUNCTION(cuD3D11GetDevice, std::int32_t* device, IDXGIAdapter* adapter);
+		// cuD3D11GetDevices
+		CUDA_DEFINE_FUNCTION(cuGraphicsD3D11RegisterResource, cu_graphics_resource_t* resource,
+							 ID3D11Resource* d3dresource, std::uint32_t flags);
+#endif
+	};
+} // namespace nvidia::cuda