obs-streamFX/source/nvidia/cuda/nvidia-cuda.hpp

362 lines
12 KiB
C++

/*
* Modern effects for a modern Streamer
* Copyright (C) 2020 Michael Fabian Dirks
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
*/
#pragma once
#include <cstddef>
#include <tuple>
#include "util/util-bitmask.hpp"
#include "util/util-library.hpp"
#ifdef WIN32
#pragma warning(push)
#pragma warning(disable : 4365)
#pragma warning(disable : 5204)
#include <d3d11.h>
#include <dxgi.h>
#pragma warning(pop)
#endif
#define P_CUDA_DEFINE_FUNCTION(name, ...) \
private: \
typedef ::streamfx::nvidia::cuda::result (*t##name)(__VA_ARGS__); \
\
public: \
t##name name = nullptr;
namespace streamfx::nvidia::cuda {
enum class result : std::size_t {
SUCCESS = 0,
INVALID_VALUE = 1,
OUT_OF_MEMORY = 2,
NOT_INITIALIZED = 3,
DEINITIALIZED = 4,
NO_DEVICE = 100,
INVALID_DEVICE = 101,
INVALID_CONTEXT = 201,
MAP_FAILED = 205,
UNMAP_FAILED = 206,
ARRAY_IS_MAPPED = 207,
ALREADY_MAPPED = 208,
NOT_MAPPED = 211,
INVALID_GRAPHICS_CONTEXT = 219,
// Still missing some.
};
enum class memory_type : uint32_t {
HOST = 1,
DEVICE = 2,
ARRAY = 3,
UNIFIED = 4,
};
enum class array_format : uint32_t {
UNSIGNED_INT8 = 0b00000001,
UNSIGNED_INT16 = 0b00000010,
UNSIGNED_INT32 = 0b00000011,
SIGNED_INT8 = 0b00001000,
SIGNED_INT16 = 0b00001001,
SIGNED_INT32 = 0b00001010,
HALF = 0b00010000,
FLOAT = 0b00100000,
};
enum class context_flags : uint32_t {
SCHEDULER_AUTO = 0x0,
SCHEDULER_SPIN = 0x1,
SCHEDULER_YIELD = 0x2,
SCHEDULER_BLOCKING_SYNC = 0x4,
MAP_HOST = 0x8,
LOCAL_MEMORY_RESIZE_TO_MAXIMUM = 0x10,
};
enum class external_memory_handle_type : uint32_t {
INVALID = 0,
FILE_DESCRIPTOR = 1,
WIN32_SHARED_HANDLE = 2,
WIN32_GLOBAL_SHARED_HANDLE = 3,
D3D12_HEAP = 4,
D3D12_RESOURCE = 5,
D3D11_SHARED_RESOURCE = 6,
D3D11_GLOBAL_SHARED_RESOURCE = 7,
NVSCIBUF = 8,
};
enum class stream_flags : uint32_t {
DEFAULT = 0x0,
NON_BLOCKING = 0x1,
};
typedef void* array_t;
typedef void* context_t;
typedef uint64_t device_ptr_t;
typedef void* external_memory_t;
typedef void* graphics_resource_t;
typedef void* stream_t;
typedef int32_t device_t;
struct memcpy2d_v2_t {
std::size_t src_x_in_bytes;
std::size_t src_y;
memory_type src_memory_type;
const void* src_host;
device_ptr_t src_device;
array_t src_array;
std::size_t src_pitch;
std::size_t dst_x_in_bytes;
std::size_t dst_y;
memory_type dst_memory_type;
const void* dst_host;
device_ptr_t dst_device;
array_t dst_array;
std::size_t dst_pitch;
std::size_t width_in_bytes;
std::size_t height;
};
struct array_descriptor_v2_t {
std::size_t width;
std::size_t height;
uint32_t num_channels;
array_format format;
};
struct external_memory_buffer_info_v1_t {
uint64_t offset;
uint64_t size;
uint32_t flags;
uint32_t reserved[16];
};
struct external_memory_handle_info_v1_t {
external_memory_handle_type type;
union {
int32_t file;
struct {
void* handle;
const void* name;
};
const void* nvscibuf;
};
uint64_t size;
uint32_t flags;
uint32_t reserved[16];
};
struct uuid_t {
union {
char bytes[16];
struct {
uint32_t a;
uint16_t b;
uint16_t c;
uint16_t d;
uint16_t e;
uint32_t f;
} uuid;
};
};
struct luid_t {
union {
char bytes[8];
struct {
uint32_t low;
int32_t high;
} parts;
uint64_t luid;
};
};
class cuda_error : public std::exception {
::streamfx::nvidia::cuda::result _code;
public:
~cuda_error(){};
cuda_error(::streamfx::nvidia::cuda::result code) : _code(code) {}
::streamfx::nvidia::cuda::result code()
{
return _code;
}
};
class cuda {
std::shared_ptr<streamfx::util::library> _library;
public:
~cuda();
cuda();
int32_t version();
public:
// Initialization
P_CUDA_DEFINE_FUNCTION(cuInit, int32_t flags);
// Version Management
P_CUDA_DEFINE_FUNCTION(cuDriverGetVersion, int32_t* driverVersion);
// Device Management
P_CUDA_DEFINE_FUNCTION(cuDeviceGetName, char* name, int32_t length, device_t device);
P_CUDA_DEFINE_FUNCTION(cuDeviceGetLuid, luid_t* luid, uint32_t* device_node_mask, device_t device);
P_CUDA_DEFINE_FUNCTION(cuDeviceGetUuid, uuid_t* uuid, device_t device);
// - Not yet needed.
// Primary Context Management
P_CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRelease, device_t device);
P_CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxRetain, context_t* ctx, device_t device);
P_CUDA_DEFINE_FUNCTION(cuDevicePrimaryCtxSetFlags, device_t device, context_flags flags);
// Context Management
P_CUDA_DEFINE_FUNCTION(cuCtxCreate, context_t* ctx, context_flags flags, device_t device);
P_CUDA_DEFINE_FUNCTION(cuCtxDestroy, context_t ctx);
P_CUDA_DEFINE_FUNCTION(cuCtxGetCurrent, context_t* ctx);
P_CUDA_DEFINE_FUNCTION(cuCtxGetStreamPriorityRange, int32_t* lowestPriority, int32_t* highestPriority);
P_CUDA_DEFINE_FUNCTION(cuCtxPopCurrent, context_t* ctx);
P_CUDA_DEFINE_FUNCTION(cuCtxPushCurrent, context_t ctx);
P_CUDA_DEFINE_FUNCTION(cuCtxSetCurrent, context_t ctx);
P_CUDA_DEFINE_FUNCTION(cuCtxSynchronize);
// Module Management
// - Not yet needed.
// Memory Management
P_CUDA_DEFINE_FUNCTION(cuArrayGetDescriptor, array_descriptor_v2_t* pArrayDescripter, array_t array);
P_CUDA_DEFINE_FUNCTION(cuMemAlloc, device_ptr_t* ptr, std::size_t bytes);
P_CUDA_DEFINE_FUNCTION(cuMemAllocPitch, device_ptr_t* ptr, std::size_t* pitch, std::size_t width_in_bytes,
std::size_t height, uint32_t element_size_bytes);
P_CUDA_DEFINE_FUNCTION(cuMemFree, device_ptr_t ptr);
P_CUDA_DEFINE_FUNCTION(cuMemHostGetDevicePointer, device_ptr_t* devptr, void* ptr, uint32_t flags);
P_CUDA_DEFINE_FUNCTION(cuMemcpy, device_ptr_t dst, device_ptr_t src, std::size_t bytes);
P_CUDA_DEFINE_FUNCTION(cuMemcpy2D, const memcpy2d_v2_t* copy);
P_CUDA_DEFINE_FUNCTION(cuMemcpy2DAsync, const memcpy2d_v2_t* copy, stream_t stream);
P_CUDA_DEFINE_FUNCTION(cuMemcpyAtoA, array_t dst, std::size_t dstOffset, array_t src, std::size_t srcOffset,
std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyAtoD, device_ptr_t dst, array_t src, std::size_t srcOffset,
std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyAtoH, void* dst, array_t src, std::size_t srcOffset, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyAtoHAsync, void* dst, array_t src, std::size_t srcOffset, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyDtoA, array_t dst, std::size_t dstOffset, device_ptr_t src,
std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyDtoD, device_ptr_t dst, array_t srcArray, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyDtoH, void* dst, array_t src, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyDtoHAsync, void* dst, array_t src, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyHtoA, array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyHtoAAsync, array_t dst, std::size_t dstOffset, void* src, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyHtoD, device_ptr_t dst, void* src, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemcpyHtoDAsync, device_ptr_t dst, void* src, std::size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemsetD8, device_ptr_t dst, uint8_t d, size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemsetD8Async, device_ptr_t dst, uint8_t d, size_t byteCount, stream_t stream);
P_CUDA_DEFINE_FUNCTION(cuMemsetD16, device_ptr_t dst, uint16_t d, size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemsetD16Async, device_ptr_t dst, uint16_t d, size_t byteCount, stream_t stream);
P_CUDA_DEFINE_FUNCTION(cuMemsetD32, device_ptr_t dst, uint32_t d, size_t byteCount);
P_CUDA_DEFINE_FUNCTION(cuMemsetD32Async, device_ptr_t dst, uint32_t d, size_t byteCount, stream_t stream);
// Virtual Memory Management
// - Not yet needed.
// Stream Ordered Memory Allocator
// - Not yet needed.
// Unified Addressing
// - Not yet needed.
// Stream Managment
P_CUDA_DEFINE_FUNCTION(cuStreamCreate, stream_t* stream, stream_flags flags);
P_CUDA_DEFINE_FUNCTION(cuStreamCreateWithPriority, stream_t* stream, stream_flags flags, int32_t priority);
P_CUDA_DEFINE_FUNCTION(cuStreamDestroy, stream_t stream);
P_CUDA_DEFINE_FUNCTION(cuStreamSynchronize, stream_t stream);
P_CUDA_DEFINE_FUNCTION(cuStreamGetPriority, stream_t stream, int32_t* priority);
// Event Management
// - Not yet needed.
// External Resource Interoperability (CUDA 11.1+)
// - Not yet needed.
// Stream Memory Operations
// - Not yet needed.
// Execution Control
// - Not yet needed.
// Graph Management
// - Not yet needed.
// Occupancy
// - Not yet needed.
// Texture Object Management
// - Not yet needed.
// Surface Object Management
// - Not yet needed.
// Peer Context Memory Access
// - Not yet needed.
// Graphics Interoperability
P_CUDA_DEFINE_FUNCTION(cuGraphicsMapResources, uint32_t count, graphics_resource_t* resources, stream_t stream);
P_CUDA_DEFINE_FUNCTION(cuGraphicsSubResourceGetMappedArray, array_t* array, graphics_resource_t resource,
uint32_t index, uint32_t level);
P_CUDA_DEFINE_FUNCTION(cuGraphicsUnmapResources, uint32_t count, graphics_resource_t* resources,
stream_t stream);
P_CUDA_DEFINE_FUNCTION(cuGraphicsUnregisterResource, graphics_resource_t resource);
// Driver Entry Point Access
// - Not yet needed.
// Profiler Control
// - Not yet needed.
// OpenGL Interoperability
// - Not yet needed.
// VDPAU Interoperability
// - Not yet needed.
// EGL Interoperability
// - Not yet needed.
#ifdef WIN32
// Direct3D9 Interoperability
// - Not yet needed.
// Direct3D10 Interoperability
P_CUDA_DEFINE_FUNCTION(cuD3D10GetDevice, device_t* device, IDXGIAdapter* adapter);
P_CUDA_DEFINE_FUNCTION(cuGraphicsD3D10RegisterResource, graphics_resource_t* resource,
ID3D10Resource* d3dresource, uint32_t flags);
// Direct3D11 Interoperability
P_CUDA_DEFINE_FUNCTION(cuD3D11GetDevice, device_t* device, IDXGIAdapter* adapter);
P_CUDA_DEFINE_FUNCTION(cuGraphicsD3D11RegisterResource, graphics_resource_t* resource,
ID3D11Resource* d3dresource, uint32_t flags);
#endif
public:
static std::shared_ptr<::streamfx::nvidia::cuda::cuda> get();
};
} // namespace streamfx::nvidia::cuda
P_ENABLE_BITMASK_OPERATORS(::streamfx::nvidia::cuda::context_flags)
P_ENABLE_BITMASK_OPERATORS(::streamfx::nvidia::cuda::stream_flags)