From 1ebb0beac4dfe95bf837bc84c7f5e75a13b7328c Mon Sep 17 00:00:00 2001 From: Michael Fabian 'Xaymar' Dirks Date: Thu, 18 Jan 2018 05:01:54 +0100 Subject: [PATCH] gs-vertexbuffer: Avoid relying on std for aligned types Changes the GS::VertexBuffer storage to be one continuous buffer that is properly aligned and is also now used for GS::Vertex. This halves the necessary memory, removes reallocation cost and removes the copy necessary to get things onto the GPU. Related: #9 --- source/filter-shape.cpp | 14 +-- source/filter-transform.cpp | 57 ++++----- source/gs-vertex.cpp | 48 +------- source/gs-vertex.h | 43 ++----- source/gs-vertexbuffer.cpp | 229 +++++++++++++++++++++--------------- source/gs-vertexbuffer.h | 64 ++++++---- 6 files changed, 217 insertions(+), 238 deletions(-) diff --git a/source/filter-shape.cpp b/source/filter-shape.cpp index f46cd7f..c88cb8a 100644 --- a/source/filter-shape.cpp +++ b/source/filter-shape.cpp @@ -273,12 +273,12 @@ void Filter::Shape::Instance::update(obs_data_t *data) { uint32_t points = (uint32_t)obs_data_get_int(data, P_SHAPE_POINTS); m_vertexHelper->resize(points); for (uint32_t point = 0; point < points; point++) { - GS::Vertex& v = m_vertexHelper->at(point); + GS::Vertex v = m_vertexHelper->at(point); { auto strings = cache.find(std::make_pair(point, P_SHAPE_POINT_X)); if (strings != cache.end()) { - v.position.x = (float)(obs_data_get_double(data, + v.position->x = (float)(obs_data_get_double(data, strings->second.first.c_str()) / 100.0); } } @@ -286,7 +286,7 @@ void Filter::Shape::Instance::update(obs_data_t *data) { auto strings = cache.find(std::make_pair(point, P_SHAPE_POINT_Y)); if (strings != cache.end()) { - v.position.y = (float)(obs_data_get_double(data, + v.position->y = (float)(obs_data_get_double(data, strings->second.first.c_str()) / 100.0); } } @@ -294,7 +294,7 @@ void Filter::Shape::Instance::update(obs_data_t *data) { auto strings = cache.find(std::make_pair(point, P_SHAPE_POINT_U)); if (strings != cache.end()) { - v.uv[0].x = (float)(obs_data_get_double(data, + v.uv[0]->x = (float)(obs_data_get_double(data, strings->second.first.c_str()) / 100.0); } } @@ -302,12 +302,12 @@ void Filter::Shape::Instance::update(obs_data_t *data) { auto strings = cache.find(std::make_pair(point, P_SHAPE_POINT_V)); if (strings != cache.end()) { - v.uv[0].y = (float)(obs_data_get_double(data, + v.uv[0]->y = (float)(obs_data_get_double(data, strings->second.first.c_str()) / 100.0); } } - v.color = 0xFFFFFFFF; - v.position.z = 0.0f; + *v.color = 0xFFFFFFFF; + v.position->z = 0.0f; } drawmode = (gs_draw_mode)obs_data_get_int(data, P_SHAPE_MODE); obs_enter_graphics(); diff --git a/source/filter-transform.cpp b/source/filter-transform.cpp index 01af8fe..2d4c4f9 100644 --- a/source/filter-transform.cpp +++ b/source/filter-transform.cpp @@ -465,41 +465,42 @@ void Filter::Transform::Instance::video_render(gs_effect_t *paramEffect) { /// Generate mesh { - GS::Vertex& v = m_vertexHelper->at(0); - v.uv[0].x = 0; v.uv[0].y = 0; - v.color = 0xFFFFFFFF; - v.position.x = -p_x + m_shear->x; - v.position.y = -p_y - m_shear->y; - v.position.z = 0.0f; - vec3_transform(&v.position, &v.position, &ident); + GS::Vertex vtx = m_vertexHelper->at(0); + *vtx.color = 0xFFFFFFFF; + vec4_set(vtx.uv[0], 0, 0, 0, 0); + vec3_set(vtx.position, + -p_x + m_shear->x, + -p_y - m_shear->y, 0); + vec3_transform(vtx.position, vtx.position, &ident); } { - GS::Vertex& v = m_vertexHelper->at(1); - v.uv[0].x = 1; v.uv[0].y = 0; - v.color = 0xFFFFFFFF; - v.position.x = p_x + m_shear->x; - v.position.y = -p_y + m_shear->y; - v.position.z = 0.0f; - vec3_transform(&v.position, &v.position, &ident); + GS::Vertex vtx = m_vertexHelper->at(1); + *vtx.color = 0xFFFFFFFF; + vec4_set(vtx.uv[0], 1, 0, 0, 0); + vec3_set(vtx.position, + p_x + m_shear->x, + -p_y + m_shear->y, 0); + vec3_transform(vtx.position, vtx.position, &ident); } { - GS::Vertex& v = m_vertexHelper->at(2); - v.uv[0].x = 0; v.uv[0].y = 1; - v.color = 0xFFFFFFFF; - v.position.x = -p_x - m_shear->x; - v.position.y = p_y - m_shear->y; - v.position.z = 0.0f; - vec3_transform(&v.position, &v.position, &ident); + GS::Vertex vtx = m_vertexHelper->at(2); + *vtx.color = 0xFFFFFFFF; + vec4_set(vtx.uv[0], 0, 1, 0, 0); + vec3_set(vtx.position, + -p_x - m_shear->x, + p_y - m_shear->y, 0); + vec3_transform(vtx.position, vtx.position, &ident); } { - GS::Vertex& v = m_vertexHelper->at(3); - v.uv[0].x = 1; v.uv[0].y = 1; - v.color = 0xFFFFFFFF; - v.position.x = p_x - m_shear->x; - v.position.y = p_y + m_shear->y; - v.position.z = 0.0f; - vec3_transform(&v.position, &v.position, &ident); + GS::Vertex vtx = m_vertexHelper->at(3); + *vtx.color = 0xFFFFFFFF; + vec4_set(vtx.uv[0], 1, 1, 0, 0); + vec3_set(vtx.position, + p_x - m_shear->x, + p_y + m_shear->y, 0); + vec3_transform(vtx.position, vtx.position, &ident); } + m_vertexBuffer = m_vertexHelper->get(); if (!m_vertexBuffer) { obs_source_skip_video_filter(m_sourceContext); diff --git a/source/gs-vertex.cpp b/source/gs-vertex.cpp index 2e423b7..271e79a 100644 --- a/source/gs-vertex.cpp +++ b/source/gs-vertex.cpp @@ -17,50 +17,4 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#include "gs-vertexbuffer.h" -#include "util-memory.h" -#include - -GS::Vertex& GS::Vertex::operator=(const Vertex& r) { - vec3_copy(&this->position, &r.position); - vec3_copy(&this->normal, &r.normal); - vec3_copy(&this->tangent, &r.tangent); - for (size_t n = 0; n < MAXIMUM_UVW_LAYERS; n++) { - vec4_copy(&this->uv[n], &r.uv[n]); - } - return *this; -} - -GS::Vertex* GS::Vertex::operator=(const Vertex* r) { - vec3_copy(&this->position, &r->position); - vec3_copy(&this->normal, &r->normal); - vec3_copy(&this->tangent, &r->tangent); - for (size_t n = 0; n < MAXIMUM_UVW_LAYERS; n++) { - vec4_copy(&this->uv[n], &r->uv[n]); - } - return this; -} - -void* GS::Vertex::operator new(size_t count) { - return _aligned_malloc(count, 16); -} - -void* GS::Vertex::operator new(size_t count, void* d){ - return d; -} - -void* GS::Vertex::operator new[](size_t count) { - return _aligned_malloc(count, 16); -} - -void* GS::Vertex::operator new[](size_t count, void* d) { - return d; -} - -void GS::Vertex::operator delete(void* p) { - return _aligned_free(p); -} - -void GS::Vertex::operator delete[](void* p) { - return _aligned_free(p); -} +#include "gs-vertex.h" diff --git a/source/gs-vertex.h b/source/gs-vertex.h index 7c0d172..88c7ea9 100644 --- a/source/gs-vertex.h +++ b/source/gs-vertex.h @@ -18,6 +18,7 @@ */ #pragma once +#include "gs-limits.h" #include #include extern "C" { @@ -28,41 +29,11 @@ extern "C" { } namespace GS { - const uint32_t MAXIMUM_UVW_LAYERS = 8u; - // ToDo: Optimize for use with GS::VertexBuffer so that it doesn't require in-memory copy. - __declspec(align(16)) struct Vertex { - union { - __m128 _positionM; - vec3 position; - }; - union { - __m128 _normalM; - vec3 normal; - }; - union { - __m128 _tangentM; - vec3 tangent; - }; - union { - __m128 _uvM[MAXIMUM_UVW_LAYERS]; - vec4 uv[MAXIMUM_UVW_LAYERS]; - }; - uint32_t color; - - // Operators - static void* Vertex::operator new(size_t count); - static void* Vertex::operator new[](size_t count); - static void* Vertex::operator new(size_t count, void* d); - static void* Vertex::operator new[](size_t count, void* d); - static void Vertex::operator delete(void* p); - static void Vertex::operator delete[](void* p); - - //Vertex& Vertex::operator =(Vertex r); - Vertex& Vertex::operator =(const Vertex& r); - Vertex* Vertex::operator =(const Vertex* r); - - private: - uint32_t padding[3]; + struct Vertex { + vec3* position; + vec3* normal; + vec3* tangent; + uint32_t* color; + vec4* uv[MAXIMUM_UVW_LAYERS]; }; - } diff --git a/source/gs-vertexbuffer.cpp b/source/gs-vertexbuffer.cpp index 0dfe7cf..e9e6bc5 100644 --- a/source/gs-vertexbuffer.cpp +++ b/source/gs-vertexbuffer.cpp @@ -18,6 +18,7 @@ */ #include "gs-vertexbuffer.h" +#include "util-memory.h" #include extern "C" { #pragma warning( push ) @@ -26,133 +27,171 @@ extern "C" { #pragma warning( pop ) } -const uint32_t defaultMaximumVertices = 65535u; - +#pragma region Constructor & Destructor GS::VertexBuffer::VertexBuffer(uint32_t maximumVertices) { - m_maximumVertices = maximumVertices; - m_uvwLayers = MAXIMUM_UVW_LAYERS; - - // Reserve Space - m_vertexbufferdata = gs_vbdata_create(); - m_vertexbufferdata->num = m_maximumVertices; - m_data.positions.resize(m_maximumVertices); - m_vertexbufferdata->points = m_data.positions.data(); - m_data.normals.resize(m_maximumVertices); - m_vertexbufferdata->normals = m_data.normals.data(); - m_data.tangents.resize(m_maximumVertices); - m_vertexbufferdata->tangents = m_data.tangents.data(); - m_data.colors.resize(m_maximumVertices); - m_vertexbufferdata->colors = m_data.colors.data(); - m_vertexbufferdata->num_tex = m_uvwLayers; - m_data.uvws.resize(m_uvwLayers); - m_data.uvwdata.resize(m_uvwLayers); - for (uint32_t n = 0; n < m_uvwLayers; n++) { - m_data.uvws[n].resize(m_maximumVertices); - m_data.uvwdata[n].width = 4; - m_data.uvwdata[n].array = m_data.uvws[n].data(); + if (maximumVertices > MAXIMUM_VERTICES) { + throw std::out_of_range("maximumVertices out of range"); + } + + // Assign limits. + m_capacity = maximumVertices; + m_layers = MAXIMUM_UVW_LAYERS; + + // Allocate memory for data. + m_vertexbufferdata = gs_vbdata_create(); + m_vertexbufferdata->num = m_capacity; + m_vertexbufferdata->points = m_positions = (vec3*)util::malloc_aligned(16, sizeof(vec3) * m_capacity); + m_vertexbufferdata->normals = m_normals = (vec3*)util::malloc_aligned(16, sizeof(vec3) * m_capacity); + m_vertexbufferdata->tangents = m_tangents = (vec3*)util::malloc_aligned(16, sizeof(vec3) * m_capacity); + m_vertexbufferdata->colors = m_colors = (uint32_t*)util::malloc_aligned(16, sizeof(uint32_t) * m_capacity); + m_vertexbufferdata->num_tex = m_layers; + m_vertexbufferdata->tvarray = m_layerdata = (gs_tvertarray*)util::malloc_aligned(16, sizeof(gs_tvertarray)* m_layers); + for (size_t n = 0; n < MAXIMUM_UVW_LAYERS; n++) { + m_layerdata[n].array = m_uvs[n] = (vec4*)util::malloc_aligned(16, sizeof(vec4) * m_capacity); + m_layerdata[n].width = 4; } - m_vertexbufferdata->tvarray = m_data.uvwdata.data(); // Allocate GPU obs_enter_graphics(); m_vertexbuffer = gs_vertexbuffer_create(m_vertexbufferdata, GS_DYNAMIC); std::memset(m_vertexbufferdata, 0, sizeof(gs_vb_data)); + m_vertexbufferdata->num = m_capacity; + m_vertexbufferdata->num_tex = m_layers; obs_leave_graphics(); if (!m_vertexbuffer) { throw std::runtime_error("Failed to create vertex buffer."); } } -GS::VertexBuffer::VertexBuffer(gs_vertbuffer_t* vb) { - m_vertexbuffer = vb; -} - -GS::VertexBuffer::VertexBuffer() : VertexBuffer(defaultMaximumVertices) {} - -GS::VertexBuffer::VertexBuffer(std::vector& other) : VertexBuffer((uint32_t)other.capacity()) { - std::copy(other.begin(), other.end(), this->end()); -} - -GS::VertexBuffer::VertexBuffer(VertexBuffer& other) : VertexBuffer(other.m_maximumVertices) { - std::copy(other.begin(), other.end(), this->end()); -} +GS::VertexBuffer::VertexBuffer() : VertexBuffer(MAXIMUM_VERTICES) {} GS::VertexBuffer::~VertexBuffer() { - if (m_vertexbuffer) { + if (m_positions) { + util::free_aligned(m_positions); + m_positions = nullptr; + } + if (m_normals) { + util::free_aligned(m_normals); + m_normals = nullptr; + } + if (m_tangents) { + util::free_aligned(m_tangents); + m_tangents = nullptr; + } + if (m_colors) { + util::free_aligned(m_colors); + m_colors = nullptr; + } + for (size_t n = 0; n < MAXIMUM_UVW_LAYERS; n++) { + if (m_uvs[n]) { + util::free_aligned(m_uvs[n]); + m_uvs[n] = nullptr; + } + } + if (m_vertexbufferdata) { std::memset(m_vertexbufferdata, 0, sizeof(gs_vb_data)); - + if (!m_vertexbuffer) { + gs_vbdata_destroy(m_vertexbufferdata); + m_vertexbufferdata = nullptr; + } + } + if (m_vertexbuffer) { obs_enter_graphics(); gs_vertexbuffer_destroy(m_vertexbuffer); obs_leave_graphics(); + m_vertexbuffer = nullptr; } - m_vertexbuffer = nullptr; +} +#pragma endregion Constructor & Destructor + +#pragma region Copy & Move Constructor +GS::VertexBuffer::VertexBuffer(VertexBuffer& other) : VertexBuffer(other.m_capacity) { + +} + +GS::VertexBuffer::VertexBuffer(gs_vertbuffer_t* vb) { + m_vertexbuffer = vb; +} +#pragma endregion Copy & Move Constructor + +void GS::VertexBuffer::resize(size_t new_size) { + if (new_size > m_capacity) { + throw std::out_of_range("new_size out of range"); + } + m_size = new_size; +} + +size_t GS::VertexBuffer::size() { + return m_size; +} + +bool GS::VertexBuffer::empty() { + return m_size == 0; +} + +const GS::Vertex GS::VertexBuffer::at(size_t idx) { + if ((idx < 0) || (idx >= m_size)) { + throw std::out_of_range("idx out of range"); + } + + GS::Vertex vtx; + vtx.position = &m_positions[idx]; + vtx.normal = &m_normals[idx]; + vtx.tangent = &m_tangents[idx]; + vtx.color = &m_colors[idx]; + for (size_t n = 0; n < MAXIMUM_UVW_LAYERS; n++) { + vtx.uv[n] = &m_uvs[n][idx]; + } + return vtx; +} + +const GS::Vertex GS::VertexBuffer::operator[](const size_t pos) { + return at(pos); } void GS::VertexBuffer::set_uv_layers(uint32_t layers) { - m_uvwLayers = layers; + m_layers = layers; } uint32_t GS::VertexBuffer::uv_layers() { - return m_uvwLayers; + return m_layers; } gs_vertbuffer_t* GS::VertexBuffer::get(bool refreshGPU) { - if (refreshGPU) { - if (size() > m_maximumVertices) - throw std::runtime_error("Too many vertices in Vertex Buffer."); + if (!refreshGPU) + return m_vertexbuffer; - // Update data pointer from Graphics Subsystem. - m_vertexbufferdata = gs_vertexbuffer_get_data(m_vertexbuffer); - std::memset(m_vertexbufferdata, 0, sizeof(gs_vb_data)); - - // Resize buffers. - m_data.positions.resize(m_maximumVertices); - m_data.normals.resize(m_maximumVertices); - m_data.tangents.resize(m_maximumVertices); - m_data.colors.resize(m_maximumVertices); - m_data.uvws.resize(m_uvwLayers); - m_data.uvwdata.resize(m_uvwLayers); - - // Assign new data. - m_vertexbufferdata->num = m_maximumVertices; - m_vertexbufferdata->points = m_data.positions.data(); - m_vertexbufferdata->normals = m_data.normals.data(); - m_vertexbufferdata->tangents = m_data.tangents.data(); - m_vertexbufferdata->colors = m_data.colors.data(); - m_vertexbufferdata->num_tex = m_uvwLayers; - for (uint32_t n = 0; n < m_uvwLayers; n++) { - m_data.uvws[n].resize(m_maximumVertices); - m_data.uvwdata[n].width = 4; - m_data.uvwdata[n].array = m_data.uvws[n].data(); - } - m_vertexbufferdata->tvarray = m_data.uvwdata.data(); - - // Copy Data - for (size_t vertexIdx = 0; vertexIdx < size(); vertexIdx++) { - GS::Vertex& v = this->at(vertexIdx); - vec3_copy(&m_data.positions[vertexIdx], &(v.position)); - vec3_copy(&m_data.normals[vertexIdx], &(v.normal)); - vec3_copy(&m_data.tangents[vertexIdx], &(v.tangent)); - for (size_t texcoordIdx = 0; texcoordIdx < m_uvwLayers; texcoordIdx++) { - vec4_copy(&m_data.uvws[texcoordIdx][vertexIdx], &(v.uv[texcoordIdx])); - } - m_data.colors[vertexIdx] = v.color; - } - - // Update GPU - obs_enter_graphics(); - gs_vertexbuffer_flush(m_vertexbuffer); - obs_leave_graphics(); - - // WORKAROUND: OBS Studio 20.x and below incorrectly deletes data that it doesn't own. - std::memset(m_vertexbufferdata, 0, sizeof(gs_vb_data)); - m_vertexbufferdata->num = m_maximumVertices; - m_vertexbufferdata->num_tex = m_uvwLayers; - for (uint32_t n = 0; n < m_uvwLayers; n++) { - m_data.uvwdata[n].width = 4; - } + if (m_size > m_capacity) + throw std::out_of_range("size is larger than capacity"); + // Update VertexBuffer data. + m_vertexbufferdata = gs_vertexbuffer_get_data(m_vertexbuffer); + std::memset(m_vertexbufferdata, 0, sizeof(gs_vb_data)); + m_vertexbufferdata->num = m_capacity; + m_vertexbufferdata->points = m_positions; + m_vertexbufferdata->normals = m_normals; + m_vertexbufferdata->tangents = m_tangents; + m_vertexbufferdata->colors = m_colors; + m_vertexbufferdata->num_tex = m_layers; + m_vertexbufferdata->tvarray = m_layerdata; + for (size_t n = 0; n < MAXIMUM_UVW_LAYERS; n++) { + m_layerdata[n].array = m_uvs[n]; + m_layerdata[n].width = 4; } + + // Update GPU + obs_enter_graphics(); + gs_vertexbuffer_flush(m_vertexbuffer); + obs_leave_graphics(); + + // WORKAROUND: OBS Studio 20.x and below incorrectly deletes data that it doesn't own. + std::memset(m_vertexbufferdata, 0, sizeof(gs_vb_data)); + m_vertexbufferdata->num = m_capacity; + m_vertexbufferdata->num_tex = m_layers; + for (uint32_t n = 0; n < m_layers; n++) { + m_layerdata[n].width = 4; + } + return m_vertexbuffer; } diff --git a/source/gs-vertexbuffer.h b/source/gs-vertexbuffer.h index 140c581..9d6b4b2 100644 --- a/source/gs-vertexbuffer.h +++ b/source/gs-vertexbuffer.h @@ -18,11 +18,11 @@ */ #pragma once +#include "gs-limits.h" #include "gs-vertex.h" #include "util-math.h" #include "util-memory.h" #include -#include extern "C" { #pragma warning( push ) #pragma warning( disable: 4201 ) @@ -31,8 +31,9 @@ extern "C" { } namespace GS { - class VertexBuffer : public std::vector> { + class VertexBuffer { public: +#pragma region Constructor & Destructor /*! * \brief Create a Vertex Buffer with specific size * @@ -47,26 +48,36 @@ namespace GS { */ VertexBuffer(); + virtual ~VertexBuffer(); +#pragma endregion Constructor & Destructor + +#pragma region Copy & Move Constructor /*! - * \brief Create a copy of a Vertex Buffer - * Full Description below - * - * \param other The Vertex Buffer to copy - */ + * \brief Create a copy of a Vertex Buffer + * Full Description below + * + * \param other The Vertex Buffer to copy + */ VertexBuffer(VertexBuffer& other); /*! - * \brief Create a Vertex Buffer from a Vertex array + * \brief Create a copy of a Vertex Buffer * Full Description below * - * \param other The Vertex array to use + * \param other The Vertex Buffer to copy */ - VertexBuffer(std::vector& other); + VertexBuffer(gs_vertbuffer_t* other); +#pragma endregion Copy & Move Constructor + + void resize(size_t new_size); + size_t size(); - VertexBuffer(gs_vertbuffer_t* vb); + bool empty(); - virtual ~VertexBuffer(); + const GS::Vertex at(size_t idx); + + const GS::Vertex operator[](const size_t pos); void set_uv_layers(uint32_t layers); @@ -76,20 +87,23 @@ namespace GS { gs_vertbuffer_t* get(bool refreshGPU); - protected: - uint32_t m_maximumVertices; - uint32_t m_uvwLayers; + private: + uint32_t m_size; + uint32_t m_capacity; + + + uint32_t m_layers; + + // Memory Storage + vec3 *m_positions; + vec3 *m_normals; + vec3 *m_tangents; + uint32_t *m_colors; + vec4 *m_uvs[MAXIMUM_UVW_LAYERS]; + + // OBS GS Data gs_vb_data* m_vertexbufferdata; gs_vertbuffer_t* m_vertexbuffer; - - // Data Storage - struct { - std::vector positions; - std::vector normals; - std::vector tangents; - std::vector colors; - std::vector> uvws; - std::vector uvwdata; - } m_data; + gs_tvertarray* m_layerdata; }; }