filter-blur: Switch to Kernel Array instead of Kernel Texture
This speeds up Gaussian Blur and Linear Gaussian Blur drastically reduces time spent reading textures and instead uses existing registers - maximizing time spent reading the actual image texture. See Also: #21 Blur Quality
This commit is contained in:
		
							parent
							
								
									92c4b54177
								
							
						
					
					
						commit
						a6f9451654
					
				|  | @ -10,9 +10,7 @@ uniform int u_diameter; | |||
| uniform float2 u_texelDelta; | ||||
| 
 | ||||
| // Kernel Settings | ||||
| //uniform float registerkernel[25]; | ||||
| uniform texture2d kernel; | ||||
| uniform float2 kernelTexel; | ||||
| uniform float4 kernel[8]; // max kernel radius 31+center. | ||||
| 
 | ||||
| // Bilateral Settings | ||||
| uniform float bilateralSmoothing; | ||||
|  | @ -53,6 +51,11 @@ VertDataOut VSDefault(VertDataIn vtx) | |||
| 	return vert_out; | ||||
| } | ||||
| 
 | ||||
| /// Utility | ||||
| float GetKernelAt(int i) { | ||||
| 	return ((float[4])(kernel[floor(i/4)]))[i%4]; | ||||
| } | ||||
| 
 | ||||
| /// Blur: Box | ||||
| float4 PSBoxBlur(VertDataOut vtx) : TARGET { | ||||
| 	float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0); | ||||
|  | @ -131,14 +134,13 @@ technique BoxLinear | |||
| } | ||||
| 
 | ||||
| /// Blur: Gaussian | ||||
| // ToDo: Switch to array Kernel instead of Texture kernel. | ||||
| float4 PSGaussianBlur(VertDataOut vtx) : TARGET { | ||||
| 	float2 uvOffset = float2(0, 0); | ||||
| 	float4 final = u_image.SampleLevel(pointSampler, vtx.uv, 0) | ||||
| 		* kernel.SampleLevel(pointSampler, (float2(0, u_radius - 1) * kernelTexel), 0).r; | ||||
| 		* GetKernelAt(0); | ||||
| 	for (int k = 1; k <= u_radius; k++) { | ||||
| 		uvOffset += u_texelDelta; | ||||
| 		float l_g = kernel.SampleLevel(pointSampler, (float2(k, u_radius - 1) * kernelTexel), 0).r; | ||||
| 		float l_g = GetKernelAt(k); | ||||
| 		float4 l_p = u_image.SampleLevel(pointSampler, vtx.uv + uvOffset, 0); | ||||
| 		float4 l_n = u_image.SampleLevel(pointSampler, vtx.uv - uvOffset, 0); | ||||
| 		final += (l_p + l_n) * l_g; | ||||
|  | @ -180,15 +182,15 @@ float4 PSGaussianLinearBlur(VertDataOut vtx) : TARGET { | |||
| 	// [-2, -1,  0, +1, +2] | ||||
| 	//   ^-S-^   S   ^-S-^ | ||||
| 	// Total Samples: 3 (n+1) | ||||
| 	 | ||||
| 
 | ||||
| 	float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0); | ||||
| 	float4 final = origin * kernel.SampleLevel(pointSampler, (float2(0, u_radius - 1) * kernelTexel), 0).r; | ||||
| 	float4 final = origin * GetKernelAt(0); | ||||
| 	float2 halfTexelDelta = u_texelDelta / 2.0; | ||||
| 
 | ||||
| 	for (int k = 1; k < u_radius; k+=2) { | ||||
| 		float2 offset = k * u_texelDelta + halfTexelDelta; | ||||
| 		float l_g0 = kernel.SampleLevel(pointSampler, (float2(k, u_radius - 1) * kernelTexel), 0).r; | ||||
| 		float l_g1 = kernel.SampleLevel(pointSampler, (float2(k + 1, u_radius - 1) * kernelTexel), 0).r; | ||||
| 		float l_g0 = GetKernelAt(k); | ||||
| 		float l_g1 = GetKernelAt(k +1); | ||||
| 		float4 l_p = u_image.SampleLevel(linearSampler, vtx.uv + offset, 0); | ||||
| 		float4 l_n = u_image.SampleLevel(linearSampler, vtx.uv - offset, 0); | ||||
| 		final += (l_p + l_n) * l_g0; | ||||
|  | @ -199,7 +201,7 @@ float4 PSGaussianLinearBlur(VertDataOut vtx) : TARGET { | |||
| 		// Odd numbers require treatment of ends. | ||||
| 		float4 left = u_image.SampleLevel(pointSampler, vtx.uv + u_texelDelta * u_radius, 0); | ||||
| 		float4 right = u_image.SampleLevel(pointSampler, vtx.uv - u_texelDelta * u_radius, 0); | ||||
| 		float krn = kernel.SampleLevel(pointSampler, (float2(u_radius, u_radius - 1) * kernelTexel), 0).r; | ||||
| 		float krn = GetKernelAt(u_radius); | ||||
| 		final += (left + right) * krn; | ||||
| 	} | ||||
| 
 | ||||
|  |  | |||
|  | @ -120,18 +120,12 @@ bool filter::blur::blur_instance::apply_bilateral_param() | |||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| bool filter::blur::blur_instance::apply_gaussian_param() | ||||
| bool filter::blur::blur_instance::apply_gaussian_param(uint8_t width) | ||||
| { | ||||
| 	std::shared_ptr<gs::texture> kernel = filter::blur::blur_factory::get()->get_kernel(filter::blur::type::Gaussian); | ||||
| 	auto kernel = filter::blur::blur_factory::get()->get_gaussian_kernel(width); | ||||
| 
 | ||||
| 	if (blur_effect->has_parameter("kernel")) { | ||||
| 		blur_effect->get_parameter("kernel").set_texture(kernel); | ||||
| 	} | ||||
| 
 | ||||
| 	if (blur_effect->has_parameter("kernelTexel")) { | ||||
| 		float_t wb = 1.0f / kernel->get_width(); | ||||
| 		float_t hb = 1.0f / kernel->get_height(); | ||||
| 		blur_effect->get_parameter("kernelTexel").set_float2(wb, hb); | ||||
| 		blur_effect->get_parameter("kernel").set_float_array(&(kernel->front()), kernel->size()); | ||||
| 	} | ||||
| 
 | ||||
| 	return true; | ||||
|  | @ -604,7 +598,7 @@ void filter::blur::blur_instance::video_render(gs_effect_t* effect) | |||
| 	} | ||||
| #pragma endregion RGB->YUV | ||||
| 
 | ||||
| #pragma region blur | ||||
| #pragma region Blur | ||||
| 	// Set up camera stuff
 | ||||
| 	gs_set_cull_mode(GS_NEITHER); | ||||
| 	gs_reset_blend_state(); | ||||
|  | @ -630,7 +624,7 @@ void filter::blur::blur_instance::video_render(gs_effect_t* effect) | |||
| 
 | ||||
| 		if (!apply_shared_param(intermediate, xpel, ypel)) | ||||
| 			break; | ||||
| 		apply_gaussian_param(); | ||||
| 		apply_gaussian_param(this->size); | ||||
| 		apply_bilateral_param(); | ||||
| 
 | ||||
| 		gs_texrender_reset(rt); | ||||
|  | @ -853,11 +847,13 @@ void filter::blur::blur_factory::generate_gaussian_kernels() | |||
| 	// 2D texture, horizontal is value, vertical is kernel size.
 | ||||
| 	size_t size_power_of_two = size_t(pow(2, util::math::get_power_of_two_exponent_ceil(max_kernel_size))); | ||||
| 
 | ||||
| 	std::vector<float_t> texture_Data(size_power_of_two * size_power_of_two); | ||||
| 	std::vector<float_t> math_data(size_power_of_two); | ||||
| 	std::vector<float_t>                  texture_data(size_power_of_two * size_power_of_two); | ||||
| 	std::vector<float_t>                  math_data(size_power_of_two); | ||||
| 	std::shared_ptr<std::vector<float_t>> kernel_data; | ||||
| 
 | ||||
| 	for (size_t width = 1; width <= max_kernel_size; width++) { | ||||
| 		size_t v = (width - 1) * size_power_of_two; | ||||
| 		size_t v    = (width - 1) * size_power_of_two; | ||||
| 		kernel_data = std::make_shared<std::vector<float_t>>(size_power_of_two); | ||||
| 
 | ||||
| 		// Calculate and normalize
 | ||||
| 		float_t sum = 0; | ||||
|  | @ -869,13 +865,16 @@ void filter::blur::blur_factory::generate_gaussian_kernels() | |||
| 		// Normalize to Texture Buffer
 | ||||
| 		double_t inverse_sum = 1.0 / sum; | ||||
| 		for (size_t p = 0; p <= width; p++) { | ||||
| 			texture_Data[v + p] = float_t(math_data[p] * inverse_sum); | ||||
| 			texture_data[v + p] = float_t(math_data[p] * inverse_sum); | ||||
| 			kernel_data->at(p)  = texture_data[v + p]; | ||||
| 		} | ||||
| 
 | ||||
| 		gaussian_kernels.insert({uint8_t(width), kernel_data}); | ||||
| 	} | ||||
| 
 | ||||
| 	// Create Texture
 | ||||
| 	try { | ||||
| 		auto texture_buffer = reinterpret_cast<uint8_t*>(texture_Data.data()); | ||||
| 		auto texture_buffer = reinterpret_cast<uint8_t*>(texture_data.data()); | ||||
| 		auto unsafe_buffer  = const_cast<const uint8_t**>(&texture_buffer); | ||||
| 
 | ||||
| 		kernels.insert_or_assign(filter::blur::type::Gaussian, | ||||
|  | @ -1046,6 +1045,11 @@ std::shared_ptr<gs::texture> filter::blur::blur_factory::get_kernel(filter::blur | |||
| 	return kernels.at(type); | ||||
| } | ||||
| 
 | ||||
| std::shared_ptr<std::vector<float_t>> filter::blur::blur_factory::get_gaussian_kernel(uint8_t size) | ||||
| { | ||||
| 	return gaussian_kernels.at(size); | ||||
| } | ||||
| 
 | ||||
| obs_scene_t* filter::blur::blur_factory::get_scene(std::string name) | ||||
| { | ||||
| 	auto kv = scenes.find(name); | ||||
|  |  | |||
|  | @ -111,7 +111,7 @@ namespace filter { | |||
| 
 | ||||
| 			bool apply_shared_param(gs_texture_t* input, float texelX, float texelY); | ||||
| 			bool apply_bilateral_param(); | ||||
| 			bool apply_gaussian_param(); | ||||
| 			bool apply_gaussian_param(uint8_t width); | ||||
| 			bool apply_mask_parameters(std::shared_ptr<gs::effect> effect, gs_texture_t* original_texture, | ||||
| 									   gs_texture_t* blurred_texture); | ||||
| 
 | ||||
|  | @ -143,6 +143,7 @@ namespace filter { | |||
| 
 | ||||
| 			std::shared_ptr<gs::effect>                                blur_effect; | ||||
| 			std::map<filter::blur::type, std::shared_ptr<gs::texture>> kernels; | ||||
| 			std::map<uint8_t, std::shared_ptr<std::vector<float_t>>>   gaussian_kernels; | ||||
| 
 | ||||
| 			std::map<std::string, obs_scene_t*> scenes; | ||||
| 
 | ||||
|  | @ -188,6 +189,8 @@ namespace filter { | |||
| 
 | ||||
| 			std::shared_ptr<gs::texture> get_kernel(filter::blur::type type); | ||||
| 
 | ||||
| 			std::shared_ptr<std::vector<float_t>> get_gaussian_kernel(uint8_t size); | ||||
| 
 | ||||
| 			obs_scene_t* get_scene(std::string name); | ||||
| 
 | ||||
| 			void enum_scenes(std::function<bool(obs_scene_t*)> fnc); | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue