From bad6e1e102dfd3312b2e9112dacd7186af06d921 Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Fri, 15 Mar 2024 19:25:07 +0100 Subject: [PATCH] gpu: Change the way we merge draw calls With potentially multiple ops per ShaderOp, we may encounter situations where 1 ShaderOp contains more ops than we want to merge. (With GSK_GPU_SKIP=merge, we don't want to merge at all.) So we still merge the ShaderOps (now unconditionally), but we then run a loop that potentially splits the merged ops again - exactly at the point we want to. This way we can merge ops inside of ShaderOps and merge ShaderOps, but still have the draw calls contain the exact number of ops we want. --- gsk/gpu/gskgpushaderop.c | 72 ++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/gsk/gpu/gskgpushaderop.c b/gsk/gpu/gskgpushaderop.c index aa9d746a7f..d074feadf8 100644 --- a/gsk/gpu/gskgpushaderop.c +++ b/gsk/gpu/gskgpushaderop.c @@ -68,15 +68,15 @@ gsk_gpu_shader_op_vk_command_n (GskGpuOp *op, GskGpuShaderOpClass *shader_op_class = (GskGpuShaderOpClass *) op->op_class; GskVulkanDescriptors *desc; GskGpuOp *next; - gsize i, n; + gsize i, n_ops, max_ops_per_draw; if (gsk_gpu_frame_should_optimize (frame, GSK_GPU_OPTIMIZE_MERGE) && gsk_vulkan_device_has_feature (GSK_VULKAN_DEVICE (gsk_gpu_frame_get_device (frame)), GDK_VULKAN_FEATURE_NONUNIFORM_INDEXING)) - n = MAX_MERGE_OPS; + max_ops_per_draw = MAX_MERGE_OPS; else - n = 1; - i = self->n_ops; + max_ops_per_draw = 1; + desc = GSK_VULKAN_DESCRIPTORS (self->desc); if (desc && state->desc != desc) { @@ -84,7 +84,8 @@ gsk_gpu_shader_op_vk_command_n (GskGpuOp *op, state->desc = desc; } - for (next = op->next; next && i < n; next = next->next) + n_ops = self->n_ops; + for (next = op->next; next; next = next->next) { GskGpuShaderOp *next_shader = (GskGpuShaderOp *) next; @@ -92,10 +93,10 @@ gsk_gpu_shader_op_vk_command_n (GskGpuOp *op, next_shader->desc != self->desc || next_shader->variation != self->variation || next_shader->clip != self->clip || - next_shader->vertex_offset != self->vertex_offset + i * shader_op_class->vertex_size) + next_shader->vertex_offset != self->vertex_offset + n_ops * shader_op_class->vertex_size) break; - i += next_shader->n_ops; + n_ops += next_shader->n_ops; } vkCmdBindPipeline (state->vk_command_buffer, @@ -109,10 +110,13 @@ gsk_gpu_shader_op_vk_command_n (GskGpuOp *op, state->vk_format, state->vk_render_pass)); - vkCmdDraw (state->vk_command_buffer, - 6 * instance_scale, i, - 0, self->vertex_offset / shader_op_class->vertex_size); - + for (i = 0; i < n_ops; i += max_ops_per_draw) + { + vkCmdDraw (state->vk_command_buffer, + 6 * instance_scale, MIN (max_ops_per_draw, n_ops - i), + 0, self->vertex_offset / shader_op_class->vertex_size + i); + } + return next; } @@ -135,7 +139,7 @@ gsk_gpu_shader_op_gl_command_n (GskGpuOp *op, GskGpuShaderOpClass *shader_op_class = (GskGpuShaderOpClass *) op->op_class; GskGLDescriptors *desc; GskGpuOp *next; - gsize i, n, n_external; + gsize i, n_ops, n_external, max_ops_per_draw; desc = GSK_GL_DESCRIPTORS (self->desc); if (desc) @@ -166,11 +170,12 @@ gsk_gpu_shader_op_gl_command_n (GskGpuOp *op, } if (gsk_gpu_frame_should_optimize (frame, GSK_GPU_OPTIMIZE_MERGE)) - n = MAX_MERGE_OPS; + max_ops_per_draw = MAX_MERGE_OPS; else - n = 1; - i = self->n_ops; - for (next = op->next; next && i < n; next = next->next) + max_ops_per_draw = 1; + + n_ops = self->n_ops; + for (next = op->next; next; next = next->next) { GskGpuShaderOp *next_shader = (GskGpuShaderOp *) next; @@ -178,28 +183,31 @@ gsk_gpu_shader_op_gl_command_n (GskGpuOp *op, next_shader->desc != self->desc || next_shader->variation != self->variation || next_shader->clip != self->clip || - next_shader->vertex_offset != self->vertex_offset + i * shader_op_class->vertex_size) + next_shader->vertex_offset != self->vertex_offset + n_ops * shader_op_class->vertex_size) break; - i += next_shader->n_ops; + n_ops += next_shader->n_ops; } - if (gsk_gpu_frame_should_optimize (frame, GSK_GPU_OPTIMIZE_GL_BASE_INSTANCE)) + for (i = 0; i < n_ops; i += max_ops_per_draw) { - glDrawArraysInstancedBaseInstance (GL_TRIANGLES, - 0, - 6 * instance_scale, - i, - self->vertex_offset / shader_op_class->vertex_size); - } - else - { - shader_op_class->setup_vao (self->vertex_offset); + if (gsk_gpu_frame_should_optimize (frame, GSK_GPU_OPTIMIZE_GL_BASE_INSTANCE)) + { + glDrawArraysInstancedBaseInstance (GL_TRIANGLES, + 0, + 6 * instance_scale, + MIN (max_ops_per_draw, n_ops - i), + self->vertex_offset / shader_op_class->vertex_size + i); + } + else + { + shader_op_class->setup_vao (self->vertex_offset + i * shader_op_class->vertex_size); - glDrawArraysInstanced (GL_TRIANGLES, - 0, - 6 * instance_scale, - i); + glDrawArraysInstanced (GL_TRIANGLES, + 0, + 6 * instance_scale, + MIN (max_ops_per_draw, n_ops - i)); + } } return next;