gpu: Merge multiple ops into one ShaderOp

When ops get allocated that use the same stats as the last op, put them
into the same ShaderOp. This reduces the number of ShaderOps we need to
record, which has 3 benefits:

1. It's less work when iterating over all the ops.
   This isn't a big win, but it makes submit() and print() run a bit
   faster.
2. We don't need to manage data per-op.
   This is a large win because we don't need to ref/unref descriptors
   as much anymore, and refcounting is visible on profiles.
3. We save memory.
   This is a pretty big win because we iterate over ops a lot, and when
   the array is large enough (I've managed to write testcases that makes
   it grow to over 4GB) it kills all the caches and that's bad.

The main benefit of all this are glyphs, which used to emit 1 ShaderOp
per glyph and can now end up with 1 ShaderOp for multiple text nodes,
even if those text nodes use different fonts or colors - because they
can all share the same ColorizeOp.
This commit is contained in:
Benjamin Otte
2024-03-15 19:30:42 +01:00
parent d51912c0b4
commit 93cdcc5e88

View File

@@ -229,19 +229,39 @@ gsk_gpu_shader_op_alloc (GskGpuFrame *frame,
GskGpuDescriptors *desc,
gpointer out_vertex_data)
{
GskGpuShaderOp *self;
GskGpuOp *last;
GskGpuShaderOp *last_shader;
gsize vertex_offset;
self = (GskGpuShaderOp *) gsk_gpu_op_alloc (frame, &op_class->parent_class);
vertex_offset = gsk_gpu_frame_reserve_vertex_data (frame, op_class->vertex_size);
self->variation = variation;
self->clip = clip;
if (desc)
self->desc = g_object_ref (desc);
last = gsk_gpu_frame_get_last_op (frame);
/* careful: We're casting without checking, but the if() does the check */
last_shader = (GskGpuShaderOp *) last;
if (last &&
last->op_class == (const GskGpuOpClass *) op_class &&
last_shader->desc == desc &&
last_shader->variation == variation &&
last_shader->clip == clip &&
last_shader->vertex_offset + last_shader->n_ops * op_class->vertex_size == vertex_offset)
{
last_shader->n_ops++;
}
else
self->desc = NULL;
self->vertex_offset = gsk_gpu_frame_reserve_vertex_data (frame, op_class->vertex_size);
self->n_ops = 1;
{
GskGpuShaderOp *self;
self = (GskGpuShaderOp *) gsk_gpu_op_alloc (frame, &op_class->parent_class);
*((gpointer *) out_vertex_data) = gsk_gpu_frame_get_vertex_data (frame, self->vertex_offset);
self->variation = variation;
self->clip = clip;
self->vertex_offset = vertex_offset;
if (desc)
self->desc = g_object_ref (desc);
else
self->desc = NULL;
self->n_ops = 1;
}
*((gpointer *) out_vertex_data) = gsk_gpu_frame_get_vertex_data (frame, vertex_offset);
}