From b43950d0f76a04a2687a36881ddf823723337dae Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Thu, 14 Mar 2024 03:42:21 +0100 Subject: [PATCH 1/2] gpu: Don't reuse frames while they're in use This copies the Vulkan idea of using a fence at the end of command submission and waiting until it gets signaled before reusing the frame. This frees up the GL driver from doing the work of making buffers etc reusable and instead allocates new ones when they're still in use and is a pretty massive performance win. --- gsk/gpu/gskglframe.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/gsk/gpu/gskglframe.c b/gsk/gpu/gskglframe.c index a92c8333a4..fa200409ef 100644 --- a/gsk/gpu/gskglframe.c +++ b/gsk/gpu/gskglframe.c @@ -20,6 +20,7 @@ struct _GskGLFrame GLuint globals_buffer_id; guint next_texture_slot; + GLsync sync; GHashTable *vaos; }; @@ -34,7 +35,12 @@ G_DEFINE_TYPE (GskGLFrame, gsk_gl_frame, GSK_TYPE_GPU_FRAME) static gboolean gsk_gl_frame_is_busy (GskGpuFrame *frame) { - return FALSE; + GskGLFrame *self = GSK_GL_FRAME (frame); + + if (!self->sync) + return FALSE; + + return glClientWaitSync (self->sync, 0, 0) == GL_TIMEOUT_EXPIRED; } static void @@ -50,6 +56,12 @@ gsk_gl_frame_cleanup (GskGpuFrame *frame) { GskGLFrame *self = GSK_GL_FRAME (frame); + if (self->sync) + { + glClientWaitSync (self->sync, 0, -1); + glDeleteSync (self->sync); + } + self->next_texture_slot = 0; GSK_GPU_FRAME_CLASS (gsk_gl_frame_parent_class)->cleanup (frame); @@ -160,6 +172,8 @@ gsk_gl_frame_submit (GskGpuFrame *frame, { op = gsk_gpu_op_gl_command (op, frame, &state); } + + self->sync = glFenceSync (GL_SYNC_GPU_COMMANDS_COMPLETE, 0); } static void From 30dddf241267eb2b816044267a3ce5d4c0676186 Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Thu, 14 Mar 2024 06:06:33 +0100 Subject: [PATCH 2/2] gpu: Refactor waiting for frames Instead of having renderer API to wait for any number of frames, just have gsk_gpu_frame_wait() to wait for a single frame. This unifies behavior on Vulkan and GL, because unlike Vulkan, GL does not allow waiting for multiple fences. To make up for it, we replace waiting for multiple frames with finding the frame with the earliest timestamp and waiting for that one. Also implement wait() for GL. --- gsk/gpu/gskglframe.c | 12 +++++++ gsk/gpu/gskgpuframe.c | 6 ++++ gsk/gpu/gskgpuframeprivate.h | 2 ++ gsk/gpu/gskgpurenderer.c | 60 ++++++++++++++++----------------- gsk/gpu/gskgpurendererprivate.h | 3 -- gsk/gpu/gsknglrenderer.c | 8 ----- gsk/gpu/gskvulkanframe.c | 16 +++++++++ gsk/gpu/gskvulkanrenderer.c | 26 -------------- 8 files changed, 65 insertions(+), 68 deletions(-) diff --git a/gsk/gpu/gskglframe.c b/gsk/gpu/gskglframe.c index fa200409ef..6e96e0dfc3 100644 --- a/gsk/gpu/gskglframe.c +++ b/gsk/gpu/gskglframe.c @@ -43,6 +43,17 @@ gsk_gl_frame_is_busy (GskGpuFrame *frame) return glClientWaitSync (self->sync, 0, 0) == GL_TIMEOUT_EXPIRED; } +static void +gsk_gl_frame_wait (GskGpuFrame *frame) +{ + GskGLFrame *self = GSK_GL_FRAME (frame); + + if (!self->sync) + return; + + glClientWaitSync (self->sync, 0, G_MAXINT64); +} + static void gsk_gl_frame_setup (GskGpuFrame *frame) { @@ -194,6 +205,7 @@ gsk_gl_frame_class_init (GskGLFrameClass *klass) GObjectClass *object_class = G_OBJECT_CLASS (klass); gpu_frame_class->is_busy = gsk_gl_frame_is_busy; + gpu_frame_class->wait = gsk_gl_frame_wait; gpu_frame_class->setup = gsk_gl_frame_setup; gpu_frame_class->cleanup = gsk_gl_frame_cleanup; gpu_frame_class->upload_texture = gsk_gl_frame_upload_texture; diff --git a/gsk/gpu/gskgpuframe.c b/gsk/gpu/gskgpuframe.c index 957a2095ae..7b60d5a32b 100644 --- a/gsk/gpu/gskgpuframe.c +++ b/gsk/gpu/gskgpuframe.c @@ -505,6 +505,12 @@ gsk_gpu_frame_is_busy (GskGpuFrame *self) return GSK_GPU_FRAME_GET_CLASS (self)->is_busy (self); } +void +gsk_gpu_frame_wait (GskGpuFrame *self) +{ + GSK_GPU_FRAME_GET_CLASS (self)->wait (self); +} + static void copy_texture (gpointer user_data, GdkTexture *texture) diff --git a/gsk/gpu/gskgpuframeprivate.h b/gsk/gpu/gskgpuframeprivate.h index f82276fab6..db0681a9cc 100644 --- a/gsk/gpu/gskgpuframeprivate.h +++ b/gsk/gpu/gskgpuframeprivate.h @@ -24,6 +24,7 @@ struct _GskGpuFrameClass GObjectClass parent_class; gboolean (* is_busy) (GskGpuFrame *self); + void (* wait) (GskGpuFrame *self); void (* setup) (GskGpuFrame *self); void (* cleanup) (GskGpuFrame *self); GskGpuImage * (* upload_texture) (GskGpuFrame *self, @@ -69,6 +70,7 @@ GskGpuBuffer * gsk_gpu_frame_write_storage_buffer (GskGpuF gsize *out_offset); gboolean gsk_gpu_frame_is_busy (GskGpuFrame *self); +void gsk_gpu_frame_wait (GskGpuFrame *self); void gsk_gpu_frame_render (GskGpuFrame *self, gint64 timestamp, diff --git a/gsk/gpu/gskgpurenderer.c b/gsk/gpu/gskgpurenderer.c index b6703f676a..986a07544c 100644 --- a/gsk/gpu/gskgpurenderer.c +++ b/gsk/gpu/gskgpurenderer.c @@ -166,24 +166,36 @@ static GskGpuFrame * gsk_gpu_renderer_get_frame (GskGpuRenderer *self) { GskGpuRendererPrivate *priv = gsk_gpu_renderer_get_instance_private (self); + GskGpuFrame *earliest_frame = NULL; + gint64 earliest_time = G_MAXINT64; guint i; - while (TRUE) + for (i = 0; i < G_N_ELEMENTS (priv->frames); i++) { - for (i = 0; i < G_N_ELEMENTS (priv->frames); i++) - { - if (priv->frames[i] == NULL) - { - priv->frames[i] = gsk_gpu_renderer_create_frame (self); - return priv->frames[i]; - } + gint64 timestamp; - if (!gsk_gpu_frame_is_busy (priv->frames[i])) - return priv->frames[i]; + if (priv->frames[i] == NULL) + { + priv->frames[i] = gsk_gpu_renderer_create_frame (self); + return priv->frames[i]; } - GSK_GPU_RENDERER_GET_CLASS (self)->wait (self, priv->frames, GSK_GPU_MAX_FRAMES); + if (!gsk_gpu_frame_is_busy (priv->frames[i])) + return priv->frames[i]; + + timestamp = gsk_gpu_frame_get_timestamp (priv->frames[i]); + if (timestamp < earliest_time) + { + earliest_time = timestamp; + earliest_frame = priv->frames[i]; + } } + + g_assert (earliest_frame); + + gsk_gpu_frame_wait (earliest_frame); + + return earliest_frame; } static gboolean @@ -217,31 +229,17 @@ gsk_gpu_renderer_unrealize (GskRenderer *renderer) { GskGpuRenderer *self = GSK_GPU_RENDERER (renderer); GskGpuRendererPrivate *priv = gsk_gpu_renderer_get_instance_private (self); - gsize i, j; + gsize i; gsk_gpu_renderer_make_current (self); - while (TRUE) + for (i = 0; i < G_N_ELEMENTS (priv->frames); i++) { - for (i = 0, j = 0; i < G_N_ELEMENTS (priv->frames); i++) - { - if (priv->frames[i] == NULL) - break; - if (gsk_gpu_frame_is_busy (priv->frames[i])) - { - if (i > j) - { - priv->frames[j] = priv->frames[i]; - priv->frames[i] = NULL; - } - j++; - continue; - } - g_clear_object (&priv->frames[i]); - } - if (j == 0) + if (priv->frames[i] == NULL) break; - GSK_GPU_RENDERER_GET_CLASS (self)->wait (self, priv->frames, j); + if (gsk_gpu_frame_is_busy (priv->frames[i])) + gsk_gpu_frame_wait (priv->frames[i]); + g_clear_object (&priv->frames[i]); } g_clear_object (&priv->context); diff --git a/gsk/gpu/gskgpurendererprivate.h b/gsk/gpu/gskgpurendererprivate.h index 4b2710d9da..8a6342c32a 100644 --- a/gsk/gpu/gskgpurendererprivate.h +++ b/gsk/gpu/gskgpurendererprivate.h @@ -35,9 +35,6 @@ struct _GskGpuRendererClass void (* make_current) (GskGpuRenderer *self); GskGpuImage * (* get_backbuffer) (GskGpuRenderer *self); - void (* wait) (GskGpuRenderer *self, - GskGpuFrame **frame, - gsize n_frames); double (* get_scale) (GskGpuRenderer *self); GdkDmabufFormats * (* get_dmabuf_formats) (GskGpuRenderer *self); diff --git a/gsk/gpu/gsknglrenderer.c b/gsk/gpu/gsknglrenderer.c index b08a0f06ce..5c2653d941 100644 --- a/gsk/gpu/gsknglrenderer.c +++ b/gsk/gpu/gsknglrenderer.c @@ -119,13 +119,6 @@ gsk_ngl_renderer_get_backbuffer (GskGpuRenderer *renderer) return self->backbuffer; } -static void -gsk_ngl_renderer_wait (GskGpuRenderer *self, - GskGpuFrame **frame, - gsize n_frames) -{ -} - static double gsk_ngl_renderer_get_scale (GskGpuRenderer *self) { @@ -164,7 +157,6 @@ gsk_ngl_renderer_class_init (GskNglRendererClass *klass) gpu_renderer_class->create_context = gsk_ngl_renderer_create_context; gpu_renderer_class->make_current = gsk_ngl_renderer_make_current; gpu_renderer_class->get_backbuffer = gsk_ngl_renderer_get_backbuffer; - gpu_renderer_class->wait = gsk_ngl_renderer_wait; gpu_renderer_class->get_scale = gsk_ngl_renderer_get_scale; gpu_renderer_class->get_dmabuf_formats = gsk_ngl_renderer_get_dmabuf_formats; diff --git a/gsk/gpu/gskvulkanframe.c b/gsk/gpu/gskvulkanframe.c index 9dad7d4e16..84832df7bd 100644 --- a/gsk/gpu/gskvulkanframe.c +++ b/gsk/gpu/gskvulkanframe.c @@ -74,6 +74,21 @@ gsk_vulkan_frame_is_busy (GskGpuFrame *frame) return vkGetFenceStatus (device, self->vk_fence) == VK_NOT_READY; } +static void +gsk_vulkan_frame_wait (GskGpuFrame *frame) +{ + GskVulkanFrame *self = GSK_VULKAN_FRAME (frame); + VkDevice vk_device; + + vk_device = gsk_vulkan_device_get_vk_device (GSK_VULKAN_DEVICE (gsk_gpu_frame_get_device (frame))); + + GSK_VK_CHECK (vkWaitForFences, vk_device, + 1, + &self->vk_fence, + VK_FALSE, + INT64_MAX); +} + static void gsk_vulkan_frame_setup (GskGpuFrame *frame) { @@ -387,6 +402,7 @@ gsk_vulkan_frame_class_init (GskVulkanFrameClass *klass) GObjectClass *object_class = G_OBJECT_CLASS (klass); gpu_frame_class->is_busy = gsk_vulkan_frame_is_busy; + gpu_frame_class->wait = gsk_vulkan_frame_wait; gpu_frame_class->setup = gsk_vulkan_frame_setup; gpu_frame_class->cleanup = gsk_vulkan_frame_cleanup; gpu_frame_class->upload_texture = gsk_vulkan_frame_upload_texture; diff --git a/gsk/gpu/gskvulkanrenderer.c b/gsk/gpu/gskvulkanrenderer.c index 997a6d9448..544902220e 100644 --- a/gsk/gpu/gskvulkanrenderer.c +++ b/gsk/gpu/gskvulkanrenderer.c @@ -129,31 +129,6 @@ gsk_vulkan_renderer_get_backbuffer (GskGpuRenderer *renderer) return self->targets[gdk_vulkan_context_get_draw_index (context)]; } -static void -gsk_vulkan_renderer_wait (GskGpuRenderer *renderer, - GskGpuFrame **frames, - gsize n_frames) -{ - VkFence *fences; - VkDevice vk_device; - gsize i; - - vk_device = gsk_vulkan_device_get_vk_device (GSK_VULKAN_DEVICE (gsk_gpu_renderer_get_device (renderer))); - - fences = g_alloca (sizeof (VkFence) * n_frames); - - for (i = 0; i < n_frames; i++) - { - fences[i] = gsk_vulkan_frame_get_vk_fence (GSK_VULKAN_FRAME (frames[i])); - } - - GSK_VK_CHECK (vkWaitForFences, vk_device, - n_frames, - fences, - VK_FALSE, - INT64_MAX); -} - static GdkDmabufFormats * gsk_vulkan_renderer_get_dmabuf_formats (GskGpuRenderer *renderer) { @@ -189,7 +164,6 @@ gsk_vulkan_renderer_class_init (GskVulkanRendererClass *klass) gpu_renderer_class->create_context = gsk_vulkan_renderer_create_context; gpu_renderer_class->make_current = gsk_vulkan_renderer_make_current; gpu_renderer_class->get_backbuffer = gsk_vulkan_renderer_get_backbuffer; - gpu_renderer_class->wait = gsk_vulkan_renderer_wait; gpu_renderer_class->get_dmabuf_formats = gsk_vulkan_renderer_get_dmabuf_formats; renderer_class->unrealize = gsk_vulkan_renderer_unrealize;