From 8b1fcb58e84341913c6fc0d2d8414688aeae26af Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Tue, 6 Apr 2021 21:55:50 -0400 Subject: [PATCH 1/7] ngl: Use fp16 for colors This reduces the size of our Vertex struct from 48 to 32 bytes. It would be nicer if we could store the colors in fp16 format in the rendernodes, and avoid conversion here. But this is still good. --- gsk/meson.build | 1 + gsk/ngl/fp16private.h | 111 ++++++++++++++++++++++++++++++++ gsk/ngl/gsknglcommandqueue.c | 4 +- gsk/ngl/gsknglrenderjob.c | 120 +++++++++++++++++++++-------------- gsk/ngl/gskngltypesprivate.h | 4 +- 5 files changed, 188 insertions(+), 52 deletions(-) create mode 100644 gsk/ngl/fp16private.h diff --git a/gsk/meson.build b/gsk/meson.build index 5c381b51c9..11be6a03f6 100644 --- a/gsk/meson.build +++ b/gsk/meson.build @@ -221,6 +221,7 @@ libgsk = static_library('gsk', '-DGTK_COMPILATION', '-DG_LOG_DOMAIN="Gsk"', '-DG_LOG_STRUCTURED=1', + '-mf16c' ] + common_cflags, link_with: libgdk, ) diff --git a/gsk/ngl/fp16private.h b/gsk/ngl/fp16private.h new file mode 100644 index 0000000000..b285bf450c --- /dev/null +++ b/gsk/ngl/fp16private.h @@ -0,0 +1,111 @@ +/* ninesliceprivate.h + * + * Copyright 2021 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program. If not, see . + * + * SPDX-License-Identifier: LGPL-2.1-or-later + */ + +#ifndef __FP16_PRIVATE_H__ +#define __FP16_PRIVATE_H__ + +#include +#include + +#ifdef GRAPHENE_USE_SSE +#include +#endif + +G_BEGIN_DECLS + +#ifdef GRAPHENE_USE_SSE + +static inline void +float_to_half4 (const float f[4], + guint16 h[4]) +{ + __m128 s = _mm_loadu_ps (f); + __m128i i = _mm_cvtps_ph (s, 0); + _mm_storel_epi64 ((__m128i*)h, i); +} + +static inline void +half_to_float4 (const guint16 h[4], + float f[4]) +{ + __m128i i = _mm_loadl_epi64 ((__m128i_u const *)h); + __m128 s = _mm_cvtph_ps (i); + _mm_store_ps (f, s); +} + +#else /* GRAPHENE_USE_SSE */ + +static inline guint +as_uint (const float x) +{ + return *(guint*)&x; +} + +static inline float +as_float (const guint x) +{ + return *(float*)&x; +} + +// IEEE-754 16-bit floating-point format (without infinity): 1-5-10 + +static inline float +half_to_float (const guint16 x) +{ + const guint e = (x&0x7C00)>>10; // exponent + const guint m = (x&0x03FF)<<13; // mantissa + const guint v = as_uint((float)m)>>23; + return as_float((x&0x8000)<<16 | (e!=0)*((e+112)<<23|m) | ((e==0)&(m!=0))*((v-37)<<23|((m<<(150-v))&0x007FE000))); +} + +static inline guint16 +float_to_half (const float x) +{ + const guint b = as_uint(x)+0x00001000; // round-to-nearest-even + const guint e = (b&0x7F800000)>>23; // exponent + const guint m = b&0x007FFFFF; // mantissa + return (b&0x80000000)>>16 | (e>112)*((((e-112)<<10)&0x7C00)|m>>13) | ((e<113)&(e>101))*((((0x007FF000+m)>>(125-e))+1)>>1) | (e>143)*0x7FFF; // sign : normalized : denormalized : saturate +} + +static inline void +float_to_half4 (const float f[4], + guint16 h[4]) +{ + h[0] = float_to_half (f[0]); + h[1] = float_to_half (f[1]); + h[2] = float_to_half (f[2]); + h[3] = float_to_half (f[3]); +} + +static inline void +half_to_float4 (const guint16 h[4], + float f[4]) +{ + f[0] = half_to_float (h[0]); + f[1] = half_to_float (h[1]); + f[2] = half_to_float (h[2]); + f[3] = half_to_float (h[3]); +} + +#endif /* GRAPHENE_USE_SSE */ + +G_END_DECLS + +#endif diff --git a/gsk/ngl/gsknglcommandqueue.c b/gsk/ngl/gsknglcommandqueue.c index e160076c57..bdfef2f35c 100644 --- a/gsk/ngl/gsknglcommandqueue.c +++ b/gsk/ngl/gsknglcommandqueue.c @@ -1003,13 +1003,13 @@ gsk_ngl_command_queue_execute (GskNglCommandQueue *self, /* 2 = color location */ glEnableVertexAttribArray (2); - glVertexAttribPointer (2, 4, GL_FLOAT, GL_FALSE, + glVertexAttribPointer (2, 4, GL_HALF_FLOAT, GL_FALSE, sizeof (GskNglDrawVertex), (void *) G_STRUCT_OFFSET (GskNglDrawVertex, color)); /* 3 = color2 location */ glEnableVertexAttribArray (3); - glVertexAttribPointer (3, 4, GL_FLOAT, GL_FALSE, + glVertexAttribPointer (3, 4, GL_HALF_FLOAT, GL_FALSE, sizeof (GskNglDrawVertex), (void *) G_STRUCT_OFFSET (GskNglDrawVertex, color2)); diff --git a/gsk/ngl/gsknglrenderjob.c b/gsk/ngl/gsknglrenderjob.c index 3e823dedab..ecaadc7458 100644 --- a/gsk/ngl/gsknglrenderjob.c +++ b/gsk/ngl/gsknglrenderjob.c @@ -43,6 +43,7 @@ #include "gsknglshadowlibraryprivate.h" #include "ninesliceprivate.h" +#include "fp16private.h" #define ORTHO_NEAR_PLANE -10000 #define ORTHO_FAR_PLANE 10000 @@ -884,6 +885,13 @@ gsk_ngl_render_job_update_clip (GskNglRenderJob *job, return TRUE; } +static inline void +rgba_to_half (const GdkRGBA *c, + guint16 h[4]) +{ + float_to_half4 ((const float *)c, h); +} + /* fill_vertex_data */ static void gsk_ngl_render_job_draw_coords (GskNglRenderJob *job, @@ -898,13 +906,16 @@ gsk_ngl_render_job_draw_coords (GskNglRenderJob *job, const GdkRGBA *color) { GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue); + guint16 c[4]; - vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { min_u, min_v }, { color->red, color->green, color->blue, color->alpha } }; - vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { min_u, max_v }, { color->red, color->green, color->blue, color->alpha } }; - vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { max_u, min_v }, { color->red, color->green, color->blue, color->alpha } }; - vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { max_u, max_v }, { color->red, color->green, color->blue, color->alpha } }; - vertices[4] = (GskNglDrawVertex) { { min_x, max_y }, { min_u, max_v }, { color->red, color->green, color->blue, color->alpha } }; - vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { max_u, min_v }, { color->red, color->green, color->blue, color->alpha } }; + rgba_to_half (color, c); + + vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { min_u, min_v }, { c[0], c[1], c[2],c[3] } }; + vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { min_u, max_v }, { c[0], c[1], c[2],c[3] } }; + vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { max_u, min_v }, { c[0], c[1], c[2],c[3] } }; + vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { max_u, max_v }, { c[0], c[1], c[2],c[3] } }; + vertices[4] = (GskNglDrawVertex) { { min_x, max_y }, { min_u, max_v }, { c[0], c[1], c[2],c[3] } }; + vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { max_u, min_v }, { c[0], c[1], c[2],c[3] } }; } /* load_vertex_data_with_region */ @@ -1658,7 +1669,7 @@ gsk_ngl_render_job_visit_border_node (GskNglRenderJob *job, const GskRenderNode *node) { const GskRoundedRect *rounded_outline = gsk_border_node_get_outline (node); - const GdkRGBA *c = gsk_border_node_get_colors (node); + const GdkRGBA *colors = gsk_border_node_get_colors (node); const float *widths = gsk_border_node_get_widths (node); struct { float w; @@ -1669,6 +1680,7 @@ gsk_ngl_render_job_visit_border_node (GskNglRenderJob *job, float max_x = min_x + node->bounds.size.width; float max_y = min_y + node->bounds.size.height; GskRoundedRect outline; + guint16 c[4]; memset (sizes, 0, sizeof sizes); @@ -1712,52 +1724,60 @@ gsk_ngl_render_job_visit_border_node (GskNglRenderJob *job, { GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue); - vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { 0, 1 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha } }; - vertices[1] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha } }; - vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha } }; + rgba_to_half (&colors[0], c); - vertices[3] = (GskNglDrawVertex) { { max_x - sizes[1].w, min_y + sizes[1].h }, { 1, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha } }; - vertices[4] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha } }; - vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha } }; + vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { 0, 1 }, { c[0], c[1], c[2], c[3] } }; + vertices[1] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; + vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; + + vertices[3] = (GskNglDrawVertex) { { max_x - sizes[1].w, min_y + sizes[1].h }, { 1, 0 }, { c[0], c[1], c[2], c[3] } }; + vertices[4] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; + vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; } if (widths[1] > 0) { GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue); - vertices[0] = (GskNglDrawVertex) { { max_x - sizes[1].w, min_y + sizes[1].h }, { 0, 1 }, { c[1].red, c[1].green, c[1].blue, c[1].alpha } }; - vertices[1] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 0, 0 }, { c[1].red, c[1].green, c[1].blue, c[1].alpha } }; - vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[1].red, c[1].green, c[1].blue, c[1].alpha } }; + rgba_to_half (&colors[1], c); - vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { 1, 0 }, { c[1].red, c[1].green, c[1].blue, c[1].alpha } }; - vertices[4] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 0, 0 }, { c[1].red, c[1].green, c[1].blue, c[1].alpha } }; - vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[1].red, c[1].green, c[1].blue, c[1].alpha } }; + vertices[0] = (GskNglDrawVertex) { { max_x - sizes[1].w, min_y + sizes[1].h }, { 0, 1 }, { c[0], c[1], c[2], c[3] } }; + vertices[1] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; + vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; + + vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { 1, 0 }, { c[0], c[1], c[2], c[3] } }; + vertices[4] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; + vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; } if (widths[2] > 0) { GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue); - vertices[0] = (GskNglDrawVertex) { { min_x + sizes[3].w, max_y - sizes[3].h }, { 0, 1 }, { c[2].red, c[2].green, c[2].blue, c[2].alpha } }; - vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[2].red, c[2].green, c[2].blue, c[2].alpha } }; - vertices[2] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 1, 1 }, { c[2].red, c[2].green, c[2].blue, c[2].alpha } }; + rgba_to_half (&colors[2], c); - vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { 1, 0 }, { c[2].red, c[2].green, c[2].blue, c[2].alpha } }; - vertices[4] = (GskNglDrawVertex) { { min_x , max_y }, { 0, 0 }, { c[2].red, c[2].green, c[2].blue, c[2].alpha } }; - vertices[5] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 1, 1 }, { c[2].red, c[2].green, c[2].blue, c[2].alpha } }; + vertices[0] = (GskNglDrawVertex) { { min_x + sizes[3].w, max_y - sizes[3].h }, { 0, 1 }, { c[0], c[1], c[2], c[3] } }; + vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; + vertices[2] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; + + vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { 1, 0 }, { c[0], c[1], c[2], c[3] } }; + vertices[4] = (GskNglDrawVertex) { { min_x , max_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; + vertices[5] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; } if (widths[3] > 0) { GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue); - vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { 0, 1 }, { c[3].red, c[3].green, c[3].blue, c[3].alpha } }; - vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[3].red, c[3].green, c[3].blue, c[3].alpha } }; - vertices[2] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 1, 1 }, { c[3].red, c[3].green, c[3].blue, c[3].alpha } }; + rgba_to_half (&colors[3], c); - vertices[3] = (GskNglDrawVertex) { { min_x + sizes[3].w, max_y - sizes[3].h }, { 1, 0 }, { c[3].red, c[3].green, c[3].blue, c[3].alpha } }; - vertices[4] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[3].red, c[3].green, c[3].blue, c[3].alpha } }; - vertices[5] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 1, 1 }, { c[3].red, c[3].green, c[3].blue, c[3].alpha } }; + vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { 0, 1 }, { c[0], c[1], c[2], c[3] } }; + vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; + vertices[2] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; + + vertices[3] = (GskNglDrawVertex) { { min_x + sizes[3].w, max_y - sizes[3].h }, { 1, 0 }, { c[0], c[1], c[2], c[3] } }; + vertices[4] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; + vertices[5] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; } gsk_ngl_render_job_end_draw (job); @@ -1775,8 +1795,8 @@ gsk_ngl_render_job_visit_css_background (GskNglRenderJob *job, const GskRenderNode *node2) { const GskRenderNode *child = gsk_rounded_clip_node_get_child (node); - const GdkRGBA *c2 = gsk_color_node_get_color (child); - const GdkRGBA *c = gsk_border_node_get_colors (node2); + const GdkRGBA *color = gsk_border_node_get_colors (node2); + const GdkRGBA *color2 = gsk_color_node_get_color (child); const GskRoundedRect *rounded_outline = gsk_border_node_get_outline (node2); const float *widths = gsk_border_node_get_widths (node2); float min_x = job->offset_x + node2->bounds.origin.x; @@ -1785,10 +1805,14 @@ gsk_ngl_render_job_visit_css_background (GskNglRenderJob *job, float max_y = min_y + node2->bounds.size.height; GskRoundedRect outline; GskNglDrawVertex *vertices; + guint16 c[4], c2[4]; if (node_is_invisible (node2)) return; + rgba_to_half (&color[0], c); + rgba_to_half (color2, c2); + gsk_ngl_render_job_transform_rounded_rect (job, rounded_outline, &outline); gsk_ngl_render_job_begin_draw (job, CHOOSE_PROGRAM (job, filled_border)); @@ -1803,12 +1827,12 @@ gsk_ngl_render_job_visit_css_background (GskNglRenderJob *job, vertices = gsk_ngl_command_queue_add_vertices (job->command_queue); - vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha }, { c2->red, c2->green, c2->blue, c2->alpha } }; - vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha }, { c2->red, c2->green, c2->blue, c2->alpha } }; - vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha }, { c2->red, c2->green, c2->blue, c2->alpha } }; - vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha }, { c2->red, c2->green, c2->blue, c2->alpha } }; - vertices[4] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha }, { c2->red, c2->green, c2->blue, c2->alpha } }; - vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha }, { c2->red, c2->green, c2->blue, c2->alpha } }; + vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] }, { c2[0], c2[1], c2[2], c2[3] } }; + vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] }, { c2[0], c2[1], c2[2], c2[3] } }; + vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] }, { c2[0], c2[1], c2[2], c2[3] } }; + vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] }, { c2[0], c2[1], c2[2], c2[3] } }; + vertices[4] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] }, { c2[0], c2[1], c2[2], c2[3] } }; + vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] }, { c2[0], c2[1], c2[2], c2[3] } }; gsk_ngl_render_job_end_draw (job); } @@ -2690,7 +2714,7 @@ gsk_ngl_render_job_visit_text_node (GskNglRenderJob *job, guint last_texture = 0; GskNglDrawVertex *vertices; guint used = 0; - GdkRGBA c; + guint16 c[4]; const PangoGlyphInfo *gi; guint i; int yshift; @@ -2703,9 +2727,9 @@ gsk_ngl_render_job_visit_text_node (GskNglRenderJob *job, * We tell the shader by setting the color to vec4(-1). */ if (!force_color && gsk_text_node_has_color_glyphs (node)) - c = (GdkRGBA) { -1.f, -1.f, -1.f, -1.f }; + rgba_to_half (&(GdkRGBA){ -1.f, -1.f, -1.f, -1.f }, c); else - c = *color; + rgba_to_half (color, c); lookup.font = (PangoFont *)font; lookup.scale = (guint) (text_scale * 1024); @@ -2783,13 +2807,13 @@ gsk_ngl_render_job_visit_text_node (GskNglRenderJob *job, glyph_x2 = glyph_x + glyph->ink_rect.width; glyph_y2 = glyph_y + glyph->ink_rect.height; - *(vertices++) = (GskNglDrawVertex) { { glyph_x, glyph_y }, { tx, ty }, { c.red, c.green, c.blue, c.alpha } }; - *(vertices++) = (GskNglDrawVertex) { { glyph_x, glyph_y2 }, { tx, ty2 }, { c.red, c.green, c.blue, c.alpha } }; - *(vertices++) = (GskNglDrawVertex) { { glyph_x2, glyph_y }, { tx2, ty }, { c.red, c.green, c.blue, c.alpha } }; + *(vertices++) = (GskNglDrawVertex) { { glyph_x, glyph_y }, { tx, ty }, { c[0], c[1], c[2], c[3] } }; + *(vertices++) = (GskNglDrawVertex) { { glyph_x, glyph_y2 }, { tx, ty2 }, { c[0], c[1], c[2], c[3] } }; + *(vertices++) = (GskNglDrawVertex) { { glyph_x2, glyph_y }, { tx2, ty }, { c[0], c[1], c[2], c[3] } }; - *(vertices++) = (GskNglDrawVertex) { { glyph_x2, glyph_y2 }, { tx2, ty2 }, { c.red, c.green, c.blue, c.alpha } }; - *(vertices++) = (GskNglDrawVertex) { { glyph_x, glyph_y2 }, { tx, ty2 }, { c.red, c.green, c.blue, c.alpha } }; - *(vertices++) = (GskNglDrawVertex) { { glyph_x2, glyph_y }, { tx2, ty }, { c.red, c.green, c.blue, c.alpha } }; + *(vertices++) = (GskNglDrawVertex) { { glyph_x2, glyph_y2 }, { tx2, ty2 }, { c[0], c[1], c[2], c[3] } }; + *(vertices++) = (GskNglDrawVertex) { { glyph_x, glyph_y2 }, { tx, ty2 }, { c[0], c[1], c[2], c[3] } }; + *(vertices++) = (GskNglDrawVertex) { { glyph_x2, glyph_y }, { tx2, ty }, { c[0], c[1], c[2], c[3] } }; batch->draw.vbo_count += GSK_NGL_N_VERTICES; used++; diff --git a/gsk/ngl/gskngltypesprivate.h b/gsk/ngl/gskngltypesprivate.h index a65130c19f..dda4365271 100644 --- a/gsk/ngl/gskngltypesprivate.h +++ b/gsk/ngl/gskngltypesprivate.h @@ -55,8 +55,8 @@ struct _GskNglDrawVertex { float position[2]; float uv[2]; - float color[4]; - float color2[4]; + guint16 color[4]; + guint16 color2[4]; }; G_END_DECLS From ae7f52d3019957d4876d0e802446020368953ca9 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Tue, 6 Apr 2021 22:24:37 -0400 Subject: [PATCH 2/7] ngl: Reduce vertex storage We only have one shader that uses the color2 attribute, and it doesn't use the uv attribute, so save vertex memory by putting those in the same space. This reduce the per vertex space from 32 to 24 bytes. --- gsk/ngl/gsknglrenderjob.c | 84 ++++++++++++++++++------------------ gsk/ngl/gskngltypesprivate.h | 6 ++- 2 files changed, 46 insertions(+), 44 deletions(-) diff --git a/gsk/ngl/gsknglrenderjob.c b/gsk/ngl/gsknglrenderjob.c index ecaadc7458..3f03148a84 100644 --- a/gsk/ngl/gsknglrenderjob.c +++ b/gsk/ngl/gsknglrenderjob.c @@ -910,12 +910,12 @@ gsk_ngl_render_job_draw_coords (GskNglRenderJob *job, rgba_to_half (color, c); - vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { min_u, min_v }, { c[0], c[1], c[2],c[3] } }; - vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { min_u, max_v }, { c[0], c[1], c[2],c[3] } }; - vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { max_u, min_v }, { c[0], c[1], c[2],c[3] } }; - vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { max_u, max_v }, { c[0], c[1], c[2],c[3] } }; - vertices[4] = (GskNglDrawVertex) { { min_x, max_y }, { min_u, max_v }, { c[0], c[1], c[2],c[3] } }; - vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { max_u, min_v }, { c[0], c[1], c[2],c[3] } }; + vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .uv = { min_u, min_v }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { min_u, max_v }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[2] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { max_u, min_v }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[3] = (GskNglDrawVertex) { .position = { max_x, max_y }, .uv = { max_u, max_v }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[4] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { min_u, max_v }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[5] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { max_u, min_v }, .color = { c[0], c[1], c[2], c[3] } }; } /* load_vertex_data_with_region */ @@ -1726,13 +1726,13 @@ gsk_ngl_render_job_visit_border_node (GskNglRenderJob *job, rgba_to_half (&colors[0], c); - vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { 0, 1 }, { c[0], c[1], c[2], c[3] } }; - vertices[1] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; - vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; + vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .uv = { 0, 1 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[1] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[2] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[3] = (GskNglDrawVertex) { { max_x - sizes[1].w, min_y + sizes[1].h }, { 1, 0 }, { c[0], c[1], c[2], c[3] } }; - vertices[4] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; - vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; + vertices[3] = (GskNglDrawVertex) { .position = { max_x - sizes[1].w, min_y + sizes[1].h }, .uv = { 1, 0 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[4] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[5] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; } if (widths[1] > 0) @@ -1741,13 +1741,13 @@ gsk_ngl_render_job_visit_border_node (GskNglRenderJob *job, rgba_to_half (&colors[1], c); - vertices[0] = (GskNglDrawVertex) { { max_x - sizes[1].w, min_y + sizes[1].h }, { 0, 1 }, { c[0], c[1], c[2], c[3] } }; - vertices[1] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; - vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; + vertices[0] = (GskNglDrawVertex) { .position = { max_x - sizes[1].w, min_y + sizes[1].h }, .uv = { 0, 1 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[1] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[2] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { 1, 0 }, { c[0], c[1], c[2], c[3] } }; - vertices[4] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; - vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; + vertices[3] = (GskNglDrawVertex) { .position = { max_x, max_y }, .uv = { 1, 0 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[4] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[5] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; } if (widths[2] > 0) @@ -1756,13 +1756,13 @@ gsk_ngl_render_job_visit_border_node (GskNglRenderJob *job, rgba_to_half (&colors[2], c); - vertices[0] = (GskNglDrawVertex) { { min_x + sizes[3].w, max_y - sizes[3].h }, { 0, 1 }, { c[0], c[1], c[2], c[3] } }; - vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; - vertices[2] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; + vertices[0] = (GskNglDrawVertex) { .position = { min_x + sizes[3].w, max_y - sizes[3].h }, .uv = { 0, 1 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[2] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { 1, 0 }, { c[0], c[1], c[2], c[3] } }; - vertices[4] = (GskNglDrawVertex) { { min_x , max_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; - vertices[5] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; + vertices[3] = (GskNglDrawVertex) { .position = { max_x, max_y }, .uv = { 1, 0 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[4] = (GskNglDrawVertex) { .position = { min_x , max_y }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[5] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; } if (widths[3] > 0) @@ -1771,13 +1771,13 @@ gsk_ngl_render_job_visit_border_node (GskNglRenderJob *job, rgba_to_half (&colors[3], c); - vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { 0, 1 }, { c[0], c[1], c[2], c[3] } }; - vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; - vertices[2] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; + vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .uv = { 0, 1 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[2] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[3] = (GskNglDrawVertex) { { min_x + sizes[3].w, max_y - sizes[3].h }, { 1, 0 }, { c[0], c[1], c[2], c[3] } }; - vertices[4] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] } }; - vertices[5] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 1, 1 }, { c[0], c[1], c[2], c[3] } }; + vertices[3] = (GskNglDrawVertex) { .position = { min_x + sizes[3].w, max_y - sizes[3].h }, .uv = { 1, 0 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[4] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[5] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; } gsk_ngl_render_job_end_draw (job); @@ -1827,12 +1827,12 @@ gsk_ngl_render_job_visit_css_background (GskNglRenderJob *job, vertices = gsk_ngl_command_queue_add_vertices (job->command_queue); - vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] }, { c2[0], c2[1], c2[2], c2[3] } }; - vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] }, { c2[0], c2[1], c2[2], c2[3] } }; - vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] }, { c2[0], c2[1], c2[2], c2[3] } }; - vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] }, { c2[0], c2[1], c2[2], c2[3] } }; - vertices[4] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] }, { c2[0], c2[1], c2[2], c2[3] } }; - vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { 0, 0 }, { c[0], c[1], c[2], c[3] }, { c2[0], c2[1], c2[2], c2[3] } }; + vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .color = { c[0], c[1], c[2], c[3] }, .color2 = { c2[0], c2[1], c2[2], c2[3] } }; + vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .color = { c[0], c[1], c[2], c[3] }, .color2 = { c2[0], c2[1], c2[2], c2[3] } }; + vertices[2] = (GskNglDrawVertex) { .position = { max_x, min_y }, .color = { c[0], c[1], c[2], c[3] }, .color2 = { c2[0], c2[1], c2[2], c2[3] } }; + vertices[3] = (GskNglDrawVertex) { .position = { max_x, max_y }, .color = { c[0], c[1], c[2], c[3] }, .color2 = { c2[0], c2[1], c2[2], c2[3] } }; + vertices[4] = (GskNglDrawVertex) { .position = { min_x, max_y }, .color = { c[0], c[1], c[2], c[3] }, .color2 = { c2[0], c2[1], c2[2], c2[3] } }; + vertices[5] = (GskNglDrawVertex) { .position = { max_x, min_y }, .color = { c[0], c[1], c[2], c[3] }, .color2 = { c2[0], c2[1], c2[2], c2[3] } }; gsk_ngl_render_job_end_draw (job); } @@ -2807,13 +2807,13 @@ gsk_ngl_render_job_visit_text_node (GskNglRenderJob *job, glyph_x2 = glyph_x + glyph->ink_rect.width; glyph_y2 = glyph_y + glyph->ink_rect.height; - *(vertices++) = (GskNglDrawVertex) { { glyph_x, glyph_y }, { tx, ty }, { c[0], c[1], c[2], c[3] } }; - *(vertices++) = (GskNglDrawVertex) { { glyph_x, glyph_y2 }, { tx, ty2 }, { c[0], c[1], c[2], c[3] } }; - *(vertices++) = (GskNglDrawVertex) { { glyph_x2, glyph_y }, { tx2, ty }, { c[0], c[1], c[2], c[3] } }; + *(vertices++) = (GskNglDrawVertex) { .position = { glyph_x, glyph_y }, .uv = { tx, ty }, .color = { c[0], c[1], c[2], c[3] } }; + *(vertices++) = (GskNglDrawVertex) { .position = { glyph_x, glyph_y2 }, .uv = { tx, ty2 }, .color = { c[0], c[1], c[2], c[3] } }; + *(vertices++) = (GskNglDrawVertex) { .position = { glyph_x2, glyph_y }, .uv = { tx2, ty }, .color = { c[0], c[1], c[2], c[3] } }; - *(vertices++) = (GskNglDrawVertex) { { glyph_x2, glyph_y2 }, { tx2, ty2 }, { c[0], c[1], c[2], c[3] } }; - *(vertices++) = (GskNglDrawVertex) { { glyph_x, glyph_y2 }, { tx, ty2 }, { c[0], c[1], c[2], c[3] } }; - *(vertices++) = (GskNglDrawVertex) { { glyph_x2, glyph_y }, { tx2, ty }, { c[0], c[1], c[2], c[3] } }; + *(vertices++) = (GskNglDrawVertex) { .position = { glyph_x2, glyph_y2 }, .uv = { tx2, ty2 }, .color = { c[0], c[1], c[2], c[3] } }; + *(vertices++) = (GskNglDrawVertex) { .position = { glyph_x, glyph_y2 }, .uv = { tx, ty2 }, .color = { c[0], c[1], c[2], c[3] } }; + *(vertices++) = (GskNglDrawVertex) { .position = { glyph_x2, glyph_y }, .uv = { tx2, ty }, .color = { c[0], c[1], c[2], c[3] } }; batch->draw.vbo_count += GSK_NGL_N_VERTICES; used++; diff --git a/gsk/ngl/gskngltypesprivate.h b/gsk/ngl/gskngltypesprivate.h index dda4365271..ce2319ef05 100644 --- a/gsk/ngl/gskngltypesprivate.h +++ b/gsk/ngl/gskngltypesprivate.h @@ -54,9 +54,11 @@ typedef struct _GskNglDriver GskNglDriver; struct _GskNglDrawVertex { float position[2]; - float uv[2]; + union { + float uv[2]; + guint16 color2[4]; + }; guint16 color[4]; - guint16 color2[4]; }; G_END_DECLS From 7fe7b7ac7d260fedd7bb4485047be6cf2a738d83 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Tue, 6 Apr 2021 23:11:27 -0400 Subject: [PATCH 3/7] ngl: Use a constant for (fp16)-1 No need to convert that again for every glyph. --- gsk/ngl/fp16private.h | 2 ++ gsk/ngl/gsknglrenderjob.c | 6 ++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gsk/ngl/fp16private.h b/gsk/ngl/fp16private.h index b285bf450c..3f74605288 100644 --- a/gsk/ngl/fp16private.h +++ b/gsk/ngl/fp16private.h @@ -30,6 +30,8 @@ G_BEGIN_DECLS +#define FP16_MINUS_ONE ((guint16)48128) + #ifdef GRAPHENE_USE_SSE static inline void diff --git a/gsk/ngl/gsknglrenderjob.c b/gsk/ngl/gsknglrenderjob.c index 3f03148a84..56832efeea 100644 --- a/gsk/ngl/gsknglrenderjob.c +++ b/gsk/ngl/gsknglrenderjob.c @@ -2714,7 +2714,7 @@ gsk_ngl_render_job_visit_text_node (GskNglRenderJob *job, guint last_texture = 0; GskNglDrawVertex *vertices; guint used = 0; - guint16 c[4]; + guint16 c[4] = { FP16_MINUS_ONE, FP16_MINUS_ONE, FP16_MINUS_ONE, FP16_MINUS_ONE }; const PangoGlyphInfo *gi; guint i; int yshift; @@ -2726,9 +2726,7 @@ gsk_ngl_render_job_visit_text_node (GskNglRenderJob *job, /* If the font has color glyphs, we don't need to recolor anything. * We tell the shader by setting the color to vec4(-1). */ - if (!force_color && gsk_text_node_has_color_glyphs (node)) - rgba_to_half (&(GdkRGBA){ -1.f, -1.f, -1.f, -1.f }, c); - else + if (force_color || !gsk_text_node_has_color_glyphs (node)) rgba_to_half (color, c); lookup.font = (PangoFont *)font; From 38c6fe2ce427375eb9ff7af43ceb5c003de65643 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Tue, 6 Apr 2021 23:52:33 -0400 Subject: [PATCH 4/7] Move color conversion up Doing the color conversion early lets us reuse the results whenever we do more than one draw per node. --- gsk/ngl/fp16private.h | 2 + gsk/ngl/gsknglrenderjob.c | 184 +++++++++++++++++++++----------------- 2 files changed, 105 insertions(+), 81 deletions(-) diff --git a/gsk/ngl/fp16private.h b/gsk/ngl/fp16private.h index 3f74605288..f54f7307a6 100644 --- a/gsk/ngl/fp16private.h +++ b/gsk/ngl/fp16private.h @@ -30,6 +30,8 @@ G_BEGIN_DECLS +#define FP16_ZERO ((guint16)0) +#define FP16_ONE ((guint16)15360) #define FP16_MINUS_ONE ((guint16)48128) #ifdef GRAPHENE_USE_SSE diff --git a/gsk/ngl/gsknglrenderjob.c b/gsk/ngl/gsknglrenderjob.c index 56832efeea..b1bda44790 100644 --- a/gsk/ngl/gsknglrenderjob.c +++ b/gsk/ngl/gsknglrenderjob.c @@ -886,10 +886,10 @@ gsk_ngl_render_job_update_clip (GskNglRenderJob *job, } static inline void -rgba_to_half (const GdkRGBA *c, +rgba_to_half (const GdkRGBA *rgba, guint16 h[4]) { - float_to_half4 ((const float *)c, h); + float_to_half4 ((const float *)rgba, h); } /* fill_vertex_data */ @@ -903,12 +903,9 @@ gsk_ngl_render_job_draw_coords (GskNglRenderJob *job, float min_v, float max_u, float max_v, - const GdkRGBA *color) + guint16 c[4]) { GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue); - guint16 c[4]; - - rgba_to_half (color, c); vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .uv = { min_u, min_v }, .color = { c[0], c[1], c[2], c[3] } }; vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { min_u, max_v }, .color = { c[0], c[1], c[2], c[3] } }; @@ -923,7 +920,7 @@ static inline void gsk_ngl_render_job_draw_offscreen_with_color (GskNglRenderJob *job, const graphene_rect_t *bounds, const GskNglRenderOffscreen *offscreen, - const GdkRGBA *color) + guint16 color[4]) { float min_x = job->offset_x + bounds->origin.x; float min_y = job->offset_y + bounds->origin.y; @@ -943,7 +940,8 @@ gsk_ngl_render_job_draw_offscreen (GskNglRenderJob *job, const graphene_rect_t *bounds, const GskNglRenderOffscreen *offscreen) { - gsk_ngl_render_job_draw_offscreen_with_color (job, bounds, offscreen, &(GdkRGBA) { 0.f, 0.f, 0.f, 0.f }); + gsk_ngl_render_job_draw_offscreen_with_color (job, bounds, offscreen, + (guint16[]) { FP16_ZERO, FP16_ZERO, FP16_ZERO, FP16_ZERO }); } /* load_float_vertex_data */ @@ -953,7 +951,7 @@ gsk_ngl_render_job_draw_with_color (GskNglRenderJob *job, float y, float width, float height, - const GdkRGBA *color) + guint16 color[4]) { float min_x = job->offset_x + x; float min_y = job->offset_y + y; @@ -970,14 +968,15 @@ gsk_ngl_render_job_draw (GskNglRenderJob *job, float width, float height) { - gsk_ngl_render_job_draw_with_color (job, x, y, width, height, &(GdkRGBA) { 0.f, 0.f, 0.f, 0.f }); + gsk_ngl_render_job_draw_with_color (job, x, y, width, height, + (guint16[]) { FP_ZERO, FP_ZERO, FP_ZERO, FP_ZERO }); } /* load_vertex_data */ static inline void gsk_ngl_render_job_draw_rect_with_color (GskNglRenderJob *job, const graphene_rect_t *bounds, - const GdkRGBA *color) + guint16 color[4]) { gsk_ngl_render_job_draw_with_color (job, bounds->origin.x, @@ -1006,11 +1005,12 @@ gsk_ngl_render_job_draw_offscreen_rect (GskNglRenderJob *job, float min_y = job->offset_y + bounds->origin.y; float max_x = min_x + bounds->size.width; float max_y = min_y + bounds->size.height; + guint16 color[4] = { FP16_ZERO, FP16_ZERO, FP16_ZERO, FP16_ZERO }; gsk_ngl_render_job_draw_coords (job, min_x, min_y, max_x, max_y, 0, 1, 1, 0, - &(GdkRGBA) { 0.f, 0.f, 0.f, 0.f } ); + color); } static inline void @@ -1264,7 +1264,7 @@ blur_offscreen (GskNglRenderJob *job, gsk_ngl_render_job_draw_coords (job, 0, 0, texture_to_blur_width, texture_to_blur_height, 0, 1, 1, 0, - &(GdkRGBA) { 0.f, 0.f, 0.f, 0.f }); + (guint16[]) { FP16_ZERO, FP16_ZERO, FP16_ZERO, FP16_ZERO }); gsk_ngl_render_job_end_draw (job); /* Bind second pass framebuffer and clear it */ @@ -1291,7 +1291,7 @@ blur_offscreen (GskNglRenderJob *job, gsk_ngl_render_job_draw_coords (job, 0, 0, texture_to_blur_width, texture_to_blur_height, 0, 1, 1, 0, - &(GdkRGBA) { 0.f, 0.f, 0.f, 0.f }); + (guint16[]) { FP16_ZERO, FP16_ZERO, FP16_ZERO, FP16_ZERO }); gsk_ngl_render_job_end_draw (job); gsk_ngl_render_job_pop_modelview (job); @@ -1364,10 +1364,11 @@ static inline void gsk_ngl_render_job_visit_color_node (GskNglRenderJob *job, const GskRenderNode *node) { + guint16 color[4]; + + rgba_to_half (gsk_color_node_get_color (node), color); gsk_ngl_render_job_begin_draw (job, CHOOSE_PROGRAM (job, color)); - gsk_ngl_render_job_draw_rect_with_color (job, - &node->bounds, - gsk_color_node_get_color (node)); + gsk_ngl_render_job_draw_rect_with_color (job, &node->bounds, color); gsk_ngl_render_job_end_draw (job); } @@ -1638,28 +1639,41 @@ gsk_ngl_render_job_visit_rect_border_node (GskNglRenderJob *job, const float *widths = gsk_border_node_get_widths (node); const graphene_point_t *origin = &node->bounds.origin; const graphene_size_t *size = &node->bounds.size; + guint16 color[4]; gsk_ngl_render_job_begin_draw (job, CHOOSE_PROGRAM (job, color)); if (widths[0] > 0) - gsk_ngl_render_job_draw_rect_with_color (job, - &GRAPHENE_RECT_INIT (origin->x, origin->y, size->width - widths[1], widths[0]), - &colors[0]); + { + rgba_to_half (&colors[0], color); + gsk_ngl_render_job_draw_rect_with_color (job, + &GRAPHENE_RECT_INIT (origin->x, origin->y, size->width - widths[1], widths[0]), + color); + } if (widths[1] > 0) - gsk_ngl_render_job_draw_rect_with_color (job, - &GRAPHENE_RECT_INIT (origin->x + size->width - widths[1], origin->y, widths[1], size->height - widths[2]), - &colors[0]); + { + rgba_to_half (&colors[1], color); + gsk_ngl_render_job_draw_rect_with_color (job, + &GRAPHENE_RECT_INIT (origin->x + size->width - widths[1], origin->y, widths[1], size->height - widths[2]), + color); + } if (widths[2] > 0) - gsk_ngl_render_job_draw_rect_with_color (job, - &GRAPHENE_RECT_INIT (origin->x + widths[3], origin->y + size->height - widths[2], size->width - widths[1], widths[2]), - &colors[0]); + { + rgba_to_half (&colors[2], color); + gsk_ngl_render_job_draw_rect_with_color (job, + &GRAPHENE_RECT_INIT (origin->x + widths[3], origin->y + size->height - widths[2], size->width - widths[1], widths[2]), + color); + } if (widths[3] > 0) - gsk_ngl_render_job_draw_rect_with_color (job, - &GRAPHENE_RECT_INIT (origin->x, origin->y + widths[0], widths[3], size->height - widths[0]), - &colors[0]); + { + rgba_to_half (&colors[3], color); + gsk_ngl_render_job_draw_rect_with_color (job, + &GRAPHENE_RECT_INIT (origin->x, origin->y + widths[0], widths[3], size->height - widths[0]), + color); + } gsk_ngl_render_job_end_draw (job); } @@ -1680,7 +1694,7 @@ gsk_ngl_render_job_visit_border_node (GskNglRenderJob *job, float max_x = min_x + node->bounds.size.width; float max_y = min_y + node->bounds.size.height; GskRoundedRect outline; - guint16 c[4]; + guint16 color[4]; memset (sizes, 0, sizeof sizes); @@ -1724,60 +1738,60 @@ gsk_ngl_render_job_visit_border_node (GskNglRenderJob *job, { GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue); - rgba_to_half (&colors[0], c); + rgba_to_half (&colors[0], color); - vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .uv = { 0, 1 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[1] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[2] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .uv = { 0, 1 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[1] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[2] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } }; - vertices[3] = (GskNglDrawVertex) { .position = { max_x - sizes[1].w, min_y + sizes[1].h }, .uv = { 1, 0 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[4] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[5] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[3] = (GskNglDrawVertex) { .position = { max_x - sizes[1].w, min_y + sizes[1].h }, .uv = { 1, 0 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[4] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[5] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } }; } if (widths[1] > 0) { GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue); - rgba_to_half (&colors[1], c); + rgba_to_half (&colors[1], color); - vertices[0] = (GskNglDrawVertex) { .position = { max_x - sizes[1].w, min_y + sizes[1].h }, .uv = { 0, 1 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[1] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[2] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[0] = (GskNglDrawVertex) { .position = { max_x - sizes[1].w, min_y + sizes[1].h }, .uv = { 0, 1 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[1] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[2] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } }; - vertices[3] = (GskNglDrawVertex) { .position = { max_x, max_y }, .uv = { 1, 0 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[4] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[5] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[3] = (GskNglDrawVertex) { .position = { max_x, max_y }, .uv = { 1, 0 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[4] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[5] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } }; } if (widths[2] > 0) { GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue); - rgba_to_half (&colors[2], c); + rgba_to_half (&colors[2], color); - vertices[0] = (GskNglDrawVertex) { .position = { min_x + sizes[3].w, max_y - sizes[3].h }, .uv = { 0, 1 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[2] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[0] = (GskNglDrawVertex) { .position = { min_x + sizes[3].w, max_y - sizes[3].h }, .uv = { 0, 1 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[2] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } }; - vertices[3] = (GskNglDrawVertex) { .position = { max_x, max_y }, .uv = { 1, 0 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[4] = (GskNglDrawVertex) { .position = { min_x , max_y }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[5] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[3] = (GskNglDrawVertex) { .position = { max_x, max_y }, .uv = { 1, 0 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[4] = (GskNglDrawVertex) { .position = { min_x , max_y }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[5] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } }; } if (widths[3] > 0) { GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue); - rgba_to_half (&colors[3], c); + rgba_to_half (&colors[3], color); - vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .uv = { 0, 1 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[2] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .uv = { 0, 1 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[2] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } }; - vertices[3] = (GskNglDrawVertex) { .position = { min_x + sizes[3].w, max_y - sizes[3].h }, .uv = { 1, 0 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[4] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { 0, 0 }, .color = { c[0], c[1], c[2], c[3] } }; - vertices[5] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 1, 1 }, .color = { c[0], c[1], c[2], c[3] } }; + vertices[3] = (GskNglDrawVertex) { .position = { min_x + sizes[3].w, max_y - sizes[3].h }, .uv = { 1, 0 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[4] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } }; + vertices[5] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } }; } gsk_ngl_render_job_end_draw (job); @@ -1795,8 +1809,6 @@ gsk_ngl_render_job_visit_css_background (GskNglRenderJob *job, const GskRenderNode *node2) { const GskRenderNode *child = gsk_rounded_clip_node_get_child (node); - const GdkRGBA *color = gsk_border_node_get_colors (node2); - const GdkRGBA *color2 = gsk_color_node_get_color (child); const GskRoundedRect *rounded_outline = gsk_border_node_get_outline (node2); const float *widths = gsk_border_node_get_widths (node2); float min_x = job->offset_x + node2->bounds.origin.x; @@ -1805,13 +1817,14 @@ gsk_ngl_render_job_visit_css_background (GskNglRenderJob *job, float max_y = min_y + node2->bounds.size.height; GskRoundedRect outline; GskNglDrawVertex *vertices; - guint16 c[4], c2[4]; + guint16 color[4]; + guint16 color2[4]; if (node_is_invisible (node2)) return; - rgba_to_half (&color[0], c); - rgba_to_half (color2, c2); + rgba_to_half (&gsk_border_node_get_colors (node2)[0], color); + rgba_to_half (gsk_color_node_get_color (child), color2); gsk_ngl_render_job_transform_rounded_rect (job, rounded_outline, &outline); @@ -1827,12 +1840,12 @@ gsk_ngl_render_job_visit_css_background (GskNglRenderJob *job, vertices = gsk_ngl_command_queue_add_vertices (job->command_queue); - vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .color = { c[0], c[1], c[2], c[3] }, .color2 = { c2[0], c2[1], c2[2], c2[3] } }; - vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .color = { c[0], c[1], c[2], c[3] }, .color2 = { c2[0], c2[1], c2[2], c2[3] } }; - vertices[2] = (GskNglDrawVertex) { .position = { max_x, min_y }, .color = { c[0], c[1], c[2], c[3] }, .color2 = { c2[0], c2[1], c2[2], c2[3] } }; - vertices[3] = (GskNglDrawVertex) { .position = { max_x, max_y }, .color = { c[0], c[1], c[2], c[3] }, .color2 = { c2[0], c2[1], c2[2], c2[3] } }; - vertices[4] = (GskNglDrawVertex) { .position = { min_x, max_y }, .color = { c[0], c[1], c[2], c[3] }, .color2 = { c2[0], c2[1], c2[2], c2[3] } }; - vertices[5] = (GskNglDrawVertex) { .position = { max_x, min_y }, .color = { c[0], c[1], c[2], c[3] }, .color2 = { c2[0], c2[1], c2[2], c2[3] } }; + vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .color = { color[0], color[1], color[2], color[3] }, .color2 = { color2[0], color2[1], color2[2], color2[3] } }; + vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .color = { color[0], color[1], color[2], color[3] }, .color2 = { color2[0], color2[1], color2[2], color2[3] } }; + vertices[2] = (GskNglDrawVertex) { .position = { max_x, min_y }, .color = { color[0], color[1], color[2], color[3] }, .color2 = { color2[0], color2[1], color2[2], color2[3] } }; + vertices[3] = (GskNglDrawVertex) { .position = { max_x, max_y }, .color = { color[0], color[1], color[2], color[3] }, .color2 = { color2[0], color2[1], color2[2], color2[3] } }; + vertices[4] = (GskNglDrawVertex) { .position = { min_x, max_y }, .color = { color[0], color[1], color[2], color[3] }, .color2 = { color2[0], color2[1], color2[2], color2[3] } }; + vertices[5] = (GskNglDrawVertex) { .position = { max_x, min_y }, .color = { color[0], color[1], color[2], color[3] }, .color2 = { color2[0], color2[1], color2[2], color2[3] } }; gsk_ngl_render_job_end_draw (job); } @@ -1957,6 +1970,7 @@ gsk_ngl_render_job_visit_unblurred_inset_shadow_node (GskNglRenderJob *job, { const GskRoundedRect *outline = gsk_inset_shadow_node_get_outline (node); GskRoundedRect transformed_outline; + guint16 color[4]; gsk_ngl_render_job_transform_rounded_rect (job, outline, &transformed_outline); @@ -1971,7 +1985,8 @@ gsk_ngl_render_job_visit_unblurred_inset_shadow_node (GskNglRenderJob *job, UNIFORM_INSET_SHADOW_OFFSET, 0, gsk_inset_shadow_node_get_dx (node), gsk_inset_shadow_node_get_dy (node)); - gsk_ngl_render_job_draw_rect_with_color (job, &node->bounds, gsk_inset_shadow_node_get_color (node)); + rgba_to_half (gsk_inset_shadow_node_get_color (node), color); + gsk_ngl_render_job_draw_rect_with_color (job, &node->bounds, color); gsk_ngl_render_job_end_draw (job); } @@ -1992,6 +2007,7 @@ gsk_ngl_render_job_visit_blurred_inset_shadow_node (GskNglRenderJob *job, int blurred_texture_id; GskTextureKey key; GskNglRenderOffscreen offscreen = {0}; + guint16 color[4]; g_assert (blur_radius > 0); @@ -2071,9 +2087,10 @@ gsk_ngl_render_job_visit_blurred_inset_shadow_node (GskNglRenderJob *job, UNIFORM_INSET_SHADOW_OFFSET, 0, offset_x * scale_x, offset_y * scale_y); + rgba_to_half (gsk_inset_shadow_node_get_color (node), color); gsk_ngl_render_job_draw_with_color (job, 0, 0, texture_width, texture_height, - gsk_inset_shadow_node_get_color (node)); + color); gsk_ngl_render_job_end_draw (job); gsk_ngl_render_job_pop_modelview (job); @@ -2156,7 +2173,7 @@ gsk_ngl_render_job_visit_unblurred_outset_shadow_node (GskNglRenderJob *job, float spread = gsk_outset_shadow_node_get_spread (node); float dx = gsk_outset_shadow_node_get_dx (node); float dy = gsk_outset_shadow_node_get_dy (node); - const GdkRGBA *color = gsk_outset_shadow_node_get_color (node); + guint16 color[4]; const float edge_sizes[] = { // Top, right, bottom, left spread - dy, spread + dx, spread + dy, spread - dx }; @@ -2167,6 +2184,8 @@ gsk_ngl_render_job_visit_unblurred_outset_shadow_node (GskNglRenderJob *job, { outline->corner[3].width + spread - dx, outline->corner[3].height + spread + dy }, }; + rgba_to_half (gsk_outset_shadow_node_get_color (node), color); + gsk_ngl_render_job_transform_rounded_rect (job, outline, &transformed_outline); gsk_ngl_render_job_begin_draw (job, CHOOSE_PROGRAM (job, unblurred_outset_shadow)); @@ -2229,10 +2248,7 @@ static inline void gsk_ngl_render_job_visit_blurred_outset_shadow_node (GskNglRenderJob *job, const GskRenderNode *node) { - static const GdkRGBA white = { 1, 1, 1, 1 }; - const GskRoundedRect *outline = gsk_outset_shadow_node_get_outline (node); - const GdkRGBA *color = gsk_outset_shadow_node_get_color (node); float scale_x = job->scale_x; float scale_y = job->scale_y; float blur_radius = gsk_outset_shadow_node_get_blur_radius (node); @@ -2249,6 +2265,9 @@ gsk_ngl_render_job_visit_blurred_outset_shadow_node (GskNglRenderJob *job, int blurred_texture_id; int cached_tid; gboolean do_slicing; + guint16 color[4]; + + rgba_to_half (gsk_outset_shadow_node_get_color (node), color); /* scaled_outline is the minimal outline we need to draw the given drop shadow, * enlarged by the spread and offset by the blur radius. */ @@ -2330,7 +2349,8 @@ gsk_ngl_render_job_visit_blurred_outset_shadow_node (GskNglRenderJob *job, /* Draw the outline using color program */ gsk_ngl_render_job_begin_draw (job, CHOOSE_PROGRAM (job, color)); - gsk_ngl_render_job_draw_with_color (job, 0, 0, texture_width, texture_height, &white); + gsk_ngl_render_job_draw_with_color (job, 0, 0, texture_width, texture_height, + (guint16[]){ FP16_ONE, FP16_ONE, FP16_ONE, FP16_ONE }); gsk_ngl_render_job_end_draw (job); /* Reset state from offscreen */ @@ -2844,6 +2864,7 @@ gsk_ngl_render_job_visit_shadow_node (GskNglRenderJob *job, const float dy = shadow->dy; GskNglRenderOffscreen offscreen = {0}; graphene_rect_t bounds; + guint16 color[4]; if (shadow->radius == 0 && gsk_render_node_get_node_type (shadow_child) == GSK_TEXT_NODE) @@ -2906,7 +2927,8 @@ gsk_ngl_render_job_visit_shadow_node (GskNglRenderJob *job, GL_TEXTURE_2D, GL_TEXTURE0, offscreen.texture_id); - gsk_ngl_render_job_draw_offscreen_with_color (job, &bounds, &offscreen, &shadow->color); + rgba_to_half (&shadow->color, color); + gsk_ngl_render_job_draw_offscreen_with_color (job, &bounds, &offscreen, color); gsk_ngl_render_job_end_draw (job); gsk_ngl_render_job_offset (job, -dx, -dy); } @@ -2963,7 +2985,7 @@ gsk_ngl_render_job_visit_blur_node (GskNglRenderJob *job, gsk_ngl_render_job_draw_coords (job, min_x, min_y, max_x, max_y, 0, 1, 1, 0, - &(GdkRGBA) { 0.f, 0.f, 0.f, 0.f } ); + (guint16[]) { FP16_ZERO, FP16_ZERO, FP16_ZERO, FP16_ZERO } ); gsk_ngl_render_job_end_draw (job); } @@ -3069,10 +3091,10 @@ static inline void gsk_ngl_render_job_visit_gl_shader_node_fallback (GskNglRenderJob *job, const GskRenderNode *node) { - static const GdkRGBA pink = { 255 / 255., 105 / 255., 180 / 255., 1.0 }; + guint16 pink[4] = { 15360, 13975, 14758, 15360 }; /* 255 105 180 */ gsk_ngl_render_job_begin_draw (job, CHOOSE_PROGRAM (job, color)); - gsk_ngl_render_job_draw_rect_with_color (job, &node->bounds, &pink); + gsk_ngl_render_job_draw_rect_with_color (job, &node->bounds, pink); gsk_ngl_render_job_end_draw (job); } @@ -3278,7 +3300,7 @@ gsk_ngl_render_job_visit_texture_node (GskNglRenderJob *job, gsk_ngl_render_job_draw_coords (job, x1, y1, x2, y2, 0, 0, 1, 1, - &(GdkRGBA) { 0.f, 0.f, 0.f, 0.f }); + (guint16[]) { FP16_ZERO, FP16_ZERO, FP16_ZERO, FP16_ZERO }); } gsk_ngl_render_job_end_draw (job); From 9d81c129fc34b489d51bd314ec540bbd6de9794b Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Wed, 7 Apr 2021 19:41:54 -0400 Subject: [PATCH 5/7] Add an option to disable F16C And add a compile time check for the presence of this CPU feature. --- config.h.meson | 2 ++ gsk/meson.build | 1 - gsk/ngl/fp16private.h | 11 ++++++----- meson.build | 36 ++++++++++++++++++++++++++++++++++++ meson_options.txt | 5 +++++ 5 files changed, 49 insertions(+), 6 deletions(-) diff --git a/config.h.meson b/config.h.meson index 549a677379..fc2dd0b3d2 100644 --- a/config.h.meson +++ b/config.h.meson @@ -279,3 +279,5 @@ /* Define if tracker3 is available */ #mesondefine HAVE_TRACKER3 + +#mesondefine HAVE_F16C diff --git a/gsk/meson.build b/gsk/meson.build index 11be6a03f6..5c381b51c9 100644 --- a/gsk/meson.build +++ b/gsk/meson.build @@ -221,7 +221,6 @@ libgsk = static_library('gsk', '-DGTK_COMPILATION', '-DG_LOG_DOMAIN="Gsk"', '-DG_LOG_STRUCTURED=1', - '-mf16c' ] + common_cflags, link_with: libgdk, ) diff --git a/gsk/ngl/fp16private.h b/gsk/ngl/fp16private.h index f54f7307a6..2bc9f84eea 100644 --- a/gsk/ngl/fp16private.h +++ b/gsk/ngl/fp16private.h @@ -1,4 +1,4 @@ -/* ninesliceprivate.h +/* fp16private.h * * Copyright 2021 Red Hat, Inc. * @@ -21,10 +21,11 @@ #ifndef __FP16_PRIVATE_H__ #define __FP16_PRIVATE_H__ +#include #include #include -#ifdef GRAPHENE_USE_SSE +#ifdef HAVE_F16C #include #endif @@ -34,7 +35,7 @@ G_BEGIN_DECLS #define FP16_ONE ((guint16)15360) #define FP16_MINUS_ONE ((guint16)48128) -#ifdef GRAPHENE_USE_SSE +#ifdef HAVE_F16C static inline void float_to_half4 (const float f[4], @@ -54,7 +55,7 @@ half_to_float4 (const guint16 h[4], _mm_store_ps (f, s); } -#else /* GRAPHENE_USE_SSE */ +#else /* GTK_HAS_F16C */ static inline guint as_uint (const float x) @@ -108,7 +109,7 @@ half_to_float4 (const guint16 h[4], f[3] = half_to_float (h[3]); } -#endif /* GRAPHENE_USE_SSE */ +#endif /* HAVE_F16C */ G_END_DECLS diff --git a/meson.build b/meson.build index 38d23ed61c..c9586acc29 100644 --- a/meson.build +++ b/meson.build @@ -685,6 +685,42 @@ if graphene_has_sse2 or graphene_has_gcc endif endif +f16c_cflags = [] +if get_option('f16c').enabled() + f16c_prog = ''' +#if defined(__GNUC__) +# if !defined(__amd64__) && !defined(__x86_64__) +# error "F16C intrinsics are only available on x86_64" +# endif +#elif defined (_MSC_VER) && !defined (_M_X64) && !defined (_M_AMD64) +# error "F16C intrinsics not supported on x86 MSVC builds" +#endif +#if defined(__SSE__) || (_M_X64 > 0) +# include +#else +# error "No F16C intrinsics available" +#endif +int main () { + float f[4] = { 0, }; + unsigned short h[4] = { 0, }; + __m128 s = _mm_loadu_ps (f); + __m128i i = _mm_cvtps_ph (s, 0); + _mm_storel_epi64 ((__m128i*)h, i); + return 0; +}''' + if cc.get_id() != 'msvc' + test_f16c_cflags = [ '-mf16c' ] + else + test_f16c_cflags = [] + endif + + if cc.compiles(f16c_prog, args: test_f16c_cflags, name: 'F16C intrinsics') + cdata.set('HAVE_F16C', 1) + f16c_cflags = test_f16c_cflags + common_cflags += test_f16c_cflags + endif +endif + subdir('gtk/css') subdir('gdk') subdir('gsk') diff --git a/meson_options.txt b/meson_options.txt index 16a1cbd9fe..4193415151 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -86,6 +86,11 @@ option('sassc', value: 'auto', description: 'Rebuild themes using sassc') +option('f16c', + type: 'feature', + value: 'enabled', + description: 'Enable F16C fast paths (requires F16C)') + # Documentation and introspection option('gtk_doc', From 885a6b8ebc2acc7915c7fc7d8fe814c1d20d8aaf Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Wed, 7 Apr 2021 21:29:21 -0400 Subject: [PATCH 6/7] gsk: Add runtime checks for F16C Use an IFUNC resolver to determine whether we can use intrinsics for FP16 conversion. This requires the functions to be no longer inline. Sadly, it turns out that __builtin_cpu_supports ("f16c") doesn't compile on the systems where we want it to prevent us from getting a SIGILL at runtime. --- gsk/meson.build | 1 + gsk/ngl/fp16.c | 129 ++++++++++++++++++++++++++++++++++++++++++ gsk/ngl/fp16private.h | 84 ++------------------------- meson.build | 4 ++ 4 files changed, 138 insertions(+), 80 deletions(-) create mode 100644 gsk/ngl/fp16.c diff --git a/gsk/meson.build b/gsk/meson.build index 5c381b51c9..f351941c22 100644 --- a/gsk/meson.build +++ b/gsk/meson.build @@ -85,6 +85,7 @@ gsk_private_sources = files([ 'ngl/gskngltexturelibrary.c', 'ngl/gskngluniformstate.c', 'ngl/gskngltexturepool.c', + 'ngl/fp16.c', ]) gsk_public_headers = files([ diff --git a/gsk/ngl/fp16.c b/gsk/ngl/fp16.c new file mode 100644 index 0000000000..22453ae210 --- /dev/null +++ b/gsk/ngl/fp16.c @@ -0,0 +1,129 @@ +/* fp16.c + * + * Copyright 2021 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program. If not, see . + * + * SPDX-License-Identifier: LGPL-2.1-or-later + */ + +#include + +#include "fp16private.h" + +#ifdef HAVE_F16C +#include +#endif + +static inline guint +as_uint (const float x) +{ + return *(guint*)&x; +} + +static inline float +as_float (const guint x) +{ + return *(float*)&x; +} + +// IEEE-754 16-bit floating-point format (without infinity): 1-5-10 + +static inline float +half_to_float (const guint16 x) +{ + const guint e = (x&0x7C00)>>10; // exponent + const guint m = (x&0x03FF)<<13; // mantissa + const guint v = as_uint((float)m)>>23; + return as_float((x&0x8000)<<16 | (e!=0)*((e+112)<<23|m) | ((e==0)&(m!=0))*((v-37)<<23|((m<<(150-v))&0x007FE000))); +} + +static inline guint16 +float_to_half (const float x) +{ + const guint b = as_uint(x)+0x00001000; // round-to-nearest-even + const guint e = (b&0x7F800000)>>23; // exponent + const guint m = b&0x007FFFFF; // mantissa + return (b&0x80000000)>>16 | (e>112)*((((e-112)<<10)&0x7C00)|m>>13) | ((e<113)&(e>101))*((((0x007FF000+m)>>(125-e))+1)>>1) | (e>143)*0x7FFF; // sign : normalized : denormalized : saturate +} + +static void +float_to_half4_c (const float f[4], + guint16 h[4]) +{ + h[0] = float_to_half (f[0]); + h[1] = float_to_half (f[1]); + h[2] = float_to_half (f[2]); + h[3] = float_to_half (f[3]); +} + +static void +half_to_float4_c (const guint16 h[4], + float f[4]) +{ + f[0] = half_to_float (h[0]); + f[1] = half_to_float (h[1]); + f[2] = half_to_float (h[2]); + f[3] = half_to_float (h[3]); +} + +#ifdef HAVE_F16C + +static void +float_to_half4_f16c (const float f[4], + guint16 h[4]) +{ + __m128 s = _mm_loadu_ps (f); + __m128i i = _mm_cvtps_ph (s, 0); + _mm_storel_epi64 ((__m128i*)h, i); +} + +static void +half_to_float4_f16c (const guint16 h[4], + float f[4]) +{ + __m128i i = _mm_loadl_epi64 ((__m128i_u const *)h); + __m128 s = _mm_cvtph_ps (i); + _mm_store_ps (f, s); +} + +void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((ifunc ("resolve_float_to_half4"))); +void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((ifunc ("resolve_half_to_float4"))); + +static void * +resolve_float_to_half4 (void) +{ + __builtin_cpu_init (); + if (__builtin_cpu_supports ("f16c")) + return float_to_half4_f16c; + else + return float_to_half4_c; +} + +static void * +resolve_half_to_float4 (void) +{ + __builtin_cpu_init (); + if (__builtin_cpu_supports ("f16c")) + return half_to_float4_f16c; + else + return half_to_float4_c; +} + +#else + +void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((alias ("float_to_half4_c"))); +void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((alias ("half_to_float4_c"))); + +#endif /* GTK_HAS_F16C */ diff --git a/gsk/ngl/fp16private.h b/gsk/ngl/fp16private.h index 2bc9f84eea..d76f18a04f 100644 --- a/gsk/ngl/fp16private.h +++ b/gsk/ngl/fp16private.h @@ -21,13 +21,7 @@ #ifndef __FP16_PRIVATE_H__ #define __FP16_PRIVATE_H__ -#include #include -#include - -#ifdef HAVE_F16C -#include -#endif G_BEGIN_DECLS @@ -35,81 +29,11 @@ G_BEGIN_DECLS #define FP16_ONE ((guint16)15360) #define FP16_MINUS_ONE ((guint16)48128) -#ifdef HAVE_F16C +void float_to_half4 (const float f[4], + guint16 h[4]); -static inline void -float_to_half4 (const float f[4], - guint16 h[4]) -{ - __m128 s = _mm_loadu_ps (f); - __m128i i = _mm_cvtps_ph (s, 0); - _mm_storel_epi64 ((__m128i*)h, i); -} - -static inline void -half_to_float4 (const guint16 h[4], - float f[4]) -{ - __m128i i = _mm_loadl_epi64 ((__m128i_u const *)h); - __m128 s = _mm_cvtph_ps (i); - _mm_store_ps (f, s); -} - -#else /* GTK_HAS_F16C */ - -static inline guint -as_uint (const float x) -{ - return *(guint*)&x; -} - -static inline float -as_float (const guint x) -{ - return *(float*)&x; -} - -// IEEE-754 16-bit floating-point format (without infinity): 1-5-10 - -static inline float -half_to_float (const guint16 x) -{ - const guint e = (x&0x7C00)>>10; // exponent - const guint m = (x&0x03FF)<<13; // mantissa - const guint v = as_uint((float)m)>>23; - return as_float((x&0x8000)<<16 | (e!=0)*((e+112)<<23|m) | ((e==0)&(m!=0))*((v-37)<<23|((m<<(150-v))&0x007FE000))); -} - -static inline guint16 -float_to_half (const float x) -{ - const guint b = as_uint(x)+0x00001000; // round-to-nearest-even - const guint e = (b&0x7F800000)>>23; // exponent - const guint m = b&0x007FFFFF; // mantissa - return (b&0x80000000)>>16 | (e>112)*((((e-112)<<10)&0x7C00)|m>>13) | ((e<113)&(e>101))*((((0x007FF000+m)>>(125-e))+1)>>1) | (e>143)*0x7FFF; // sign : normalized : denormalized : saturate -} - -static inline void -float_to_half4 (const float f[4], - guint16 h[4]) -{ - h[0] = float_to_half (f[0]); - h[1] = float_to_half (f[1]); - h[2] = float_to_half (f[2]); - h[3] = float_to_half (f[3]); -} - -static inline void -half_to_float4 (const guint16 h[4], - float f[4]) -{ - f[0] = half_to_float (h[0]); - f[1] = half_to_float (h[1]); - f[2] = half_to_float (h[2]); - f[3] = half_to_float (h[3]); -} - -#endif /* HAVE_F16C */ +void half_to_float4 (const guint16 h[4], + float f[4]); G_END_DECLS diff --git a/meson.build b/meson.build index c9586acc29..641208158c 100644 --- a/meson.build +++ b/meson.build @@ -706,6 +706,10 @@ int main () { __m128 s = _mm_loadu_ps (f); __m128i i = _mm_cvtps_ph (s, 0); _mm_storel_epi64 ((__m128i*)h, i); + + __builtin_cpu_init (); + __builtin_cpu_supports ("f16c"); + return 0; }''' if cc.get_id() != 'msvc' From 2d7169fd5f2467e6a6159978fcbd89478de85424 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Wed, 7 Apr 2021 22:38:47 -0400 Subject: [PATCH 7/7] Work around compiler shortcomings on macOS alias attributes don't work on Darwin, so do without. --- gsk/ngl/fp16.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/gsk/ngl/fp16.c b/gsk/ngl/fp16.c index 22453ae210..e9a8c8fbce 100644 --- a/gsk/ngl/fp16.c +++ b/gsk/ngl/fp16.c @@ -123,7 +123,28 @@ resolve_half_to_float4 (void) #else +#ifdef __APPLE__ +// turns out aliases don't work on Darwin + +void +float_to_half4 (const float f[4], + guint16 h[4]) +{ + float_to_half4_c (f, h); +} + +void +half_to_float4 (const guint16 h[4], + float f[4]) +{ + half_to_float4_c (h, f); +} + +#else + void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((alias ("float_to_half4_c"))); void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((alias ("half_to_float4_c"))); +#endif + #endif /* GTK_HAS_F16C */