From fcf504905a02bc867369044b13e501cbf649eba1 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Wed, 24 Jan 2024 18:28:32 -0500 Subject: [PATCH] Add an simd implementation This is strongly based on graphenes simd plumbing. All three types get represented as graphene_simf4d_t, scale as {x,y,0,0}, point as {x,y,0,0} and box as {x0,y0,x1,y1}. --- gsk/boxprivate.h | 256 +++++++++++++++++++++++++++++++++++++++++++++ gsk/pointprivate.h | 148 ++++++++++++++++++++++++++ gsk/scaleprivate.h | 95 +++++++++++++++++ 3 files changed, 499 insertions(+) diff --git a/gsk/boxprivate.h b/gsk/boxprivate.h index 32cf6a9b26..e9b8ca1b70 100644 --- a/gsk/boxprivate.h +++ b/gsk/boxprivate.h @@ -6,6 +6,8 @@ #include "scaleprivate.h" #include "pointprivate.h" +#ifndef USE_SIMD + struct _Box { float x0, y0, x1, y1; @@ -221,3 +223,257 @@ box_round_to_pixels (const Box box, { return box_sub (box_div (box_round_larger (box_mul (box_add (box, offset), scale)), scale), offset); } + +#else /* USE_SIMD */ + +struct _Box +{ + GRAPHENE_ALIGNED_DECL (graphene_simd4f_t v, 16); +}; + +static inline float +box_x0 (const Box box) +{ + return graphene_simd4f_get_x (box.v); +} + +static inline float +box_y0 (const Box box) +{ + return graphene_simd4f_get_y (box.v); +} + +static inline float +box_x1 (const Box box) +{ + return graphene_simd4f_get_z (box.v); +} + +static inline float +box_y1 (const Box box) +{ + return graphene_simd4f_get_w (box.v); +} + +static inline float +box_width (const Box box) +{ + return box_x1 (box) - box_x0 (box); +} + +static inline float +box_height (const Box box) +{ + return box_y1 (box) - box_y0 (box); +} + +static inline Box +box (float x0, + float y0, + float x1, + float y1) +{ + return (Box) { .v = graphene_simd4f_init (x0, y0, x1, y1) }; +} + +static inline Box +box_from_rect (float x, + float y, + float w, + float h) +{ + return box (x, y, x + w, y + h); +} + +static inline Box +box_from_graphene (const graphene_rect_t *rect) +{ + return box_from_rect (rect->origin.x, + rect->origin.y, + rect->size.width, + rect->size.height); +} + +/* { a[0], a[1], b[0], b[1] } */ +# define graphene_simd4f_splat_xyxy(a,b) \ + (__extension__ ({ \ + (graphene_simd4f_t) _mm_shuffle_ps ((a), (b), _MM_SHUFFLE (1, 0, 1, 0)); \ + })) + +static inline Box +box_from_points (Point p0, + Point p1) +{ + return (Box) { .v = graphene_simd4f_splat_xyxy (p0.v, p1.v) }; +} + +static inline Point +box_origin (const Box box) +{ + return (Point) { .v = graphene_simd4f_zero_zw (box.v) }; +} + +static inline Point +box_opposite (const Box box) +{ + return (Point) { .v = graphene_simd4f_zero_zw (graphene_simd4f_shuffle_zwxy (box.v)) }; +} + +static inline void +box_to_float (const Box box, + float v[4]) +{ + graphene_simd4f_dup_4f (box.v, v); + v[2] -= v[0]; + v[3] -= v[1]; +} + +static inline Box +box_inset (const Box box, + float dx, + float dy) +{ + return (Box) { .v = graphene_simd4f_add (box.v, graphene_simd4f_init (dx, dy, -dx, -dy)) }; +} + +/* return a[0] < b[0] && a[1] < b[1] */ +#ifndef graphene_simd4f_cmple_xy +# define graphene_simd4f_cmple_xy(a,b) \ + (__extension__ ({ \ + __m128i __res = (__m128i) _mm_cmple_ps ((a), (b)); \ + (bool) ((_mm_movemask_epi8 (__res) & 0xff) == 0xff); \ + })) +#endif + +static inline gboolean +box_intersect (const Box box1, + const Box box2, + Box *box) +{ + graphene_simd4f_t s, t, t1; + + s = graphene_simd4f_max (box1.v, box2.v); + t = graphene_simd4f_min (box1.v, box2.v); + t1 = graphene_simd4f_shuffle_zwxy (t); + + if (graphene_simd4f_cmple_xy (s, t1)) + { + if (box) + box->v = graphene_simd4f_splat_xyxy (s, t); + return TRUE; + } + + return FALSE; +} + +static inline gboolean +box_equal (const Box box1, + const Box box2) +{ + return (gboolean) !!graphene_simd4f_cmp_eq (box1.v, box2.v); +} + +static inline gboolean +box_contains (const Box box1, + const Box box2) +{ + Box box; + + if (box_intersect (box1, box2, &box)) + return box_equal (box, box2); + + return FALSE; +} + +static inline gboolean +box_empty (const Box box) +{ + /* FIXME simd */ + return box_x0 (box) == box_x1 (box) || box_y0 (box) == box_y1 (box); +} + +/* a splat variation */ +#ifndef graphene_simd4f_shuffle_xyxy +# define graphene_simd4f_shuffle_xyxy(v) \ + (__extension__ ({ \ + (graphene_simd4f_t) _mm_shuffle_ps ((v), (v), _MM_SHUFFLE (1, 0, 1, 0)); \ + })) +#endif + +static inline Box +box_add (const Box box, + const Point offset) +{ + return (Box) { .v = graphene_simd4f_add (box.v, graphene_simd4f_shuffle_xyxy (offset.v)) }; +} + +static inline Box +box_sub (const Box box, + const Point offset) +{ + return (Box) { .v = graphene_simd4f_sub (box.v, graphene_simd4f_shuffle_xyxy (offset.v)) }; +} + +static inline Box +box_mul (const Box box, + const Scale scale) +{ + Box b = (Box) { .v = graphene_simd4f_mul (box.v, graphene_simd4f_shuffle_xyxy (scale.v)) }; + + if (G_UNLIKELY (!graphene_simd4f_cmple_xy (graphene_simd4f_init (0, 0, 0, 0), scale.v))) + { + graphene_simd4f_t v = graphene_simd4f_shuffle_zwxy (b.v); + graphene_simd4f_t s = graphene_simd4f_min (b.v, v); + graphene_simd4f_t t = graphene_simd4f_max (b.v, v); + + return (Box) { .v = graphene_simd4f_splat_xyxy (s, t) }; + } + + return b; +} + +static inline Box +box_div (const Box box, + const Scale scale) +{ + return box_mul (box, scale_inv (scale)); +} + +static inline void +box_offset_to_float (const Box box, + const Point offset, + float v[4]) +{ + box_to_float (box_add (box, offset), v); +} + +#ifdef __SSE4_1__ + +static inline Box +box_round_larger (const Box box) +{ + return { (Box) .v = graphene_simd4f_splat_xyxy (graphene_simd4f_floor (b.v), graphene_simd4f_ceil (b.v)) }; +} + +#else + +static inline Box +box_round_larger (const Box b) +{ + return box (floorf (box_x0 (b)), + floorf (box_y0 (b)), + ceilf (box_x1 (b)), + ceilf (box_y1 (b))); +} + +#endif + +static inline Box +box_round_to_pixels (const Box box, + const Scale scale, + const Point offset) +{ + return box_sub (box_div (box_round_larger (box_mul (box_add (box, offset), scale)), scale), offset); +} + +#endif diff --git a/gsk/pointprivate.h b/gsk/pointprivate.h index 123e1aa103..0e00be7867 100644 --- a/gsk/pointprivate.h +++ b/gsk/pointprivate.h @@ -7,6 +7,8 @@ #include "scaleprivate.h" +#ifndef USE_SIMD + struct _Point { float x, y; @@ -102,3 +104,149 @@ point_round (const Point p) { return (Point) { .x = roundf (p.x), .y = roundf (p.y) }; } + +#else /* USE_SIMD */ + +#include + +struct _Point +{ + GRAPHENE_ALIGNED_DECL (graphene_simd4f_t v, 16); +}; + +static inline float +point_x (const Point p) +{ + return graphene_simd4f_get_x (p.v); +} + +static inline float +point_y (const Point p) +{ + return graphene_simd4f_get_y (p.v); +} + +static inline Point +point (float x, + float y) +{ + return (Point) { .v = graphene_simd4f_init (x, y, 0.f, 0.f) }; +} + +static inline Point +point_from_graphene (const graphene_point_t *p) +{ + return point (p->x, p->y); +} + +static inline void +point_to_float (const Point p, + float v[2]) +{ + graphene_simd4f_dup_2f (p.v, v); +} + +static inline Point +point_zero (void) +{ + return point (0, 0); +} + +static inline Point +point_neg (const Point p) +{ + return (Point) { .v = graphene_simd4f_neg (p.v) }; +} + +static inline Point +point_mul (const Point p, + const Scale s) +{ + return (Point) { .v = graphene_simd4f_mul (p.v, s.v) }; +} + +static inline Point +point_div (const Point p, + const Scale s) +{ + return (Point) { .v = graphene_simd4f_div (p.v, s.v) }; +} + +static inline Point +point_add (const Point p1, + const Point p2) +{ + return (Point) { .v = graphene_simd4f_add (p1.v, p2.v) }; +} + +static inline Point +point_sub (const Point p1, + const Point p2) +{ + return (Point) { .v = graphene_simd4f_sub (p1.v, p2.v) }; +} + +#ifdef __SSE4_1__ + +#ifndef graphene_simd4f_floor +# define graphene_simd4f_floor(v) \ + (__extension__ ({ \ + (graphene_simd4f_t) _mm_floor_ps ((v)); \ + })) +#endif + +#ifndef graphene_simd4f_ceil +# define graphene_simd4f_ceil(v) \ + (__extension__ ({ \ + (graphene_simd4f_t) _mm_ceil_ps ((v)); \ + })) +#endif + +#ifndef graphene_simd4f_round +# define graphene_simd4f_round(v) \ + (__extension__ ({ \ + (graphene_simd4f_t) _mm_round_ps ((v)); \ + })) +#endif + +static inline Point +point_floor (const Point p) +{ + return (Point) { .v = graphene_simd4f_floor (p.v) }; +} + +static inline Point +point_ceil (const Point p) +{ + return (Point) { .v = graphene_simd4f_ceil (p.v) }; +} + +static inline Point +point_round (const Point p) +{ + return (Point) { .v = graphene_simd4f_round (p.v) }; +} + +#else + +static inline Point +point_floor (const Point p) +{ + return point (floorf (point_x (p)), floorf (point_y (p))); +} + +static inline Point +point_ceil (const Point p) +{ + return point (ceilf (point_x (p)), ceilf (point_y (p))); +} + +static inline Point +point_round (const Point p) +{ + return point (roundf (point_x (p)), roundf (point_y (p))); +} + +#endif + +#endif diff --git a/gsk/scaleprivate.h b/gsk/scaleprivate.h index e3fa752e28..6bb2011c53 100644 --- a/gsk/scaleprivate.h +++ b/gsk/scaleprivate.h @@ -4,6 +4,8 @@ #include #include +#ifndef USE_SIMD + struct _Scale { float x, y; @@ -86,3 +88,96 @@ scale_max (const Scale s) { return (Scale) { .x = MAX (s.x, s.y), .y = MAX (s.x, s.y) }; } + +#else /* USE_SIMD */ + +struct _Scale +{ + GRAPHENE_ALIGNED_DECL (graphene_simd4f_t v, 16); +}; + +static inline float +scale_x (const Scale s) +{ + return graphene_simd4f_get_x (s.v); +} + +static inline float +scale_y (const Scale s) +{ + return graphene_simd4f_get_y (s.v); +} + +static inline Scale +scale (float x, + float y) +{ + return (Scale) { .v = graphene_simd4f_init (x, y, 0.f, 0.f) }; +} + +static inline Scale +scale_from_float (float s) +{ + return scale (s, s); +} + +static inline Scale +scale_from_graphene (const graphene_vec2_t *v) +{ + return (Scale) { .v = v->__graphene_private_value }; +} + +static inline void +scale_to_float (const Scale s, + float v[2]) +{ + graphene_simd4f_dup_2f (s.v, v); +} + +static inline gboolean +scale_equal (const Scale s1, + const Scale s2) +{ + return (gboolean) graphene_simd4f_cmp_eq (s1.v, s2.v); +} + +static inline Scale +scale_one (void) +{ + return scale (1, 1); +} + +static inline Scale +scale_inv (const Scale s) +{ + return (Scale) { .v = graphene_simd4f_reciprocal (s.v) }; +} + +static inline Scale +scale_mul (const Scale s1, + const Scale s2) +{ + return (Scale) { .v = graphene_simd4f_mul (s1.v, s2.v) }; +} + +static inline Scale +scale_div (const Scale s1, + const Scale s2) +{ + return (Scale) { .v = graphene_simd4f_div (s1.v, s2.v) }; +} + +#ifndef graphene_simd4f_shuffle_yxzw +# define graphene_simd4f_shuffle_yxzw(v) \ + (__extension__ ({ \ + (graphene_simd4f_t) _mm_shuffle_ps ((v), (v), _MM_SHUFFLE (3, 2, 0, 1)); \ + })) +#endif + +static inline Scale +scale_max (const Scale s) +{ + return (Scale) { .v = graphene_simd4f_max (graphene_simd4f_shuffle_yxzw (s.v), s.v) }; +} + +#endif