Add an simd implementation

This is strongly based on graphenes simd plumbing.
All three types get represented as graphene_simf4d_t,
scale as {x,y,0,0}, point as {x,y,0,0} and box as
{x0,y0,x1,y1}.
This commit is contained in:
Matthias Clasen
2024-01-24 18:28:32 -05:00
parent f3c2cdb94d
commit 81da31d662
3 changed files with 525 additions and 0 deletions

View File

@@ -6,6 +6,8 @@
#include "scaleprivate.h"
#include "pointprivate.h"
#ifndef USE_SIMD
struct _Box
{
float x0, y0, x1, y1;
@@ -228,3 +230,268 @@ box_round_to_pixels (const Box box,
{
return box_sub (box_div (box_round_larger (box_mul (box_add (box, offset), scale)), scale), offset);
}
#else /* USE_SIMD */
struct _Box
{
GRAPHENE_ALIGNED_DECL (graphene_simd4f_t v, 16);
};
static inline float
box_x0 (const Box box)
{
return graphene_simd4f_get_x (box.v);
}
static inline float
box_y0 (const Box box)
{
return graphene_simd4f_get_y (box.v);
}
static inline float
box_x1 (const Box box)
{
return graphene_simd4f_get_z (box.v);
}
static inline float
box_y1 (const Box box)
{
return graphene_simd4f_get_w (box.v);
}
static inline float
box_width (const Box box)
{
return box_x1 (box) - box_x0 (box);
}
static inline float
box_height (const Box box)
{
return box_y1 (box) - box_y0 (box);
}
static inline Box
box (float x0,
float y0,
float x1,
float y1)
{
return (Box) { .v = graphene_simd4f_init (x0, y0, x1, y1) };
}
static inline Box
box_from_rect (float x,
float y,
float w,
float h)
{
return box (x, y, x + w, y + h);
}
static inline Box
box_from_graphene (const graphene_rect_t *rect)
{
return box_from_rect (rect->origin.x,
rect->origin.y,
rect->size.width,
rect->size.height);
}
static inline void
box_to_graphene (const Box box,
graphene_rect_t *rect)
{
graphene_rect_init (rect, box_x0 (box),
box_y0 (box),
box_x1 (box) - box_x0 (box),
box_y1 (box) - box_y0 (box));
}
/* Assumes p0.x <= p1.x && p0.y <= p1.y */
/* { a[0], a[1], b[0], b[1] } */
# define graphene_simd4f_splat_xyxy(a,b) \
(__extension__ ({ \
(graphene_simd4f_t) _mm_shuffle_ps ((a), (b), _MM_SHUFFLE (1, 0, 1, 0)); \
}))
static inline Box
box_from_points (Point p0,
Point p1)
{
return (Box) { .v = graphene_simd4f_splat_xyxy (p0.v, p1.v) };
}
static inline Point
box_origin (const Box box)
{
return (Point) { .v = graphene_simd4f_zero_zw (box.v) };
}
static inline Point
box_opposite (const Box box)
{
return (Point) { .v = graphene_simd4f_zero_zw (graphene_simd4f_shuffle_zwxy (box.v)) };
}
static inline void
box_to_float (const Box box,
float v[4])
{
graphene_simd4f_dup_4f (box.v, v);
v[2] -= v[0];
v[3] -= v[1];
}
static inline Box
box_inset (const Box box,
float dx,
float dy)
{
return (Box) { .v = graphene_simd4f_add (box.v, graphene_simd4f_init (dx, dy, -dx, -dy)) };
}
/* return a[0] < b[0] && a[1] < b[1] */
#ifndef graphene_simd4f_cmple_xy
# define graphene_simd4f_cmple_xy(a,b) \
(__extension__ ({ \
__m128i __res = (__m128i) _mm_cmple_ps ((a), (b)); \
(bool) ((_mm_movemask_epi8 (__res) & 0xff) == 0xff); \
}))
#endif
static inline gboolean
box_intersect (const Box box1,
const Box box2,
Box *box)
{
graphene_simd4f_t s, t, t1;
s = graphene_simd4f_max (box1.v, box2.v);
t = graphene_simd4f_min (box1.v, box2.v);
t1 = graphene_simd4f_shuffle_zwxy (t);
if (graphene_simd4f_cmple_xy (s, t1))
{
if (box)
box->v = graphene_simd4f_splat_xyxy (s, t);
return TRUE;
}
return FALSE;
}
static inline gboolean
box_equal (const Box box1,
const Box box2)
{
return (gboolean) !!graphene_simd4f_cmp_eq (box1.v, box2.v);
}
static inline gboolean
box_contains (const Box box1,
const Box box2)
{
Box box;
if (box_intersect (box1, box2, &box))
return box_equal (box, box2);
return FALSE;
}
static inline gboolean
box_empty (const Box box)
{
/* FIXME simd */
return box_x0 (box) == box_x1 (box) || box_y0 (box) == box_y1 (box);
}
/* a splat variation */
#ifndef graphene_simd4f_shuffle_xyxy
# define graphene_simd4f_shuffle_xyxy(v) \
(__extension__ ({ \
(graphene_simd4f_t) _mm_shuffle_ps ((v), (v), _MM_SHUFFLE (1, 0, 1, 0)); \
}))
#endif
static inline Box
box_add (const Box box,
const Point offset)
{
return (Box) { .v = graphene_simd4f_add (box.v, graphene_simd4f_shuffle_xyxy (offset.v)) };
}
static inline Box
box_sub (const Box box,
const Point offset)
{
return (Box) { .v = graphene_simd4f_sub (box.v, graphene_simd4f_shuffle_xyxy (offset.v)) };
}
static inline Box
box_mul (const Box box,
const Scale scale)
{
Box b = (Box) { .v = graphene_simd4f_mul (box.v, graphene_simd4f_shuffle_xyxy (scale.v)) };
if (G_UNLIKELY (!graphene_simd4f_cmple_xy (graphene_simd4f_init (0, 0, 0, 0), scale.v)))
{
graphene_simd4f_t v = graphene_simd4f_shuffle_zwxy (b.v);
graphene_simd4f_t s = graphene_simd4f_min (b.v, v);
graphene_simd4f_t t = graphene_simd4f_max (b.v, v);
return (Box) { .v = graphene_simd4f_splat_xyxy (s, t) };
}
return b;
}
static inline Box
box_div (const Box box,
const Scale scale)
{
return box_mul (box, scale_inv (scale));
}
static inline void
box_offset_to_float (const Box box,
const Point offset,
float v[4])
{
box_to_float (box_add (box, offset), v);
}
#ifdef __SSE4_1__
static inline Box
box_round_larger (const Box box)
{
return { (Box) .v = graphene_simd4f_splat_xyxy (graphene_simd4f_floor (b.v), graphene_simd4f_ceil (b.v)) };
}
#else
static inline Box
box_round_larger (const Box b)
{
return box (floorf (box_x0 (b)),
floorf (box_y0 (b)),
ceilf (box_x1 (b)),
ceilf (box_y1 (b)));
}
#endif
static inline Box
box_round_to_pixels (const Box box,
const Scale scale,
const Point offset)
{
return box_sub (box_div (box_round_larger (box_mul (box_add (box, offset), scale)), scale), offset);
}
#endif

View File

@@ -7,6 +7,8 @@
#include "scaleprivate.h"
#ifndef USE_SIMD
struct _Point
{
float x, y;
@@ -110,3 +112,157 @@ point_round (const Point p)
{
return (Point) { .x = roundf (p.x), .y = roundf (p.y) };
}
#else /* USE_SIMD */
#include <smmintrin.h>
struct _Point
{
GRAPHENE_ALIGNED_DECL (graphene_simd4f_t v, 16);
};
static inline float
point_x (const Point p)
{
return graphene_simd4f_get_x (p.v);
}
static inline float
point_y (const Point p)
{
return graphene_simd4f_get_y (p.v);
}
static inline Point
point (float x,
float y)
{
return (Point) { .v = graphene_simd4f_init (x, y, 0.f, 0.f) };
}
static inline Point
point_from_graphene (const graphene_point_t *p)
{
return point (p->x, p->y);
}
static inline void
point_to_graphene (const Point p,
graphene_point_t *v)
{
v->x = p.x;
v->y = p.y;
}
static inline void
point_to_float (const Point p,
float v[2])
{
graphene_simd4f_dup_2f (p.v, v);
}
static inline Point
point_zero (void)
{
return point (0, 0);
}
static inline Point
point_neg (const Point p)
{
return (Point) { .v = graphene_simd4f_neg (p.v) };
}
static inline Point
point_mul (const Point p,
const Scale s)
{
return (Point) { .v = graphene_simd4f_mul (p.v, s.v) };
}
static inline Point
point_div (const Point p,
const Scale s)
{
return (Point) { .v = graphene_simd4f_div (p.v, s.v) };
}
static inline Point
point_add (const Point p1,
const Point p2)
{
return (Point) { .v = graphene_simd4f_add (p1.v, p2.v) };
}
static inline Point
point_sub (const Point p1,
const Point p2)
{
return (Point) { .v = graphene_simd4f_sub (p1.v, p2.v) };
}
#ifdef __SSE4_1__
#ifndef graphene_simd4f_floor
# define graphene_simd4f_floor(v) \
(__extension__ ({ \
(graphene_simd4f_t) _mm_floor_ps ((v)); \
}))
#endif
#ifndef graphene_simd4f_ceil
# define graphene_simd4f_ceil(v) \
(__extension__ ({ \
(graphene_simd4f_t) _mm_ceil_ps ((v)); \
}))
#endif
#ifndef graphene_simd4f_round
# define graphene_simd4f_round(v) \
(__extension__ ({ \
(graphene_simd4f_t) _mm_round_ps ((v)); \
}))
#endif
static inline Point
point_floor (const Point p)
{
return (Point) { .v = graphene_simd4f_floor (p.v) };
}
static inline Point
point_ceil (const Point p)
{
return (Point) { .v = graphene_simd4f_ceil (p.v) };
}
static inline Point
point_round (const Point p)
{
return (Point) { .v = graphene_simd4f_round (p.v) };
}
#else
static inline Point
point_floor (const Point p)
{
return point (floorf (point_x (p)), floorf (point_y (p)));
}
static inline Point
point_ceil (const Point p)
{
return point (ceilf (point_x (p)), ceilf (point_y (p)));
}
static inline Point
point_round (const Point p)
{
return point (roundf (point_x (p)), roundf (point_y (p)));
}
#endif
#endif

View File

@@ -4,6 +4,8 @@
#include <graphene.h>
#include <math.h>
#ifndef USE_SIMD
struct _Scale
{
float x, y;
@@ -93,3 +95,103 @@ scale_max (const Scale s)
{
return (Scale) { .x = MAX (s.x, s.y), .y = MAX (s.x, s.y) };
}
#else /* USE_SIMD */
struct _Scale
{
GRAPHENE_ALIGNED_DECL (graphene_simd4f_t v, 16);
};
static inline float
scale_x (const Scale s)
{
return graphene_simd4f_get_x (s.v);
}
static inline float
scale_y (const Scale s)
{
return graphene_simd4f_get_y (s.v);
}
static inline Scale
scale (float x,
float y)
{
return (Scale) { .v = graphene_simd4f_init (x, y, 0.f, 0.f) };
}
static inline Scale
scale_from_float (float s)
{
return scale (s, s);
}
static inline Scale
scale_from_graphene (const graphene_vec2_t *v)
{
return (Scale) { .v = v->__graphene_private_value };
}
static inline void
scale_to_graphene (const Scale s,
graphene_vec2_t *v)
{
v->__graphene_private_value = s.v;
}
static inline void
scale_to_float (const Scale s,
float v[2])
{
graphene_simd4f_dup_2f (s.v, v);
}
static inline gboolean
scale_equal (const Scale s1,
const Scale s2)
{
return (gboolean) graphene_simd4f_cmp_eq (s1.v, s2.v);
}
static inline Scale
scale_one (void)
{
return scale (1, 1);
}
static inline Scale
scale_inv (const Scale s)
{
return (Scale) { .v = graphene_simd4f_reciprocal (s.v) };
}
static inline Scale
scale_mul (const Scale s1,
const Scale s2)
{
return (Scale) { .v = graphene_simd4f_mul (s1.v, s2.v) };
}
static inline Scale
scale_div (const Scale s1,
const Scale s2)
{
return (Scale) { .v = graphene_simd4f_div (s1.v, s2.v) };
}
#ifndef graphene_simd4f_shuffle_yxzw
# define graphene_simd4f_shuffle_yxzw(v) \
(__extension__ ({ \
(graphene_simd4f_t) _mm_shuffle_ps ((v), (v), _MM_SHUFFLE (3, 2, 0, 1)); \
}))
#endif
static inline Scale
scale_max (const Scale s)
{
return (Scale) { .v = graphene_simd4f_max (graphene_simd4f_shuffle_yxzw (s.v), s.v) };
}
#endif