gsk: Add runtime checks for F16C

Use an IFUNC resolver to determine whether we can use
intrinsics for FP16 conversion. This requires the functions
to be no longer inline.

Sadly, it turns out that __builtin_cpu_supports ("f16c")
doesn't compile on the systems where we want it to prevent
us from getting a SIGILL at runtime.
This commit is contained in:
Matthias Clasen
2021-04-07 21:29:21 -04:00
parent 9d81c129fc
commit 885a6b8ebc
4 changed files with 138 additions and 80 deletions

View File

@@ -85,6 +85,7 @@ gsk_private_sources = files([
'ngl/gskngltexturelibrary.c',
'ngl/gskngluniformstate.c',
'ngl/gskngltexturepool.c',
'ngl/fp16.c',
])
gsk_public_headers = files([

129
gsk/ngl/fp16.c Normal file
View File

@@ -0,0 +1,129 @@
/* fp16.c
*
* Copyright 2021 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*/
#include <config.h>
#include "fp16private.h"
#ifdef HAVE_F16C
#include <immintrin.h>
#endif
static inline guint
as_uint (const float x)
{
return *(guint*)&x;
}
static inline float
as_float (const guint x)
{
return *(float*)&x;
}
// IEEE-754 16-bit floating-point format (without infinity): 1-5-10
static inline float
half_to_float (const guint16 x)
{
const guint e = (x&0x7C00)>>10; // exponent
const guint m = (x&0x03FF)<<13; // mantissa
const guint v = as_uint((float)m)>>23;
return as_float((x&0x8000)<<16 | (e!=0)*((e+112)<<23|m) | ((e==0)&(m!=0))*((v-37)<<23|((m<<(150-v))&0x007FE000)));
}
static inline guint16
float_to_half (const float x)
{
const guint b = as_uint(x)+0x00001000; // round-to-nearest-even
const guint e = (b&0x7F800000)>>23; // exponent
const guint m = b&0x007FFFFF; // mantissa
return (b&0x80000000)>>16 | (e>112)*((((e-112)<<10)&0x7C00)|m>>13) | ((e<113)&(e>101))*((((0x007FF000+m)>>(125-e))+1)>>1) | (e>143)*0x7FFF; // sign : normalized : denormalized : saturate
}
static void
float_to_half4_c (const float f[4],
guint16 h[4])
{
h[0] = float_to_half (f[0]);
h[1] = float_to_half (f[1]);
h[2] = float_to_half (f[2]);
h[3] = float_to_half (f[3]);
}
static void
half_to_float4_c (const guint16 h[4],
float f[4])
{
f[0] = half_to_float (h[0]);
f[1] = half_to_float (h[1]);
f[2] = half_to_float (h[2]);
f[3] = half_to_float (h[3]);
}
#ifdef HAVE_F16C
static void
float_to_half4_f16c (const float f[4],
guint16 h[4])
{
__m128 s = _mm_loadu_ps (f);
__m128i i = _mm_cvtps_ph (s, 0);
_mm_storel_epi64 ((__m128i*)h, i);
}
static void
half_to_float4_f16c (const guint16 h[4],
float f[4])
{
__m128i i = _mm_loadl_epi64 ((__m128i_u const *)h);
__m128 s = _mm_cvtph_ps (i);
_mm_store_ps (f, s);
}
void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((ifunc ("resolve_float_to_half4")));
void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((ifunc ("resolve_half_to_float4")));
static void *
resolve_float_to_half4 (void)
{
__builtin_cpu_init ();
if (__builtin_cpu_supports ("f16c"))
return float_to_half4_f16c;
else
return float_to_half4_c;
}
static void *
resolve_half_to_float4 (void)
{
__builtin_cpu_init ();
if (__builtin_cpu_supports ("f16c"))
return half_to_float4_f16c;
else
return half_to_float4_c;
}
#else
void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((alias ("float_to_half4_c")));
void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((alias ("half_to_float4_c")));
#endif /* GTK_HAS_F16C */

View File

@@ -21,13 +21,7 @@
#ifndef __FP16_PRIVATE_H__
#define __FP16_PRIVATE_H__
#include <config.h>
#include <glib.h>
#include <graphene.h>
#ifdef HAVE_F16C
#include <immintrin.h>
#endif
G_BEGIN_DECLS
@@ -35,81 +29,11 @@ G_BEGIN_DECLS
#define FP16_ONE ((guint16)15360)
#define FP16_MINUS_ONE ((guint16)48128)
#ifdef HAVE_F16C
void float_to_half4 (const float f[4],
guint16 h[4]);
static inline void
float_to_half4 (const float f[4],
guint16 h[4])
{
__m128 s = _mm_loadu_ps (f);
__m128i i = _mm_cvtps_ph (s, 0);
_mm_storel_epi64 ((__m128i*)h, i);
}
static inline void
half_to_float4 (const guint16 h[4],
float f[4])
{
__m128i i = _mm_loadl_epi64 ((__m128i_u const *)h);
__m128 s = _mm_cvtph_ps (i);
_mm_store_ps (f, s);
}
#else /* GTK_HAS_F16C */
static inline guint
as_uint (const float x)
{
return *(guint*)&x;
}
static inline float
as_float (const guint x)
{
return *(float*)&x;
}
// IEEE-754 16-bit floating-point format (without infinity): 1-5-10
static inline float
half_to_float (const guint16 x)
{
const guint e = (x&0x7C00)>>10; // exponent
const guint m = (x&0x03FF)<<13; // mantissa
const guint v = as_uint((float)m)>>23;
return as_float((x&0x8000)<<16 | (e!=0)*((e+112)<<23|m) | ((e==0)&(m!=0))*((v-37)<<23|((m<<(150-v))&0x007FE000)));
}
static inline guint16
float_to_half (const float x)
{
const guint b = as_uint(x)+0x00001000; // round-to-nearest-even
const guint e = (b&0x7F800000)>>23; // exponent
const guint m = b&0x007FFFFF; // mantissa
return (b&0x80000000)>>16 | (e>112)*((((e-112)<<10)&0x7C00)|m>>13) | ((e<113)&(e>101))*((((0x007FF000+m)>>(125-e))+1)>>1) | (e>143)*0x7FFF; // sign : normalized : denormalized : saturate
}
static inline void
float_to_half4 (const float f[4],
guint16 h[4])
{
h[0] = float_to_half (f[0]);
h[1] = float_to_half (f[1]);
h[2] = float_to_half (f[2]);
h[3] = float_to_half (f[3]);
}
static inline void
half_to_float4 (const guint16 h[4],
float f[4])
{
f[0] = half_to_float (h[0]);
f[1] = half_to_float (h[1]);
f[2] = half_to_float (h[2]);
f[3] = half_to_float (h[3]);
}
#endif /* HAVE_F16C */
void half_to_float4 (const guint16 h[4],
float f[4]);
G_END_DECLS

View File

@@ -706,6 +706,10 @@ int main () {
__m128 s = _mm_loadu_ps (f);
__m128i i = _mm_cvtps_ph (s, 0);
_mm_storel_epi64 ((__m128i*)h, i);
__builtin_cpu_init ();
__builtin_cpu_supports ("f16c");
return 0;
}'''
if cc.get_id() != 'msvc'