Skip to content

Commit 70c325b

Browse files
authored
[libclc] Move fp32 sincos helpers to CLC library (#132753)
This commit moves most of the sincos helper functions to the CLC library. It simultaneously vectorizes them with the aim to increase performance for vector types by avoiding scalarization. Some helpers for double types remain as they use various features not yet ready, like 'fract' which in turn relies on 'fmin'; neither of these are in the CLC library. They also use table lookups and type punning which don't translate well to vector versions. As a proof of concept, float and half versions of the sin and cos builtins are now vectorized and use the CLC helpers to do so. They remain in the OpenCL layer but will be simpler to move to the CLC library when the double versions are ready.
1 parent a6a56a3 commit 70c325b

File tree

13 files changed

+478
-352
lines changed

13 files changed

+478
-352
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef __CLC_MATH_CLC_SINCOS_HELPERS_H__
10+
#define __CLC_MATH_CLC_SINCOS_HELPERS_H__
11+
12+
#define __FLOAT_ONLY
13+
#define __CLC_BODY <clc/math/clc_sincos_helpers.inc>
14+
15+
#include <clc/math/gentype.inc>
16+
17+
#undef __CLC_BODY
18+
#undef __FLOAT_ONLY
19+
20+
#endif // __CLC_MATH_CLC_SINCOS_HELPERS_H__
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_sinf_piby4(__CLC_FLOATN x,
10+
__CLC_FLOATN y);
11+
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x,
12+
__CLC_FLOATN y);
13+
14+
_CLC_DECL _CLC_OVERLOAD __CLC_INTN __clc_argReductionS(private __CLC_FLOATN *r,
15+
private __CLC_FLOATN *rr,
16+
__CLC_FLOATN x);

libclc/clc/include/clc/math/gentype.inc

+14
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@
2929
#define __CLC_UINTN __CLC_XCONCAT(uint, __CLC_VECSIZE)
3030
#define __CLC_ULONGN __CLC_XCONCAT(ulong, __CLC_VECSIZE)
3131

32+
#define __CLC_AS_HALFN __CLC_XCONCAT(__clc_as_, __CLC_HALFN)
33+
#define __CLC_AS_FLOATN __CLC_XCONCAT(__clc_as_, __CLC_FLOATN)
34+
#define __CLC_AS_DOUBLEN __CLC_XCONCAT(__clc_as_, __CLC_DOUBLEN)
35+
3236
#define __CLC_AS_CHARN __CLC_XCONCAT(__clc_as_, __CLC_CHARN)
3337
#define __CLC_AS_SHORTN __CLC_XCONCAT(__clc_as_, __CLC_SHORTN)
3438
#define __CLC_AS_INTN __CLC_XCONCAT(__clc_as_, __CLC_INTN)
@@ -67,6 +71,7 @@
6771
#define __CLC_SCALAR_GENTYPE float
6872
#define __CLC_FPSIZE 32
6973
#define __CLC_FP_LIT(x) x##F
74+
#define __CLC_GENTYPE_NAN FLT_NAN
7075

7176
#define __CLC_S_GENTYPE __CLC_XCONCAT(int, __CLC_VECSIZE)
7277
#define __CLC_U_GENTYPE __CLC_XCONCAT(uint, __CLC_VECSIZE)
@@ -123,6 +128,7 @@
123128

124129
#undef __CLC_U_GENTYPE
125130
#undef __CLC_S_GENTYPE
131+
#undef __CLC_GENTYPE_NAN
126132
#undef __CLC_FP_LIT
127133
#undef __CLC_FPSIZE
128134
#undef __CLC_SCALAR_GENTYPE
@@ -134,6 +140,7 @@
134140
#define __CLC_SCALAR_GENTYPE double
135141
#define __CLC_FPSIZE 64
136142
#define __CLC_FP_LIT(x) (x)
143+
#define __CLC_GENTYPE_NAN DBL_NAN
137144

138145
#define __CLC_S_GENTYPE __CLC_XCONCAT(long, __CLC_VECSIZE)
139146
#define __CLC_U_GENTYPE __CLC_XCONCAT(ulong, __CLC_VECSIZE)
@@ -190,6 +197,7 @@
190197

191198
#undef __CLC_U_GENTYPE
192199
#undef __CLC_S_GENTYPE
200+
#undef __CLC_GENTYPE_NAN
193201
#undef __CLC_FP_LIT
194202
#undef __CLC_FPSIZE
195203
#undef __CLC_SCALAR_GENTYPE
@@ -203,6 +211,7 @@
203211
#define __CLC_SCALAR_GENTYPE half
204212
#define __CLC_FPSIZE 16
205213
#define __CLC_FP_LIT(x) x##H
214+
#define __CLC_GENTYPE_NAN HALF_NAN
206215

207216
#define __CLC_S_GENTYPE __CLC_XCONCAT(short, __CLC_VECSIZE)
208217
#define __CLC_U_GENTYPE __CLC_XCONCAT(ushort, __CLC_VECSIZE)
@@ -259,6 +268,7 @@
259268

260269
#undef __CLC_U_GENTYPE
261270
#undef __CLC_S_GENTYPE
271+
#undef __CLC_GENTYPE_NAN
262272
#undef __CLC_FP_LIT
263273
#undef __CLC_FPSIZE
264274
#undef __CLC_SCALAR_GENTYPE
@@ -278,6 +288,10 @@
278288
#undef __CLC_AS_INTN
279289
#undef __CLC_AS_LONGN
280290

291+
#undef __CLC_AS_HALFN
292+
#undef __CLC_AS_FLOATN
293+
#undef __CLC_AS_DOUBLEN
294+
281295
#undef __CLC_AS_UCHARN
282296
#undef __CLC_AS_USHORTN
283297
#undef __CLC_AS_UINTN

libclc/clc/lib/generic/SOURCES

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ math/clc_nextafter.cl
3535
math/clc_rint.cl
3636
math/clc_round.cl
3737
math/clc_rsqrt.cl
38+
math/clc_sincos_helpers.cl
3839
math/clc_sqrt.cl
3940
math/clc_sw_fma.cl
4041
math/clc_trunc.cl
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/clc_convert.h>
10+
#include <clc/integer/clc_clz.h>
11+
#include <clc/integer/clc_mul_hi.h>
12+
#include <clc/internal/clc.h>
13+
#include <clc/math/clc_fma.h>
14+
#include <clc/math/clc_mad.h>
15+
#include <clc/math/clc_trunc.h>
16+
#include <clc/math/math.h>
17+
18+
#define bitalign(hi, lo, shift) ((hi) << (32 - (shift))) | ((lo) >> (shift));
19+
20+
#define FULL_MUL(A, B, HI, LO) \
21+
LO = A * B; \
22+
HI = __clc_mul_hi(A, B)
23+
24+
#define FULL_MAD(A, B, C, HI, LO) \
25+
LO = ((A) * (B) + (C)); \
26+
HI = __clc_mul_hi(A, B); \
27+
HI += LO < C ? 1U : 0U;
28+
29+
#define __FLOAT_ONLY
30+
#define __CLC_BODY <clc_sincos_helpers.inc>
31+
32+
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)