Skip to content

Commit a35c64c

Browse files
committed
[Clang][Attribute] Introduce maybe_undef attribute for function arguments which accepts undef values
Add the ability to put __attribute__((maybe_undef)) on function arguments. Clang codegen introduces a freeze instruction on the argument. Differential Revision: https://reviews.llvm.org/D130224
1 parent cc47db6 commit a35c64c

File tree

9 files changed

+291
-3
lines changed

9 files changed

+291
-3
lines changed

clang/include/clang/Basic/Attr.td

+7
Original file line numberDiff line numberDiff line change
@@ -2023,6 +2023,13 @@ def NoEscape : Attr {
20232023
let Documentation = [NoEscapeDocs];
20242024
}
20252025

2026+
def MaybeUndef : InheritableAttr {
2027+
let Spellings = [Clang<"maybe_undef">];
2028+
let Subjects = SubjectList<[ParmVar]>;
2029+
let Documentation = [MaybeUndefDocs];
2030+
let SimpleHandler = 1;
2031+
}
2032+
20262033
def AssumeAligned : InheritableAttr {
20272034
let Spellings = [GCC<"assume_aligned">];
20282035
let Subjects = SubjectList<[ObjCMethod, Function]>;

clang/include/clang/Basic/AttrDocs.td

+22
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,28 @@ applies to copies of the block. For example:
257257
}];
258258
}
259259

260+
def MaybeUndefDocs : Documentation {
261+
let Category = DocCatVariable;
262+
let Content = [{
263+
The ``maybe_undef`` attribute can be placed on a function parameter. It indicates
264+
that the parameter is allowed to use undef values. It informs the compiler
265+
to insert a freeze LLVM IR instruction on the function parameter.
266+
Please note that this is an attribute that is used as an internal
267+
implementation detail and not intended to be used by external users.
268+
269+
In languages HIP, CUDA etc., some functions have multi-threaded semantics and
270+
it is enough for only one or some threads to provide defined arguments.
271+
Depending on semantics, undef arguments in some threads don't produce
272+
undefined results in the function call. Since, these functions accept undefined
273+
arguments, ``maybe_undef`` attribute can be placed.
274+
275+
Sample usage:
276+
.. code-block:: c
277+
278+
void maybeundeffunc(int __attribute__((maybe_undef))param);
279+
}];
280+
}
281+
260282
def CarriesDependencyDocs : Documentation {
261283
let Category = DocCatFunction;
262284
let Content = [{

clang/lib/CodeGen/CGCall.cpp

+47-3
Original file line numberDiff line numberDiff line change
@@ -2046,6 +2046,27 @@ static bool DetermineNoUndef(QualType QTy, CodeGenTypes &Types,
20462046
return false;
20472047
}
20482048

2049+
/// Check if the argument of a function has maybe_undef attribute.
2050+
static bool IsArgumentMaybeUndef(const Decl *TargetDecl,
2051+
unsigned NumRequiredArgs, unsigned ArgNo) {
2052+
const auto *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
2053+
if (!FD)
2054+
return false;
2055+
2056+
// Assume variadic arguments do not have maybe_undef attribute.
2057+
if (ArgNo >= NumRequiredArgs)
2058+
return false;
2059+
2060+
// Check if argument has maybe_undef attribute.
2061+
if (ArgNo < FD->getNumParams()) {
2062+
const ParmVarDecl *Param = FD->getParamDecl(ArgNo);
2063+
if (Param && Param->hasAttr<MaybeUndefAttr>())
2064+
return true;
2065+
}
2066+
2067+
return false;
2068+
}
2069+
20492070
/// Construct the IR attribute list of a function or call.
20502071
///
20512072
/// When adding an attribute, please consider where it should be handled:
@@ -4821,6 +4842,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
48214842
unsigned FirstIRArg, NumIRArgs;
48224843
std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
48234844

4845+
bool ArgHasMaybeUndefAttr =
4846+
IsArgumentMaybeUndef(TargetDecl, CallInfo.getNumRequiredArgs(), ArgNo);
4847+
48244848
switch (ArgInfo.getKind()) {
48254849
case ABIArgInfo::InAlloca: {
48264850
assert(NumIRArgs == 0);
@@ -4879,7 +4903,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
48794903
// Make a temporary alloca to pass the argument.
48804904
Address Addr = CreateMemTempWithoutCast(
48814905
I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp");
4882-
IRCallArgs[FirstIRArg] = Addr.getPointer();
4906+
4907+
llvm::Value *Val = Addr.getPointer();
4908+
if (ArgHasMaybeUndefAttr)
4909+
Val = Builder.CreateFreeze(Addr.getPointer());
4910+
IRCallArgs[FirstIRArg] = Val;
48834911

48844912
I->copyInto(*this, Addr);
48854913
} else {
@@ -4937,7 +4965,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
49374965
// Create an aligned temporary, and copy to it.
49384966
Address AI = CreateMemTempWithoutCast(
49394967
I->Ty, ArgInfo.getIndirectAlign(), "byval-temp");
4940-
IRCallArgs[FirstIRArg] = AI.getPointer();
4968+
llvm::Value *Val = AI.getPointer();
4969+
if (ArgHasMaybeUndefAttr)
4970+
Val = Builder.CreateFreeze(AI.getPointer());
4971+
IRCallArgs[FirstIRArg] = Val;
49414972

49424973
// Emit lifetime markers for the temporary alloca.
49434974
llvm::TypeSize ByvalTempElementSize =
@@ -4956,9 +4987,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
49564987
auto *T = llvm::PointerType::getWithSamePointeeType(
49574988
cast<llvm::PointerType>(V->getType()),
49584989
CGM.getDataLayout().getAllocaAddrSpace());
4959-
IRCallArgs[FirstIRArg] = getTargetHooks().performAddrSpaceCast(
4990+
4991+
llvm::Value *Val = getTargetHooks().performAddrSpaceCast(
49604992
*this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T,
49614993
true);
4994+
if (ArgHasMaybeUndefAttr)
4995+
Val = Builder.CreateFreeze(Val);
4996+
IRCallArgs[FirstIRArg] = Val;
49624997
}
49634998
}
49644999
break;
@@ -5012,6 +5047,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
50125047
V->getType() != IRFuncTy->getParamType(FirstIRArg))
50135048
V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg));
50145049

5050+
if (ArgHasMaybeUndefAttr)
5051+
V = Builder.CreateFreeze(V);
50155052
IRCallArgs[FirstIRArg] = V;
50165053
break;
50175054
}
@@ -5056,6 +5093,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
50565093
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
50575094
Address EltPtr = Builder.CreateStructGEP(Src, i);
50585095
llvm::Value *LI = Builder.CreateLoad(EltPtr);
5096+
if (ArgHasMaybeUndefAttr)
5097+
LI = Builder.CreateFreeze(LI);
50595098
IRCallArgs[FirstIRArg + i] = LI;
50605099
}
50615100
} else {
@@ -5072,6 +5111,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
50725111
if (ATy != nullptr && isa<RecordType>(I->Ty.getCanonicalType()))
50735112
Load = EmitCMSEClearRecord(Load, ATy, I->Ty);
50745113
}
5114+
5115+
if (ArgHasMaybeUndefAttr)
5116+
Load = Builder.CreateFreeze(Load);
50755117
IRCallArgs[FirstIRArg] = Load;
50765118
}
50775119

@@ -5117,6 +5159,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
51175159
if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue;
51185160
Address eltAddr = Builder.CreateStructGEP(addr, i);
51195161
llvm::Value *elt = Builder.CreateLoad(eltAddr);
5162+
if (ArgHasMaybeUndefAttr)
5163+
elt = Builder.CreateFreeze(elt);
51205164
IRCallArgs[IRArgPos++] = elt;
51215165
}
51225166
assert(IRArgPos == FirstIRArg + NumIRArgs);

clang/lib/Sema/SemaDeclAttr.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -8634,6 +8634,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
86348634
case ParsedAttr::AT_NoEscape:
86358635
handleNoEscapeAttr(S, D, AL);
86368636
break;
8637+
case ParsedAttr::AT_MaybeUndef:
8638+
handleSimpleAttribute<MaybeUndefAttr>(S, D, AL);
8639+
break;
86378640
case ParsedAttr::AT_AssumeAligned:
86388641
handleAssumeAlignedAttr(S, D, AL);
86398642
break;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
2+
3+
// CHECK-LABEL: define{{.*}} void @{{.*}}test4{{.*}}(float
4+
// CHECK-NEXT: entry:
5+
// CHECK-NEXT: [[TMP2:%.*]] = alloca float, align 4
6+
// CHECK-NEXT: store float [[TMP1:%.*]], float* [[TMP2:%.*]], align 4
7+
// CHECK-NEXT: ret void
8+
9+
// CHECK-LABEL: define{{.*}} void @{{.*}}test4{{.*}}(i32
10+
// CHECK-NEXT: entry:
11+
// CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4
12+
// CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[TMP2:%.*]], align 4
13+
// CHECK-NEXT: ret void
14+
15+
// CHECK-LABEL: define{{.*}} void @{{.*}}test{{.*}}(
16+
// CHECK-NEXT: entry:
17+
// CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4
18+
// CHECK-NEXT: [[TMP2:%.*]] = alloca float, align 4
19+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1:%.*]], align 4
20+
// CHECK-NEXT: [[TMP4:%.*]] = freeze i32 [[TMP3:%.*]]
21+
// CHECK-NEXT: call void @{{.*}}test4{{.*}}(i32 noundef [[TMP4:%.*]])
22+
// CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[TMP2:%.*]], align 4
23+
// CHECK-NEXT: [[TMP6:%.*]] = freeze float [[TMP5:%.*]]
24+
// CHECK-NEXT: call void @{{.*}}test4{{.*}}(float noundef [[TMP6:%.*]])
25+
// CHECK-NEXT: ret void
26+
27+
template<class T>
28+
void test4(T __attribute__((maybe_undef)) arg) {
29+
return;
30+
}
31+
32+
template
33+
void test4<float>(float arg);
34+
35+
template
36+
void test4<int>(int arg);
37+
38+
void test() {
39+
int Var1;
40+
float Var2;
41+
test4<int>(Var1);
42+
test4<float>(Var2);
43+
}

clang/test/CodeGen/attr-maybeundef.c

+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm %s -o - | FileCheck %s
2+
3+
#define __maybe_undef __attribute__((maybe_undef))
4+
5+
// CHECK: define{{.*}} void @t1(i32 noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]], i32 noundef [[TMP3:%.*]])
6+
// CHECK-NEXT: entry:
7+
// CHECK-NEXT: [[TMP4:%.*]] = alloca i32, align 4
8+
// CHECK-NEXT: [[TMP5:%.*]] = alloca i32, align 4
9+
// CHECK-NEXT: [[TMP6:%.*]] = alloca i32, align 4
10+
// CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[TMP4:%.*]], align 4
11+
// CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[TMP5:%.*]], align 4
12+
// CHECK-NEXT: store i32 [[TMP3:%.*]], i32* [[TMP6:%.*]], align 4
13+
// CHECK-NEXT: ret void
14+
15+
// CHECK: define{{.*}} void @t2(i32 noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]], i32 noundef [[TMP3:%.*]])
16+
// CHECK-NEXT: entry:
17+
// CHECK-NEXT: [[TMP4:%.*]] = alloca i32, align 4
18+
// CHECK-NEXT: [[TMP5:%.*]] = alloca i32, align 4
19+
// CHECK-NEXT: [[TMP6:%.*]] = alloca i32, align 4
20+
// CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[TMP4:%.*]], align 4
21+
// CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[TMP5:%.*]], align 4
22+
// CHECK-NEXT: store i32 [[TMP3:%.*]], i32* [[TMP6:%.*]], align 4
23+
// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP4:%.*]], align 4
24+
// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP5:%.*]], align 4
25+
// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP6:%.*]], align 4
26+
// CHECK-NEXT: [[TMP10:%.*]] = freeze i32 [[TMP8:%.*]]
27+
// CHECK-NEXT: call void @t1(i32 noundef [[TMP7:%.*]], i32 noundef [[TMP10:%.*]], i32 noundef [[TMP9:%.*]])
28+
// CHECK-NEXT: ret void
29+
30+
void t1(int param1, int __maybe_undef param2, int param3) {}
31+
32+
void t2(int param1, int param2, int param3) {
33+
t1(param1, param2, param3);
34+
}
35+
36+
// CHECK: define{{.*}} void @TestVariadicFunction(i32 noundef [[TMP0:%.*]], ...)
37+
// CHECK-NEXT: entry:
38+
// CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4
39+
// CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4
40+
// CHECK-NEXT: store i32 [[TMP0:%.*]], i32* [[TMP1:%.*]], align 4
41+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1:%.*]], align 4
42+
// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2:%.*]], align 4
43+
// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP2:%.*]], align 4
44+
// CHECK-NEXT: [[TMP5:%.*]] = freeze i32 [[TMP2:%.*]]
45+
// CHECK-NEXT: call void (i32, ...) @VariadicFunction(i32 noundef [[TMP6:%.*]], i32 noundef [[TMP4:%.*]], i32 noundef [[TMP5:%.*]])
46+
// CHECK-NEXT: ret void
47+
48+
// CHECK: declare{{.*}} void @VariadicFunction(i32 noundef, ...)
49+
50+
void VariadicFunction(int __maybe_undef x, ...);
51+
void TestVariadicFunction(int x, ...) {
52+
int Var;
53+
return VariadicFunction(x, Var, Var);
54+
}
55+
56+
// CHECK: define{{.*}} void @other()
57+
// CHECK-NEXT: entry:
58+
// CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4
59+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1:%.*]], align 4
60+
// CHECK-NEXT: call void @func(i32 noundef [[TMP2:%.*]])
61+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1:%.*]], align 4
62+
// CHECK-NEXT: [[TMP4:%.*]] = freeze i32 [[TMP3:%.*]]
63+
// CHECK-NEXT: call void @func1(i32 noundef [[TMP4:%.*]])
64+
// CHECK-NEXT: ret void
65+
66+
// CHECK: define{{.*}} void @func(i32 noundef [[TMP1:%.*]])
67+
// CHECK-NEXT: entry:
68+
// CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4
69+
// CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[TMP2:%.*]], align 4
70+
// CHECK-NEXT: ret void
71+
72+
// CHECK: define{{.*}} void @func1(i32 noundef [[TMP1:%.*]])
73+
// CHECK-NEXT: entry:
74+
// CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4
75+
// CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[TMP2:%.*]], align 4
76+
// CHECK-NEXT: ret void
77+
78+
void func(int param);
79+
void func1(int __maybe_undef param);
80+
81+
void other() {
82+
int Var;
83+
func(Var);
84+
func1(Var);
85+
}
86+
87+
void func(__maybe_undef int param) {}
88+
void func1(int param) {}
89+
90+
// CHECK: define{{.*}} void @foo(i32 noundef [[TMP1:%.*]])
91+
// CHECK-NEXT: entry:
92+
// CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4
93+
// CHECK-NEXT: store i32 [[TMP1:%.*]], i32* [[TMP2:%.*]], align 4
94+
// CHECK-NEXT: ret void
95+
96+
// CHECK: define{{.*}} void @bar()
97+
// CHECK-NEXT: entry:
98+
// CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4
99+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1:%.*]], align 4
100+
// CHECK-NEXT: call void @foo(i32 noundef [[TMP2:%.*]])
101+
// CHECK-NEXT: ret void
102+
103+
void foo(__maybe_undef int param);
104+
void foo(int param) {}
105+
106+
void bar() {
107+
int Var;
108+
foo(Var);
109+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -target-cpu gfx906 -x hip -fcuda-is-device -emit-llvm %s \
2+
// RUN: -o - | FileCheck %s
3+
4+
// CHECK: define dso_local amdgpu_kernel void @_Z13shufflekernelv()
5+
// CHECK-NEXT: entry:
6+
// CHECK-NEXT: [[TMP1:%.*]] = alloca i32, align 4, addrspace(5)
7+
// CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4, addrspace(5)
8+
// CHECK-NEXT: [[TMP3:%.*]] = addrspacecast i32 addrspace(5)* [[TMP1:%.*]] to i32*
9+
// CHECK-NEXT: [[TMP4:%.*]] = addrspacecast i32 addrspace(5)* [[TMP2:%.*]] to i32*
10+
// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3:%.*]], align 4
11+
// CHECK-NEXT: [[TMP6:%.*]] = freeze i32 [[TMP5:%.*]]
12+
// CHECK-NEXT: %call = call noundef i32 @_Z11__shfl_synciii(i32 noundef [[TMP6:%.*]], i32 noundef 64, i32 noundef 0) #4
13+
// CHECK-NEXT: store i32 %call, i32* [[TMP4:%.*]], align 4
14+
// CHECK-NEXT: ret void
15+
16+
// CHECK: define linkonce_odr noundef i32 @_Z11__shfl_synciii(i32 noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]], i32 noundef [[TMP3:%.*]])
17+
18+
#define __global__ __attribute__((global))
19+
#define __device__ __attribute__((device))
20+
#define __maybe_undef __attribute__((maybe_undef))
21+
#define WARP_SIZE 64
22+
23+
static constexpr int warpSize = __AMDGCN_WAVEFRONT_SIZE;
24+
25+
__device__ static inline unsigned int __lane_id() {
26+
return __builtin_amdgcn_mbcnt_hi(
27+
-1, __builtin_amdgcn_mbcnt_lo(-1, 0));
28+
}
29+
30+
__device__
31+
inline
32+
int __shfl_sync(int __maybe_undef var, int src_lane, int width = warpSize) {
33+
int self = __lane_id();
34+
int index = src_lane + (self & ~(width-1));
35+
return __builtin_amdgcn_ds_bpermute(index<<2, var);
36+
}
37+
38+
__global__ void
39+
shufflekernel()
40+
{
41+
int t;
42+
int res;
43+
res = __shfl_sync(t, WARP_SIZE, 0);
44+
}

clang/test/Misc/pragma-attribute-supported-attributes-list.test

+1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
// CHECK-NEXT: Lockable (SubjectMatchRule_record)
8484
// CHECK-NEXT: MIGServerRoutine (SubjectMatchRule_function, SubjectMatchRule_objc_method, SubjectMatchRule_block)
8585
// CHECK-NEXT: MSStruct (SubjectMatchRule_record)
86+
// CHECK-NEXT: MaybeUndef (SubjectMatchRule_variable_is_parameter)
8687
// CHECK-NEXT: MicroMips (SubjectMatchRule_function)
8788
// CHECK-NEXT: MinSize (SubjectMatchRule_function, SubjectMatchRule_objc_method)
8889
// CHECK-NEXT: MinVectorWidth (SubjectMatchRule_function)

clang/test/Sema/attr-maybeundef.c

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// RUN: %clang_cc1 -fsyntax-only -verify %s
2+
3+
// Decl annotations.
4+
void f(int __attribute__((maybe_undef)) *a);
5+
void (*fp)(int __attribute__((maybe_undef)) handle);
6+
__attribute__((maybe_undef)) int i(); // expected-warning {{'maybe_undef' attribute only applies to parameters}}
7+
int __attribute__((maybe_undef)) a; // expected-warning {{'maybe_undef' attribute only applies to parameters}}
8+
int (* __attribute__((maybe_undef)) fpt)(char *); // expected-warning {{'maybe_undef' attribute only applies to parameters}}
9+
void h(int *a __attribute__((maybe_undef("RandomString")))); // expected-error {{'maybe_undef' attribute takes no arguments}}
10+
11+
// Type annotations.
12+
int __attribute__((maybe_undef)) ta; // expected-warning {{'maybe_undef' attribute only applies to parameters}}
13+
14+
// Typedefs.
15+
typedef int callback(char *) __attribute__((maybe_undef)); // expected-warning {{'maybe_undef' attribute only applies to parameters}}

0 commit comments

Comments
 (0)