Skip to content

Commit 133a61c

Browse files
committed
Unicode upgrades.
Also closes llvm#10.
1 parent d1cd625 commit 133a61c

File tree

9 files changed

+252
-119
lines changed

9 files changed

+252
-119
lines changed

clang/include/clang/Lex/Lexer.h

+2
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,8 @@ class Lexer : public PreprocessorLexer {
602602
/// Check if this is the first time we're lexing the input file.
603603
bool isFirstTimeLexingFile() const { return IsFirstTimeLexingFile; }
604604

605+
bool validateAndRewriteIdentifier(std::string &In);
606+
605607
private:
606608
//===--------------------------------------------------------------------===//
607609
// Internal implementation interfaces.

clang/lib/Lex/Lexer.cpp

+79
Original file line numberDiff line numberDiff line change
@@ -4689,3 +4689,82 @@ bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) {
46894689
convertDependencyDirectiveToken(DDTok, Result);
46904690
return false;
46914691
}
4692+
4693+
bool Lexer::validateAndRewriteIdentifier(std::string &In) {
4694+
SmallVector<llvm::UTF32, 30> RewriteUTF32;
4695+
4696+
static const llvm::sys::UnicodeCharRange DigitRanges[] = {
4697+
{0x0030, 0x0039}
4698+
};
4699+
static llvm::sys::UnicodeCharRange NondigitRanges[] = {
4700+
{0x0041, 0x005A}, {0x005F, 0x005F}, {0x0061, 0x007A}
4701+
};
4702+
static const llvm::sys::UnicodeCharSet DigitChars(DigitRanges);
4703+
static const llvm::sys::UnicodeCharSet NondigitChars(NondigitRanges);
4704+
static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges);
4705+
static const llvm::sys::UnicodeCharSet XIDContinueChars(XIDContinueRanges);
4706+
4707+
if (In.size() == 0)
4708+
return false;
4709+
4710+
const auto *Cursor = &In[0];
4711+
const auto *End = Cursor + In.size();
4712+
4713+
// Validate leading character.
4714+
if (*Cursor == '\\') {
4715+
const char *SlashLoc = Cursor++;
4716+
std::optional<uint32_t> UCN = tryReadUCN(Cursor, SlashLoc, nullptr);
4717+
if (!UCN || !XIDStartChars.contains(UCN.value()))
4718+
return false;
4719+
RewriteUTF32.push_back(UCN.value());
4720+
} else {
4721+
llvm::UTF32 CodePoint;
4722+
4723+
if (llvm::conversionOK != llvm::convertUTF8Sequence(
4724+
reinterpret_cast<const llvm::UTF8 **>(&Cursor),
4725+
reinterpret_cast<const llvm::UTF8 *>(End), &CodePoint,
4726+
llvm::ConversionFlags::strictConversion))
4727+
return false;
4728+
4729+
if (!NondigitChars.contains(CodePoint) &&
4730+
!XIDStartChars.contains(CodePoint))
4731+
return false;
4732+
RewriteUTF32.push_back(CodePoint);
4733+
}
4734+
4735+
// Validate remaining characters.
4736+
while (Cursor < End) {
4737+
if (*Cursor == '\\') {
4738+
const char *SlashLoc = Cursor++;
4739+
std::optional<uint32_t> UCN = tryReadUCN(Cursor, SlashLoc, nullptr);
4740+
if (!UCN || !(XIDStartChars.contains(UCN.value()) ||
4741+
XIDContinueChars.contains(UCN.value())))
4742+
return false;
4743+
RewriteUTF32.push_back(UCN.value());
4744+
} else {
4745+
llvm::UTF32 CodePoint;
4746+
4747+
if (llvm::conversionOK != llvm::convertUTF8Sequence(
4748+
reinterpret_cast<const llvm::UTF8 **>(&Cursor),
4749+
reinterpret_cast<const llvm::UTF8 *>(End), &CodePoint,
4750+
llvm::ConversionFlags::strictConversion))
4751+
return false;
4752+
4753+
if (!DigitChars.contains(CodePoint) &&
4754+
!NondigitChars.contains(CodePoint) &&
4755+
!XIDStartChars.contains(CodePoint) &&
4756+
!XIDContinueChars.contains(CodePoint))
4757+
return false;
4758+
RewriteUTF32.push_back(CodePoint);
4759+
}
4760+
}
4761+
assert(Cursor == End);
4762+
4763+
std::string Rewrite;
4764+
Rewrite.reserve(RewriteUTF32.size() * 4);
4765+
if (!llvm::convertUTF32ToUTF8String(RewriteUTF32, Rewrite))
4766+
return true;
4767+
In = Rewrite;
4768+
4769+
return true;
4770+
}

clang/lib/Sema/Metafunctions.cpp

+61-74
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "clang/AST/RecordLayout.h"
2121
#include "clang/AST/Reflection.h"
2222
#include "clang/Basic/SourceManager.h"
23+
#include "clang/Lex/Lexer.h"
2324
#include "clang/Lex/Preprocessor.h"
2425
#include "clang/Sema/Metafunction.h"
2526
#include "clang/Sema/ParsedTemplate.h"
@@ -392,7 +393,7 @@ static constexpr Metafunction Metafunctions[] = {
392393
{ Metafunction::MFRK_bool, 1, 1, is_special_member },
393394
{ Metafunction::MFRK_metaInfo, 2, 2, reflect_result },
394395
{ Metafunction::MFRK_metaInfo, 5, 5, reflect_invoke },
395-
{ Metafunction::MFRK_metaInfo, 9, 9, data_member_spec },
396+
{ Metafunction::MFRK_metaInfo, 10, 10, data_member_spec },
396397
{ Metafunction::MFRK_metaInfo, 3, 3, define_class },
397398
{ Metafunction::MFRK_sizeT, 1, 1, offset_of },
398399
{ Metafunction::MFRK_sizeT, 1, 1, size_of },
@@ -527,6 +528,7 @@ static void getTypeName(std::string &Result, ASTContext &C, QualType QT,
527528
bool BasicOnly) {
528529
PrintingPolicy PP = C.getPrintingPolicy();
529530
PP.SuppressTagKeyword = true;
531+
PP.SuppressScope = true;
530532

531533
encodeName(Result, QT.getAsString(PP), BasicOnly);
532534
}
@@ -1779,27 +1781,20 @@ bool can_substitute(APValue &Result, Sema &S, EvalFn Evaluator,
17791781

17801782
for (uint64_t k = 0; k < nArgs; ++k) {
17811783
llvm::APInt Idx(S.Context.getTypeSize(S.Context.getSizeType()), k, false);
1782-
Expr *IdxExpr = IntegerLiteral::Create(S.Context, Idx,
1783-
S.Context.getSizeType(),
1784-
Args[1]->getExprLoc());
1785-
1786-
ArraySubscriptExpr *SubscriptExpr =
1787-
new (S.Context) ArraySubscriptExpr(Args[1], IdxExpr,
1788-
S.Context.MetaInfoTy,
1789-
VK_LValue, OK_Ordinary,
1790-
Range.getBegin());
1791-
1792-
ImplicitCastExpr *RVExpr = ImplicitCastExpr::Create(S.Context,
1793-
S.Context.MetaInfoTy,
1794-
CK_LValueToRValue,
1795-
SubscriptExpr,
1796-
nullptr, VK_PRValue,
1797-
FPOptionsOverride());
1798-
if (RVExpr->isValueDependent() || RVExpr->isTypeDependent())
1784+
Expr *Synthesized = IntegerLiteral::Create(S.Context, Idx,
1785+
S.Context.getSizeType(),
1786+
Args[1]->getExprLoc());
1787+
1788+
Synthesized = new (S.Context) ArraySubscriptExpr(Args[1], Synthesized,
1789+
S.Context.MetaInfoTy,
1790+
VK_LValue, OK_Ordinary,
1791+
Range.getBegin());
1792+
if (Synthesized->isValueDependent() || Synthesized->isTypeDependent())
17991793
return true;
18001794

18011795
APValue Unwrapped;
1802-
if (!Evaluator(Unwrapped, RVExpr, true) || !Unwrapped.isReflection() ||
1796+
if (!Evaluator(Unwrapped, Synthesized, true) ||
1797+
!Unwrapped.isReflection() ||
18031798
!CanActAsTemplateArg(Unwrapped.getReflection()))
18041799
return true;
18051800

@@ -1852,27 +1847,20 @@ bool substitute(APValue &Result, Sema &S, EvalFn Evaluator, QualType ResultTy,
18521847

18531848
for (uint64_t k = 0; k < nArgs; ++k) {
18541849
llvm::APInt Idx(S.Context.getTypeSize(S.Context.getSizeType()), k, false);
1855-
Expr *IdxExpr = IntegerLiteral::Create(S.Context, Idx,
1856-
S.Context.getSizeType(),
1857-
Args[1]->getExprLoc());
1858-
1859-
ArraySubscriptExpr *SubscriptExpr =
1860-
new (S.Context) ArraySubscriptExpr(Args[1], IdxExpr,
1861-
S.Context.MetaInfoTy,
1862-
VK_LValue, OK_Ordinary,
1863-
Range.getBegin());
1864-
1865-
ImplicitCastExpr *RVExpr = ImplicitCastExpr::Create(S.Context,
1866-
S.Context.MetaInfoTy,
1867-
CK_LValueToRValue,
1868-
SubscriptExpr,
1869-
nullptr, VK_PRValue,
1870-
FPOptionsOverride());
1871-
if (RVExpr->isValueDependent() || RVExpr->isTypeDependent())
1850+
Expr *Synthesized = IntegerLiteral::Create(S.Context, Idx,
1851+
S.Context.getSizeType(),
1852+
Args[1]->getExprLoc());
1853+
1854+
Synthesized = new (S.Context) ArraySubscriptExpr(Args[1], Synthesized,
1855+
S.Context.MetaInfoTy,
1856+
VK_LValue, OK_Ordinary,
1857+
Range.getBegin());
1858+
if (Synthesized->isValueDependent() || Synthesized->isTypeDependent())
18721859
return true;
18731860

18741861
APValue Unwrapped;
1875-
if (!Evaluator(Unwrapped, RVExpr, true) || !Unwrapped.isReflection() ||
1862+
if (!Evaluator(Unwrapped, Synthesized, true) ||
1863+
!Unwrapped.isReflection() ||
18761864
!CanActAsTemplateArg(Unwrapped.getReflection()))
18771865
return true;
18781866

@@ -3363,34 +3351,41 @@ bool data_member_spec(APValue &Result, Sema &S, EvalFn Evaluator,
33633351
size_t nameLen = Scratch.getInt().getExtValue();
33643352
Name.emplace(nameLen, '\0');
33653353

3354+
// Evaluate the character type.
3355+
if (!Evaluator(Scratch, Args[ArgIdx++], true))
3356+
return true;
3357+
QualType CharTy = Scratch.getReflectedType();
3358+
3359+
// Evaluate the data contents.
33663360
for (uint64_t k = 0; k < nameLen; ++k) {
33673361
llvm::APInt Idx(S.Context.getTypeSize(S.Context.getSizeType()), k, false);
3368-
Expr *IdxExpr = IntegerLiteral::Create(S.Context, Idx,
3369-
S.Context.getSizeType(),
3370-
Args[ArgIdx]->getExprLoc());
3371-
3372-
ArraySubscriptExpr *SubscriptExpr =
3373-
new (S.Context) ArraySubscriptExpr(Args[ArgIdx], IdxExpr,
3374-
S.Context.CharTy,
3375-
VK_LValue, OK_Ordinary,
3376-
Range.getBegin());
3377-
3378-
ImplicitCastExpr *RVExpr = ImplicitCastExpr::Create(S.Context,
3379-
S.Context.CharTy,
3380-
CK_LValueToRValue,
3381-
SubscriptExpr,
3382-
nullptr, VK_PRValue,
3383-
FPOptionsOverride());
3384-
if (RVExpr->isValueDependent() || RVExpr->isTypeDependent())
3362+
Expr *Synthesized = IntegerLiteral::Create(S.Context, Idx,
3363+
S.Context.getSizeType(),
3364+
Args[ArgIdx]->getExprLoc());
3365+
3366+
Synthesized = new (S.Context) ArraySubscriptExpr(Args[ArgIdx],
3367+
Synthesized, CharTy,
3368+
VK_LValue, OK_Ordinary,
3369+
Range.getBegin());
3370+
if (Synthesized->isValueDependent() || Synthesized->isTypeDependent())
33853371
return true;
33863372

3387-
if (!Evaluator(Scratch, RVExpr, true))
3373+
if (!Evaluator(Scratch, Synthesized, true))
33883374
return true;
3375+
33893376
(*Name)[k] = static_cast<char>(Scratch.getInt().getExtValue());
33903377
}
33913378
ArgIdx++;
33923379
} else {
3393-
ArgIdx += 2;
3380+
ArgIdx += 3;
3381+
}
3382+
3383+
// Validate the name as an identifier.
3384+
if (Name) {
3385+
Lexer Lex(Range.getBegin(), S.getLangOpts(), Name->data(), Name->data(),
3386+
Name->data() + Name->size(), false);
3387+
if (!Lex.validateAndRewriteIdentifier(*Name))
3388+
return true;
33943389
}
33953390

33963391
// Evaluate whether an alignment was provided.
@@ -3581,28 +3576,20 @@ bool define_class(APValue &Result, Sema &S, EvalFn Evaluator, QualType ResultTy,
35813576
for (size_t k = 0; k < NumMembers; ++k) {
35823577
// Extract the reflection from the list of member specs.
35833578
llvm::APInt Idx(S.Context.getTypeSize(S.Context.getSizeType()), k, false);
3584-
Expr *IdxExpr = IntegerLiteral::Create(S.Context, Idx,
3585-
S.Context.getSizeType(),
3586-
Args[2]->getExprLoc());
3587-
3588-
ArraySubscriptExpr *SubscriptExpr =
3589-
new (S.Context) ArraySubscriptExpr(Args[2], IdxExpr,
3590-
S.Context.MetaInfoTy,
3591-
VK_LValue, OK_Ordinary,
3592-
Range.getBegin());
3593-
3594-
ImplicitCastExpr *RVExpr = ImplicitCastExpr::Create(S.Context,
3595-
S.Context.MetaInfoTy,
3596-
CK_LValueToRValue,
3597-
SubscriptExpr,
3598-
nullptr, VK_PRValue,
3599-
FPOptionsOverride());
3600-
if (RVExpr->isValueDependent() || RVExpr->isTypeDependent()) {
3579+
Expr *Synthesized = IntegerLiteral::Create(S.Context, Idx,
3580+
S.Context.getSizeType(),
3581+
Args[2]->getExprLoc());
3582+
3583+
Synthesized = new (S.Context) ArraySubscriptExpr(Args[2], Synthesized,
3584+
S.Context.MetaInfoTy,
3585+
VK_LValue, OK_Ordinary,
3586+
Range.getBegin());
3587+
if (Synthesized->isValueDependent() || Synthesized->isTypeDependent()) {
36013588
RestoreDC();
36023589
return true;
36033590
}
36043591

3605-
if (!Evaluator(Scratch, RVExpr, true) ||
3592+
if (!Evaluator(Scratch, Synthesized, true) ||
36063593
Scratch.getReflection().getKind() !=
36073594
ReflectionValue::RK_data_member_spec) {
36083595
RestoreDC();

clang/lib/Sema/SemaTemplate.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -8843,6 +8843,8 @@ static ExprResult
88438843
BuildExpressionFromReflection(Sema &S, const ReflectionValue &R,
88448844
SourceLocation Loc) {
88458845
switch (R.getKind()) {
8846+
case ReflectionValue::RK_null:
8847+
return CXXReflectExpr::Create(S.Context, Loc, Loc);
88468848
case ReflectionValue::RK_type:
88478849
return CXXReflectExpr::Create(S.Context, Loc, Loc, R.getAsType());
88488850
case ReflectionValue::RK_expr_result:

libcxx/include/experimental/meta

+28-7
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ consteval auto has_default_argument(info) -> bool;
158158
#include <span>
159159
#include <string_view>
160160
#include <type_traits>
161+
#include <variant>
161162
#include <vector>
162163

163164
#if __has_feature(reflection)
@@ -942,7 +943,17 @@ consteval auto reflect_invoke(info target, R1 &&targs, R2 &&args) -> info {
942943

943944
// Representation of a data member which may be passed to 'data_member_spec'.
944945
struct data_member_options_t {
945-
optional<string_view> name = nullopt;
946+
struct name_type {
947+
std::variant<u8string, string> impl;
948+
949+
template <typename T> requires constructible_from<u8string, T>
950+
consteval name_type(T &&in) : impl(std::in_place_type<u8string>, in) {}
951+
952+
template <typename T> requires constructible_from<string, T>
953+
consteval name_type(T &&in) : impl(std::in_place_type<string>, in) {}
954+
};
955+
956+
optional<name_type> name = nullopt;
946957
bool is_static = false;
947958
optional<int> alignment = nullopt;
948959
optional<int> width = nullopt;
@@ -952,16 +963,26 @@ struct data_member_options_t {
952963
// be used with 'define_class' to define a record type.
953964
consteval auto data_member_spec(info member_type,
954965
data_member_options_t options = {}) -> info {
955-
string_view name = options.name.value_or("");
966+
auto name = options.name.value_or(u8"").impl;
956967
bool is_static = options.is_static;
957968
int alignment = options.alignment.value_or(0);
958969
int width = options.width.value_or(0);
959970

960-
return __metafunction(detail::__metafn_data_member_spec,
961-
member_type, is_static,
962-
options.name.has_value(), name.size(), name.data(),
963-
options.alignment.has_value(), alignment,
964-
options.width.has_value(), width);
971+
if (holds_alternative<u8string>(name)) {
972+
const u8string &s = get<u8string>(name);
973+
return __metafunction(detail::__metafn_data_member_spec,
974+
member_type, is_static, options.name.has_value(),
975+
s.size(), ^const char8_t *, s.data(),
976+
options.alignment.has_value(), alignment,
977+
options.width.has_value(), width);
978+
} else {
979+
const string &s = get<string>(name);
980+
return __metafunction(detail::__metafn_data_member_spec,
981+
member_type, is_static, options.name.has_value(),
982+
s.size(), ^const char *, s.data(),
983+
options.alignment.has_value(), alignment,
984+
options.width.has_value(), width);
985+
}
965986
}
966987

967988
// Completes the definition of the record type reflected by 'class_type' with

0 commit comments

Comments
 (0)