-
Notifications
You must be signed in to change notification settings - Fork 15.1k
clang-format: Add splitting for strings with user-defined suffixes #167150
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
|
@llvm/pr-subscribers-clang-format Author: Shivram (shivrm) ChangesString literals with user-defined suffixes can now be split between lines.
Fixes #165617 Full diff: https://github.com/llvm/llvm-project/pull/167150.diff 4 Files Affected:
diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp
index 994a427517ffc..dd9d4ecb2f3c7 100644
--- a/clang/lib/Format/BreakableToken.cpp
+++ b/clang/lib/Format/BreakableToken.cpp
@@ -253,10 +253,13 @@ unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,
BreakableStringLiteral::BreakableStringLiteral(
const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
- StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
- encoding::Encoding Encoding, const FormatStyle &Style)
+ StringRef Postfix, StringRef ContinuationPrefix,
+ StringRef ContinuationPostfix, unsigned UnbreakableTailLength,
+ bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
: BreakableToken(Tok, InPPDirective, Encoding, Style),
StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
+ ContinuationPrefix(ContinuationPrefix),
+ ContinuationPostfix(ContinuationPostfix),
UnbreakableTailLength(UnbreakableTailLength) {
assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));
Line = Tok.TokenText.substr(
@@ -274,9 +277,14 @@ void BreakableStringLiteral::insertBreak(unsigned LineIndex,
unsigned TailOffset, Split Split,
unsigned ContentIndent,
WhitespaceManager &Whitespaces) const {
+
+ const unsigned SplitEnd = TailOffset + Split.first + Split.second;
+ const bool IsLastFragment = SplitEnd > Line.size() - UnbreakableTailLength;
+ StringRef LocalPostfix = (IsLastFragment) ? Postfix : ContinuationPostfix;
+
Whitespaces.replaceWhitespaceInToken(
- Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
- Prefix, InPPDirective, 1, StartColumn);
+ Tok, ContinuationPrefix.size() + TailOffset + Split.first, Split.second,
+ LocalPostfix, ContinuationPrefix, InPPDirective, 1, StartColumn);
}
BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators(
@@ -288,6 +296,10 @@ BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators(
: QuoteStyle == AtDoubleQuotes ? "@\""
: "\"",
/*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
+ /*ContinuationPrefix=*/QuoteStyle == SingleQuotes ? "'"
+ : QuoteStyle == AtDoubleQuotes ? "@\""
+ : "\"",
+ /*ContinuationPostfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
UnbreakableTailLength, InPPDirective, Encoding, Style),
BracesNeeded(Tok.isNot(TT_StringInConcatenation)),
QuoteStyle(QuoteStyle) {
diff --git a/clang/lib/Format/BreakableToken.h b/clang/lib/Format/BreakableToken.h
index 45c00b35fd01e..2ee37d3e0e059 100644
--- a/clang/lib/Format/BreakableToken.h
+++ b/clang/lib/Format/BreakableToken.h
@@ -252,6 +252,8 @@ class BreakableStringLiteral : public BreakableToken {
/// after formatting.
BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn,
StringRef Prefix, StringRef Postfix,
+ StringRef ContinuationPrefix,
+ StringRef ContinuationPostfix,
unsigned UnbreakableTailLength, bool InPPDirective,
encoding::Encoding Encoding, const FormatStyle &Style);
@@ -274,15 +276,21 @@ class BreakableStringLiteral : public BreakableToken {
protected:
// The column in which the token starts.
unsigned StartColumn;
- // The prefix a line needs after a break in the token.
+ // The prefix a line needs at the start
StringRef Prefix;
- // The postfix a line needs before introducing a break.
+ // The postfix a line needs at the end
StringRef Postfix;
+ // The prefix every line except the first line needs
+ StringRef ContinuationPrefix;
+ // The postfix every line except the last line needs
+ StringRef ContinuationPostfix;
// The token text excluding the prefix and postfix.
StringRef Line;
// Length of the sequence of tokens after this string literal that cannot
// contain line breaks.
unsigned UnbreakableTailLength;
+ // Whether the string prefix and postfix should be repeated on each line
+ // when breaking the string.
};
class BreakableStringLiteralUsingOperators : public BreakableStringLiteral {
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 9ab024a03fbd7..5badd6edf4a7b 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -2540,22 +2540,46 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current,
StringRef Prefix;
StringRef Postfix;
+
// FIXME: Handle whitespace between '_T', '(', '"..."', and ')'.
// FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to
// reduce the overhead) for each FormatToken, which is a string, so that we
// don't run multiple checks here on the hot path.
- if ((Text.ends_with(Postfix = "\"") &&
- (Text.starts_with(Prefix = "@\"") || Text.starts_with(Prefix = "\"") ||
- Text.starts_with(Prefix = "u\"") ||
- Text.starts_with(Prefix = "U\"") ||
- Text.starts_with(Prefix = "u8\"") ||
- Text.starts_with(Prefix = "L\""))) ||
- (Text.starts_with(Prefix = "_T(\"") &&
- Text.ends_with(Postfix = "\")"))) {
+ if (Text.starts_with(Prefix = "_T(\"") && Text.ends_with(Postfix = "\")")) {
+ // We need to put `_T("` and `")` on each line because it is a macro
+ llvm::StringRef ContinuationPrefix = Prefix;
+ llvm::StringRef ContinuationPostfix = Postfix;
+
return std::make_unique<BreakableStringLiteral>(
- Current, StartColumn, Prefix, Postfix, UnbreakableTailLength,
- State.Line->InPPDirective, Encoding, Style);
+ Current, StartColumn, Prefix, Postfix, ContinuationPrefix,
+ ContinuationPostfix, UnbreakableTailLength, State.Line->InPPDirective,
+ Encoding, Style);
+ }
+
+ static const auto PostfixRegex =
+ llvm::Regex(R"("(_[a-zA-Z_][a-zA-Z0-9_]*)?$)");
+ llvm::SmallVector<llvm::StringRef, 1> Matches;
+
+ if (PostfixRegex.match(Text, &Matches)) {
+ Postfix = Matches.front();
+
+ if ((Text.starts_with(Prefix = "@\"") ||
+ Text.starts_with(Prefix = "\"") ||
+ Text.starts_with(Prefix = "u\"") ||
+ Text.starts_with(Prefix = "U\"") ||
+ Text.starts_with(Prefix = "u8\"") ||
+ Text.starts_with(Prefix = "L\""))) {
+
+ // Repeat the prefix on every line but don't repeat the suffix
+ llvm::StringRef ContinuationPrefix = Prefix;
+ llvm::StringRef ContinuationPostfix = "\"";
+ return std::make_unique<BreakableStringLiteral>(
+ Current, StartColumn, Prefix, Postfix, ContinuationPrefix,
+ ContinuationPostfix, UnbreakableTailLength,
+ State.Line->InPPDirective, Encoding, Style);
+ }
}
+
} else if (Current.is(TT_BlockComment)) {
if (Style.ReflowComments == FormatStyle::RCS_Never ||
// If a comment token switches formatting, like
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 24235b966399d..4c7593b88202f 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -15838,6 +15838,10 @@ TEST_F(FormatTest, BreaksWideAndNSStringLiterals) {
"@\"NSString literal\";", getGoogleStyleWithColumns(19));
verifyFormat(R"(NSString *s = @"那那那那";)", getLLVMStyleWithColumns(26));
+ EXPECT_EQ("L\"suffixed \"\n"
+ "L\"string\"_s;",
+ format("L\"suffixed string\"_s;", getLLVMStyleWithColumns(19)));
+
// This input makes clang-format try to split the incomplete unicode escape
// sequence, which used to lead to a crasher.
verifyNoCrash(
|
String literals with user-defined suffixes can now be split between lines.
ContinuationPrefixandContinuationPostfixattributes toBreakableStringLiteralto have different postfixes for the last line and all the other linesContinuationPrefixis currently unused - prefixes are still placed on every line when splitting. I've kept it for completeness.Fixes #165617