@@ -1213,6 +1213,197 @@ static bool isRemark(const Record &Diag) {
12131213 return ClsName == " CLASS_REMARK" ;
12141214}
12151215
1216+ // Presumes the text has been split at the first whitespace or hyphen.
1217+ static bool isExemptAtStart (StringRef Text) {
1218+ // Fast path, the first character is lowercase or not alphanumeric.
1219+ if (Text.empty () || isLower (Text[0 ]) || !isAlnum (Text[0 ]))
1220+ return true ;
1221+
1222+ // If the text is all uppercase (or numbers, +, or _), then we assume it's an
1223+ // acronym and that's allowed. This covers cases like ISO, C23, C++14, and
1224+ // OBJECT_MODE. However, if there's only a single letter other than "C", we
1225+ // do not exempt it so that we catch a case like "A really bad idea" while
1226+ // still allowing a case like "C does not allow...".
1227+ if (llvm::all_of (Text, [](char C) {
1228+ return isUpper (C) || isDigit (C) || C == ' +' || C == ' _' ;
1229+ }))
1230+ return Text.size () > 1 || Text[0 ] == ' C' ;
1231+
1232+ // Otherwise, there are a few other exemptions.
1233+ return StringSwitch<bool >(Text)
1234+ .Case (" AddressSanitizer" , true )
1235+ .Case (" CFString" , true )
1236+ .Case (" Clang" , true )
1237+ .Case (" Fuchsia" , true )
1238+ .Case (" GNUstep" , true )
1239+ .Case (" IBOutletCollection" , true )
1240+ .Case (" Microsoft" , true )
1241+ .Case (" Neon" , true )
1242+ .StartsWith (" NSInvocation" , true ) // NSInvocation, NSInvocation's
1243+ .Case (" Objective" , true ) // Objective-C (hyphen is a word boundary)
1244+ .Case (" OpenACC" , true )
1245+ .Case (" OpenCL" , true )
1246+ .Case (" OpenMP" , true )
1247+ .Case (" Pascal" , true )
1248+ .Case (" Swift" , true )
1249+ .Case (" Unicode" , true )
1250+ .Case (" Vulkan" , true )
1251+ .Case (" WebAssembly" , true )
1252+ .Default (false );
1253+ }
1254+
1255+ // Does not presume the text has been split at all.
1256+ static bool isExemptAtEnd (StringRef Text) {
1257+ // Rather than come up with a list of characters that are allowed, we go the
1258+ // other way and look only for characters that are not allowed.
1259+ switch (Text.back ()) {
1260+ default :
1261+ return true ;
1262+ case ' ?' :
1263+ // Explicitly allowed to support "; did you mean?".
1264+ return true ;
1265+ case ' .' :
1266+ case ' !' :
1267+ return false ;
1268+ }
1269+ }
1270+
1271+ static void verifyDiagnosticWording (const Record &Diag) {
1272+ StringRef FullDiagText = Diag.getValueAsString (" Summary" );
1273+
1274+ auto DiagnoseStart = [&](StringRef Text) {
1275+ // Verify that the text does not start with a capital letter, except for
1276+ // special cases that are exempt like ISO and C++. Find the first word
1277+ // by looking for a word breaking character.
1278+ char Separators[] = {' ' , ' -' , ' ,' , ' }' };
1279+ auto Iter = std::find_first_of (
1280+ Text.begin (), Text.end (), std::begin (Separators), std::end (Separators));
1281+
1282+ StringRef First = Text.substr (0 , Iter - Text.begin ());
1283+ if (!isExemptAtStart (First)) {
1284+ PrintError (&Diag,
1285+ " Diagnostics should not start with a capital letter; '" +
1286+ First + " ' is invalid" );
1287+ }
1288+ };
1289+
1290+ auto DiagnoseEnd = [&](StringRef Text) {
1291+ // Verify that the text does not end with punctuation like '.' or '!'.
1292+ if (!isExemptAtEnd (Text)) {
1293+ PrintError (&Diag, " Diagnostics should not end with punctuation; '" +
1294+ Text.substr (Text.size () - 1 , 1 ) + " ' is invalid" );
1295+ }
1296+ };
1297+
1298+ // If the diagnostic starts with %select, look through it to see whether any
1299+ // of the options will cause a problem.
1300+ if (FullDiagText.starts_with (" %select{" )) {
1301+ // Do a balanced delimiter scan from the start of the text to find the
1302+ // closing '}', skipping intermediary {} pairs.
1303+
1304+ size_t BraceCount = 1 ;
1305+ constexpr size_t PercentSelectBraceLen = sizeof (" %select{" ) - 1 ;
1306+ auto Iter = FullDiagText.begin () + PercentSelectBraceLen;
1307+ for (auto End = FullDiagText.end (); Iter != End; ++Iter) {
1308+ char Ch = *Iter;
1309+ if (Ch == ' {' )
1310+ ++BraceCount;
1311+ else if (Ch == ' }' )
1312+ --BraceCount;
1313+ if (!BraceCount)
1314+ break ;
1315+ }
1316+ // Defending against a malformed diagnostic string.
1317+ if (BraceCount != 0 )
1318+ return ;
1319+
1320+ StringRef SelectText =
1321+ FullDiagText.substr (PercentSelectBraceLen, Iter - FullDiagText.begin () -
1322+ PercentSelectBraceLen);
1323+ SmallVector<StringRef, 4 > SelectPieces;
1324+ SelectText.split (SelectPieces, ' |' );
1325+
1326+ // Walk over all of the individual pieces of select text to see if any of
1327+ // them start with an invalid character. If any of the select pieces is
1328+ // empty, we need to look at the first word after the %select to see
1329+ // whether that is invalid or not. If all of the pieces are fine, then we
1330+ // don't need to check anything else about the start of the diagnostic.
1331+ bool CheckSecondWord = false ;
1332+ for (StringRef Piece : SelectPieces) {
1333+ if (Piece.empty ())
1334+ CheckSecondWord = true ;
1335+ else
1336+ DiagnoseStart (Piece);
1337+ }
1338+
1339+ if (CheckSecondWord) {
1340+ // There was an empty select piece, so we need to check the second
1341+ // word. This catches situations like '%select{|fine}0 Not okay'. Add
1342+ // two to account for the closing curly brace and the number after it.
1343+ StringRef AfterSelect =
1344+ FullDiagText.substr (Iter - FullDiagText.begin () + 2 ).ltrim ();
1345+ DiagnoseStart (AfterSelect);
1346+ }
1347+ } else {
1348+ // If the start of the diagnostic is not %select, we can check the first
1349+ // word and be done with it.
1350+ DiagnoseStart (FullDiagText);
1351+ }
1352+
1353+ // If the last character in the diagnostic is a number preceded by a }, scan
1354+ // backwards to see if this is for a %select{...}0. If it is, we need to look
1355+ // at each piece to see whether it ends in punctuation or not.
1356+ bool StillNeedToDiagEnd = true ;
1357+ if (isDigit (FullDiagText.back ()) && *(FullDiagText.end () - 2 ) == ' }' ) {
1358+ // Scan backwards to find the opening curly brace.
1359+ size_t BraceCount = 1 ;
1360+ auto Iter = FullDiagText.end () - sizeof (" }0" );
1361+ for (auto End = FullDiagText.begin (); Iter != End; --Iter) {
1362+ char Ch = *Iter;
1363+ if (Ch == ' }' )
1364+ ++BraceCount;
1365+ else if (Ch == ' {' )
1366+ --BraceCount;
1367+ if (!BraceCount)
1368+ break ;
1369+ }
1370+ // Defending against a malformed diagnostic string.
1371+ if (BraceCount != 0 )
1372+ return ;
1373+
1374+ // Continue the backwards scan to find the word before the '{' to see if it
1375+ // is 'select'.
1376+ constexpr size_t SelectLen = sizeof (" select" ) - 1 ;
1377+ bool IsSelect =
1378+ (FullDiagText.substr (Iter - SelectLen - FullDiagText.begin (),
1379+ SelectLen) == " select" );
1380+ if (IsSelect) {
1381+ // Gather the content between the {} for the select in question so we can
1382+ // split it into pieces.
1383+ StillNeedToDiagEnd = false ; // No longer need to handle the end.
1384+ StringRef SelectText =
1385+ FullDiagText.substr (Iter - FullDiagText.begin () + /* {*/ 1 ,
1386+ FullDiagText.end () - Iter - /* pos before }0*/ 3 );
1387+ SmallVector<StringRef, 4 > SelectPieces;
1388+ SelectText.split (SelectPieces, ' |' );
1389+ for (StringRef Piece : SelectPieces) {
1390+ // Not worrying about a situation like: "this is bar. %select{foo|}0".
1391+ if (!Piece.empty ())
1392+ DiagnoseEnd (Piece);
1393+ }
1394+ }
1395+ }
1396+
1397+ // If we didn't already cover the diagnostic because of a %select, handle it
1398+ // now.
1399+ if (StillNeedToDiagEnd)
1400+ DiagnoseEnd (FullDiagText);
1401+
1402+ // FIXME: This could also be improved by looking for instances of clang or
1403+ // gcc in the diagnostic and recommend Clang or GCC instead. However, this
1404+ // runs into odd situations like [[clang::warn_unused_result]],
1405+ // #pragma clang, or --unwindlib=libgcc.
1406+ }
12161407
12171408// / ClangDiagsDefsEmitter - The top-level class emits .def files containing
12181409// / declarations of Clang diagnostics.
@@ -1273,6 +1464,9 @@ void clang::EmitClangDiagsDefs(RecordKeeper &Records, raw_ostream &OS,
12731464 if (!Component.empty () && Component != R.getValueAsString (" Component" ))
12741465 continue ;
12751466
1467+ // Validate diagnostic wording for common issues.
1468+ verifyDiagnosticWording (R);
1469+
12761470 OS << " DIAG(" << R.getName () << " , " ;
12771471 OS << R.getValueAsDef (" Class" )->getName ();
12781472 OS << " , (unsigned)diag::Severity::"
0 commit comments