2323#include < clang/AST/RawCommentList.h>
2424#include < clang/Lex/Lexer.h>
2525#include < clang/Basic/SourceLocation.h>
26+ #include " lib/AST/ParseRef.hpp"
27+
2628#ifdef _MSC_VER
2729#pragma warning(push)
2830#pragma warning(disable: 5054) // C5054: operator '+': deprecated between enumerations of different types
@@ -1072,6 +1074,21 @@ std::string
10721074JavadocVisitor::
10731075fixReference (std::string& ref)
10741076{
1077+ auto peekNextIt = [&]() -> std::optional<std::string_view>
1078+ {
1079+ ++it_;
1080+ if (it_ == end_ ||
1081+ (*it_)->getCommentKind () != CommentKind::TextComment)
1082+ {
1083+ --it_;
1084+ return std::nullopt ;
1085+ }
1086+ Comment const * c = *it_;
1087+ std::string_view text = static_cast <TextComment const *>(c)->getText ();
1088+ --it_;
1089+ return text;
1090+ };
1091+
10751092 // If the ref is only "operator", the next text comment
10761093 // might contain a simple operator name/type, or a
10771094 // full operator overload.
@@ -1080,249 +1097,126 @@ fixReference(std::string& ref)
10801097 // we find an unbalanced '('.
10811098 // Simply including the next text comment is enough
10821099 // for the next step.
1083- std::string_view trimmed = trim (ref);
1084- bool const isNoSuffixOperator =
1085- trimmed == " operator" ||
1086- trimmed.ends_with (" ::operator" );
1087- if (isNoSuffixOperator)
1088- {
1089- ++it_;
1090- if (it_ == end_)
1091- {
1092- return ref;
1093- }
1094- Comment const * c = *it_;
1095- if (c->getCommentKind () == CommentKind::TextComment)
1096- {
1097- ref += static_cast <TextComment const *>(c)->getText ();
1098- }
1099- else
1100+ ParsedRef v;
1101+ while (true )
1102+ {
1103+ // Attempt to parse ref
1104+ char const * first = ref.data ();
1105+ char const * last = first + ref.size ();
1106+ auto const pres = parse (first, last, v);
1107+ if (!pres)
11001108 {
1101- return ref;
1102- }
1103- }
1104- static constexpr std::string_view idChars =
1105- " abcdefghijklmnopqrstuvwxyz"
1106- " ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1107- " 0123456789"
1108- " _:" ;
1109- bool const isNoFunctionOperator =
1110- isNoSuffixOperator ||
1111- [trimmed]{
1112- if (contains_n (trimmed, ' (' , 1 ))
1113- {
1114- return false ;
1115- }
1116- std::size_t pos = trimmed.rfind (" ::" );
1117- std::string_view last = trimmed;
1118- if (pos != std::string::npos) {
1119- last = trimmed.substr (pos + 2 );
1120- }
1121- if (!last.starts_with (" operator" ))
1122- {
1123- return false ;
1124- }
1125- last.remove_prefix (8 );
1126- if (last.empty ())
1109+ // The ref could not be parsed, add content from next
1110+ // text comment to the ref
1111+ auto const nextTextOpt = peekNextIt ();
1112+ if (!nextTextOpt)
11271113 {
1128- return true ;
1114+ return {} ;
11291115 }
1130- return !contains (idChars, last.front ());
1131- }();
1132-
1133- // Clang parses the copydoc command breaking
1134- // before the complete overload information. For instance,
1135- // `@copydoc operator()(unsigned char) const` will create
1136- // a node with the text `operator()(unsigned` and another
1137- // with `char) const`. We need to merge these nodes.
1138- // If the ref contains an unbalanced '(', then it's
1139- // a function, and we need to merge the next text comments
1140- // until we find a balanced ')'.
1141- bool const isFunction = contains (ref, ' (' );
1142- if (isFunction)
1143- {
1144- while (std::ranges::count (ref, ' (' ) != std::ranges::count (ref, ' )' ))
1145- {
1116+ ref += *nextTextOpt;
11461117 ++it_;
1147- if (it_ == end_)
1148- {
1149- break ;
1150- }
1151- Comment const * c = *it_;
1152- if (c->getCommentKind () == CommentKind::TextComment)
1153- {
1154- ref += static_cast <TextComment const *>(c)->getText ();
1155- }
1156- else
1118+ continue ;
1119+ }
1120+
1121+ // The ref is fully parsed
1122+ if (pres.ptr != last)
1123+ {
1124+ // The ref didn't consume all the text, so we need to
1125+ // remove the leftover text from the ref and return it
1126+ auto leftover = std::string (pres.ptr , last - pres.ptr );
1127+ // If leftover is only whitespace, the ref might need
1128+ // the next text comment to complete it.
1129+ if (!isWhitespace (leftover))
11571130 {
1158- break ;
1131+ ref.erase (pres.ptr - first);
1132+ return leftover;
11591133 }
11601134 }
1161- if (rtrim (ref).ends_with (' )' ))
1135+
1136+ // The ref is fully parsed, but we might want to
1137+ // include the next text comment if it contains
1138+ // a valid continuation to the ref.
1139+ bool const mightHaveMoreQualifiers =
1140+ v.HasFunctionParameters &&
1141+ v.ExceptionSpec .Implicit &&
1142+ v.ExceptionSpec .Operand .empty ();
1143+ if (mightHaveMoreQualifiers)
11621144 {
1163- static constexpr std::array<std::string_view, 5 > qualifiers = {
1164- " const" ,
1165- " volatile" ,
1166- " noexcept" ,
1167- " &&" ,
1168- " &" ,
1169- };
1170- auto isQualifiersOnly = [](std::string_view str)
1145+ llvm::SmallVector<std::string_view, 4 > potentialQualifiers;
1146+ if (v.Kind == ReferenceKind::None)
11711147 {
1172- // Iterate all words between spaces and check if
1173- // they are qualifiers
1174- std::size_t pos = 0 ;
1175- while (pos < str.size ())
1148+ // "&&" or "&" not defined yet
1149+ if (!v.IsConst )
11761150 {
1177- std::size_t const start = str.find_first_not_of (' ' , pos);
1178- if (start == std::string::npos)
1179- {
1180- break ;
1181- }
1182- std::size_t const end = str.find_first_of (' ' , start);
1183- std::string_view word = str.substr (start, end - start);
1184- if (std::ranges::find (qualifiers, word) == qualifiers.end ())
1185- {
1186- return false ;
1187- }
1188- pos = end;
1151+ potentialQualifiers.push_back (" const" );
11891152 }
1190- return true ;
1191- };
1192- auto isWhitespaceOnly = [](std::string_view str)
1193- {
1194- return str.empty () || str.find_first_not_of (' ' ) == std::string::npos;
1195- };
1196-
1197- // peek next comment
1198- std::string functionContinuation;
1199- auto originalIt = it_;
1200- ++it_;
1201- while (
1202- it_ != end_ &&
1203- (isWhitespaceOnly (functionContinuation) ||
1204- isQualifiersOnly (functionContinuation)))
1205- {
1206- Comment const * c = *it_;
1207- if (c->getCommentKind () != CommentKind::TextComment)
1153+ if (!v.IsVolatile )
12081154 {
1209- break ;
1155+ potentialQualifiers. push_back ( " volatile " ) ;
12101156 }
1211- functionContinuation += static_cast <TextComment const *>(c)->getText ();
1212- ++it_;
1157+ potentialQualifiers.push_back (" &" );
12131158 }
1214- if (isWhitespaceOnly (functionContinuation))
1159+ else if (
1160+ v.Kind == ReferenceKind::LValue &&
1161+ ref.ends_with (' &' ))
12151162 {
1216- it_ = originalIt;
1163+ // The second "&" might be in the next Text block
1164+ potentialQualifiers.push_back (" &" );
12171165 }
1218- else /* if (!functionContinuation.empty()) */
1166+ potentialQualifiers.push_back (" noexcept" );
1167+ auto const nextTextOpt = peekNextIt ();
1168+ if (!nextTextOpt)
12191169 {
1220- --it_;
1221- std::string_view suffix = functionContinuation;
1222- std::string_view leftover = functionContinuation;
1223- bool foundAny = false ;
1224- std::size_t totalRemoved = 0 ;
1225- while (!suffix.empty ())
1226- {
1227- bool found = false ;
1228- std::size_t const initialWhitespace = std::min (
1229- suffix.find_first_not_of (" " ), suffix.size ());
1230- for (auto const & q : qualifiers)
1231- {
1232- if (suffix.substr (initialWhitespace).starts_with (q))
1233- {
1234- std::size_t const toRemove = initialWhitespace + q.size ();
1235- if (
1236- contains (idChars, q.back ()) &&
1237- suffix.size () > toRemove &&
1238- contains (idChars, suffix[toRemove]))
1239- {
1240- // This is not a qualifier, but part of
1241- // an identifier
1242- continue ;
1243- }
1244- suffix.remove_prefix (toRemove);
1245- totalRemoved += toRemove;
1246- found = true ;
1247- foundAny = true ;
1248- break ;
1249- }
1250- }
1251- if (!found)
1170+ auto leftover = std::string (pres.ptr , last - pres.ptr );
1171+ ref.erase (pres.ptr - first);
1172+ return leftover;
1173+ }
1174+ std::string_view const nextText = *nextTextOpt;
1175+ std::string_view const trimmed = ltrim (nextText);
1176+ if (trimmed.empty () ||
1177+ std::ranges::any_of (
1178+ potentialQualifiers,
1179+ [&](std::string_view s)
12521180 {
1253- break ;
1254- }
1255- }
1256- if (foundAny)
1257- {
1258- leftover = leftover.substr (0 , totalRemoved);
1259- ref += leftover;
1260- return std::string (suffix);
1261- }
1181+ return trimmed.starts_with (s);
1182+ }))
1183+ {
1184+ ref += nextText;
1185+ ++it_;
1186+ continue ;
12621187 }
12631188 }
1264- }
12651189
1266-
1267- // Clang refs can also contain invalid characters
1268- // at the end, especially punctuation. We need to
1269- // truncate the ref at the last valid identifier
1270- // character.
1271- // The last identifier character depends on the type
1272- // of ref.
1273- // - If it's an operator but not a function, then
1274- // we also consider operator chars as valid.
1275- // - If it's a function, then we also consider ')'
1276- // as valid.
1277- // - In all cases, we consider the identifier chars
1278- // as valid.
1279- static constexpr std::string_view operatorChars =
1280- " ~!%^&*()-+=|[]{};:,.<>?/" ;
1281- static constexpr std::string_view parenChars =
1282- " ()" ;
1283- std::string leftover;
1284- bool const isRegularIdentifier = !isFunction && !isNoFunctionOperator;
1285- if (isRegularIdentifier)
1286- {
1287- auto const lastIdChar = ref.find_last_of (idChars);
1288- auto const firstLeftoverChar = lastIdChar + 1 ;
1289- if (firstLeftoverChar < ref.size ())
1290- {
1291- leftover = std::string_view (ref).substr (lastIdChar + 1 );
1292- ref = ref.substr (0 , lastIdChar + 1 );
1293- }
1294- }
1295- else if (isFunction)
1296- {
1297- auto reservedCharsets = {idChars, parenChars};
1298- auto reservedChars = std::views::join (reservedCharsets);
1299- auto const lastIdOrParen = find_last_of (ref, reservedChars);
1300- auto const firstLeftoverChar =
1301- lastIdOrParen == ref.end () ?
1302- ref.end () :
1303- std::next (lastIdOrParen);
1304- if (firstLeftoverChar != ref.end ())
1190+ // The ref might have more components
1191+ bool const mightHaveMoreComponents =
1192+ !v.HasFunctionParameters ;
1193+ if (mightHaveMoreComponents)
13051194 {
1306- leftover = std::string_view (firstLeftoverChar, ref.end ());
1307- ref = ref.substr (0 , std::distance (ref.begin (), firstLeftoverChar));
1308- }
1309- }
1310- else /* if (isNoFunctionOperator) */
1311- {
1312- auto reservedCharsets = {idChars, operatorChars};
1313- auto reservedChars = std::views::join (reservedCharsets);
1314- auto const lastIdOrOperator = find_last_of (ref, reservedChars);
1315- auto const firstLeftoverChar =
1316- lastIdOrOperator == ref.end () ?
1317- ref.end () :
1318- std::next (lastIdOrOperator);
1319- if (firstLeftoverChar != ref.end ())
1320- {
1321- leftover = std::string_view (firstLeftoverChar, ref.end ());
1322- ref = ref.substr (0 , std::distance (ref.begin (), firstLeftoverChar));
1195+ auto const nextTextOpt = peekNextIt ();
1196+ if (!nextTextOpt)
1197+ {
1198+ auto leftover = std::string (pres.ptr , last - pres.ptr );
1199+ ref.erase (pres.ptr - first);
1200+ return leftover;
1201+ }
1202+ std::string_view const nextText = *nextTextOpt;
1203+ std::string_view const trimmed = ltrim (nextText);
1204+ static constexpr std::string_view idChars
1205+ = " abcdefghijklmnopqrstuvwxyz"
1206+ " ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1207+ " 0123456789"
1208+ " _:" ;
1209+ if (trimmed.empty () ||
1210+ contains (idChars, trimmed.front ()))
1211+ {
1212+ ref += nextText;
1213+ ++it_;
1214+ continue ;
1215+ }
13231216 }
1217+
1218+ return {};
13241219 }
1325- return leftover;
13261220}
13271221
13281222// ------------------------------------------------
0 commit comments