@@ -189,41 +189,130 @@ struct ExtractInactiveRanges : public ASTWalker {
189189};
190190} // end anonymous namespace
191191
192+ // / Appends the textual contents of the provided source range, stripping
193+ // / the contents of comments that appear in the source.
194+ // /
195+ // / Given that comments are treated as whitespace, this also appends a
196+ // / space or newline (depending if the comment was multi-line and itself
197+ // / had newlines in the body) in place of the comment, to avoid fusing tokens
198+ // / together.
199+ static void appendRange (
200+ SourceManager &sourceMgr, SourceLoc start, SourceLoc end,
201+ SmallVectorImpl<char > &scratch) {
202+ unsigned bufferID = sourceMgr.findBufferContainingLoc (start);
203+ unsigned offset = sourceMgr.getLocOffsetInBuffer (start, bufferID);
204+ unsigned endOffset = sourceMgr.getLocOffsetInBuffer (end, bufferID);
205+
206+ // Strip comments from the chunk before adding it by re-lexing the range.
207+ LangOptions FakeLangOpts;
208+ Lexer lexer (FakeLangOpts, sourceMgr, bufferID, nullptr , LexerMode::Swift,
209+ HashbangMode::Disallowed, CommentRetentionMode::ReturnAsTokens,
210+ offset, endOffset);
211+
212+ SourceLoc nonCommentStart = start;
213+ Token token;
214+
215+ // Re-lex the range, and skip the full text of `tok::comment` tokens.
216+ while (!token.is (tok::eof)) {
217+ lexer.lex (token);
218+
219+ // Skip over #sourceLocation's in the file.
220+ if (token.is (tok::pound_sourceLocation)) {
221+
222+ // Append the text leading up to the #sourceLocation
223+ auto charRange = CharSourceRange (
224+ sourceMgr, nonCommentStart, token.getLoc ());
225+ StringRef text = sourceMgr.extractText (charRange);
226+ scratch.append (text.begin (), text.end ());
227+
228+ // Skip to the right paren. We know the AST is already valid, so there's
229+ // definitely a right paren.
230+ while (!token.is (tok::r_paren)) {
231+ lexer.lex (token);
232+ }
233+
234+ nonCommentStart = Lexer::getLocForEndOfToken (sourceMgr, token.getLoc ());
235+ }
236+
237+ if (token.is (tok::comment)) {
238+ // Grab the start of the full comment token (with leading trivia as well)
239+ SourceLoc commentLoc = token.getLoc ();
240+
241+ // Find the end of the token (with trailing trivia)
242+ SourceLoc endLoc = Lexer::getLocForEndOfToken (sourceMgr, token.getLoc ());
243+
244+ // The comment token's range includes leading/trailing whitespace, so trim
245+ // whitespace and only strip the portions of the comment that are not
246+ // whitespace.
247+ CharSourceRange range = CharSourceRange (sourceMgr, commentLoc, endLoc);
248+ StringRef fullTokenText = sourceMgr.extractText (range);
249+ unsigned leadingWhitespace = fullTokenText.size () -
250+ fullTokenText.ltrim ().size ();
251+ if (leadingWhitespace > 0 ) {
252+ commentLoc = commentLoc.getAdvancedLoc (leadingWhitespace);
253+ }
254+
255+ unsigned trailingWhitespace = fullTokenText.size () -
256+ fullTokenText.rtrim ().size ();
257+ if (trailingWhitespace > 0 ) {
258+ endLoc = endLoc.getAdvancedLoc (-trailingWhitespace);
259+ }
260+
261+ // First, extract the text up to the start of the comment, including the
262+ // whitespace.
263+ auto charRange = CharSourceRange (sourceMgr, nonCommentStart, commentLoc);
264+ StringRef text = sourceMgr.extractText (charRange);
265+ scratch.append (text.begin (), text.end ());
266+
267+ // Next, search through the comment text to see if it's a block comment
268+ // with a newline. If so we need to re-insert a newline to avoid fusing
269+ // multi-line tokens together.
270+ auto commentTextRange = CharSourceRange (sourceMgr, commentLoc, endLoc);
271+ StringRef commentText = sourceMgr.extractText (commentTextRange);
272+ bool hasNewline = commentText.find_first_of (" \n\r " ) != StringRef::npos;
273+
274+ // Use a newline as a filler character if the comment itself had a newline
275+ // in it.
276+ char filler = hasNewline ? ' \n ' : ' ' ;
277+
278+ // Append a single whitespace filler character, to avoid fusing tokens.
279+ scratch.push_back (filler);
280+
281+ // Start the next region after the contents of the comment.
282+ nonCommentStart = endLoc;
283+ }
284+ }
285+
286+ if (nonCommentStart.isValid () && nonCommentStart != end) {
287+ auto charRange = CharSourceRange (sourceMgr, nonCommentStart, end);
288+ StringRef text = sourceMgr.extractText (charRange);
289+ scratch.append (text.begin (), text.end ());
290+ }
291+ }
292+
192293StringRef swift::extractInlinableText (SourceManager &sourceMgr, ASTNode node,
193294 SmallVectorImpl<char > &scratch) {
194295 // Extract inactive ranges from the text of the node.
195296 ExtractInactiveRanges extractor (sourceMgr);
196297 node.walk (extractor);
197298
198- // If there were no inactive ranges, then there were no #if configs.
199- // Return an unowned buffer directly into the source file.
200- if (extractor.ranges .empty ()) {
201- auto range =
202- Lexer::getCharSourceRangeFromSourceRange (
203- sourceMgr, node.getSourceRange ());
204- return sourceMgr.extractText (range);
205- }
206-
207299 // Begin piecing together active code ranges.
208300
209301 // Get the full start and end of the provided node, as character locations.
210302 SourceLoc start = node.getStartLoc ();
211303 SourceLoc end = Lexer::getLocForEndOfToken (sourceMgr, node.getEndLoc ());
212304 for (auto &range : extractor.getSortedRanges ()) {
213305 // Add the text from the current 'start' to this ignored range's start.
214- auto charRange = CharSourceRange (sourceMgr, start, range.getStart ());
215- auto chunk = sourceMgr.extractText (charRange);
216- scratch.append (chunk.begin (), chunk.end ());
306+ appendRange (sourceMgr, start, range.getStart (), scratch);
217307
218308 // Set 'start' to the end of this range, effectively skipping it.
219309 start = range.getEnd ();
220310 }
221311
222312 // If there's leftover unignored text, add it.
223313 if (start != end) {
224- auto range = CharSourceRange (sourceMgr, start, end);
225- auto chunk = sourceMgr.extractText (range);
226- scratch.append (chunk.begin (), chunk.end ());
314+ appendRange (sourceMgr, start, end, scratch);
227315 }
316+
228317 return { scratch.data (), scratch.size () };
229318}
0 commit comments