1//===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===----------------------------------------------------------------------===//
18#include "llvm/Support/Allocator.h"
23/// Get comment kind and bool describing if it is a trailing comment.
24std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
25 bool ParseAllComments) {
26 const size_t MinCommentLength = ParseAllComments ? 2 : 3;
27 if ((Comment.size() < MinCommentLength) || Comment[0] !=
'/')
31 if (Comment[1] ==
'/') {
32 if (Comment.size() < 3)
35 if (Comment[2] ==
'/')
37 else if (Comment[2] ==
'!')
42 assert(Comment.size() >= 4);
44 // Comment lexer does not understand escapes in comment markers, so pretend
45 // that this is not a comment.
46 if (Comment[1] !=
'*' ||
47 Comment[Comment.size() - 2] !=
'*' ||
48 Comment[Comment.size() - 1] !=
'/')
51 if (Comment[2] ==
'*')
53 else if (Comment[2] ==
'!')
58 const bool TrailingComment = (Comment.size() > 3) && (Comment[3] ==
'<');
59 return std::make_pair(K, TrailingComment);
62bool mergedCommentIsTrailingComment(StringRef Comment) {
63 return (Comment.size() > 3) && (Comment[3] ==
'<');
66/// Returns true if R1 and R2 both have valid locations that start on the same
73 unsigned C1 =
SM.getPresumedColumnNumber(L1, &
Invalid);
75 unsigned C2 =
SM.getPresumedColumnNumber(L2, &
Invalid);
82/// Determines whether there is only whitespace in `Buffer` between `P`
83/// and the previous line.
84/// \param Buffer The buffer to search in.
85/// \param P The offset from the beginning of `Buffer` to start from.
86/// \return true if all of the characters in `Buffer` ranging from the closest
87/// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
90 // Search backwards until we see linefeed or carriage return.
91 for (
unsigned I = P; I != 0; --I) {
92 char C = Buffer[I - 1];
98 // We hit the beginning of the buffer.
102/// Returns whether `K` is an ordinary comment kind.
110 Range(SR), RawTextValid(
false), BriefTextValid(
false),
112 IsAlmostTrailingComment(
false) {
113 // Extract raw comment text, if possible.
119 // Guess comment kind.
120 std::pair<CommentKind, bool> K =
123 // Guess whether an ordinary comment is trailing.
126 unsigned BeginOffset;
127 std::tie(BeginFileID, BeginOffset) =
128 SourceMgr.getDecomposedLoc(Range.getBegin());
129 if (BeginOffset != 0) {
132 SourceMgr.getBufferData(BeginFileID, &
Invalid).data();
140 IsTrailingComment |= K.second;
142 IsAlmostTrailingComment =
143 RawText.starts_with(
"//<") || RawText.starts_with(
"/*<");
147 IsTrailingComment || mergedCommentIsTrailingComment(RawText);
151StringRef RawComment::getRawTextSlow(
const SourceManager &SourceMgr)
const {
154 unsigned BeginOffset;
157 std::tie(BeginFileID, BeginOffset) =
158 SourceMgr.getDecomposedLoc(Range.
getBegin());
159 std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.
getEnd());
161 const unsigned Length = EndOffset - BeginOffset;
165 // The comment can't begin in one file and end in another.
166 assert(BeginFileID == EndFileID);
169 const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
174 return StringRef(BufferStart + BeginOffset, Length);
177const char *RawComment::extractBriefText(
const ASTContext &Context)
const {
178 // Lazily initialize RawText using the accessor before using it.
181 // Since we will be copying the resulting text, all allocations made during
182 // parsing are garbage after resulting string is formed. Thus we can use
183 // a separate allocator for all temporary stuff.
184 llvm::BumpPtrAllocator Allocator;
187 Context.getCommentCommandTraits(),
189 RawText.begin(), RawText.end());
192 const std::string Result = P.Parse();
193 const unsigned BriefTextLength = Result.size();
194 char *BriefTextPtr =
new (Context)
char[BriefTextLength + 1];
195 memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
196 BriefText = BriefTextPtr;
197 BriefTextValid =
true;
204 const Decl *D)
const {
205 // Lazily initialize RawText using the accessor before using it.
209 Context.getCommentCommandTraits(),
211 RawText.begin(), RawText.end());
212 comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
213 Context.getDiagnostics(),
214 Context.getCommentCommandTraits(),
217 comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
218 Context.getDiagnostics(),
219 Context.getCommentCommandTraits());
226 unsigned MaxNewlinesAllowed) {
230 // Question does not make sense if locations are in different files.
231 if (Loc1Info.first != Loc2Info.first)
235 const char *Buffer =
SM.getBufferData(Loc1Info.first, &
Invalid).data();
239 unsigned NumNewlines = 0;
240 assert(Loc1Info.second <= Loc2Info.second &&
"Loc1 after Loc2!");
241 // Look for non-whitespace characters and remember any newlines seen.
242 for (
unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
255 // Check if we have found more than the maximum allowed number of
257 if (NumNewlines > MaxNewlinesAllowed)
260 // Collapse \r\n and \n\r into a single newline.
261 if (I + 1 != Loc2Info.second &&
262 (Buffer[I + 1] ==
'\n' || Buffer[I + 1] ==
'\r') &&
263 Buffer[I] != Buffer[I + 1])
274 llvm::BumpPtrAllocator &Allocator) {
278 // Ordinary comments are not interesting for us.
284 const FileID CommentFile = Loc.first;
285 const unsigned CommentOffset = Loc.second;
287 // If this is the first Doxygen comment, save it (because there isn't
288 // anything to merge it with).
289 auto &OC = OrderedComments[CommentFile];
291 OC[CommentOffset] =
new (Allocator)
RawComment(RC);
298 // Merge comments only if there is only whitespace between them.
299 // Can't merge trailing and non-trailing comments unless the second is
300 // non-trailing ordinary in the same column, as in the case:
301 // int x; // documents x
304 // int x; // documents x
305 // int y; // documents y
307 // int x; // documents x
310 // Merge comments if they are on same or consecutive lines.
314 commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
316 /*MaxNewlinesAllowed=*/1)) {
318 *OrderedComments[CommentFile].rbegin()->second =
319 RawComment(SourceMgr, MergedRange, CommentOpts,
true);
321 OrderedComments[CommentFile][CommentOffset] =
326const std::map<unsigned, RawComment *> *
328 auto CommentsInFile = OrderedComments.find(
File);
329 if (CommentsInFile == OrderedComments.end())
332 return &CommentsInFile->second;
338 unsigned Offset)
const {
339 auto Cached = CommentBeginLine.find(
C);
340 if (Cached != CommentBeginLine.end())
341 return Cached->second;
342 const unsigned Line = SourceMgr.getLineNumber(
File, Offset);
343 CommentBeginLine[
C] =
Line;
348 auto Cached = CommentEndOffset.find(
C);
349 if (Cached != CommentEndOffset.end())
350 return Cached->second;
351 const unsigned Offset =
352 SourceMgr.getDecomposedLoc(
C->getSourceRange().getEnd()).second;
353 CommentEndOffset[
C] = Offset;
359 llvm::StringRef CommentText =
getRawText(SourceMgr);
360 if (CommentText.empty())
368 auto LastChar =
Result.find_last_not_of(
'\n');
374std::vector<RawComment::CommentLine>
377 llvm::StringRef CommentText =
getRawText(SourceMgr);
378 if (CommentText.empty())
381 llvm::BumpPtrAllocator Allocator;
382 // We do not parse any commands, so CommentOptions are ignored by
383 // comments::Lexer. Therefore, we just use default-constructed options.
387 CommentText.begin(), CommentText.end(),
388 /*ParseCommands=*/false);
390 std::vector<RawComment::CommentLine>
Result;
391 // A column number of the first non-whitespace token in the comment text.
392 // We skip whitespace up to this column, but keep the whitespace after this
393 // column. IndentColumn is calculated when lexing the first line and reused
394 // for the rest of lines.
395 unsigned IndentColumn = 0;
397 // Record the line number of the last processed comment line.
398 // For block-style comments, an extra newline token will be produced after
399 // the end-comment marker, e.g.:
400 // /** This is a multi-line comment block.
401 // The lexer will produce two newline tokens here > */
402 // previousLine will record the line number when we previously saw a newline
403 // token and recorded a comment line. If we see another newline token on the
404 // same line, don't record anything in between.
405 unsigned PreviousLine = 0;
407 // Processes one line of the comment and adds it to the result.
408 // Handles skipping the indent at the start of the line.
409 // Returns false when eof is reached and true otherwise.
410 auto LexLine = [&](
bool IsFirstLine) ->
bool {
412 // Lex the first token on the line. We handle it separately, because we to
413 // fix up its indentation.
419 if (Loc.
getLine() != PreviousLine) {
420 Result.emplace_back(
"", Loc, Loc);
427 bool LocInvalid =
false;
429 SourceMgr.getSpellingColumnNumber(
Tok.getLocation(), &LocInvalid);
430 assert(!LocInvalid &&
"getFormattedText for invalid location");
432 // Amount of leading whitespace in TokText.
433 size_t WhitespaceLen = TokText.find_first_not_of(
" \t");
434 if (WhitespaceLen == StringRef::npos)
435 WhitespaceLen = TokText.size();
436 // Remember the amount of whitespace we skipped in the first line to remove
437 // indent up to that column in the following lines.
439 IndentColumn = TokColumn + WhitespaceLen;
441 // Amount of leading whitespace we actually want to skip.
442 // For the first line we skip all the whitespace.
443 // For the rest of the lines, we skip whitespace up to IndentColumn.
449 std::max<int>(
static_cast<int>(IndentColumn) - TokColumn, 0));
450 llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
452 // Get the beginning location of the adjusted comment line.
454 SourceMgr.getPresumedLoc(
Tok.getLocation().getLocWithOffset(SkipLen));
456 // Lex all tokens in the rest of the line.
459 // Get the ending location of the comment line.
461 if (End.
getLine() != PreviousLine) {
471 // We've reached the end of file token.
475 // Process first line separately to remember indent for the following lines.
476 if (!LexLine(
/*IsFirstLine=*/true))
478 // Process the rest of the lines.
479 while (LexLine(
/*IsFirstLine=*/false))
Defines the clang::ASTContext interface.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Decl - This represents one declaration (or definition), e.g.
Concrete class used by the front-end to report problems and issues.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getLine() const
Return the presumed line number of this location.
Encodes a location in the source.
This class handles loading and caching of source files into memory.
A trivial tuple used to represent a source range.
SourceLocation getEnd() const
SourceLocation getBegin() const
The JSON file list parser is used to communicate input to InstallAPI.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
std::pair< FileID, unsigned > FileIDAndOffset
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
@ Result
The result type of a method or function.