Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 21c8b2a

Browse files
committed
added way to detect comments during parsing
1 parent 28e102a commit 21c8b2a

File tree

8 files changed

+267
-75
lines changed

8 files changed

+267
-75
lines changed

‎src/main/java/com/igormaznitsa/prologparser/GenericPrologParser.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
* Generic version of prolog parser.
2828
*/
2929
public class GenericPrologParser extends PrologParser {
30-
public GenericPrologParser(final Reader reader, final ParserContext context) {
31-
super(reader, context);
30+
public GenericPrologParser(final Reader reader, final ParserContext context,
31+
final TokenizedCommentListener... tokenizedCommentListeners) {
32+
super(reader, context, tokenizedCommentListeners);
3233
}
3334
}

‎src/main/java/com/igormaznitsa/prologparser/ParserContext.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ public interface ParserContext {
6868
* @since 2.0.2
6969
*/
7070
int FLAG_ZERO_QUOTATION_ALLOWS_WHITESPACE_CHAR = 64;
71+
/**
72+
* Return commentaries as atoms.
73+
*
74+
* @since 2.2.0
75+
*/
76+
int FLAG_COMMENTS_AS_ATOMS = 128;
7177

7278
/**
7379
* Check that the context contains an operator starts with some string.

‎src/main/java/com/igormaznitsa/prologparser/PrologParser.java

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
package com.igormaznitsa.prologparser;
2323

24+
import static com.igormaznitsa.prologparser.ParserContext.FLAG_COMMENTS_AS_ATOMS;
2425
import static com.igormaznitsa.prologparser.ParserContext.FLAG_DOT2_AS_LIST;
2526
import static com.igormaznitsa.prologparser.ParserContext.FLAG_NONE;
2627
import static com.igormaznitsa.prologparser.ParserContext.FLAG_VAR_AS_FUNCTOR;
@@ -102,13 +103,25 @@ public abstract class PrologParser implements Iterable<PrologTerm>, AutoCloseabl
102103

103104
protected final ParserContext context;
104105
protected final int parserFlags;
106+
private final boolean commentsAsAtoms;
105107
private final Tokenizer tokenizer;
106108
private boolean autoCloseReaderFlag;
109+
private final List<TokenizedCommentListener> commentTokenListeners;
107110

108-
protected PrologParser(final Reader source, final ParserContext context) {
111+
protected PrologParser(
112+
final Reader source,
113+
final ParserContext context,
114+
final TokenizedCommentListener... tokenizedCommentListeners
115+
) {
109116
this.context = context == null ? DefaultParserContext.of(ParserContext.FLAG_NONE) : context;
110117
this.parserFlags = context == null ? FLAG_NONE : context.getFlags();
118+
this.commentsAsAtoms = (this.parserFlags & FLAG_COMMENTS_AS_ATOMS) != 0;
111119
this.tokenizer = new Tokenizer(this, META_OP_MAP, requireNonNull(source));
120+
if (tokenizedCommentListeners.length == 0) {
121+
this.commentTokenListeners = List.of();
122+
} else {
123+
this.commentTokenListeners = List.of(tokenizedCommentListeners);
124+
}
112125
}
113126

114127
public static Op findBaseMetaOperator(final String text, final OpAssoc type) {
@@ -178,14 +191,36 @@ public boolean hasNext() {
178191
return this.tokenizer.peek() != null;
179192
}
180193

194+
private TokenizerResult readNextTokenCommentAware() {
195+
TokenizerResult result;
196+
if (this.commentsAsAtoms) {
197+
while (true) {
198+
result = this.tokenizer.readNextToken();
199+
if (result != null
200+
&& (result.getResult().getQuotation() == Quotation.COMMENT_BLOCK ||
201+
result.getResult().getQuotation() == Quotation.COMMENT_LINE)) {
202+
for (final TokenizedCommentListener listener : this.commentTokenListeners) {
203+
listener.onCommentToken(this, result);
204+
}
205+
} else {
206+
break;
207+
}
208+
}
209+
} else {
210+
result = this.tokenizer.readNextToken();
211+
}
212+
return result;
213+
}
214+
181215
public PrologTerm next() {
182216
final PrologTerm found = readBlock(OPERATORS_PHRASE);
183217
if (found == null) {
184218
throw new NoSuchElementException("No terms in source");
185219
} else {
186-
final TokenizerResult endAtom = this.tokenizer.readNextToken();
220+
final TokenizerResult endAtom = this.readNextTokenCommentAware();
187221
if (endAtom == null || !endAtom.getResult().getText().equals(OPERATOR_DOT.getText())) {
188-
throw new PrologParserException("End operator is not found", this.tokenizer.getLine(),
222+
throw new PrologParserException("End operator is not found",
223+
this.tokenizer.getLine(),
189224
this.tokenizer.getPos());
190225
}
191226
}
@@ -203,7 +238,7 @@ private PrologStruct readStruct(final PrologTerm functor) {
203238
return null;
204239
}
205240

206-
final TokenizerResult nextAtom = this.tokenizer.readNextToken();
241+
final TokenizerResult nextAtom = this.readNextTokenCommentAware();
207242
if (nextAtom == null) {
208243
throw new PrologParserException("Can't read next token in block", this.tokenizer.getLine(),
209244
this.tokenizer.getPos());
@@ -242,7 +277,7 @@ private PrologTerm readList(final TokenizerResult openingBracket) {
242277
while (continueReading) {
243278
final PrologTerm block = readBlock(OPERATORS_INSIDE_LIST);
244279

245-
final TokenizerResult nextAtom = this.tokenizer.readNextToken();
280+
final TokenizerResult nextAtom = this.readNextTokenCommentAware();
246281
if (nextAtom == null) {
247282
throw new PrologParserException("Can't read next token in list", this.tokenizer.getLine(),
248283
this.tokenizer.getPos());
@@ -281,7 +316,7 @@ private PrologTerm readList(final TokenizerResult openingBracket) {
281316
tokenizer.getLastTokenPos(), null);
282317
}
283318

284-
final TokenizerResult nextAtomTwo = tokenizer.readNextToken();
319+
final TokenizerResult nextAtomTwo = this.readNextTokenCommentAware();
285320
if (nextAtomTwo == null) {
286321
throw new PrologParserException("Can't find expected token in list",
287322
this.tokenizer.getLine(), this.tokenizer.getPos());
@@ -353,7 +388,7 @@ private PrologTerm readBlock(final Koi7CharOpMap endOperators) {
353388

354389
while (true) {
355390
// read next atom from tokenizer
356-
TokenizerResult readAtomContainer = this.tokenizer.readNextToken();
391+
TokenizerResult readAtomContainer = this.readNextTokenCommentAware();
357392

358393
if (readAtomContainer == null) {
359394
if (currentTreeItem == null) {
@@ -470,7 +505,7 @@ private PrologTerm readBlock(final Koi7CharOpMap endOperators) {
470505
readAtomContainer.getLine(), readAtomContainer.getPos());
471506
}
472507

473-
final TokenizerResult token = this.tokenizer.readNextToken();
508+
final TokenizerResult token = this.readNextTokenCommentAware();
474509

475510
final PrologTerm closingAtom;
476511
if (token == null) {
@@ -499,7 +534,7 @@ private PrologTerm readBlock(final Koi7CharOpMap endOperators) {
499534
}
500535
} else {
501536
if (readAtom.getType() != TermType.VAR || (this.parserFlags & FLAG_VAR_AS_FUNCTOR) != 0) {
502-
TokenizerResult nextToken = this.tokenizer.readNextToken();
537+
TokenizerResult nextToken = this.readNextTokenCommentAware();
503538

504539
if (nextToken == null) {
505540
throw new PrologParserException("Non-closed clause", this.tokenizer.getLastTokenLine(),
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package com.igormaznitsa.prologparser;
2+
3+
import com.igormaznitsa.prologparser.tokenizer.TokenizerResult;
4+
5+
/**
6+
* Listener gets notifications from parser for met parsed comments if detection flag is on
7+
*
8+
* @see ParserContext#FLAG_COMMENTS_AS_ATOMS
9+
* @since 2.2.0
10+
*/
11+
@FunctionalInterface
12+
public interface TokenizedCommentListener {
13+
/**
14+
* Notification from parser that either a line comment or a block comment has got as a tokenizer result.
15+
*
16+
* @param parser source prolog parser, must not be null
17+
* @param comment detected comment token as an atom, must not be null
18+
*/
19+
void onCommentToken(PrologParser parser, TokenizerResult comment);
20+
}

‎src/main/java/com/igormaznitsa/prologparser/terms/Quotation.java

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,19 @@ public enum Quotation {
4747
* Term is back tick quotation
4848
* example: `hello`
4949
*/
50-
BACK_TICK("`");
50+
BACK_TICK("`"),
51+
/**
52+
* Special variant shows that content is line comment
53+
*
54+
* @since 2.2.0
55+
*/
56+
COMMENT_LINE("%"),
57+
/**
58+
* Special variant shows that content is block comment
59+
*
60+
* @since 2.2.0
61+
*/
62+
COMMENT_BLOCK("/*");
5163

5264
private final String quotationMark;
5365
public static final List<Quotation> VALUES = List.of(Quotation.values());
@@ -72,6 +84,13 @@ public String getQuotationMark() {
7284
* @return quoted string
7385
*/
7486
public String quoteString(final String str) {
75-
return this.quotationMark + escapeString(str == null ? "" : str, this) + this.quotationMark;
87+
switch (this) {
88+
case COMMENT_LINE:
89+
return COMMENT_LINE.quotationMark + str;
90+
case COMMENT_BLOCK:
91+
return COMMENT_BLOCK.quotationMark + str + "*/";
92+
default:
93+
return this.quotationMark + escapeString(str == null ? "" : str, this) + this.quotationMark;
94+
}
7695
}
7796
}

‎src/main/java/com/igormaznitsa/prologparser/tokenizer/Tokenizer.java

Lines changed: 58 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
package com.igormaznitsa.prologparser.tokenizer;
2323

2424
import static com.igormaznitsa.prologparser.ParserContext.FLAG_BLOCK_COMMENTS;
25+
import static com.igormaznitsa.prologparser.ParserContext.FLAG_COMMENTS_AS_ATOMS;
2526
import static com.igormaznitsa.prologparser.ParserContext.FLAG_ZERO_QUOTATION_ALLOWS_WHITESPACE_CHAR;
2627
import static com.igormaznitsa.prologparser.ParserContext.FLAG_ZERO_QUOTATION_CHARCODE;
2728
import static com.igormaznitsa.prologparser.tokenizer.TokenizerState.ATOM;
@@ -58,6 +59,7 @@ public final class Tokenizer {
5859
private final StringBuilderEx specCharBuf;
5960
private final StringBuilderEx insideCharBuffer;
6061
private final boolean blockCommentsAllowed;
62+
private final boolean returnCommentsAsToken;
6163
private final boolean zeroSingleQuotationAllowed;
6264
private final boolean zeroQuotationAllowsWhitespaceChar;
6365
private final Reader reader;
@@ -82,6 +84,8 @@ public Tokenizer(final PrologParser parser, final Koi7CharOpMap metaOperators,
8284

8385
final int maxAllowedCharBufferSize = parser.getContext() == null ? Integer.MAX_VALUE :
8486
parser.getContext().getMaxTokenizerBufferLength();
87+
this.returnCommentsAsToken = parser.getContext() != null
88+
&& ((parser.getContext().getFlags() & FLAG_COMMENTS_AS_ATOMS) != 0);
8589
this.blockCommentsAllowed = parser.getContext() != null
8690
&& ((parser.getContext().getFlags() & FLAG_BLOCK_COMMENTS) != 0);
8791
this.zeroSingleQuotationAllowed = parser.getContext() != null
@@ -269,25 +273,47 @@ public void fixPosition() {
269273
this.lastTokenPos = this.pos - 1;
270274
}
271275

272-
private void skipUntilBlockCommentEnd() throws IOException {
276+
private String skipTillBlockCommentEnd(final boolean accumulateText) throws IOException {
277+
final StringBuilder result = accumulateText ? new StringBuilder() : null;
273278
boolean starCharDetected = false;
274279
while (true) {
275280
final int readChar = this.doReadChar();
276-
if (readChar < 0 || (readChar == '/' && starCharDetected)) {
281+
if (readChar < 0) {
277282
break;
283+
} else if (readChar == '/') {
284+
if (starCharDetected) {
285+
if (accumulateText) {
286+
result.setLength(result.length() - 1);
287+
}
288+
break;
289+
} else {
290+
if (accumulateText) {
291+
result.append((char) readChar);
292+
}
293+
}
278294
} else {
279295
starCharDetected = readChar == '*';
296+
if (accumulateText) {
297+
result.append((char) readChar);
298+
}
280299
}
281300
}
301+
return accumulateText ? result.toString() : null;
282302
}
283303

284-
private void skipUntilNextString() throws IOException {
304+
private String skipTillNextLine(final boolean accumulateText) throws IOException {
305+
final StringBuilder result = accumulateText ? new StringBuilder() : null;
306+
285307
while (true) {
286308
final int readChar = this.doReadChar();
287309
if (readChar < 0 || readChar == '\n') {
288310
break;
289311
}
312+
if (accumulateText) {
313+
result.append((char) readChar);
314+
}
290315
}
316+
return accumulateText ? result.toString() : null;
291317
}
292318

293319
public TokenizerResult pop() {
@@ -324,10 +350,10 @@ public TokenizerResult readNextToken() {
324350
final StringBuilderEx strBuffer = this.strBuf;
325351
final StringBuilderEx specCharBuffer = this.specCharBuf;
326352

327-
OpContainerlastFoundFullOperator = null;
353+
finalbooleancommentsAsAtoms = this.returnCommentsAsToken;
328354

355+
OpContainer lastFoundFullOperator = null;
329356
boolean letterOrDigitOnly = false;
330-
331357
boolean foundUnderscoreInNumber = false;
332358

333359
try {
@@ -416,18 +442,30 @@ public TokenizerResult readNextToken() {
416442

417443
final char chr = (char) readChar;
418444

419-
if (state != STRING && this.blockCommentsAllowed && chr == '*'
420-
&& this.strBuf.isLastChar('/')) {
445+
if (state != STRING
446+
&& this.blockCommentsAllowed
447+
&& chr == '*'
448+
&& this.strBuf.isLastChar('/')
449+
) {
421450
if (this.strBuf.isSingleChar('/')) {
422451
this.strBuf.pop();
423452
state = this.strBuf.isEmpty() ? LOOK_FOR : state;
424453
} else if (state == OPERATOR) {
425454
throw new PrologParserException("Operator can be mixed with comment block: "
426-
+ this.strBuf + chr, getLastTokenLine(), getLastTokenPos());
455+
+ this.strBuf + chr, this.getLastTokenLine(), this.getLastTokenPos());
427456
}
428457

429-
skipUntilBlockCommentEnd();
430-
458+
if (commentsAsAtoms) {
459+
final String commentText = this.skipTillBlockCommentEnd(true);
460+
return new TokenizerResult(
461+
new PrologAtom(commentText, Quotation.COMMENT_BLOCK),
462+
state,
463+
this.getLastTokenLine(),
464+
this.getLastTokenPos()
465+
);
466+
} else {
467+
this.skipTillBlockCommentEnd(false);
468+
}
431469
} else {
432470
switch (state) {
433471
case LOOK_FOR: {
@@ -437,7 +475,16 @@ public TokenizerResult readNextToken() {
437475

438476
switch (chr) {
439477
case '%': {
440-
skipUntilNextString();
478+
this.fixPosition();
479+
final String text = skipTillNextLine(commentsAsAtoms);
480+
if (commentsAsAtoms) {
481+
return new TokenizerResult(
482+
new PrologAtom(text, Quotation.COMMENT_LINE),
483+
state,
484+
this.getLastTokenLine(),
485+
this.getLastTokenPos()
486+
);
487+
}
441488
}
442489
break;
443490
case '_': {

‎src/main/java/com/igormaznitsa/prologparser/tokenizer/TokenizerState.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,7 @@ public enum TokenizerState {
3232
OPERATOR,
3333
VAR,
3434
INTEGER,
35-
FLOAT
35+
FLOAT,
36+
LINE_COMMENT,
37+
BLOCK_COMMENT
3638
}

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /