Commit 21c8b2a

committed

added way to detect comments during parsing

1 parent 28e102a commit 21c8b2aCopy full SHA for 21c8b2a

File tree

8 files changed

+267

-75

lines changed

src
- main/java/com/igormaznitsa/prologparser
- test/java/com/igormaznitsa/prologparser/tokenizer
  - TokenizerTest.java

8 files changed

+267

-75

lines changed

`‎src/main/java/com/igormaznitsa/prologparser/GenericPrologParser.java`

Lines changed: 3 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,8 @@`
`27`	`27`	`* Generic version of prolog parser.`
`28`	`28`	`*/`
`29`	`29`	`public class GenericPrologParser extends PrologParser {`
`30`		`- public GenericPrologParser(final Reader reader, final ParserContext context) {`
`31`		`- super(reader, context);`
	`30`	`+ public GenericPrologParser(final Reader reader, final ParserContext context,`
	`31`	`+ final TokenizedCommentListener... tokenizedCommentListeners) {`
	`32`	`+ super(reader, context, tokenizedCommentListeners);`
`32`	`33`	`}`
`33`	`34`	`}`

`‎src/main/java/com/igormaznitsa/prologparser/ParserContext.java`

Lines changed: 6 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -68,6 +68,12 @@ public interface ParserContext {`
`68`	`68`	`* @since 2.0.2`
`69`	`69`	`*/`
`70`	`70`	`int FLAG_ZERO_QUOTATION_ALLOWS_WHITESPACE_CHAR = 64;`
	`71`	`+ /**`
	`72`	`+ * Return commentaries as atoms.`
	`73`	`+ *`
	`74`	`+ * @since 2.2.0`
	`75`	`+ */`
	`76`	`+ int FLAG_COMMENTS_AS_ATOMS = 128;`
`71`	`77`
`72`	`78`	`/**`
`73`	`79`	`* Check that the context contains an operator starts with some string.`

`‎src/main/java/com/igormaznitsa/prologparser/PrologParser.java`

Lines changed: 44 additions & 9 deletions

Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@`
`21`	`21`
`22`	`22`	`package com.igormaznitsa.prologparser;`
`23`	`23`
	`24`	`+import static com.igormaznitsa.prologparser.ParserContext.FLAG_COMMENTS_AS_ATOMS;`
`24`	`25`	`import static com.igormaznitsa.prologparser.ParserContext.FLAG_DOT2_AS_LIST;`
`25`	`26`	`import static com.igormaznitsa.prologparser.ParserContext.FLAG_NONE;`
`26`	`27`	`import static com.igormaznitsa.prologparser.ParserContext.FLAG_VAR_AS_FUNCTOR;`
`@@ -102,13 +103,25 @@ public abstract class PrologParser implements Iterable<PrologTerm>, AutoCloseabl`
`102`	`103`
`103`	`104`	`protected final ParserContext context;`
`104`	`105`	`protected final int parserFlags;`
	`106`	`+ private final boolean commentsAsAtoms;`
`105`	`107`	`private final Tokenizer tokenizer;`
`106`	`108`	`private boolean autoCloseReaderFlag;`
	`109`	`+ private final List<TokenizedCommentListener> commentTokenListeners;`
`107`	`110`
`108`		`- protected PrologParser(final Reader source, final ParserContext context) {`
	`111`	`+ protected PrologParser(`
	`112`	`+ final Reader source,`
	`113`	`+ final ParserContext context,`
	`114`	`+ final TokenizedCommentListener... tokenizedCommentListeners`
	`115`	`+ ) {`
`109`	`116`	`this.context = context == null ? DefaultParserContext.of(ParserContext.FLAG_NONE) : context;`
`110`	`117`	`this.parserFlags = context == null ? FLAG_NONE : context.getFlags();`
	`118`	`+ this.commentsAsAtoms = (this.parserFlags & FLAG_COMMENTS_AS_ATOMS) != 0;`
`111`	`119`	`this.tokenizer = new Tokenizer(this, META_OP_MAP, requireNonNull(source));`
	`120`	`+ if (tokenizedCommentListeners.length == 0) {`
	`121`	`+ this.commentTokenListeners = List.of();`
	`122`	`+ } else {`
	`123`	`+ this.commentTokenListeners = List.of(tokenizedCommentListeners);`
	`124`	`+ }`
`112`	`125`	`}`
`113`	`126`
`114`	`127`	`public static Op findBaseMetaOperator(final String text, final OpAssoc type) {`
`@@ -178,14 +191,36 @@ public boolean hasNext() {`
`178`	`191`	`return this.tokenizer.peek() != null;`
`179`	`192`	`}`
`180`	`193`
	`194`	`+ private TokenizerResult readNextTokenCommentAware() {`
	`195`	`+ TokenizerResult result;`
	`196`	`+ if (this.commentsAsAtoms) {`
	`197`	`+ while (true) {`
	`198`	`+ result = this.tokenizer.readNextToken();`
	`199`	`+ if (result != null`
	`200`	`+ && (result.getResult().getQuotation() == Quotation.COMMENT_BLOCK \|\|`
	`201`	`+ result.getResult().getQuotation() == Quotation.COMMENT_LINE)) {`
	`202`	`+ for (final TokenizedCommentListener listener : this.commentTokenListeners) {`
	`203`	`+ listener.onCommentToken(this, result);`
	`204`	`+ }`
	`205`	`+ } else {`
	`206`	`+ break;`
	`207`	`+ }`
	`208`	`+ }`
	`209`	`+ } else {`
	`210`	`+ result = this.tokenizer.readNextToken();`
	`211`	`+ }`
	`212`	`+ return result;`
	`213`	`+ }`
	`214`	`+`
`181`	`215`	`public PrologTerm next() {`
`182`	`216`	`final PrologTerm found = readBlock(OPERATORS_PHRASE);`
`183`	`217`	`if (found == null) {`
`184`	`218`	`throw new NoSuchElementException("No terms in source");`
`185`	`219`	`} else {`
`186`		`- final TokenizerResult endAtom = this.tokenizer.readNextToken();`
	`220`	`+ final TokenizerResult endAtom = this.readNextTokenCommentAware();`
`187`	`221`	`if (endAtom == null \|\| !endAtom.getResult().getText().equals(OPERATOR_DOT.getText())) {`
`188`		`- throw new PrologParserException("End operator is not found", this.tokenizer.getLine(),`
	`222`	`+ throw new PrologParserException("End operator is not found",`
	`223`	`+ this.tokenizer.getLine(),`
`189`	`224`	`this.tokenizer.getPos());`
`190`	`225`	`}`
`191`	`226`	`}`
`@@ -203,7 +238,7 @@ private PrologStruct readStruct(final PrologTerm functor) {`
`203`	`238`	`return null;`
`204`	`239`	`}`
`205`	`240`
`206`		`- final TokenizerResult nextAtom = this.tokenizer.readNextToken();`
	`241`	`+ final TokenizerResult nextAtom = this.readNextTokenCommentAware();`
`207`	`242`	`if (nextAtom == null) {`
`208`	`243`	`throw new PrologParserException("Can't read next token in block", this.tokenizer.getLine(),`
`209`	`244`	`this.tokenizer.getPos());`
`@@ -242,7 +277,7 @@ private PrologTerm readList(final TokenizerResult openingBracket) {`
`242`	`277`	`while (continueReading) {`
`243`	`278`	`final PrologTerm block = readBlock(OPERATORS_INSIDE_LIST);`
`244`	`279`
`245`		`- final TokenizerResult nextAtom = this.tokenizer.readNextToken();`
	`280`	`+ final TokenizerResult nextAtom = this.readNextTokenCommentAware();`
`246`	`281`	`if (nextAtom == null) {`
`247`	`282`	`throw new PrologParserException("Can't read next token in list", this.tokenizer.getLine(),`
`248`	`283`	`this.tokenizer.getPos());`
`@@ -281,7 +316,7 @@ private PrologTerm readList(final TokenizerResult openingBracket) {`
`281`	`316`	`tokenizer.getLastTokenPos(), null);`
`282`	`317`	`}`
`283`	`318`
`284`		`- final TokenizerResult nextAtomTwo = tokenizer.readNextToken();`
	`319`	`+ final TokenizerResult nextAtomTwo = this.readNextTokenCommentAware();`
`285`	`320`	`if (nextAtomTwo == null) {`
`286`	`321`	`throw new PrologParserException("Can't find expected token in list",`
`287`	`322`	`this.tokenizer.getLine(), this.tokenizer.getPos());`
`@@ -353,7 +388,7 @@ private PrologTerm readBlock(final Koi7CharOpMap endOperators) {`
`353`	`388`
`354`	`389`	`while (true) {`
`355`	`390`	`// read next atom from tokenizer`
`356`		`- TokenizerResult readAtomContainer = this.tokenizer.readNextToken();`
	`391`	`+ TokenizerResult readAtomContainer = this.readNextTokenCommentAware();`
`357`	`392`
`358`	`393`	`if (readAtomContainer == null) {`
`359`	`394`	`if (currentTreeItem == null) {`
`@@ -470,7 +505,7 @@ private PrologTerm readBlock(final Koi7CharOpMap endOperators) {`
`470`	`505`	`readAtomContainer.getLine(), readAtomContainer.getPos());`
`471`	`506`	`}`
`472`	`507`
`473`		`- final TokenizerResult token = this.tokenizer.readNextToken();`
	`508`	`+ final TokenizerResult token = this.readNextTokenCommentAware();`
`474`	`509`
`475`	`510`	`final PrologTerm closingAtom;`
`476`	`511`	`if (token == null) {`
`@@ -499,7 +534,7 @@ private PrologTerm readBlock(final Koi7CharOpMap endOperators) {`
`499`	`534`	`}`
`500`	`535`	`} else {`
`501`	`536`	`if (readAtom.getType() != TermType.VAR \|\| (this.parserFlags & FLAG_VAR_AS_FUNCTOR) != 0) {`
`502`		`- TokenizerResult nextToken = this.tokenizer.readNextToken();`
	`537`	`+ TokenizerResult nextToken = this.readNextTokenCommentAware();`
`503`	`538`
`504`	`539`	`if (nextToken == null) {`
`505`	`540`	`throw new PrologParserException("Non-closed clause", this.tokenizer.getLastTokenLine(),`

`‎src/main/java/com/igormaznitsa/prologparser/TokenizedCommentListener.java`

Lines changed: 20 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,20 @@`
	`1`	`+package com.igormaznitsa.prologparser;`
	`2`	`+`
	`3`	`+import com.igormaznitsa.prologparser.tokenizer.TokenizerResult;`
	`4`	`+`
	`5`	`+/**`
	`6`	`+ * Listener gets notifications from parser for met parsed comments if detection flag is on`
	`7`	`+ *`
	`8`	`+ * @see ParserContext#FLAG_COMMENTS_AS_ATOMS`
	`9`	`+ * @since 2.2.0`
	`10`	`+ */`
	`11`	`+@FunctionalInterface`
	`12`	`+public interface TokenizedCommentListener {`
	`13`	`+ /**`
	`14`	`+ * Notification from parser that either a line comment or a block comment has got as a tokenizer result.`
	`15`	`+ *`
	`16`	`+ * @param parser source prolog parser, must not be null`
	`17`	`+ * @param comment detected comment token as an atom, must not be null`
	`18`	`+ */`
	`19`	`+ void onCommentToken(PrologParser parser, TokenizerResult comment);`
	`20`	`+}`

`‎src/main/java/com/igormaznitsa/prologparser/terms/Quotation.java`

Lines changed: 21 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,19 @@ public enum Quotation {`
`47`	`47`	`* Term is back tick quotation`
`48`	`48`	* example: `hello`
`49`	`49`	`*/`
`50`		- BACK_TICK("`");
	`50`	+ BACK_TICK("`"),
	`51`	`+ /**`
	`52`	`+ * Special variant shows that content is line comment`
	`53`	`+ *`
	`54`	`+ * @since 2.2.0`
	`55`	`+ */`
	`56`	`+ COMMENT_LINE("%"),`
	`57`	`+ /**`
	`58`	`+ * Special variant shows that content is block comment`
	`59`	`+ *`
	`60`	`+ * @since 2.2.0`
	`61`	`+ */`
	`62`	`+ COMMENT_BLOCK("/*");`
`51`	`63`
`52`	`64`	`private final String quotationMark;`
`53`	`65`	`public static final List<Quotation> VALUES = List.of(Quotation.values());`
`@@ -72,6 +84,13 @@ public String getQuotationMark() {`
`72`	`84`	`* @return quoted string`
`73`	`85`	`*/`
`74`	`86`	`public String quoteString(final String str) {`
`75`		`- return this.quotationMark + escapeString(str == null ? "" : str, this) + this.quotationMark;`
	`87`	`+ switch (this) {`
	`88`	`+ case COMMENT_LINE:`
	`89`	`+ return COMMENT_LINE.quotationMark + str;`
	`90`	`+ case COMMENT_BLOCK:`
	`91`	`+ return COMMENT_BLOCK.quotationMark + str + "*/";`
	`92`	`+ default:`
	`93`	`+ return this.quotationMark + escapeString(str == null ? "" : str, this) + this.quotationMark;`
	`94`	`+ }`
`76`	`95`	`}`
`77`	`96`	`}`

`‎src/main/java/com/igormaznitsa/prologparser/tokenizer/Tokenizer.java`

Lines changed: 58 additions & 11 deletions

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@`
`22`	`22`	`package com.igormaznitsa.prologparser.tokenizer;`
`23`	`23`
`24`	`24`	`import static com.igormaznitsa.prologparser.ParserContext.FLAG_BLOCK_COMMENTS;`
	`25`	`+import static com.igormaznitsa.prologparser.ParserContext.FLAG_COMMENTS_AS_ATOMS;`
`25`	`26`	`import static com.igormaznitsa.prologparser.ParserContext.FLAG_ZERO_QUOTATION_ALLOWS_WHITESPACE_CHAR;`
`26`	`27`	`import static com.igormaznitsa.prologparser.ParserContext.FLAG_ZERO_QUOTATION_CHARCODE;`
`27`	`28`	`import static com.igormaznitsa.prologparser.tokenizer.TokenizerState.ATOM;`
`@@ -58,6 +59,7 @@ public final class Tokenizer {`
`58`	`59`	`private final StringBuilderEx specCharBuf;`
`59`	`60`	`private final StringBuilderEx insideCharBuffer;`
`60`	`61`	`private final boolean blockCommentsAllowed;`
	`62`	`+ private final boolean returnCommentsAsToken;`
`61`	`63`	`private final boolean zeroSingleQuotationAllowed;`
`62`	`64`	`private final boolean zeroQuotationAllowsWhitespaceChar;`
`63`	`65`	`private final Reader reader;`
`@@ -82,6 +84,8 @@ public Tokenizer(final PrologParser parser, final Koi7CharOpMap metaOperators,`
`82`	`84`
`83`	`85`	`final int maxAllowedCharBufferSize = parser.getContext() == null ? Integer.MAX_VALUE :`
`84`	`86`	`parser.getContext().getMaxTokenizerBufferLength();`
	`87`	`+ this.returnCommentsAsToken = parser.getContext() != null`
	`88`	`+ && ((parser.getContext().getFlags() & FLAG_COMMENTS_AS_ATOMS) != 0);`
`85`	`89`	`this.blockCommentsAllowed = parser.getContext() != null`
`86`	`90`	`&& ((parser.getContext().getFlags() & FLAG_BLOCK_COMMENTS) != 0);`
`87`	`91`	`this.zeroSingleQuotationAllowed = parser.getContext() != null`
`@@ -269,25 +273,47 @@ public void fixPosition() {`
`269`	`273`	`this.lastTokenPos = this.pos - 1;`
`270`	`274`	`}`
`271`	`275`
`272`		`- private void skipUntilBlockCommentEnd() throws IOException {`
	`276`	`+ private String skipTillBlockCommentEnd(final boolean accumulateText) throws IOException {`
	`277`	`+ final StringBuilder result = accumulateText ? new StringBuilder() : null;`
`273`	`278`	`boolean starCharDetected = false;`
`274`	`279`	`while (true) {`
`275`	`280`	`final int readChar = this.doReadChar();`
`276`		`- if (readChar < 0 \|\| (readChar == '/' && starCharDetected)) {`
	`281`	`+ if (readChar < 0) {`
`277`	`282`	`break;`
	`283`	`+ } else if (readChar == '/') {`
	`284`	`+ if (starCharDetected) {`
	`285`	`+ if (accumulateText) {`
	`286`	`+ result.setLength(result.length() - 1);`
	`287`	`+ }`
	`288`	`+ break;`
	`289`	`+ } else {`
	`290`	`+ if (accumulateText) {`
	`291`	`+ result.append((char) readChar);`
	`292`	`+ }`
	`293`	`+ }`
`278`	`294`	`} else {`
`279`	`295`	`starCharDetected = readChar == '*';`
	`296`	`+ if (accumulateText) {`
	`297`	`+ result.append((char) readChar);`
	`298`	`+ }`
`280`	`299`	`}`
`281`	`300`	`}`
	`301`	`+ return accumulateText ? result.toString() : null;`
`282`	`302`	`}`
`283`	`303`
`284`		`- private void skipUntilNextString() throws IOException {`
	`304`	`+ private String skipTillNextLine(final boolean accumulateText) throws IOException {`
	`305`	`+ final StringBuilder result = accumulateText ? new StringBuilder() : null;`
	`306`	`+`
`285`	`307`	`while (true) {`
`286`	`308`	`final int readChar = this.doReadChar();`
`287`	`309`	`if (readChar < 0 \|\| readChar == '\n') {`
`288`	`310`	`break;`
`289`	`311`	`}`
	`312`	`+ if (accumulateText) {`
	`313`	`+ result.append((char) readChar);`
	`314`	`+ }`
`290`	`315`	`}`
	`316`	`+ return accumulateText ? result.toString() : null;`
`291`	`317`	`}`
`292`	`318`
`293`	`319`	`public TokenizerResult pop() {`
`@@ -324,10 +350,10 @@ public TokenizerResult readNextToken() {`
`324`	`350`	`final StringBuilderEx strBuffer = this.strBuf;`
`325`	`351`	`final StringBuilderEx specCharBuffer = this.specCharBuf;`
`326`	`352`
`327`		`- OpContainerlastFoundFullOperator = null;`
	`353`	`+ finalbooleancommentsAsAtoms = this.returnCommentsAsToken;`
`328`	`354`
	`355`	`+ OpContainer lastFoundFullOperator = null;`
`329`	`356`	`boolean letterOrDigitOnly = false;`
`330`		`-`
`331`	`357`	`boolean foundUnderscoreInNumber = false;`
`332`	`358`
`333`	`359`	`try {`
`@@ -416,18 +442,30 @@ public TokenizerResult readNextToken() {`
`416`	`442`
`417`	`443`	`final char chr = (char) readChar;`
`418`	`444`
`419`		`- if (state != STRING && this.blockCommentsAllowed && chr == '*'`
`420`		`- && this.strBuf.isLastChar('/')) {`
	`445`	`+ if (state != STRING`
	`446`	`+ && this.blockCommentsAllowed`
	`447`	`+ && chr == '*'`
	`448`	`+ && this.strBuf.isLastChar('/')`
	`449`	`+ ) {`
`421`	`450`	`if (this.strBuf.isSingleChar('/')) {`
`422`	`451`	`this.strBuf.pop();`
`423`	`452`	`state = this.strBuf.isEmpty() ? LOOK_FOR : state;`
`424`	`453`	`} else if (state == OPERATOR) {`
`425`	`454`	`throw new PrologParserException("Operator can be mixed with comment block: "`
`426`		`- + this.strBuf + chr, getLastTokenLine(), getLastTokenPos());`
	`455`	`+ + this.strBuf + chr, this.getLastTokenLine(), this.getLastTokenPos());`
`427`	`456`	`}`
`428`	`457`
`429`		`- skipUntilBlockCommentEnd();`
`430`		`-`
	`458`	`+ if (commentsAsAtoms) {`
	`459`	`+ final String commentText = this.skipTillBlockCommentEnd(true);`
	`460`	`+ return new TokenizerResult(`
	`461`	`+ new PrologAtom(commentText, Quotation.COMMENT_BLOCK),`
	`462`	`+ state,`
	`463`	`+ this.getLastTokenLine(),`
	`464`	`+ this.getLastTokenPos()`
	`465`	`+ );`
	`466`	`+ } else {`
	`467`	`+ this.skipTillBlockCommentEnd(false);`
	`468`	`+ }`
`431`	`469`	`} else {`
`432`	`470`	`switch (state) {`
`433`	`471`	`case LOOK_FOR: {`
`@@ -437,7 +475,16 @@ public TokenizerResult readNextToken() {`
`437`	`475`
`438`	`476`	`switch (chr) {`
`439`	`477`	`case '%': {`
`440`		`- skipUntilNextString();`
	`478`	`+ this.fixPosition();`
	`479`	`+ final String text = skipTillNextLine(commentsAsAtoms);`
	`480`	`+ if (commentsAsAtoms) {`
	`481`	`+ return new TokenizerResult(`
	`482`	`+ new PrologAtom(text, Quotation.COMMENT_LINE),`
	`483`	`+ state,`
	`484`	`+ this.getLastTokenLine(),`
	`485`	`+ this.getLastTokenPos()`
	`486`	`+ );`
	`487`	`+ }`
`441`	`488`	`}`
`442`	`489`	`break;`
`443`	`490`	`case '_': {`

`‎src/main/java/com/igormaznitsa/prologparser/tokenizer/TokenizerState.java`

Lines changed: 3 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -32,5 +32,7 @@ public enum TokenizerState {`
`32`	`32`	`OPERATOR,`
`33`	`33`	`VAR,`
`34`	`34`	`INTEGER,`
`35`		`- FLOAT`
	`35`	`+ FLOAT,`
	`36`	`+ LINE_COMMENT,`
	`37`	`+ BLOCK_COMMENT`
`36`	`38`	`}`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit 21c8b2a

File tree

8 files changed

8 files changed

`‎src/main/java/com/igormaznitsa/prologparser/GenericPrologParser.java`

`‎src/main/java/com/igormaznitsa/prologparser/ParserContext.java`

`‎src/main/java/com/igormaznitsa/prologparser/PrologParser.java`

`‎src/main/java/com/igormaznitsa/prologparser/TokenizedCommentListener.java`

`‎src/main/java/com/igormaznitsa/prologparser/terms/Quotation.java`

`‎src/main/java/com/igormaznitsa/prologparser/tokenizer/Tokenizer.java`

`‎src/main/java/com/igormaznitsa/prologparser/tokenizer/TokenizerState.java`

0 commit comments