This repository was archived by the owner on Jan 19, 2023. It is now read-only.

Commit fccef8b

committed

add docstrings to scanner

1 parent 5157156 commit fccef8bCopy full SHA for fccef8b

File tree

1 file changed

+46

-7

lines changed

scanner.py

1 file changed

+46

-7

lines changed

`‎scanner.py`

Lines changed: 46 additions & 7 deletions

Original file line number	Diff line number	Diff line change
`@@ -2,49 +2,88 @@`
`2`	`2`
`3`	`3`
`4`	`4`	`class Token:`
`5`		`- def __init__(self, token, lexim, position):`
`6`		`- self.token = token`
`7`		`- self.lexim = lexim`
	`5`	`+ """Represents an token.`
	`6`	`+ """`
	`7`	`+`
	`8`	`+ def __init__(self, token_type, value, position):`
	`9`	`+ """`
	`10`	`+ Args:`
	`11`	`+ token_type (str): type of token like: "ID", "SEMICOLON".`
	`12`	`+ value (str): the actual value of token: "some_variable", ";".`
	`13`	`+ position (int): location of token in the buffer.`
	`14`	`+ """`
	`15`	`+ self.token_type = token_type`
	`16`	`+ self.value = value`
`8`	`17`	`self.position = position`
`9`	`18`
`10`	`19`	`def __str__(self):`
`11`		`- return f'{self.position}\t{self.lexim}\t{self.token}'`
	`20`	`+ return f'{self.position}\t{self.value}\t{self.token_type}'`
`12`	`21`
`13`	`22`	`def __repr__(self):`
`14`	`23`	`return self.__str__()`
`15`	`24`
`16`	`25`
`17`	`26`	`class UnknownTokenError(Exception):`
	`27`	`+ """A custom Exception for reporting scanner error.`
	`28`	`+ """`
	`29`	`+`
`18`	`30`	`def __init__(self, buffer, position):`
	`31`	`+ """`
	`32`	`+ Args:`
	`33`	`+ buffer (str): we can found the error in the buffer.`
	`34`	`+ position ([type]): location of error in the buffer.`
	`35`	`+ """`
`19`	`36`	`super().__init__()`
`20`		`- self.buffer = buffer.strip()`
	`37`	`+ self.buffer = buffer`
`21`	`38`	`self.position = position`
`22`	`39`
`23`	`40`	`def __str__(self):`
`24`	`41`	`return f'\nLexerError: Unknown token!\n\n▼\n{self.buffer[self.position:self.position + 30]}'`
`25`	`42`
`26`	`43`
`27`	`44`	`class Scanner:`
	`45`	`+ """Lexical analyzer`
	`46`	`+ """`
	`47`	`+`
`28`	`48`	`def __init__(self, rules, buffer):`
	`49`	`+ """`
	`50`	`+ Args:`
	`51`	`+ rules (list): a list of tuples like this [("token", "regex"),...]`
	`52`	`+ buffer (str): content that we want to scan.`
	`53`	`+ """`
`29`	`54`	`rules_list = [f'(?P<{typ}>{reg})' for typ, reg in rules]`
`30`	`55`	`self.regex = re.compile('\|'.join(rules_list))`
`31`	`56`	`self.buffer = buffer`
	`57`	`+ self.position = 0`
`32`	`58`
`33`	`59`	`def token(self):`
	`60`	`+ """Get the next token that we found.`
	`61`	`+`
	`62`	`+ Raises:`
	`63`	`+ UnknownTokenError: raises if find a token that not matches any rules.`
	`64`	`+`
	`65`	`+ Returns:`
	`66`	`+ Token: token that we found.`
	`67`	`+ """`
`34`	`68`	`if self.position < len(self.buffer):`
`35`	`69`	`if match := re.compile('\S').search(self.buffer, self.position):`
`36`	`70`	`self.position = match.start()`
`37`	`71`	`else:`
`38`	`72`	`return None`
`39`	`73`
`40`	`74`	`if match := self.regex.match(self.buffer, self.position):`
`41`		`- token = Token(token=match.lastgroup, lexim=match.group(match.lastgroup), position=self.position)`
	`75`	`+ token = Token(token_type=match.lastgroup, value=match.group(match.lastgroup), position=self.position)`
`42`	`76`	`self.position = match.end()`
`43`	`77`	`return token`
`44`	`78`	`else:`
`45`	`79`	`raise UnknownTokenError(self.buffer, self.position)`
`46`	`80`
`47`		`- def tokens_generator(self):`
	`81`	`+ def token_generator(self):`
	`82`	`+ """generates all the tokens to the end.`
	`83`	`+`
	`84`	`+ Yields:`
	`85`	`+ Token: token that we found.`
	`86`	`+ """`
`48`	`87`	`self.position = 0`
`49`	`88`	`while token := self.token():`
`50`	`89`	`yield token`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit fccef8b

File tree

1 file changed

1 file changed

`‎scanner.py`

0 commit comments