|
2 | 2 |
|
3 | 3 |
|
4 | 4 | class Token:
|
5 | | - def __init__(self, token, lexim, position): |
6 | | - self.token = token |
7 | | - self.lexim = lexim |
| 5 | + """Represents an token. |
| 6 | + """ |
| 7 | + |
| 8 | + def __init__(self, token_type, value, position): |
| 9 | + """ |
| 10 | + Args: |
| 11 | + token_type (str): type of token like: "ID", "SEMICOLON". |
| 12 | + value (str): the actual value of token: "some_variable", ";". |
| 13 | + position (int): location of token in the buffer. |
| 14 | + """ |
| 15 | + self.token_type = token_type |
| 16 | + self.value = value |
8 | 17 | self.position = position
|
9 | 18 |
|
10 | 19 | def __str__(self):
|
11 | | - return f'{self.position}\t{self.lexim}\t{self.token}' |
| 20 | + return f'{self.position}\t{self.value}\t{self.token_type}' |
12 | 21 |
|
13 | 22 | def __repr__(self):
|
14 | 23 | return self.__str__()
|
15 | 24 |
|
16 | 25 |
|
17 | 26 | class UnknownTokenError(Exception):
|
| 27 | + """A custom Exception for reporting scanner error. |
| 28 | + """ |
| 29 | + |
18 | 30 | def __init__(self, buffer, position):
|
| 31 | + """ |
| 32 | + Args: |
| 33 | + buffer (str): we can found the error in the buffer. |
| 34 | + position ([type]): location of error in the buffer. |
| 35 | + """ |
19 | 36 | super().__init__()
|
20 | | - self.buffer = buffer.strip() |
| 37 | + self.buffer = buffer |
21 | 38 | self.position = position
|
22 | 39 |
|
23 | 40 | def __str__(self):
|
24 | 41 | return f'\nLexerError: Unknown token!\n\n▼\n{self.buffer[self.position:self.position + 30]}'
|
25 | 42 |
|
26 | 43 |
|
27 | 44 | class Scanner:
|
| 45 | + """Lexical analyzer |
| 46 | + """ |
| 47 | + |
28 | 48 | def __init__(self, rules, buffer):
|
| 49 | + """ |
| 50 | + Args: |
| 51 | + rules (list): a list of tuples like this [("token", "regex"),...] |
| 52 | + buffer (str): content that we want to scan. |
| 53 | + """ |
29 | 54 | rules_list = [f'(?P<{typ}>{reg})' for typ, reg in rules]
|
30 | 55 | self.regex = re.compile('|'.join(rules_list))
|
31 | 56 | self.buffer = buffer
|
| 57 | + self.position = 0 |
32 | 58 |
|
33 | 59 | def token(self):
|
| 60 | + """Get the next token that we found. |
| 61 | + |
| 62 | + Raises: |
| 63 | + UnknownTokenError: raises if find a token that not matches any rules. |
| 64 | + |
| 65 | + Returns: |
| 66 | + Token: token that we found. |
| 67 | + """ |
34 | 68 | if self.position < len(self.buffer):
|
35 | 69 | if match := re.compile('\S').search(self.buffer, self.position):
|
36 | 70 | self.position = match.start()
|
37 | 71 | else:
|
38 | 72 | return None
|
39 | 73 |
|
40 | 74 | if match := self.regex.match(self.buffer, self.position):
|
41 | | - token = Token(token=match.lastgroup, lexim=match.group(match.lastgroup), position=self.position) |
| 75 | + token = Token(token_type=match.lastgroup, value=match.group(match.lastgroup), position=self.position) |
42 | 76 | self.position = match.end()
|
43 | 77 | return token
|
44 | 78 | else:
|
45 | 79 | raise UnknownTokenError(self.buffer, self.position)
|
46 | 80 |
|
47 | | - def tokens_generator(self): |
| 81 | + def token_generator(self): |
| 82 | + """generates all the tokens to the end. |
| 83 | + |
| 84 | + Yields: |
| 85 | + Token: token that we found. |
| 86 | + """ |
48 | 87 | self.position = 0
|
49 | 88 | while token := self.token():
|
50 | 89 | yield token
|
0 commit comments