|
| 1 | +# Compiler Design Course: Tokenizer |
| 2 | + |
| 3 | +you can read the [repo in github](https://github.com/danialkeimasi/python-regex-based-scanner). |
| 4 | + |
| 5 | +### credits |
| 6 | + |
| 7 | +Danial Keimasi |
| 8 | + |
| 9 | +9612358036 |
| 10 | + |
| 11 | +# Grammar |
| 12 | + |
| 13 | +``` |
| 14 | +program -> Statements |
| 15 | +---- |
| 16 | +Statements -> Statement; Statements | Statement; |
| 17 | +---- |
| 18 | +Statement -> IfStatement | ID(ParamList) |
| 19 | +---- |
| 20 | +IfStatement -> if (Exp) { Statements } | if (Exp) { Statements } else { Statements } |
| 21 | +---- |
| 22 | +Exp -> Param BIN_LOGIC_OP Param | Param |
| 23 | +---- |
| 24 | +ParamList -> Param, ParamList | λ |
| 25 | +---- |
| 26 | +Param -> CONST | ID |
| 27 | +---- |
| 28 | +CONST -> CONSTSTR | CONSTNUM |
| 29 | +``` |
| 30 | + |
| 31 | +# How to run the program |
| 32 | + |
| 33 | +this is how you can compile your code using compiler_cli.py |
| 34 | + |
| 35 | +```sh |
| 36 | +python compiler_cli.py path/to/file |
| 37 | +``` |
| 38 | + |
| 39 | +# input/output example |
| 40 | + |
| 41 | +- We want to tokenize this file: |
| 42 | + |
| 43 | +random_program/main.cpp |
| 44 | + |
| 45 | +```cpp |
| 46 | +some_var = "hello world" * 123; |
| 47 | + |
| 48 | +if (some_var == 2) { |
| 49 | + print("it's equal"); |
| 50 | +} |
| 51 | +``` |
| 52 | + |
| 53 | +- Using this command: |
| 54 | + |
| 55 | +```sh |
| 56 | + |
| 57 | +└─ python compiler_cli.py random_program/main.cpp |
| 58 | + |
| 59 | +0 some_var IDENTIFIER |
| 60 | +9 = ASSIGNMENT_OP |
| 61 | +11 "hello world" CONST_STR |
| 62 | +25 * MULTIPLY_OP |
| 63 | +27 123 CONST_NUMBER |
| 64 | +30 ; SEMICOLON |
| 65 | +33 if IF_KW |
| 66 | +36 ( LP |
| 67 | +37 some_var IDENTIFIER |
| 68 | +46 == EQUAL_OP |
| 69 | +49 2 CONST_NUMBER |
| 70 | +50 ) RP |
| 71 | +52 { LCB |
| 72 | +58 print IDENTIFIER |
| 73 | +63 ( LP |
| 74 | +64 "it's equal" CONST_STR |
| 75 | +76 ) RP |
| 76 | +77 ; SEMICOLON |
| 77 | +79 } RCB |
| 78 | +``` |
| 79 | + |
| 80 | +# compiler_cli.py |
| 81 | + |
| 82 | +in this file we just read the file and compile it using Scanner class that we implemented in scanner.py file. |
| 83 | + |
| 84 | +```py |
| 85 | +from typer import Typer |
| 86 | +from scanner import Scanner, UnknownTokenError |
| 87 | + |
| 88 | +app = Typer() |
| 89 | + |
| 90 | + |
| 91 | +@app.command() |
| 92 | +def compile(file_address): |
| 93 | + rules = [ |
| 94 | + ('IF_KW', r'if'), |
| 95 | + ('ELSE_KW', r'else'), |
| 96 | + ('FOR_KW', r'for'), |
| 97 | + ('CONST_STR', r'".*?"|\'.*?\''), |
| 98 | + ('CONST_NUMBER', r'\d+'), |
| 99 | + |
| 100 | + ('PLUS_OP', r'\+'), |
| 101 | + ('MINUS_OP', r'\-'), |
| 102 | + ('MULTIPLY_OP', r'\*'), |
| 103 | + ('DIVIDE_OP', r'\/'), |
| 104 | + ('LP', r'\('), |
| 105 | + ('LCB', r'\{'), |
| 106 | + ('RP', r'\)'), |
| 107 | + ('RCB', r'\}'), |
| 108 | + |
| 109 | + ('EQUAL_OP', r'=='), |
| 110 | + ('ASSIGNMENT_OP', r'='), |
| 111 | + ('SEMICOLON', r';'), |
| 112 | + ('IDENTIFIER', r'[a-zA-Z_]\w*'), |
| 113 | + ] |
| 114 | + |
| 115 | + scanner = Scanner(rules, open(file_address, 'r').read()) |
| 116 | + try: |
| 117 | + for token in scanner.token_generator(): |
| 118 | + print(token) |
| 119 | + except UnknownTokenError as error: |
| 120 | + print(error) |
| 121 | + |
| 122 | + |
| 123 | +if __name__ == '__main__': |
| 124 | + app() |
| 125 | + |
| 126 | +``` |
0 commit comments