Commit ae97813

committed

Adding skeletons for LLParsers and top-downBreadthFirstParser. Still a lot of working out to do.

1 parent 89b44f5 commit ae97813Copy full SHA for ae97813

File tree

2 files changed

+195

-29

lines changed

sudkampPython
- LLParsers.py
- breadthFirstParsers.py

2 files changed

+195

-29

lines changed

`‎sudkampPython/LLParsers.py‎`

Lines changed: 107 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,107 @@`
	`1`	`+from grammars.grammar import STARTSYMBOL`
	`2`	`+from grammars.contextFree import ContextFreeGrammar`
	`3`	`+`
	`4`	`+"""`
	`5`	`+ Algorithm 19.4.1`
	`6`	`+input: context-free grammar G = (V, Alphabet, P, S)`
	`7`	`+`
	`8`	+1. for each a that is in Alphabet do F`(a) := {a}
	`9`	`+2. for each A that is in V do F(a) := { {null} if A -> null is rule in P`
	`10`	`+ { empty set otherwise`
	`11`	`+3. repeat`
	`12`	+ 3.1 for each A that is in V do F`(A) := F(A)
	`13`	`+ 3.2 for each rule A -> u<sub>1</sub>u<sub>2</sub>...u<sub>n</sub> with n > 0 do`
	`14`	`+ F(A) := F(A) UNION`
	`15`	+ trunc<sub>k</sub>(F`(u<sub>1</sub>)F`(u<sub>2</sub>)...F`(u<sub>n</sub>)) until F(A) = F`(A) for all A in V
	`16`	`+ 4. FIRST<sub>k</sub>(A) = F(A)`
	`17`	`+"""`
	`18`	`+def FIRSTk( k, G ):`
	`19`	`+ Fprime, F, first_k = {}, {}, {} # initialise dictionaries`
	`20`	`+ for a in G.terminals:`
	`21`	`+ Fprime[a] = {a}`
	`22`	`+ for A in G.vars:`
	`23`	`+ {None} if Rule(A, None) in G.rules else {}`
	`24`	`+ while True:`
	`25`	`+ for A in G.vars:`
	`26`	`+ Fprime[A] = F[A]`
	`27`	`+ for rule in [rule for rule in G.rules if len(rule.rhs) > 0]:`
	`28`	`+ F[A] = F[A] \| trunc_k( Fprime[] for all elements of rhs concatenated)`
	`29`	`+`
	`30`	`+ if all(F[A] == Fprime[A] for A in G.vars):`
	`31`	`+ break`
	`32`	`+ for A in G.vars: # Originally just FIRSTk[A] = F[A]`
	`33`	`+ first_k[A] = F[A]`
	`34`	`+ return first_k`
	`35`	`+`
	`36`	`+"""`
	`37`	`+Algorithm 19.5.1`
	`38`	`+`
	`39`	`+input: context-free grammar G = (V, Alphabet, P, S)`
	`40`	`+ FIRSTk(A) for every A in V`
	`41`	`+`
	`42`	`+1. FL(S) := {null}`
	`43`	`+2. for each A in V - {S} do FL(A) := empty`
	`44`	`+3. repeat`
	`45`	+ 3.1 for each A in V do FL`(A) := FL(A)
	`46`	`+ 3.2 for each rule A -> w = u<sub>1</sub>u<sub>2</sub>...u<sub>n</sub> with`
	`47`	`+ w NOT a terminal string do`
	`48`	+ 3.2.1 L := FL`(A)
	`49`	`+ 3.2.2 if u<sub>n</sub> in V then FL(u<sub>n</sub>) := FL(u<sub>n</sub>) UNION L`
	`50`	`+ 3.2.3 for i := n - 1 to 1 do`
	`51`	`+ 3.2.3.1 L := trunc<sub>k</sub>(FIRST<sub>k</sub>(u<sub>i+1</sub>)L)`
	`52`	`+ 3.2.3.2 if u<sub>i</sub> in V then FL(u<sub>i</sub>) := FL(u<sub>i</sub>) UNION L`
	`53`	+ until FL(A) = FL`(A) for every A in V
	`54`	`+4. FOLLOW<sub>k</sub>(A) := FL(A)`
	`55`	`+"""`
	`56`	`+def FOLLOWk(k, G, FIRSTk ):`
	`57`	`+ follow_k, FLprime = {}, {} # initialise`
	`58`	`+ FL = { STARTSYMBOL : {None} }`
	`59`	`+ for A in (G.vars - {STARTSYMBOL}): FL[A] = {}`
	`60`	`+ while True: # repeat`
	`61`	`+ for A in G.vars: FLprime[A] = FL[A]`
	`62`	`+ for rule in [r for r in G.rules if any(u.isupper() for u in r.rhs)]:`
	`63`	`+ A = rule.lhs[0]`
	`64`	`+ L = FLprime[A]`
	`65`	`+ n = len(rule.rhs)`
	`66`	`+ u = rule.rhs`
	`67`	`+ if u[n-1] in G.vars: FL[u[n-1]] = FL[u[n-1]] \| L`
	`68`	`+ for i in range(len(r.rhs)-2, -1, -1): # equiv to n-1, n-2, n-3, ..., 1`
	`69`	`+ L = trunc_k( k, FIRSTk( k, u[n+1]).update(L) ) # TODO: this could be wrong!!`
	`70`	`+ if u[i] in G.vars:`
	`71`	`+ FL[u[i]] = FL[u[i]] \| L`
	`72`	`+ if all(FL[A] = FLprime[A] for A in G.vars): # until`
	`73`	`+ break`
	`74`	`+ for A in G.vars:`
	`75`	`+ follow_k[A] = FL[A]`
	`76`	`+ return follow_k`
	`77`	`+`
	`78`	`+"""`
	`79`	`+Introduced to simplify the definition of the fixed-length lookahead sets,`
	`80`	`+trunc_k is a function from powerset(Σ) to powerset(Σ) defined by:`
	`81`	`+ trunc_k(X) = {u \| u ε X with len(u) <= k or uv ε X with len(u) = k}`
	`82`	`+"""`
	`83`	`+def trunc_k( k, X ):`
	`84`	`+ result = set()`
	`85`	`+ for uv in X:`
	`86`	`+ if len(uv) <= k:`
	`87`	`+ result.add( uv ) # uv is u ε X`
	`88`	`+ else:`
	`89`	`+ result.add( uv[:k] )`
	`90`	`+ return result`
	`91`	`+`
	`92`	`+def LA_k( k, A ):`
	`93`	`+ return trunc_k( k, LA(A))`
	`94`	`+`
	`95`	`+"""`
	`96`	`+The lookahead set of the variable A, LA(A), is defined by`
	`97`	`+ LA(A) = {x \| S -> uAv -> ux ε Σ*}`
	`98`	`+For each rule A -> w in P, the lookahead set of the rule A -> w is defined by`
	`99`	`+ LA(A->w) = {x \| wv -> x where x ε Σ and S *-> uAv}`
	`100`	`+"""`
	`101`	`+def LA( X ):`
	`102`	`+ if type(X) == str: # X is variable, A`
	`103`	`+ pass # TODO`
	`104`	`+ #return {x \| S -> uAv -> ux ε Σ*} # TODO: this is complicated`
	`105`	`+ else: # X is a rule A -> w`
	`106`	`+ pass # TODO`
	`107`	`+ #return {x \| wv -> x where x ε Σ and S *-> uAv}`

`‎sudkampPython/breadthFirstParsers.py‎`

Lines changed: 88 additions & 29 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,56 +1,115 @@`
	`1`	`+from collections import deque`
	`2`	`+`
	`3`	`+from grammars.grammar import STARTSYMBOL`
	`4`	`+`
`1`	`5`	`"""`
`2`	`6`	`Algorithm 18.2.1`
`3`	`7`	`Breadth-First Top-Down Parser`
`4`	`8`
`5`	`9`	`input: context free grammar G = (V, Alphabet, P, S)`
`6`		`- string p, where p is element of Alphabet*`
	`10`	`+ string p, where p is element of Alphabet*`
`7`	`11`	`data structure: queue Q`
`8`	`12`
`9`	`13`	`1. initialize T with root S`
`10`	`14`	`INSERT(S, Q)`
`11`	`15`	`2. repeat`
`12`		`-2.1 q := REMOVE(Q) (node to be expanded)`
`13`		`-2.2 i := 0 (number of last rule used)`
`14`		`-2.3 done := false (Boolean indicator of expansion completion)`
`15`		`-Let q = uAv where A is the leftmost variable in q`
`16`		`-2.4 repeat`
`17`		`-2.4.1 if there is no A rule numbered greater than i the done := true`
`18`		`-2.4.2 if not done then`
`19`		`-Let A -> w be the first A rule with number greater than i and`
`20`		`-let j be the number of this rule`
`21`		`-2.4.2.1 if uwv is not element of Alphabet* and the terminal prefix of`
`22`		`-uwv matches a prefix of p then`
`23`		`-2.4.2.1.1 INSERT(uwv, Q)`
`24`		`-2.4.2.1.2 Add node uwv to T. Set a pointer from uwv to q`
`25`		`-end if`
`26`		`-end if`
`27`		`-2.4.3 i := j`
`28`		`- until done or p = uwv`
	`16`	`+2.1 q := REMOVE(Q) (node to be expanded)`
	`17`	`+2.2 i := 0 (number of last rule used)`
	`18`	`+2.3 done := false (Boolean indicator of expansion completion)`
	`19`	`+Let q = uAv where A is the leftmost variable in q`
	`20`	`+2.4 repeat`
	`21`	`+2.4.1 if there is no A rule numbered greater than i the done := true`
	`22`	`+2.4.2 if not done then`
	`23`	`+Let A -> w be the first A rule with number greater than i and`
	`24`	`+let j be the number of this rule`
	`25`	`+2.4.2.1 if uwv is not element of Alphabet* and the terminal prefix of`
	`26`	`+uwv matches a prefix of p then`
	`27`	`+2.4.2.1.1 INSERT(uwv, Q)`
	`28`	`+2.4.2.1.2 Add node uwv to T. Set a pointer from uwv to q`
	`29`	`+end if`
	`30`	`+end if`
	`31`	`+2.4.3 i := j`
	`32`	`+ until done or p = uwv`
`29`	`33`	`until EMPTY(Q) or p = uwv`
`30`	`34`	`3. if p = uwv then accept else reject`
`31`	`35`	`"""`
`32`		`-def breadthFirstTopDownParse( contextFreeGrammar, p):`
	`36`	`+def breadthFirstTopDownParse( G, p, Q):`
	`37`	`+ S = Node( STARTSYMBOL )`
	`38`	`+ T = SearchTree( S )`
	`39`	`+ INSERT( S, Q )`
	`40`	`+ while True: # repeat`
	`41`	`+ q = REMOVE(Q)`
	`42`	`+ i = 0`
	`43`	`+ done = False`
	`44`	`+ u, A, v = uAv_structure( q.sentForm )`
	`45`	`+ while True: # repeat`
	`46`	`+ validRules = [(j, rule) for j, rule in enumerate(G.rules) if (rule.lhs == A) and (j > i)]`
	`47`	`+ if not validRules:`
	`48`	`+ done = True`
	`49`	`+ if not done:`
	`50`	`+ A, w = validRules[0][1].lhs, validRules[0][1].rhs # rule A -> w`
	`51`	`+ j = validRules[0][0]`
	`52`	`+ if (not all(x.islower() for x in u+w+v) and`
	`53`	`+ *the terminal prefix of uwv matches a prefix of p*):`
	`54`	`+ uwv = Node(u+w+v)`
	`55`	`+ INSERT( uwv, Q )`
	`56`	`+ uwv.parent = q # and add uwv to T`
	`57`	`+ i = j`
	`58`	`+ if done or p == uwv: break # until`
	`59`	`+ if EMPTY(Q) or p == uwv`
	`60`	`+ return True if p == uwv else False`
	`61`	`+`
	`62`	`+def uAv_structure( sentForm ):`
`33`	`63`	`raise NotImplementedError`
`34`	`64`
	`65`	`+def INSERT( x, Q ):`
	`66`	`+ """ places the string x at the rear of the queue """`
	`67`	`+ return Q.append(x)`
	`68`	`+`
	`69`	`+def REMOVE( Q ):`
	`70`	`+ """ returns the item at the front of Q and deletes it from Q. """`
	`71`	`+ return Q.popleft()`
	`72`	`+`
	`73`	`+def EMPTY( Q ):`
	`74`	`+ """ boolean function that returns true if queue is empty, false otherwise. """`
	`75`	`+ return len(Q) == 0`
	`76`	`+`
	`77`	`+class Node( object ):`
	`78`	`+ """ A node to be a part of the search tree T. Contains a sentential form. """`
	`79`	`+ def __init__(self, sentForm, parent=None ):`
	`80`	`+ self.parent = parent`
	`81`	`+ self.sentForm = sentForm`
	`82`	`+`
	`83`	`+class SearchTree( object ):`
	`84`	`+ """`
	`85`	`+ The 'implicit tree' of a grammar is the tree of possible derivation`
	`86`	`+ paths in that grammar. This 'search tree' is the portion of the implicit`
	`87`	`+ tree that is examined during the parse.`
	`88`	`+ """`
	`89`	`+ def __init__(self, root=None):`
	`90`	`+ self.root = root`
	`91`	`+ # TODO: expand class to fulfil its function within the parsers`
	`92`	`+`
	`93`	`+`
`35`	`94`	`"""`
`36`	`95`	`Algorithm 18.4.1`
`37`	`96`	`Breath-First Bottom-Up Parser`
`38`	`97`
`39`	`98`	`input: context-free grammar G = (V, Alphabet, P, S)`
`40`		`- string p, where p is element of Alphabet*`
	`99`	`+ string p, where p is element of Alphabet*`
`41`	`100`	`data structure: queue Q`
`42`	`101`
`43`	`102`	`1. initialize T with root p`
`44`		`- INSERT(p, Q)`
	`103`	`+ INSERT(p, Q)`
`45`	`104`	`2. repeat`
`46`		`-q := REMOVE(Q)`
`47`		`-2.1 for each rule A -> w in P do`
`48`		`-2.2.1 for each decomposition uwv of q with v, where v is element of Alphabet* do`
`49`		`-2.1.1.1 INSERT(uAv, Q)`
`50`		`-2.1.1.2 Add node uAv to T. Set a pointer from uAv to q`
`51`		`- end for`
`52`		`-end for`
`53`		`- until q = S or EMPTY(Q)`
	`105`	`+q := REMOVE(Q)`
	`106`	`+2.1 for each rule A -> w in P do`
	`107`	`+2.2.1 for each decomposition uwv of q with v, where v is element of Alphabet* do`
	`108`	`+2.1.1.1 INSERT(uAv, Q)`
	`109`	`+2.1.1.2 Add node uAv to T. Set a pointer from uAv to q`
	`110`	`+ end for`
	`111`	`+end for`
	`112`	`+ until q = S or EMPTY(Q)`
`54`	`113`	`3. if q = S then accept else reject`
`55`	`114`	`"""`
`56`	`115`	`def breadthFirstBottomUpParser( contextFreeGrammar, p):`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit ae97813

File tree

2 files changed

2 files changed

`‎sudkampPython/LLParsers.py‎`

`‎sudkampPython/breadthFirstParsers.py‎`

0 commit comments