Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit ae97813

Browse files
Adding skeletons for LLParsers and top-downBreadthFirstParser. Still a lot of working out to do.
1 parent 89b44f5 commit ae97813

File tree

2 files changed

+195
-29
lines changed

2 files changed

+195
-29
lines changed

‎sudkampPython/LLParsers.py‎

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
from grammars.grammar import STARTSYMBOL
2+
from grammars.contextFree import ContextFreeGrammar
3+
4+
"""
5+
Algorithm 19.4.1
6+
input: context-free grammar G = (V, Alphabet, P, S)
7+
8+
1. for each a that is in Alphabet do F`(a) := {a}
9+
2. for each A that is in V do F(a) := { {null} if A -> null is rule in P
10+
{ empty set otherwise
11+
3. repeat
12+
3.1 for each A that is in V do F`(A) := F(A)
13+
3.2 for each rule A -> u<sub>1</sub>u<sub>2</sub>...u<sub>n</sub> with n > 0 do
14+
F(A) := F(A) UNION
15+
trunc<sub>k</sub>(F`(u<sub>1</sub>)F`(u<sub>2</sub>)...F`(u<sub>n</sub>)) until F(A) = F`(A) for all A in V
16+
4. FIRST<sub>k</sub>(A) = F(A)
17+
"""
18+
def FIRSTk( k, G ):
19+
Fprime, F, first_k = {}, {}, {} # initialise dictionaries
20+
for a in G.terminals:
21+
Fprime[a] = {a}
22+
for A in G.vars:
23+
{None} if Rule(A, None) in G.rules else {}
24+
while True:
25+
for A in G.vars:
26+
Fprime[A] = F[A]
27+
for rule in [rule for rule in G.rules if len(rule.rhs) > 0]:
28+
F[A] = F[A] | trunc_k( Fprime[] for all elements of rhs concatenated)
29+
30+
if all(F[A] == Fprime[A] for A in G.vars):
31+
break
32+
for A in G.vars: # Originally just FIRSTk[A] = F[A]
33+
first_k[A] = F[A]
34+
return first_k
35+
36+
"""
37+
Algorithm 19.5.1
38+
39+
input: context-free grammar G = (V, Alphabet, P, S)
40+
FIRSTk(A) for every A in V
41+
42+
1. FL(S) := {null}
43+
2. for each A in V - {S} do FL(A) := empty
44+
3. repeat
45+
3.1 for each A in V do FL`(A) := FL(A)
46+
3.2 for each rule A -> w = u<sub>1</sub>u<sub>2</sub>...u<sub>n</sub> with
47+
w NOT a terminal string do
48+
3.2.1 L := FL`(A)
49+
3.2.2 if u<sub>n</sub> in V then FL(u<sub>n</sub>) := FL(u<sub>n</sub>) UNION L
50+
3.2.3 for i := n - 1 to 1 do
51+
3.2.3.1 L := trunc<sub>k</sub>(FIRST<sub>k</sub>(u<sub>i+1</sub>)L)
52+
3.2.3.2 if u<sub>i</sub> in V then FL(u<sub>i</sub>) := FL(u<sub>i</sub>) UNION L
53+
until FL(A) = FL`(A) for every A in V
54+
4. FOLLOW<sub>k</sub>(A) := FL(A)
55+
"""
56+
def FOLLOWk(k, G, FIRSTk ):
57+
follow_k, FLprime = {}, {} # initialise
58+
FL = { STARTSYMBOL : {None} }
59+
for A in (G.vars - {STARTSYMBOL}): FL[A] = {}
60+
while True: # repeat
61+
for A in G.vars: FLprime[A] = FL[A]
62+
for rule in [r for r in G.rules if any(u.isupper() for u in r.rhs)]:
63+
A = rule.lhs[0]
64+
L = FLprime[A]
65+
n = len(rule.rhs)
66+
u = rule.rhs
67+
if u[n-1] in G.vars: FL[u[n-1]] = FL[u[n-1]] | L
68+
for i in range(len(r.rhs)-2, -1, -1): # equiv to n-1, n-2, n-3, ..., 1
69+
L = trunc_k( k, FIRSTk( k, u[n+1]).update(L) ) # TODO: this could be wrong!!
70+
if u[i] in G.vars:
71+
FL[u[i]] = FL[u[i]] | L
72+
if all(FL[A] = FLprime[A] for A in G.vars): # until
73+
break
74+
for A in G.vars:
75+
follow_k[A] = FL[A]
76+
return follow_k
77+
78+
"""
79+
Introduced to simplify the definition of the fixed-length lookahead sets,
80+
trunc_k is a function from powerset(Σ*) to powerset(Σ*) defined by:
81+
trunc_k(X) = {u | u ε X with len(u) <= k or uv ε X with len(u) = k}
82+
"""
83+
def trunc_k( k, X ):
84+
result = set()
85+
for uv in X:
86+
if len(uv) <= k:
87+
result.add( uv ) # uv is u ε X
88+
else:
89+
result.add( uv[:k] )
90+
return result
91+
92+
def LA_k( k, A ):
93+
return trunc_k( k, LA(A))
94+
95+
"""
96+
The lookahead set of the variable A, LA(A), is defined by
97+
LA(A) = {x | S *-> uAv *-> ux ε Σ*}
98+
For each rule A -> w in P, the lookahead set of the rule A -> w is defined by
99+
LA(A->w) = {x | wv *-> x where x ε Σ* and S *-> uAv}
100+
"""
101+
def LA( X ):
102+
if type(X) == str: # X is variable, A
103+
pass # TODO
104+
#return {x | S *-> uAv *-> ux ε Σ*} # TODO: this is complicated
105+
else: # X is a rule A -> w
106+
pass # TODO
107+
#return {x | wv *-> x where x ε Σ* and S *-> uAv}

‎sudkampPython/breadthFirstParsers.py‎

Lines changed: 88 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,115 @@
1+
from collections import deque
2+
3+
from grammars.grammar import STARTSYMBOL
4+
15
"""
26
Algorithm 18.2.1
37
Breadth-First Top-Down Parser
48
59
input: context free grammar G = (V, Alphabet, P, S)
6-
string p, where p is element of Alphabet*
10+
string p, where p is element of Alphabet*
711
data structure: queue Q
812
913
1. initialize T with root S
1014
INSERT(S, Q)
1115
2. repeat
12-
2.1 q := REMOVE(Q) (node to be expanded)
13-
2.2 i := 0 (number of last rule used)
14-
2.3 done := false (Boolean indicator of expansion completion)
15-
Let q = uAv where A is the leftmost variable in q
16-
2.4 repeat
17-
2.4.1 if there is no A rule numbered greater than i the done := true
18-
2.4.2 if not done then
19-
Let A -> w be the first A rule with number greater than i and
20-
let j be the number of this rule
21-
2.4.2.1 if uwv is not element of Alphabet* and the terminal prefix of
22-
uwv matches a prefix of p then
23-
2.4.2.1.1 INSERT(uwv, Q)
24-
2.4.2.1.2 Add node uwv to T. Set a pointer from uwv to q
25-
end if
26-
end if
27-
2.4.3 i := j
28-
until done or p = uwv
16+
2.1 q := REMOVE(Q) (node to be expanded)
17+
2.2 i := 0 (number of last rule used)
18+
2.3 done := false (Boolean indicator of expansion completion)
19+
Let q = uAv where A is the leftmost variable in q
20+
2.4 repeat
21+
2.4.1 if there is no A rule numbered greater than i the done := true
22+
2.4.2 if not done then
23+
Let A -> w be the first A rule with number greater than i and
24+
let j be the number of this rule
25+
2.4.2.1 if uwv is not element of Alphabet* and the terminal prefix of
26+
uwv matches a prefix of p then
27+
2.4.2.1.1 INSERT(uwv, Q)
28+
2.4.2.1.2 Add node uwv to T. Set a pointer from uwv to q
29+
end if
30+
end if
31+
2.4.3 i := j
32+
until done or p = uwv
2933
until EMPTY(Q) or p = uwv
3034
3. if p = uwv then accept else reject
3135
"""
32-
def breadthFirstTopDownParse( contextFreeGrammar, p):
36+
def breadthFirstTopDownParse( G, p, Q):
37+
S = Node( STARTSYMBOL )
38+
T = SearchTree( S )
39+
INSERT( S, Q )
40+
while True: # repeat
41+
q = REMOVE(Q)
42+
i = 0
43+
done = False
44+
u, A, v = uAv_structure( q.sentForm )
45+
while True: # repeat
46+
validRules = [(j, rule) for j, rule in enumerate(G.rules) if (rule.lhs == A) and (j > i)]
47+
if not validRules:
48+
done = True
49+
if not done:
50+
A, w = validRules[0][1].lhs, validRules[0][1].rhs # rule A -> w
51+
j = validRules[0][0]
52+
if (not all(x.islower() for x in u+w+v) and
53+
***the terminal prefix of uwv matches a prefix of p***):
54+
uwv = Node(u+w+v)
55+
INSERT( uwv, Q )
56+
uwv.parent = q # and add uwv to T
57+
i = j
58+
if done or p == uwv: break # until
59+
if EMPTY(Q) or p == uwv
60+
return True if p == uwv else False
61+
62+
def uAv_structure( sentForm ):
3363
raise NotImplementedError
3464

65+
def INSERT( x, Q ):
66+
""" places the string x at the rear of the queue """
67+
return Q.append(x)
68+
69+
def REMOVE( Q ):
70+
""" returns the item at the front of Q and deletes it from Q. """
71+
return Q.popleft()
72+
73+
def EMPTY( Q ):
74+
""" boolean function that returns true if queue is empty, false otherwise. """
75+
return len(Q) == 0
76+
77+
class Node( object ):
78+
""" A node to be a part of the search tree T. Contains a sentential form. """
79+
def __init__(self, sentForm, parent=None ):
80+
self.parent = parent
81+
self.sentForm = sentForm
82+
83+
class SearchTree( object ):
84+
"""
85+
The 'implicit tree' of a grammar is the tree of possible derivation
86+
paths in that grammar. This 'search tree' is the portion of the implicit
87+
tree that is examined during the parse.
88+
"""
89+
def __init__(self, root=None):
90+
self.root = root
91+
# TODO: expand class to fulfil its function within the parsers
92+
93+
3594
"""
3695
Algorithm 18.4.1
3796
Breath-First Bottom-Up Parser
3897
3998
input: context-free grammar G = (V, Alphabet, P, S)
40-
string p, where p is element of Alphabet*
99+
string p, where p is element of Alphabet*
41100
data structure: queue Q
42101
43102
1. initialize T with root p
44-
INSERT(p, Q)
103+
INSERT(p, Q)
45104
2. repeat
46-
q := REMOVE(Q)
47-
2.1 for each rule A -> w in P do
48-
2.2.1 for each decomposition uwv of q with v, where v is element of Alphabet* do
49-
2.1.1.1 INSERT(uAv, Q)
50-
2.1.1.2 Add node uAv to T. Set a pointer from uAv to q
51-
end for
52-
end for
53-
until q = S or EMPTY(Q)
105+
q := REMOVE(Q)
106+
2.1 for each rule A -> w in P do
107+
2.2.1 for each decomposition uwv of q with v, where v is element of Alphabet* do
108+
2.1.1.1 INSERT(uAv, Q)
109+
2.1.1.2 Add node uAv to T. Set a pointer from uAv to q
110+
end for
111+
end for
112+
until q = S or EMPTY(Q)
54113
3. if q = S then accept else reject
55114
"""
56115
def breadthFirstBottomUpParser( contextFreeGrammar, p):

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /