def __new__(cls, input, value=None, left=0start=0, cur=0, stop=None):
if stop is None:
stop = len(input)
assert 0 <= start <= cur <= stop <= len(input)
return super().__new__(cls, input, value, leftstart, cur, stop)
def __new__(cls, input, value=None, left=0, cur=0, stop=None):
if stop is None:
stop = len(input)
assert 0 <= start <= cur <= stop <= len(input)
return super().__new__(cls, input, value, left, cur, stop)
def __new__(cls, input, value=None, start=0, cur=0, stop=None):
if stop is None:
stop = len(input)
assert 0 <= start <= cur <= stop <= len(input)
return super().__new__(cls, input, value, start, cur, stop)
class State(namedtuple('State', 'input value start cur end'stop')):
"""State of a parser, with attributes:
input: str -- the input being parsed
value -- arbitrary value constructed by parser
start: int -- index in input where parsing started
cur: int -- index in input from where parsing should continue
endstop: int -- index in input where parsing must stop
It must be the case that 0 <= start <= cur <= endstop <= len(input).
This means that parsing input[start:cur] produced value, and next
the parser must go on to parse input[cur:end]stop].
"""
def __new__(cls, input, value=None, left=0, cur=0, end=Nonestop=None):
if endstop is None:
endstop = len(input)
assert 0 <= start <= cur <= endstop <= len(input)
return super().__new__(cls, input, value, left, cur, endstop)
which runs in constant time. Of course, the rest of the code would have to be updated to use the new representation, but I suspect there will turn out to be simplifications — for example in chain
you won't need to join the pieces, you can just copy the endstop
field from the last state in the chain.
class State(namedtuple('State', 'input value start cur end')):
"""State of a parser, with attributes:
input: str -- the input being parsed
value -- arbitrary value constructed by parser
start: int -- index in input where parsing started
cur: int -- index in input from where parsing should continue
end: int -- index in input where parsing must stop
It must be the case that 0 <= start <= cur <= end <= len(input).
This means that parsing input[start:cur] produced value, and next
the parser must go on to parse input[cur:end].
"""
def __new__(cls, input, value=None, left=0, cur=0, end=None):
if end is None:
end = len(input)
assert 0 <= start <= cur <= end <= len(input)
return super().__new__(cls, input, value, left, cur, end)
which runs in constant time. Of course, the rest of the code would have to be updated to use the new representation, but I suspect there will turn out to be simplifications — for example in chain
you won't need to join the pieces, you can just copy the end
field from the last state in the chain.
class State(namedtuple('State', 'input value start cur stop')):
"""State of a parser, with attributes:
input: str -- the input being parsed
value -- arbitrary value constructed by parser
start: int -- index in input where parsing started
cur: int -- index in input from where parsing should continue
stop: int -- index in input where parsing must stop
It must be the case that 0 <= start <= cur <= stop <= len(input).
This means that parsing input[start:cur] produced value, and next
the parser must go on to parse input[cur:stop].
"""
def __new__(cls, input, value=None, left=0, cur=0, stop=None):
if stop is None:
stop = len(input)
assert 0 <= start <= cur <= stop <= len(input)
return super().__new__(cls, input, value, left, cur, stop)
which runs in constant time. Of course, the rest of the code would have to be updated to use the new representation, but I suspect there will turn out to be simplifications — for example in chain
you won't need to join the pieces, you can just copy the stop
field from the last state in the chain.
- There are two possible designs for the use of state objects. In the first approach, we have a single object representing the current state, and as each piece of the input is parsed, the state is updated. In the second approach, we treat the state objects as immutable, and make a new one each time we parse a piece of the input.
The first approachdesign saves memory (we only need one state object) but alternation is tricky (after exploring one alternative we have to reverse the updates to the state object before exploring the next alternative). The second approachdesign makes many state objects, but alternation is easy (we make separate objects for each alternative).
class State(namedtuple('State', 'input value start cur end')):
"""State of a parser, with attributes:
input: str -- the input being parsed
value -- arbitrary value constructed by parser
start: int -- index in input where parsing started
cur: int -- index in input from where parsing should continue
end: int -- index in input where parsing must stop
It must be the case that 0 <= start <= cur <= end <= len(input).
This means that parsing input[start:cur] produced value, and nownext
the parser must continuego on to parse input[cur:end].
"""
The invariant can be enforced by the __new__
method:
def __new__(cls, input, value=None, left=0, cur=0, end=None):
if end is None:
end = len(input)
assert 0 <= start <= cur <= end <= len(input)
return super().__new__(cls, input, value, left, cur, end)
cur =return self._replace(cur=self.cur + how_many
assert cur <= self.end
return self._replace(cur=cur)
which runs in constant time. Of course, the rest of the code would have to be updated to use the new representation, but I suspect there mightwill turn out to be simplifications — for example in chain
you wouldn'twon't need to join the pieces, you couldcan just set the end
field tocopy the end
field from the last state in the chain.
- There are two possible designs for the use of state objects. In the first approach, we have a single object representing the current state, and as each piece of the input is parsed, the state is updated. In the second approach, we treat the state objects as immutable, and make a new one each time we parse a piece of the input.
The first approach saves memory (we only need one state object) but alternation is tricky (after exploring one alternative we have to reverse the updates to the state object before exploring the next alternative). The second approach makes many state objects, but alternation is easy (we make separate objects for each alternative).
class State(namedtuple('State', 'input value start cur end')):
"""State of a parser, with attributes:
input: str -- the input being parsed
value -- arbitrary value constructed by parser
start: int -- index in input where parsing started
cur: int -- index in input from where parsing should continue
end: int -- index in input where parsing must stop
It must be the case that 0 <= start <= cur <= end <= len(input).
This means that parsing input[start:cur] produced value, and now
the parser must continue to parse input[cur:end].
"""
cur = self.cur + how_many
assert cur <= self.end
return self._replace(cur=cur)
which runs in constant time. Of course, the rest of the code would have to be updated to use the new representation, but there might turn out to be simplifications — for example in chain
you wouldn't need to join the pieces, you could just set the end
field to the end
field from the last state in the chain.
- There are two possible designs for the use of state objects. In the first, we have a single object representing the current state, and as each piece of the input is parsed, the state is updated. In the second, we treat the state objects as immutable, and make a new one each time we parse a piece of the input.
The first design saves memory (we only need one state object) but alternation is tricky (after exploring one alternative we have to reverse the updates to the state object before exploring the next alternative). The second design makes many state objects, but alternation is easy (we make separate objects for each alternative).
class State(namedtuple('State', 'input value start cur end')):
"""State of a parser, with attributes:
input: str -- the input being parsed
value -- arbitrary value constructed by parser
start: int -- index in input where parsing started
cur: int -- index in input from where parsing should continue
end: int -- index in input where parsing must stop
It must be the case that 0 <= start <= cur <= end <= len(input).
This means that parsing input[start:cur] produced value, and next
the parser must go on to parse input[cur:end].
"""
The invariant can be enforced by the __new__
method:
def __new__(cls, input, value=None, left=0, cur=0, end=None):
if end is None:
end = len(input)
assert 0 <= start <= cur <= end <= len(input)
return super().__new__(cls, input, value, left, cur, end)
return self._replace(cur=self.cur + how_many)
which runs in constant time. Of course, the rest of the code would have to be updated to use the new representation, but I suspect there will turn out to be simplifications — for example in chain
you won't need to join the pieces, you can just copy the end
field from the last state in the chain.