Return to Answer

added 2 characters in body

edited Feb 4, 2018 at 17:25

50.1k
3
130
210

 def __new__(cls, input, value=None, left=0start=0, cur=0, stop=None):
 if stop is None:
 stop = len(input)
 assert 0 <= start <= cur <= stop <= len(input)
 return super().__new__(cls, input, value, leftstart, cur, stop)

 def __new__(cls, input, value=None, left=0, cur=0, stop=None):
 if stop is None:
 stop = len(input)
 assert 0 <= start <= cur <= stop <= len(input)
 return super().__new__(cls, input, value, left, cur, stop)

 def __new__(cls, input, value=None, start=0, cur=0, stop=None):
 if stop is None:
 stop = len(input)
 assert 0 <= start <= cur <= stop <= len(input)
 return super().__new__(cls, input, value, start, cur, stop)

added 10 characters in body

Source Link

edited Feb 4, 2018 at 17:18

Gareth Rees

edited Feb 4, 2018 at 17:18

Gareth Rees

50.1k
3
130
210

 class State(namedtuple('State', 'input value start cur end'stop')):
 """State of a parser, with attributes:
 input: str -- the input being parsed
 value -- arbitrary value constructed by parser
 start: int -- index in input where parsing started
 cur: int -- index in input from where parsing should continue
 endstop: int -- index in input where parsing must stop
 It must be the case that 0 <= start <= cur <= endstop <= len(input).
 This means that parsing input[start:cur] produced value, and next
 the parser must go on to parse input[cur:end]stop].
 """

 def __new__(cls, input, value=None, left=0, cur=0, end=Nonestop=None):
 if endstop is None:
 endstop = len(input)
 assert 0 <= start <= cur <= endstop <= len(input)
 return super().__new__(cls, input, value, left, cur, endstop)

which runs in constant time. Of course, the rest of the code would have to be updated to use the new representation, but I suspect there will turn out to be simplifications — for example in chain you won't need to join the pieces, you can just copy the endstop field from the last state in the chain.

 class State(namedtuple('State', 'input value start cur end')):
 """State of a parser, with attributes:
 input: str -- the input being parsed
 value -- arbitrary value constructed by parser
 start: int -- index in input where parsing started
 cur: int -- index in input from where parsing should continue
 end: int -- index in input where parsing must stop
 It must be the case that 0 <= start <= cur <= end <= len(input).
 This means that parsing input[start:cur] produced value, and next
 the parser must go on to parse input[cur:end].
 """

 def __new__(cls, input, value=None, left=0, cur=0, end=None):
 if end is None:
 end = len(input)
 assert 0 <= start <= cur <= end <= len(input)
 return super().__new__(cls, input, value, left, cur, end)

 class State(namedtuple('State', 'input value start cur stop')):
 """State of a parser, with attributes:
 input: str -- the input being parsed
 value -- arbitrary value constructed by parser
 start: int -- index in input where parsing started
 cur: int -- index in input from where parsing should continue
 stop: int -- index in input where parsing must stop
 It must be the case that 0 <= start <= cur <= stop <= len(input).
 This means that parsing input[start:cur] produced value, and next
 the parser must go on to parse input[cur:stop].
 """

 def __new__(cls, input, value=None, left=0, cur=0, stop=None):
 if stop is None:
 stop = len(input)
 assert 0 <= start <= cur <= stop <= len(input)
 return super().__new__(cls, input, value, left, cur, stop)

added 258 characters in body

Source Link

edited Feb 4, 2018 at 17:12

Gareth Rees

edited Feb 4, 2018 at 17:12

Gareth Rees

50.1k
3
130
210

There are two possible designs for the use of state objects. In the first approach, we have a single object representing the current state, and as each piece of the input is parsed, the state is updated. In the second approach, we treat the state objects as immutable, and make a new one each time we parse a piece of the input.

The first approachdesign saves memory (we only need one state object) but alternation is tricky (after exploring one alternative we have to reverse the updates to the state object before exploring the next alternative). The second approachdesign makes many state objects, but alternation is easy (we make separate objects for each alternative).

 class State(namedtuple('State', 'input value start cur end')):
 """State of a parser, with attributes:
 input: str -- the input being parsed
 value -- arbitrary value constructed by parser
 start: int -- index in input where parsing started
 cur: int -- index in input from where parsing should continue
 end: int -- index in input where parsing must stop
 It must be the case that 0 <= start <= cur <= end <= len(input).
 This means that parsing input[start:cur] produced value, and nownext
 the parser must continuego on to parse input[cur:end].
 """

The invariant can be enforced by the __new__ method:

 def __new__(cls, input, value=None, left=0, cur=0, end=None):
 if end is None:
 end = len(input)
 assert 0 <= start <= cur <= end <= len(input)
 return super().__new__(cls, input, value, left, cur, end)

 cur =return self._replace(cur=self.cur + how_many
 assert cur <= self.end
 return self._replace(cur=cur)

which runs in constant time. Of course, the rest of the code would have to be updated to use the new representation, but I suspect there mightwill turn out to be simplifications — for example in chain you wouldn'twon't need to join the pieces, you couldcan just set the end field tocopy the end field from the last state in the chain.

There are two possible designs for the use of state objects. In the first approach, we have a single object representing the current state, and as each piece of the input is parsed, the state is updated. In the second approach, we treat the state objects as immutable, and make a new one each time we parse a piece of the input.

The first approach saves memory (we only need one state object) but alternation is tricky (after exploring one alternative we have to reverse the updates to the state object before exploring the next alternative). The second approach makes many state objects, but alternation is easy (we make separate objects for each alternative).

 class State(namedtuple('State', 'input value start cur end')):
 """State of a parser, with attributes:
 input: str -- the input being parsed
 value -- arbitrary value constructed by parser
 start: int -- index in input where parsing started
 cur: int -- index in input from where parsing should continue
 end: int -- index in input where parsing must stop
 It must be the case that 0 <= start <= cur <= end <= len(input).
 This means that parsing input[start:cur] produced value, and now
 the parser must continue to parse input[cur:end].
 """

 cur = self.cur + how_many
 assert cur <= self.end
 return self._replace(cur=cur)

which runs in constant time. Of course, the rest of the code would have to be updated to use the new representation, but there might turn out to be simplifications — for example in chain you wouldn't need to join the pieces, you could just set the end field to the end field from the last state in the chain.

There are two possible designs for the use of state objects. In the first, we have a single object representing the current state, and as each piece of the input is parsed, the state is updated. In the second, we treat the state objects as immutable, and make a new one each time we parse a piece of the input.

The first design saves memory (we only need one state object) but alternation is tricky (after exploring one alternative we have to reverse the updates to the state object before exploring the next alternative). The second design makes many state objects, but alternation is easy (we make separate objects for each alternative).

 class State(namedtuple('State', 'input value start cur end')):
 """State of a parser, with attributes:
 input: str -- the input being parsed
 value -- arbitrary value constructed by parser
 start: int -- index in input where parsing started
 cur: int -- index in input from where parsing should continue
 end: int -- index in input where parsing must stop
 It must be the case that 0 <= start <= cur <= end <= len(input).
 This means that parsing input[start:cur] produced value, and next
 the parser must go on to parse input[cur:end].
 """

The invariant can be enforced by the __new__ method:

 def __new__(cls, input, value=None, left=0, cur=0, end=None):
 if end is None:
 end = len(input)
 assert 0 <= start <= cur <= end <= len(input)
 return super().__new__(cls, input, value, left, cur, end)

 return self._replace(cur=self.cur + how_many)

added 377 characters in body

Source Link

edited Feb 4, 2018 at 17:05

Gareth Rees

edited Feb 4, 2018 at 17:05

Gareth Rees

50.1k
3
130
210

Source Link

answered Feb 4, 2018 at 16:49

Gareth Rees

answered Feb 4, 2018 at 16:49

Gareth Rees

50.1k
3
130
210

lang-py