[Python-checkins] CVS: python/dist/src/Lib/dos-8x3 test_has.py,NONE,1.1 sre_comp.py,1.2,1.3 sre_cons.py,1.2,1.3 sre_pars.py,1.2,1.3
Guido van Rossum
python-dev@python.org
2000年6月30日 09:13:40 -0700
Update of /cvsroot/python/python/dist/src/Lib/dos-8x3
In directory slayer.i.sourceforge.net:/tmp/cvs-serv31620
Modified Files:
sre_comp.py sre_cons.py sre_pars.py
Added Files:
test_has.py
Log Message:
the usual
--- NEW FILE ---
# test the invariant that
# iff a==b then hash(a)==hash(b)
#
import test_support
def same_hash(*objlist):
# hash each object given an raise TestFailed if
# the hash values are not all the same
hashed = map(hash, objlist)
for h in hashed[1:]:
if h != hashed[0]:
raise TestFailed, "hashed values differ: %s" % `objlist`
same_hash(1, 1L, 1.0, 1.0+0.0j)
same_hash(int(1), long(1), float(1), complex(1))
same_hash(long(1.23e300), float(1.23e300))
same_hash(float(0.5), complex(0.5, 0.0))
Index: sre_comp.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/dos-8x3/sre_comp.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -r1.2 -r1.3
*** sre_comp.py 2000年06月29日 19:35:29 1.2
--- sre_comp.py 2000年06月30日 16:13:37 1.3
***************
*** 19,174 ****
for WORDSIZE in "BHil":
if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
! break
else:
raise RuntimeError, "cannot find a useable array type"
def _compile(code, pattern, flags):
emit = code.append
for op, av in pattern:
! if op is ANY:
! if flags & SRE_FLAG_DOTALL:
! emit(OPCODES[op])
! else:
! emit(OPCODES[CATEGORY])
! emit(CHCODES[CATEGORY_NOT_LINEBREAK])
! elif op in (SUCCESS, FAILURE):
! emit(OPCODES[op])
! elif op is AT:
! emit(OPCODES[op])
! if flags & SRE_FLAG_MULTILINE:
! emit(ATCODES[AT_MULTILINE[av]])
! else:
! emit(ATCODES[av])
! elif op is BRANCH:
! emit(OPCODES[op])
! tail = []
! for av in av[1]:
! skip = len(code); emit(0)
! _compile(code, av, flags)
! emit(OPCODES[JUMP])
! tail.append(len(code)); emit(0)
! code[skip] = len(code) - skip
! emit(0) # end of branch
! for tail in tail:
! code[tail] = len(code) - tail
! elif op is CALL:
! emit(OPCODES[op])
! skip = len(code); emit(0)
! _compile(code, av, flags)
! emit(OPCODES[SUCCESS])
! code[skip] = len(code) - skip
! elif op is CATEGORY:
! emit(OPCODES[op])
! if flags & SRE_FLAG_LOCALE:
! emit(CH_LOCALE[CHCODES[av]])
! elif flags & SRE_FLAG_UNICODE:
! emit(CH_UNICODE[CHCODES[av]])
! else:
! emit(CHCODES[av])
! elif op is GROUP:
! if flags & SRE_FLAG_IGNORECASE:
! emit(OPCODES[OP_IGNORE[op]])
! else:
! emit(OPCODES[op])
! emit(av-1)
! elif op is IN:
! if flags & SRE_FLAG_IGNORECASE:
! emit(OPCODES[OP_IGNORE[op]])
! def fixup(literal, flags=flags):
! return _sre.getlower(ord(literal), flags)
! else:
! emit(OPCODES[op])
! fixup = ord
! skip = len(code); emit(0)
! for op, av in av:
! emit(OPCODES[op])
! if op is NEGATE:
! pass
! elif op is LITERAL:
! emit(fixup(av))
! elif op is RANGE:
! emit(fixup(av[0]))
! emit(fixup(av[1]))
! elif op is CATEGORY:
! if flags & SRE_FLAG_LOCALE:
! emit(CH_LOCALE[CHCODES[av]])
! elif flags & SRE_FLAG_UNICODE:
! emit(CH_UNICODE[CHCODES[av]])
! else:
! emit(CHCODES[av])
! else:
! raise error, "internal: unsupported set operator"
! emit(OPCODES[FAILURE])
! code[skip] = len(code) - skip
! elif op in (LITERAL, NOT_LITERAL):
! if flags & SRE_FLAG_IGNORECASE:
! emit(OPCODES[OP_IGNORE[op]])
! else:
! emit(OPCODES[op])
! emit(ord(av))
! elif op is MARK:
! emit(OPCODES[op])
! emit(av)
! elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
! if flags & SRE_FLAG_TEMPLATE:
! emit(OPCODES[REPEAT])
! skip = len(code); emit(0)
! emit(av[0])
! emit(av[1])
! _compile(code, av[2], flags)
! emit(OPCODES[SUCCESS])
! code[skip] = len(code) - skip
! else:
! lo, hi = av[2].getwidth()
! if lo == 0:
! raise error, "nothing to repeat"
! if 0 and lo == hi == 1 and op is MAX_REPEAT:
! # FIXME: <fl> need a better way to figure out when
! # it's safe to use this one (in the parser, probably)
! emit(OPCODES[MAX_REPEAT_ONE])
! skip = len(code); emit(0)
! emit(av[0])
! emit(av[1])
! _compile(code, av[2], flags)
! emit(OPCODES[SUCCESS])
! code[skip] = len(code) - skip
! else:
! emit(OPCODES[op])
! skip = len(code); emit(0)
! emit(av[0])
! emit(av[1])
! _compile(code, av[2], flags)
! emit(OPCODES[SUCCESS])
! code[skip] = len(code) - skip
! elif op is SUBPATTERN:
! group = av[0]
! if group:
! emit(OPCODES[MARK])
! emit((group-1)*2)
! _compile(code, av[1], flags)
! if group:
! emit(OPCODES[MARK])
! emit((group-1)*2+1)
! else:
! raise ValueError, ("unsupported operand type", op)
def compile(p, flags=0):
# internal: convert pattern list to internal format
if type(p) in (type(""), type(u"")):
! import sre_parse
! pattern = p
! p = sre_parse.parse(p)
else:
! pattern = None
flags = p.pattern.flags | flags
code = []
_compile(code, p.data, flags)
code.append(OPCODES[SUCCESS])
! # FIXME: <fl> get rid of this limitation
assert p.pattern.groups <= 100,\
! "sorry, but this version only supports 100 named groups"
return _sre.compile(
! pattern, flags,
! array.array(WORDSIZE, code).tostring(),
! p.pattern.groups-1, p.pattern.groupdict
! )
--- 19,229 ----
for WORDSIZE in "BHil":
if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
! break
else:
raise RuntimeError, "cannot find a useable array type"
def _compile(code, pattern, flags):
+ # internal: compile a (sub)pattern
emit = code.append
for op, av in pattern:
! if op in (LITERAL, NOT_LITERAL):
! if flags & SRE_FLAG_IGNORECASE:
! emit(OPCODES[OP_IGNORE[op]])
! else:
! emit(OPCODES[op])
! emit(av)
! elif op is IN:
! if flags & SRE_FLAG_IGNORECASE:
! emit(OPCODES[OP_IGNORE[op]])
! def fixup(literal, flags=flags):
! return _sre.getlower(literal, flags)
! else:
! emit(OPCODES[op])
! fixup = lambda x: x
! skip = len(code); emit(0)
! for op, av in av:
! emit(OPCODES[op])
! if op is NEGATE:
! pass
! elif op is LITERAL:
! emit(fixup(av))
! elif op is RANGE:
! emit(fixup(av[0]))
! emit(fixup(av[1]))
! elif op is CATEGORY:
! if flags & SRE_FLAG_LOCALE:
! emit(CHCODES[CH_LOCALE[av]])
! elif flags & SRE_FLAG_UNICODE:
! emit(CHCODES[CH_UNICODE[av]])
! else:
! emit(CHCODES[av])
! else:
! raise error, "internal: unsupported set operator"
! emit(OPCODES[FAILURE])
! code[skip] = len(code) - skip
! elif op is ANY:
! if flags & SRE_FLAG_DOTALL:
! emit(OPCODES[op])
! else:
! emit(OPCODES[CATEGORY])
! emit(CHCODES[CATEGORY_NOT_LINEBREAK])
! elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
! if flags & SRE_FLAG_TEMPLATE:
! emit(OPCODES[REPEAT])
! skip = len(code); emit(0)
! emit(av[0])
! emit(av[1])
! _compile(code, av[2], flags)
! emit(OPCODES[SUCCESS])
! code[skip] = len(code) - skip
! else:
! lo, hi = av[2].getwidth()
! if lo == 0:
! raise error, "nothing to repeat"
! if 0 and lo == hi == 1 and op is MAX_REPEAT:
! # FIXME: <fl> need a better way to figure out when
! # it's safe to use this one (in the parser, probably)
! emit(OPCODES[MAX_REPEAT_ONE])
! skip = len(code); emit(0)
! emit(av[0])
! emit(av[1])
! _compile(code, av[2], flags)
! emit(OPCODES[SUCCESS])
! code[skip] = len(code) - skip
! else:
! emit(OPCODES[op])
! skip = len(code); emit(0)
! emit(av[0])
! emit(av[1])
! _compile(code, av[2], flags)
! emit(OPCODES[SUCCESS])
! code[skip] = len(code) - skip
! elif op is SUBPATTERN:
! group = av[0]
! if group:
! emit(OPCODES[MARK])
! emit((group-1)*2)
! _compile(code, av[1], flags)
! if group:
! emit(OPCODES[MARK])
! emit((group-1)*2+1)
! elif op in (SUCCESS, FAILURE):
! emit(OPCODES[op])
! elif op in (ASSERT, ASSERT_NOT, CALL):
! emit(OPCODES[op])
! skip = len(code); emit(0)
! _compile(code, av, flags)
! emit(OPCODES[SUCCESS])
! code[skip] = len(code) - skip
! elif op is AT:
! emit(OPCODES[op])
! if flags & SRE_FLAG_MULTILINE:
! emit(ATCODES[AT_MULTILINE[av]])
! else:
! emit(ATCODES[av])
! elif op is BRANCH:
! emit(OPCODES[op])
! tail = []
! for av in av[1]:
! skip = len(code); emit(0)
! _compile(code, av, flags)
! emit(OPCODES[JUMP])
! tail.append(len(code)); emit(0)
! code[skip] = len(code) - skip
! emit(0) # end of branch
! for tail in tail:
! code[tail] = len(code) - tail
! elif op is CATEGORY:
! emit(OPCODES[op])
! if flags & SRE_FLAG_LOCALE:
! emit(CHCODES[CH_LOCALE[av]])
! elif flags & SRE_FLAG_UNICODE:
! emit(CHCODES[CH_UNICODE[av]])
! else:
! emit(CHCODES[av])
! elif op is GROUP:
! if flags & SRE_FLAG_IGNORECASE:
! emit(OPCODES[OP_IGNORE[op]])
! else:
! emit(OPCODES[op])
! emit(av-1)
! elif op is MARK:
! emit(OPCODES[op])
! emit(av)
! else:
! raise ValueError, ("unsupported operand type", op)
+ def _compile_info(code, pattern, flags):
+ # internal: compile an info block. in the current version,
+ # this contains min/max pattern width and a literal prefix,
+ # if any
+ lo, hi = pattern.getwidth()
+ if lo == 0:
+ return # not worth it
+ # look for a literal prefix
+ prefix = []
+ if not (flags & SRE_FLAG_IGNORECASE):
+ for op, av in pattern.data:
+ if op is LITERAL:
+ prefix.append(av)
+ else:
+ break
+ # add an info block
+ emit = code.append
+ emit(OPCODES[INFO])
+ skip = len(code); emit(0)
+ # literal flag
+ mask = 0
+ if len(prefix) == len(pattern.data):
+ mask = 1
+ emit(mask)
+ # pattern length
+ emit(lo)
+ if hi < 32768:
+ emit(hi)
+ else:
+ emit(0)
+ # add literal prefix
+ emit(len(prefix))
+ if prefix:
+ code.extend(prefix)
+ # generate overlap table
+ table = [-1] + ([0]*len(prefix))
+ for i in range(len(prefix)):
+ table[i+1] = table[i]+1
+ while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
+ table[i+1] = table[table[i+1]-1]+1
+ code.extend(table[1:]) # don't store first entry
+ code[skip] = len(code) - skip
+
def compile(p, flags=0):
# internal: convert pattern list to internal format
+
+ # compile, as necessary
if type(p) in (type(""), type(u"")):
! import sre_parse
! pattern = p
! p = sre_parse.parse(p)
else:
! pattern = None
!
flags = p.pattern.flags | flags
code = []
+
+ # compile info block
+ _compile_info(code, p, flags)
+
+ # compile the pattern
_compile(code, p.data, flags)
+
code.append(OPCODES[SUCCESS])
!
! # FIXME: <fl> get rid of this limitation!
assert p.pattern.groups <= 100,\
! "sorry, but this version only supports 100 named groups"
!
return _sre.compile(
! pattern, flags,
! array.array(WORDSIZE, code).tostring(),
! p.pattern.groups-1, p.pattern.groupdict
! )
Index: sre_cons.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/dos-8x3/sre_cons.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -r1.2 -r1.3
*** sre_cons.py 2000年06月29日 19:35:29 1.2
--- sre_cons.py 2000年06月30日 16:13:37 1.3
***************
*** 24,27 ****
--- 24,28 ----
ANY = "any"
ASSERT = "assert"
+ ASSERT_NOT = "assert_not"
AT = "at"
BRANCH = "branch"
***************
*** 82,86 ****
ANY,
! ASSERT,
AT,
BRANCH,
--- 83,87 ----
ANY,
! ASSERT, ASSERT_NOT,
AT,
BRANCH,
***************
*** 122,127 ****
i = 0
for item in list:
! d[item] = i
! i = i + 1
return d
--- 123,128 ----
i = 0
for item in list:
! d[item] = i
! i = i + 1
return d
***************
*** 177,186 ****
import string
def dump(f, d, prefix):
! items = d.items()
! items.sort(lambda a, b: cmp(a[1], b[1]))
! for k, v in items:
! f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
f = open("sre_constants.h", "w")
! f.write("/* generated from sre_constants.py */\n")
dump(f, OPCODES, "SRE_OP")
dump(f, ATCODES, "SRE")
--- 178,202 ----
import string
def dump(f, d, prefix):
! items = d.items()
! items.sort(lambda a, b: cmp(a[1], b[1]))
! for k, v in items:
! f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
f = open("sre_constants.h", "w")
! f.write("""\
! /*
! * Secret Labs' Regular Expression Engine
! *
! * regular expression matching engine
! *
! * NOTE: This file is generated by sre_constants.py. If you need
! * to change anything in here, edit sre_constants.py and run it.
! *
! * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
! *
! * See the _sre.c file for information on usage and redistribution.
! */
!
! """)
!
dump(f, OPCODES, "SRE_OP")
dump(f, ATCODES, "SRE")
Index: sre_pars.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/dos-8x3/sre_pars.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -r1.2 -r1.3
*** sre_pars.py 2000年06月29日 19:35:29 1.2
--- sre_pars.py 2000年06月30日 16:13:37 1.3
***************
*** 20,23 ****
--- 20,26 ----
MAXREPEAT = 32767
+ # FIXME: same here
+ CHARMASK = 0x7fff
+
SPECIAL_CHARS = ".\\[{()*+?^$|"
REPEAT_CHARS = "*+?{"
***************
*** 31,54 ****
[...1073 lines suppressed...]
! raise error, "empty group"
! a(s)
! return match.string[:0].join(p)
--- 615,630 ----
p = []
a = p.append
+ sep = match.string[:0]
+ if type(sep) is type(""):
+ char = chr
+ else:
+ char = unichr
for c, s in template:
! if c is LITERAL:
! a(char(s))
! elif c is MARK:
! s = match.group(s)
! if s is None:
! raise error, "empty group"
! a(s)
! return sep.join(p)