[Python-checkins] CVS: python/dist/src/Lib sre.py,1.30,1.31 sre_compile.py,1.36,1.37 sre_parse.py,1.45,1.46 sre_constants.py,1.27,1.28
Fredrik Lundh
effbot@users.sourceforge.net
2001年3月22日 07:50:12 -0800
Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv29581/Lib
Modified Files:
sre.py sre_compile.py sre_parse.py sre_constants.py
Log Message:
sre 2.1b2 update:
- take locale into account for word boundary anchors (#410271)
- restored 2.0's *? behaviour (#233283, #408936 and others)
- speed up re.sub/re.subn
Index: sre.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre.py,v
retrieving revision 1.30
retrieving revision 1.31
diff -C2 -r1.30 -r1.31
*** sre.py 2001年02月18日 12:05:16 1.30
--- sre.py 2001年03月22日 15:50:10 1.31
***************
*** 24,27 ****
--- 24,29 ----
"UNICODE", "error" ]
+ __version__ = "2.1b2"
+
# this module works under 1.5.2 and later. don't use string methods
import string
***************
*** 91,94 ****
--- 93,97 ----
"Clear the regular expression cache"
_cache.clear()
+ _cache_repl.clear()
def template(pattern, flags=0):
***************
*** 112,115 ****
--- 115,120 ----
_cache = {}
+ _cache_repl = {}
+
_MAXCACHE = 100
***************
*** 135,138 ****
--- 140,158 ----
return p
+ def _compile_repl(*key):
+ # internal: compile replacement pattern
+ p = _cache_repl.get(key)
+ if p is not None:
+ return p
+ repl, pattern = key
+ try:
+ p = sre_parse.parse_template(repl, pattern)
+ except error, v:
+ raise error, v # invalid expression
+ if len(_cache_repl) >= _MAXCACHE:
+ _cache_repl.clear()
+ _cache_repl[key] = p
+ return p
+
def _expand(pattern, match, template):
# internal: match.expand implementation hook
***************
*** 149,153 ****
filter = template
else:
! template = sre_parse.parse_template(template, pattern)
def filter(match, template=template):
return sre_parse.expand_template(template, match)
--- 169,173 ----
filter = template
else:
! template = _compile_repl(template, pattern)
def filter(match, template=template):
return sre_parse.expand_template(template, match)
Index: sre_compile.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_compile.py,v
retrieving revision 1.36
retrieving revision 1.37
diff -C2 -r1.36 -r1.37
*** sre_compile.py 2001年02月18日 12:05:16 1.36
--- sre_compile.py 2001年03月22日 15:50:10 1.37
***************
*** 106,112 ****
emit(OPCODES[op])
if flags & SRE_FLAG_MULTILINE:
! emit(ATCODES[AT_MULTILINE.get(av, av)])
! else:
! emit(ATCODES[av])
elif op is BRANCH:
emit(OPCODES[op])
--- 106,115 ----
emit(OPCODES[op])
if flags & SRE_FLAG_MULTILINE:
! av = AT_MULTILINE.get(av, av)
! if flags & SRE_FLAG_LOCALE:
! av = AT_LOCALE.get(av, av)
! elif flags & SRE_FLAG_UNICODE:
! av = AT_UNICODE.get(av, av)
! emit(ATCODES[av])
elif op is BRANCH:
emit(OPCODES[op])
***************
*** 125,133 ****
emit(OPCODES[op])
if flags & SRE_FLAG_LOCALE:
! emit(CHCODES[CH_LOCALE[av]])
elif flags & SRE_FLAG_UNICODE:
! emit(CHCODES[CH_UNICODE[av]])
! else:
! emit(CHCODES[av])
elif op is GROUPREF:
if flags & SRE_FLAG_IGNORECASE:
--- 128,135 ----
emit(OPCODES[op])
if flags & SRE_FLAG_LOCALE:
! av = CH_LOCALE[av]
elif flags & SRE_FLAG_UNICODE:
! av = CH_UNICODE[av]
! emit(CHCODES[av])
elif op is GROUPREF:
if flags & SRE_FLAG_IGNORECASE:
Index: sre_parse.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_parse.py,v
retrieving revision 1.45
retrieving revision 1.46
diff -C2 -r1.45 -r1.46
*** sre_parse.py 2001年02月18日 21:04:48 1.45
--- sre_parse.py 2001年03月22日 15:50:10 1.46
***************
*** 639,642 ****
--- 639,652 ----
p = []
a = p.append
+ def literal(literal, p=p):
+ if p and p[-1][0] is LITERAL:
+ p[-1] = LITERAL, p[-1][1] + literal
+ else:
+ p.append((LITERAL, literal))
+ sep = source[:0]
+ if type(sep) is type(""):
+ char = chr
+ else:
+ char = unichr
while 1:
this = s.get()
***************
*** 682,713 ****
if not code:
this = this[1:]
! code = LITERAL, atoi(this[-6:], 8) & 0xff
! a(code)
else:
try:
! a(ESCAPES[this])
except KeyError:
! for c in this:
! a((LITERAL, ord(c)))
else:
! a((LITERAL, ord(this)))
! return p
def expand_template(template, match):
! # XXX: <fl> this is sooooo slow. drop in the slicelist code instead
! p = []
! a = p.append
sep = match.string[:0]
! if type(sep) is type(""):
! char = chr
! else:
! char = unichr
! for c, s in template:
! if c is LITERAL:
! a(char(s))
! elif c is MARK:
! s = match.group(s)
if s is None:
! raise error, "empty group"
! a(s)
! return string.join(p, sep)
--- 692,732 ----
if not code:
this = this[1:]
! code = LITERAL, char(atoi(this[-6:], 8) & 0xff)
! if code[0] is LITERAL:
! literal(code[1])
! else:
! a(code)
else:
try:
! this = char(ESCAPES[this][1])
except KeyError:
! pass
! literal(this)
else:
! literal(this)
! # convert template to groups and literals lists
! i = 0
! groups = []
! literals = []
! for c, s in p:
! if c is MARK:
! groups.append((i, s))
! literals.append(None)
! else:
! literals.append(s)
! i = i + 1
! return groups, literals
def expand_template(template, match):
! g = match.group
sep = match.string[:0]
! groups, literals = template
! literals = literals[:]
! try:
! for index, group in groups:
! literals[index] = s = g(group)
if s is None:
! raise IndexError
! except IndexError:
! raise error, "empty group"
! return string.join(literals, sep)
Index: sre_constants.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_constants.py,v
retrieving revision 1.27
retrieving revision 1.28
diff -C2 -r1.27 -r1.28
*** sre_constants.py 2001年02月18日 12:05:16 1.27
--- sre_constants.py 2001年03月22日 15:50:10 1.28
***************
*** 12,16 ****
# update when constants are added or removed
! MAGIC = 20010115
# max code word in this release
--- 12,16 ----
# update when constants are added or removed
! MAGIC = 20010320
# max code word in this release
***************
*** 68,71 ****
--- 68,75 ----
AT_END_LINE = "at_end_line"
AT_END_STRING = "at_end_string"
+ AT_LOC_BOUNDARY = "at_loc_boundary"
+ AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
+ AT_UNI_BOUNDARY = "at_uni_boundary"
+ AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
# categories
***************
*** 120,124 ****
ATCODES = [
AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
! AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING
]
--- 124,130 ----
ATCODES = [
AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
! AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
! AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
! AT_UNI_NON_BOUNDARY
]
***************
*** 156,159 ****
--- 162,175 ----
AT_BEGINNING: AT_BEGINNING_LINE,
AT_END: AT_END_LINE
+ }
+
+ AT_LOCALE = {
+ AT_BOUNDARY: AT_LOC_BOUNDARY,
+ AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
+ }
+
+ AT_UNICODE = {
+ AT_BOUNDARY: AT_UNI_BOUNDARY,
+ AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
}