homepage

This issue tracker has been migrated to GitHub , and is currently read-only.
For more information, see the GitHub FAQs in the Python's Developer Guide.

Author alegrigoriev
Recipients alegrigoriev
Date 2021年04月01日.03:11:36
SpamBayes Score -1.0
Marked as misclassified Yes
Message-id <1617246696.22.0.947636964181.issue43686@roundup.psfhosted.org>
In-reply-to
Content
Certain patterns and input strings cause re.match to take exponentially longer time. This can be expected because of recursive nature of matching some patterns, but it can take surprisingly long time with some combination of a pattern and input data of moderate length. For example:
import re
import time
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_ {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_ {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_D {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_D {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DI {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DI {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DIS {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DIS {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISP {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISP {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPL {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPL {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLA {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLA {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLAY {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLAY {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
Does:
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_ {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 0.2190000000409782 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_D {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 0.4529999999795109 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DI {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 0.9529999999795109 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DIS {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 1.797000000020489 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISP {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 3.6090000001713634 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPL {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 7.125 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLA {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 14.890999999828637 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLAY {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 29.688000000081956 s
Perhaps re.match needs to be optimized and/or rewritten in low level language (C)
History
Date User Action Args
2021年04月01日 03:11:36alegrigorievsetrecipients: + alegrigoriev
2021年04月01日 03:11:36alegrigorievsetmessageid: <1617246696.22.0.947636964181.issue43686@roundup.psfhosted.org>
2021年04月01日 03:11:36alegrigorievlinkissue43686 messages
2021年04月01日 03:11:36alegrigorievcreate

AltStyle によって変換されたページ (->オリジナル) /