Message389949
| Author |
alegrigoriev |
| Recipients |
alegrigoriev |
| Date |
2021年04月01日.03:11:36 |
| SpamBayes Score |
-1.0 |
| Marked as misclassified |
Yes |
| Message-id |
<1617246696.22.0.947636964181.issue43686@roundup.psfhosted.org> |
| In-reply-to |
| Content |
Certain patterns and input strings cause re.match to take exponentially longer time. This can be expected because of recursive nature of matching some patterns, but it can take surprisingly long time with some combination of a pattern and input data of moderate length. For example:
import re
import time
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_ {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_ {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_D {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_D {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DI {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DI {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DIS {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DIS {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISP {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISP {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPL {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPL {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLA {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLA {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
t1 = time.monotonic()
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLAY {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n')
t2 = time.monotonic()
print(r"re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLAY {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): %s s" % (t2-t1))
Does:
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_ {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 0.2190000000409782 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_D {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 0.4529999999795109 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DI {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 0.9529999999795109 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DIS {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 1.797000000020489 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISP {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 3.6090000001713634 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPL {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 7.125 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLA {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 14.890999999828637 s
re.match(rb'((?: |\t)*)((?:.*?(?:\\(?: |\t)+)?)*)(\s*)$', b'#define EVENT_DISPLAY {\t\\\r\n\tif {\t\\\r\n\t\tmnt_disp;\t\\\r\n\t} }\r\n'): 29.688000000081956 s
Perhaps re.match needs to be optimized and/or rewritten in low level language (C) |
|
History
|
|---|
| Date |
User |
Action |
Args |
| 2021年04月01日 03:11:36 | alegrigoriev | set | recipients:
+ alegrigoriev |
| 2021年04月01日 03:11:36 | alegrigoriev | set | messageid: <1617246696.22.0.947636964181.issue43686@roundup.psfhosted.org> |
| 2021年04月01日 03:11:36 | alegrigoriev | link | issue43686 messages |
| 2021年04月01日 03:11:36 | alegrigoriev | create |
|