musl - musl - an implementation of the standard library for Linux-based systems

index : musl
musl - an implementation of the standard library for Linux-based systems
summary refs log tree commit diff
path: root/src/regex/regcomp.c
diff options
context:
space:
mode:
authorSzabolcs Nagy <nsz@port70.net>2015年04月18日 15:51:16 +0000
committerRich Felker <dalias@aerifal.cx>2016年01月30日 20:53:04 -0500
commit7eaa76fc2e7993582989d3838b1ac32dd8abac09 (patch)
tree8d3c0772c1c85b0ba9017fbf919a654a821d8b2d /src/regex/regcomp.c
parenta8cc2253843e30dfbdf0bb2954439d9f2b2e8704 (diff)
downloadmusl-7eaa76fc2e7993582989d3838b1ac32dd8abac09.tar.gz
regex: reject repetitions in some cases with REG_BADRPT
Previously repetitions were accepted after empty expressions like in (*|?)|{2}, but in BRE the handling of * and \{\} were not consistent: they were accepted as literals in some cases and repetitions in others. It is better to treat repetitions after an empty expression as an error (this is allowed by the standard, and glibc mostly does the same). This is hard to do consistently with the current logic so the new rule is: Reject repetitions after empty expressions, except after assertions ^*, $? and empty groups ()+ and never treat them as literals. Empty alternation (|a) is undefined by the standard, but it can be useful so that should be accepted.
Diffstat (limited to 'src/regex/regcomp.c')
-rw-r--r--src/regex/regcomp.c 15
1 files changed, 12 insertions, 3 deletions
diff --git a/src/regex/regcomp.c b/src/regex/regcomp.c
index ac207c89..078f657c 100644
--- a/src/regex/regcomp.c
+++ b/src/regex/regcomp.c
@@ -837,6 +837,10 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
s--;
break;
+ case '{':
+ /* reject repetitions after empty expression in BRE */
+ if (!ere)
+ return REG_BADRPT;
default:
if (!ere && (unsigned)*s-'1' < 9) {
/* back reference */
@@ -880,10 +884,14 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
s++;
break;
case '*':
- case '|':
+ return REG_BADPAT;
case '{':
case '+':
case '?':
+ /* reject repetitions after empty expression in ERE */
+ if (ere)
+ return REG_BADRPT;
+ case '|':
if (!ere)
goto parse_literal;
case 0:
@@ -964,8 +972,9 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
}
parse_iter:
- /* extension: repetitions are accepted after an empty node
- eg. (+), ^*, a$?, a|{2} */
+ /* extension: repetitions are rejected after an empty node
+ eg. (+), |*, {2}, but assertions are not treated as empty
+ so ^* or $? are accepted currently. */
switch (*s) {
case '+':
case '?':
generated by cgit v1.2.1 (git 2.18.0) at 2025年09月05日 19:49:31 +0000

AltStyle によって変換されたページ (->オリジナル) /