git.postgresql.org Git - postgresql.git/commitdiff

git projects / postgresql.git / commitdiff
? search:
summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: c876fb4)
Fix regexp_matches() handling of zero-length matches.
2013年7月31日 15:31:22 +0000 (11:31 -0400)
2013年7月31日 15:31:22 +0000 (11:31 -0400)
We'd find the same match twice if it was of zero length and not immediately
adjacent to the previous match. replace_text_regexp() got similar cases
right, so adjust this search logic to match that. Note that even though
the regexp_split_to_xxx() functions share this code, they did not display
equivalent misbehavior, because the second match would be considered
degenerate and ignored.

Jeevan Chalke, with some cosmetic changes by me.


diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 6a89fabf4bf25f71587b485c7bec1bb1a7f8285b..ee37dfe991a7a3c3895ffa3d5fa8afd19de0a7e3 100644 (file)
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -957,14 +957,13 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
break;
/*
- * Advance search position. Normally we start just after the end of
- * the previous match, but always advance at least one character (the
- * special case can occur if the pattern matches zero characters just
- * after the prior match or at the end of the string).
+ * Advance search position. Normally we start the next search at the
+ * end of the previous match; but if the match was of zero length, we
+ * have to advance by one character, or we'd just find the same match
+ * again.
*/
- if (start_search < pmatch[0].rm_eo)
- start_search = pmatch[0].rm_eo;
- else
+ start_search = prev_match_end;
+ if (pmatch[0].rm_so == pmatch[0].rm_eo)
start_search++;
if (start_search > wide_len)
break;
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 4aa344896f9f704b8a1681a8978c0218ee49b510..5e2c2ddc532c604a05f365f0cf6761033a35be76 100644 (file)
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -3083,7 +3083,10 @@ replace_text_regexp(text *src_text, void *regexp,
break;
/*
- * Search from next character when the matching text is zero width.
+ * Advance search position. Normally we start the next search at the
+ * end of the previous match; but if the match was of zero length, we
+ * have to advance by one character, or we'd just find the same match
+ * again.
*/
search_start = data_pos;
if (pmatch[0].rm_so == pmatch[0].rm_eo)
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 281c69528aa986a57642b65fb7244de688cf50e1..19708c32fdd30b9566050b55ef82843b72989923 100644 (file)
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -440,6 +440,64 @@ SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
{barbeque}
(1 row)
+-- start/end-of-line matches are of zero length
+SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '^', 'mg');
+ regexp_matches
+----------------
+ {""}
+ {""}
+ {""}
+ {""}
+(4 rows)
+
+SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '$', 'mg');
+ regexp_matches
+----------------
+ {""}
+ {""}
+ {""}
+ {""}
+(4 rows)
+
+SELECT regexp_matches('1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '^.?', 'mg');
+ regexp_matches
+----------------
+ {1}
+ {2}
+ {3}
+ {4}
+ {""}
+(5 rows)
+
+SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '.?$', 'mg');
+ regexp_matches
+----------------
+ {""}
+ {1}
+ {""}
+ {2}
+ {""}
+ {3}
+ {""}
+ {4}
+ {""}
+ {""}
+(10 rows)
+
+SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4', '.?$', 'mg');
+ regexp_matches
+----------------
+ {""}
+ {1}
+ {""}
+ {2}
+ {""}
+ {3}
+ {""}
+ {4}
+ {""}
+(9 rows)
+
-- give me errors
SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re,ドル 'gz');
ERROR: invalid regexp option: "z"
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index e7841aa20d77e315b7bdf056d7da4a8d46578fcb..f9cfaeb44ac2f55d8f5c859a0951aaba9e1dd381 100644 (file)
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -158,6 +158,13 @@ SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$);
-- no capture groups
SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$);
+-- start/end-of-line matches are of zero length
+SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '^', 'mg');
+SELECT regexp_matches('foo' || chr(10) || 'bar' || chr(10) || 'bequq' || chr(10) || 'baz', '$', 'mg');
+SELECT regexp_matches('1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '^.?', 'mg');
+SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4' || chr(10), '.?$', 'mg');
+SELECT regexp_matches(chr(10) || '1' || chr(10) || '2' || chr(10) || '3' || chr(10) || '4', '.?$', 'mg');
+
-- give me errors
SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re,ドル 'gz');
SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$);
This is the main PostgreSQL git repository.
RSS Atom

AltStyle によって変換されたページ (->オリジナル) /