Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 08d21d7

Browse files
ptth222jorisvandenbossche
andauthored
BUG: fix bug in str.fullmatch for Arrow backend with optional groups (#61073)
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
1 parent 2b25842 commit 08d21d7

File tree

4 files changed

+54
-6
lines changed

4 files changed

+54
-6
lines changed

‎doc/source/whatsnew/v2.3.3.rst‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Bug fixes
3535
- Fix bug in :meth:`Series.str.replace` using named capture groups (e.g., ``\g<name>``) with the Arrow-backed dtype would raise an error (:issue:`57636`)
3636
- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
3737
with a compiled regex and custom flags (:issue:`62240`)
38+
- Fix :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
3839

3940
.. ---------------------------------------------------------------------------
4041
.. _whatsnew_233.contributors:

‎pandas/core/arrays/_arrow_string_mixins.py‎

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,8 +326,12 @@ def _str_fullmatch(
326326
flags: int = 0,
327327
na: Scalar | lib.NoDefault = lib.no_default,
328328
):
329-
if not pat.endswith("$") or pat.endswith("\\$"):
330-
pat = f"{pat}$"
329+
if (not pat.endswith("$") or pat.endswith("\\$")) and not pat.startswith("^"):
330+
pat = f"^({pat})$"
331+
elif not pat.endswith("$") or pat.endswith("\\$"):
332+
pat = f"^({pat[1:]})$"
333+
elif not pat.startswith("^"):
334+
pat = f"^({pat[0:-1]})$"
331335
return self._str_match(pat, case, flags, na)
332336

333337
def _str_find(self, sub: str, start: int = 0, end: int | None = None):

‎pandas/tests/extension/test_arrow.py‎

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1870,23 +1870,28 @@ def test_str_match(pat, case, na, exp):
18701870

18711871
@pytest.mark.parametrize(
18721872
"pat, case, na, exp",
1873+
# Note: keep cases in sync with
1874+
# pandas/tests/strings/test_find_replace.py::test_str_fullmatch_extra_cases
18731875
[
1874-
["abc", False, None, [True, True, False, None]],
1876+
["abc", False, None, [True, False, False, None]],
18751877
["Abc", True, None, [False, False, False, None]],
18761878
["bc", True, None, [False, False, False, None]],
1877-
["ab", False, None, [True, True, False, None]],
1878-
["a[a-z]{2}", False, None, [True, True, False, None]],
1879+
["ab", False, None, [False, False, False, None]],
1880+
["a[a-z]{2}", False, None, [True, False, False, None]],
18791881
["A[a-z]{1}", True, None, [False, False, False, None]],
18801882
# GH Issue: #56652
18811883
["abc$", False, None, [True, False, False, None]],
18821884
["abc\\$", False, None, [False, True, False, None]],
18831885
["Abc$", True, None, [False, False, False, None]],
18841886
["Abc\\$", True, None, [False, False, False, None]],
1887+
# https://github.com/pandas-dev/pandas/issues/61072
1888+
["(abc)|(abx)", True, None, [True, False, False, None]],
1889+
["((abc)|(abx))", True, None, [True, False, False, None]],
18851890
],
18861891
)
18871892
def test_str_fullmatch(pat, case, na, exp):
18881893
ser = pd.Series(["abc", "abc$", "$abc", None], dtype=ArrowDtype(pa.string()))
1889-
result = ser.str.match(pat, case=case, na=na)
1894+
result = ser.str.fullmatch(pat, case=case, na=na)
18901895
expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_()))
18911896
tm.assert_series_equal(result, expected)
18921897

‎pandas/tests/strings/test_find_replace.py‎

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,44 @@ def test_fullmatch_compiled_regex(any_string_dtype):
10751075
values.str.fullmatch(re.compile("ab"), flags=re.IGNORECASE)
10761076

10771077

1078+
@pytest.mark.parametrize(
1079+
"pat, case, na, exp",
1080+
# Note: keep cases in sync with
1081+
# pandas/tests/extension/test_arrow.py::test_str_fullmatch
1082+
[
1083+
["abc", False, None, [True, False, False, None]],
1084+
["Abc", True, None, [False, False, False, None]],
1085+
["bc", True, None, [False, False, False, None]],
1086+
["ab", False, None, [False, False, False, None]],
1087+
["a[a-z]{2}", False, None, [True, False, False, None]],
1088+
["A[a-z]{1}", True, None, [False, False, False, None]],
1089+
# GH Issue: #56652
1090+
["abc$", False, None, [True, False, False, None]],
1091+
["abc\\$", False, None, [False, True, False, None]],
1092+
["Abc$", True, None, [False, False, False, None]],
1093+
["Abc\\$", True, None, [False, False, False, None]],
1094+
# https://github.com/pandas-dev/pandas/issues/61072
1095+
["(abc)|(abx)", True, None, [True, False, False, None]],
1096+
["((abc)|(abx))", True, None, [True, False, False, None]],
1097+
],
1098+
)
1099+
def test_str_fullmatch_extra_cases(any_string_dtype, pat, case, na, exp):
1100+
ser = Series(["abc", "abc$", "$abc", None], dtype=any_string_dtype)
1101+
result = ser.str.fullmatch(pat, case=case, na=na)
1102+
1103+
if any_string_dtype == "str":
1104+
# NaN propagates as False
1105+
exp[-1] = False
1106+
expected_dtype = bool
1107+
else:
1108+
expected_dtype = (
1109+
"object" if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
1110+
)
1111+
expected = Series([True, False, np.nan, False], dtype=expected_dtype)
1112+
expected = Series(exp, dtype=expected_dtype)
1113+
tm.assert_series_equal(result, expected)
1114+
1115+
10781116
# --------------------------------------------------------------------------------------
10791117
# str.findall
10801118
# --------------------------------------------------------------------------------------

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /