diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 91ce855f03b08..253c7cdede336 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -794,6 +794,7 @@ Other Removals - Enforce deprecation in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` with object dtype and mismatched null-like values, which are now considered not-equal (:issue:`18463`) - Enforce banning of upcasting in in-place setitem-like operations (:issue:`59007`) (see `PDEP6 `_) - Enforced deprecation ``all`` and ``any`` reductions with ``datetime64``, :class:`DatetimeTZDtype`, and :class:`PeriodDtype` dtypes (:issue:`58029`) +- Enforced deprecation allowing non-``bool`` and NA values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` (:issue:`59615`) - Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`, (:issue:`56036`) - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) - Enforced deprecation in :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype performing dtype inference on the ``.index`` of the result (:issue:`56161`) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 9046d83dcc09f..4545d35ba5dae 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -6,7 +6,6 @@ TYPE_CHECKING, Self, ) -import warnings import numpy as np @@ -19,7 +18,7 @@ PYARROW_MIN_VERSION, pa_version_under16p0, ) -from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_na_arg from pandas.core.dtypes.common import ( is_scalar, @@ -242,17 +241,7 @@ def insert(self, loc: int, item) -> ArrowStringArray: return super().insert(loc, item) def _convert_bool_result(self, values, na=lib.no_default, method_name=None): - if na is not lib.no_default and not isna(na) and not isinstance(na, bool): - # TODO: Enforce in 3.0 (#59615) - # GH#59561 - warnings.warn( - f"Allowing a non-bool 'na' in obj.str.{method_name} is deprecated " - "and will raise in a future version.", - FutureWarning, # pdlint: ignore[warning_class] - stacklevel=find_stack_level(), - ) - na = bool(na) - + validate_na_arg(na, name="na") if self.dtype.na_value is np.nan: if na is lib.no_default or isna(na): # NaN propagates as False diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index ba35542b7f112..21975cc1e0b5e 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -9,14 +9,13 @@ cast, ) import unicodedata -import warnings import numpy as np from pandas._libs import lib import pandas._libs.missing as libmissing import pandas._libs.ops as libops -from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_na_arg from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.missing import isna @@ -145,6 +144,7 @@ def _str_contains( na=lib.no_default, regex: bool = True, ): + validate_na_arg(na, name="na") if regex: if not case: flags |= re.IGNORECASE @@ -158,41 +158,16 @@ def _str_contains( else: upper_pat = pat.upper() f = lambda x: upper_pat in x.upper() - if na is not lib.no_default and not isna(na) and not isinstance(na, bool): - # TODO: Enforce in 3.0 (#59615) - # GH#59561 - warnings.warn( - "Allowing a non-bool 'na' in obj.str.contains is deprecated " - "and will raise in a future version.", - FutureWarning, # pdlint: ignore[warning_class] - stacklevel=find_stack_level(), - ) return self._str_map(f, na, dtype=np.dtype("bool")) def _str_startswith(self, pat, na=lib.no_default): + validate_na_arg(na, name="na") f = lambda x: x.startswith(pat) - if na is not lib.no_default and not isna(na) and not isinstance(na, bool): - # TODO: Enforce in 3.0 (#59615) - # GH#59561 - warnings.warn( - "Allowing a non-bool 'na' in obj.str.startswith is deprecated " - "and will raise in a future version.", - FutureWarning, # pdlint: ignore[warning_class] - stacklevel=find_stack_level(), - ) return self._str_map(f, na_value=na, dtype=np.dtype(bool)) def _str_endswith(self, pat, na=lib.no_default): + validate_na_arg(na, name="na") f = lambda x: x.endswith(pat) - if na is not lib.no_default and not isna(na) and not isinstance(na, bool): - # TODO: Enforce in 3.0 (#59615) - # GH#59561 - warnings.warn( - "Allowing a non-bool 'na' in obj.str.endswith is deprecated " - "and will raise in a future version.", - FutureWarning, # pdlint: ignore[warning_class] - stacklevel=find_stack_level(), - ) return self._str_map(f, na_value=na, dtype=np.dtype(bool)) def _str_replace( diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index a53b8475aa379..f75492a84042c 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -175,17 +175,14 @@ def test_contains_na_kwarg_for_nullable_string_dtype( values = Series(["a", "b", "c", "a", np.nan], dtype=nullable_string_dtype) - msg = ( - "Allowing a non-bool 'na' in obj.str.contains is deprecated and " - "will raise in a future version" - ) - warn = None - if not pd.isna(na) and not isinstance(na, bool): - warn = FutureWarning - with tm.assert_produces_warning(warn, match=msg): + if na in [0, 3] and na is not False: + msg = f"na must be an NA value, True, or False; got {na}" + with pytest.raises(ValueError, match=msg): + values.str.contains("a", na=na, regex=regex) + else: result = values.str.contains("a", na=na, regex=regex) - expected = Series([True, False, False, True, expected], dtype="boolean") - tm.assert_series_equal(result, expected) + expected = Series([True, False, False, True, expected], dtype="boolean") + tm.assert_series_equal(result, expected) def test_contains_moar(any_string_dtype): @@ -255,19 +252,9 @@ def test_contains_nan(any_string_dtype): expected = Series([True, True, True], dtype=expected_dtype) tm.assert_series_equal(result, expected) - msg = ( - "Allowing a non-bool 'na' in obj.str.contains is deprecated and " - "will raise in a future version" - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = s.str.contains("foo", na="foo") - if any_string_dtype == "object": - expected = Series(["foo", "foo", "foo"], dtype=np.object_) - elif any_string_dtype.na_value is np.nan: - expected = Series([True, True, True], dtype=np.bool_) - else: - expected = Series([True, True, True], dtype="boolean") - tm.assert_series_equal(result, expected) + msg = "na must be an NA value, True, or False; got foo" + with pytest.raises(ValueError, match=msg): + s.str.contains("foo", na="foo") result = s.str.contains("foo") if any_string_dtype == "str": @@ -352,12 +339,10 @@ def test_startswith_endswith_validate_na(any_string_dtype): ["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"], dtype=any_string_dtype, ) - - msg = "Allowing a non-bool 'na' in obj.str.startswith is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + msg = "na must be an NA value, True, or False; got baz" + with pytest.raises(ValueError, match=msg): ser.str.startswith("kapow", na="baz") - msg = "Allowing a non-bool 'na' in obj.str.endswith is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with pytest.raises(ValueError, match=msg): ser.str.endswith("bar", na="baz") diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 9aab19fe340ec..6e543bbcf1e45 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -22,6 +22,7 @@ is_bool, is_integer, ) +from pandas.core.dtypes.missing import isna BoolishT = TypeVar("BoolishT", bool, int) BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None) @@ -269,6 +270,27 @@ def validate_bool_kwarg( return value +def validate_na_arg(value, name: str): + """ + Validate na arguments. + + Parameters + ---------- + value : object + Value to validate. + name : str + Name of the argument, used to raise an informative error message. + + Raises + ______ + ValueError + When ``value`` is determined to be invalid. + """ + if value is lib.no_default or isinstance(value, bool) or isna(value): + return + raise ValueError(f"{name} must be an NA value, True, or False; got {value}") + + def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True): """ Validate the keyword arguments to 'fillna'.

AltStyle によって変換されたページ (->オリジナル) /