Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

CLN: Enforce deprecation of not validating na argument to string methods #62399

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
rhshadrach wants to merge 3 commits into pandas-dev:main
base: main
Choose a base branch
Loading
from rhshadrach:cln_enforce_na_validation
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,7 @@ Other Removals
- Enforce deprecation in :func:`testing.assert_series_equal` and :func:`testing.assert_frame_equal` with object dtype and mismatched null-like values, which are now considered not-equal (:issue:`18463`)
- Enforce banning of upcasting in in-place setitem-like operations (:issue:`59007`) (see `PDEP6 <https://pandas.pydata.org/pdeps/0006-ban-upcasting.html>`_)
- Enforced deprecation ``all`` and ``any`` reductions with ``datetime64``, :class:`DatetimeTZDtype`, and :class:`PeriodDtype` dtypes (:issue:`58029`)
- Enforced deprecation allowing non-``bool`` and NA values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` (:issue:`59615`)
- Enforced deprecation disallowing ``float`` "periods" in :func:`date_range`, :func:`period_range`, :func:`timedelta_range`, :func:`interval_range`, (:issue:`56036`)
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
- Enforced deprecation in :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype performing dtype inference on the ``.index`` of the result (:issue:`56161`)
Expand Down
15 changes: 2 additions & 13 deletions pandas/core/arrays/string_arrow.py
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
TYPE_CHECKING,
Self,
)
import warnings

import numpy as np

Expand All @@ -19,7 +18,7 @@
PYARROW_MIN_VERSION,
pa_version_under16p0,
)
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import validate_na_arg

from pandas.core.dtypes.common import (
is_scalar,
Expand Down Expand Up @@ -242,17 +241,7 @@ def insert(self, loc: int, item) -> ArrowStringArray:
return super().insert(loc, item)

def _convert_bool_result(self, values, na=lib.no_default, method_name=None):
if na is not lib.no_default and not isna(na) and not isinstance(na, bool):
# TODO: Enforce in 3.0 (#59615)
# GH#59561
warnings.warn(
f"Allowing a non-bool 'na' in obj.str.{method_name} is deprecated "
"and will raise in a future version.",
FutureWarning, # pdlint: ignore[warning_class]
stacklevel=find_stack_level(),
)
na = bool(na)

validate_na_arg(na, name="na")
if self.dtype.na_value is np.nan:
if na is lib.no_default or isna(na):
# NaN propagates as False
Expand Down
33 changes: 4 additions & 29 deletions pandas/core/strings/object_array.py
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,13 @@
cast,
)
import unicodedata
import warnings

import numpy as np

from pandas._libs import lib
import pandas._libs.missing as libmissing
import pandas._libs.ops as libops
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import validate_na_arg

from pandas.core.dtypes.common import pandas_dtype
from pandas.core.dtypes.missing import isna
Expand Down Expand Up @@ -145,6 +144,7 @@ def _str_contains(
na=lib.no_default,
regex: bool = True,
):
validate_na_arg(na, name="na")
if regex:
if not case:
flags |= re.IGNORECASE
Expand All @@ -158,41 +158,16 @@ def _str_contains(
else:
upper_pat = pat.upper()
f = lambda x: upper_pat in x.upper()
if na is not lib.no_default and not isna(na) and not isinstance(na, bool):
# TODO: Enforce in 3.0 (#59615)
# GH#59561
warnings.warn(
"Allowing a non-bool 'na' in obj.str.contains is deprecated "
"and will raise in a future version.",
FutureWarning, # pdlint: ignore[warning_class]
stacklevel=find_stack_level(),
)
return self._str_map(f, na, dtype=np.dtype("bool"))

def _str_startswith(self, pat, na=lib.no_default):
validate_na_arg(na, name="na")
f = lambda x: x.startswith(pat)
if na is not lib.no_default and not isna(na) and not isinstance(na, bool):
# TODO: Enforce in 3.0 (#59615)
# GH#59561
warnings.warn(
"Allowing a non-bool 'na' in obj.str.startswith is deprecated "
"and will raise in a future version.",
FutureWarning, # pdlint: ignore[warning_class]
stacklevel=find_stack_level(),
)
return self._str_map(f, na_value=na, dtype=np.dtype(bool))

def _str_endswith(self, pat, na=lib.no_default):
validate_na_arg(na, name="na")
f = lambda x: x.endswith(pat)
if na is not lib.no_default and not isna(na) and not isinstance(na, bool):
# TODO: Enforce in 3.0 (#59615)
# GH#59561
warnings.warn(
"Allowing a non-bool 'na' in obj.str.endswith is deprecated "
"and will raise in a future version.",
FutureWarning, # pdlint: ignore[warning_class]
stacklevel=find_stack_level(),
)
return self._str_map(f, na_value=na, dtype=np.dtype(bool))

def _str_replace(
Expand Down
41 changes: 13 additions & 28 deletions pandas/tests/strings/test_find_replace.py
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -175,17 +175,14 @@ def test_contains_na_kwarg_for_nullable_string_dtype(

values = Series(["a", "b", "c", "a", np.nan], dtype=nullable_string_dtype)

msg = (
"Allowing a non-bool 'na' in obj.str.contains is deprecated and "
"will raise in a future version"
)
warn = None
if not pd.isna(na) and not isinstance(na, bool):
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg):
if na in [0, 3] and na is not False:
msg = f"na must be an NA value, True, or False; got {na}"
with pytest.raises(ValueError, match=msg):
values.str.contains("a", na=na, regex=regex)
else:
result = values.str.contains("a", na=na, regex=regex)
expected = Series([True, False, False, True, expected], dtype="boolean")
tm.assert_series_equal(result, expected)
expected = Series([True, False, False, True, expected], dtype="boolean")
tm.assert_series_equal(result, expected)


def test_contains_moar(any_string_dtype):
Expand Down Expand Up @@ -255,19 +252,9 @@ def test_contains_nan(any_string_dtype):
expected = Series([True, True, True], dtype=expected_dtype)
tm.assert_series_equal(result, expected)

msg = (
"Allowing a non-bool 'na' in obj.str.contains is deprecated and "
"will raise in a future version"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = s.str.contains("foo", na="foo")
if any_string_dtype == "object":
expected = Series(["foo", "foo", "foo"], dtype=np.object_)
elif any_string_dtype.na_value is np.nan:
expected = Series([True, True, True], dtype=np.bool_)
else:
expected = Series([True, True, True], dtype="boolean")
tm.assert_series_equal(result, expected)
msg = "na must be an NA value, True, or False; got foo"
with pytest.raises(ValueError, match=msg):
s.str.contains("foo", na="foo")

result = s.str.contains("foo")
if any_string_dtype == "str":
Expand Down Expand Up @@ -352,12 +339,10 @@ def test_startswith_endswith_validate_na(any_string_dtype):
["om", np.nan, "foo_nom", "nom", "bar_foo", np.nan, "foo"],
dtype=any_string_dtype,
)

msg = "Allowing a non-bool 'na' in obj.str.startswith is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
msg = "na must be an NA value, True, or False; got baz"
with pytest.raises(ValueError, match=msg):
ser.str.startswith("kapow", na="baz")
msg = "Allowing a non-bool 'na' in obj.str.endswith is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
with pytest.raises(ValueError, match=msg):
ser.str.endswith("bar", na="baz")


Expand Down
22 changes: 22 additions & 0 deletions pandas/util/_validators.py
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
is_bool,
is_integer,
)
from pandas.core.dtypes.missing import isna

BoolishT = TypeVar("BoolishT", bool, int)
BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
Expand Down Expand Up @@ -269,6 +270,27 @@ def validate_bool_kwarg(
return value


def validate_na_arg(value, name: str):
"""
Validate na arguments.

Parameters
----------
value : object
Value to validate.
name : str
Name of the argument, used to raise an informative error message.

Raises
______
ValueError
When ``value`` is determined to be invalid.
"""
if value is lib.no_default or isinstance(value, bool) or isna(value):
return
raise ValueError(f"{name} must be an NA value, True, or False; got {value}")


def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
"""
Validate the keyword arguments to 'fillna'.
Expand Down
Loading

AltStyle によって変換されたページ (->オリジナル) /