Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 009e548

Browse files
BUG: Series.replace NA->NaN (#62487)
1 parent dfe6dc8 commit 009e548

File tree

6 files changed

+129
-4
lines changed

6 files changed

+129
-4
lines changed

‎doc/source/whatsnew/v3.0.0.rst‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1219,10 +1219,11 @@ Other
12191219
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
12201220
- Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`)
12211221
- Deprecated the keyword ``check_datetimelike_compat`` in :meth:`testing.assert_frame_equal` and :meth:`testing.assert_series_equal` (:issue:`55638`)
1222+
- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when trying to replace :class:`NA` values in a :class:`Float64Dtype` object with ``np.nan``; this now works with ``pd.set_option("mode.nan_is_na", False)`` and is irrelevant otherwise (:issue:`55127`)
1223+
- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when trying to replace :class:`np.nan` values in a :class:`Int64Dtype` object with :class:`NA`; this is now a no-op with ``pd.set_option("mode.nan_is_na", False)`` and is irrelevant otherwise (:issue:`51237`)
12221224
- Fixed bug in the :meth:`Series.rank` with object dtype and extremely small float values (:issue:`62036`)
12231225
- Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`)
12241226
- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`)
1225-
-
12261227

12271228
.. ***DO NOT USE THIS SECTION***
12281229

‎pandas/core/arrays/masked.py‎

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,9 @@ def __setitem__(self, key, value) -> None:
312312
key = check_array_indexer(self, key)
313313

314314
if is_scalar(value):
315-
if is_valid_na_for_dtype(value, self.dtype):
315+
if is_valid_na_for_dtype(value, self.dtype) and not (
316+
lib.is_float(value) and not is_nan_na()
317+
):
316318
self._mask[key] = True
317319
else:
318320
value = self._validate_setitem_value(value)

‎pandas/core/missing.py‎

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515

1616
import numpy as np
1717

18+
from pandas._config import is_nan_na
19+
1820
from pandas._libs import (
1921
NaT,
2022
algos,
@@ -37,7 +39,11 @@
3739
is_object_dtype,
3840
needs_i8_conversion,
3941
)
40-
from pandas.core.dtypes.dtypes import DatetimeTZDtype
42+
from pandas.core.dtypes.dtypes import (
43+
ArrowDtype,
44+
BaseMaskedDtype,
45+
DatetimeTZDtype,
46+
)
4147
from pandas.core.dtypes.missing import (
4248
is_valid_na_for_dtype,
4349
isna,
@@ -86,6 +92,31 @@ def mask_missing(arr: ArrayLike, value) -> npt.NDArray[np.bool_]:
8692
"""
8793
dtype, value = infer_dtype_from(value)
8894

95+
if (
96+
isinstance(arr.dtype, (BaseMaskedDtype, ArrowDtype))
97+
and lib.is_float(value)
98+
and np.isnan(value)
99+
and not is_nan_na()
100+
):
101+
# TODO: this should be done in an EA method?
102+
if arr.dtype.kind == "f":
103+
# GH#55127
104+
if isinstance(arr.dtype, BaseMaskedDtype):
105+
# error: "ExtensionArray" has no attribute "_data" [attr-defined]
106+
mask = np.isnan(arr._data) & ~arr.isna() # type: ignore[attr-defined,operator]
107+
return mask
108+
else:
109+
# error: "ExtensionArray" has no attribute "_pa_array" [attr-defined]
110+
import pyarrow.compute as pc
111+
112+
mask = pc.is_nan(arr._pa_array).fill_null(False).to_numpy() # type: ignore[attr-defined]
113+
return mask
114+
115+
elif arr.dtype.kind in "iu":
116+
# GH#51237
117+
mask = np.zeros(arr.shape, dtype=bool)
118+
return mask
119+
89120
if isna(value):
90121
return isna(arr)
91122

‎pandas/tests/arrays/masked/test_indexing.py‎

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
import pandas.util._test_decorators as td
7+
68
import pandas as pd
79

810

@@ -58,3 +60,47 @@ def test_setitem_validation_scalar_int(self, invalid, any_int_ea_dtype):
5860
def test_setitem_validation_scalar_float(self, invalid, float_ea_dtype):
5961
arr = pd.array([1, 2, None], dtype=float_ea_dtype)
6062
self._check_setitem_invalid(arr, invalid)
63+
64+
65+
@pytest.mark.parametrize(
66+
"dtype",
67+
[
68+
"Float64",
69+
pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")),
70+
],
71+
)
72+
@pytest.mark.parametrize("indexer", [1, [1], [False, True, False]])
73+
def test_setitem_nan_in_float64_array(dtype, indexer, using_nan_is_na):
74+
arr = pd.array([0, pd.NA, 1], dtype=dtype)
75+
76+
arr[indexer] = np.nan
77+
if not using_nan_is_na:
78+
assert np.isnan(arr[1])
79+
else:
80+
assert arr[1] is pd.NA
81+
82+
83+
@pytest.mark.parametrize(
84+
"dtype",
85+
[
86+
"Int64",
87+
pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")),
88+
],
89+
)
90+
@pytest.mark.parametrize("indexer", [1, [1], [False, True, False]])
91+
def test_setitem_nan_in_int64_array(dtype, indexer, using_nan_is_na):
92+
arr = pd.array([0, 1, 2], dtype=dtype)
93+
if not using_nan_is_na:
94+
err = TypeError
95+
msg = "Invalid value 'nan' for dtype 'Int64'"
96+
if dtype == "int64[pyarrow]":
97+
import pyarrow as pa
98+
99+
err = pa.lib.ArrowInvalid
100+
msg = "Could not convert nan with type float"
101+
with pytest.raises(err, match=msg):
102+
arr[indexer] = np.nan
103+
assert arr[1] == 1
104+
else:
105+
arr[indexer] = np.nan
106+
assert arr[1] is pd.NA

‎pandas/tests/frame/methods/test_replace.py‎

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import numpy as np
77
import pytest
88

9+
import pandas.util._test_decorators as td
10+
911
import pandas as pd
1012
from pandas import (
1113
DataFrame,
@@ -1430,6 +1432,49 @@ def test_replace_with_nil_na(self):
14301432
result = ser.replace("nil", "anything else")
14311433
tm.assert_frame_equal(expected, result)
14321434

1435+
@pytest.mark.parametrize(
1436+
"dtype",
1437+
[
1438+
"Float64",
1439+
pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")),
1440+
],
1441+
)
1442+
def test_replace_na_to_nan_nullable_floats(self, dtype, using_nan_is_na):
1443+
# GH#55127
1444+
df = DataFrame({0: [1, np.nan, 1], 1: Series([0, pd.NA, 1], dtype=dtype)})
1445+
1446+
result = df.replace(pd.NA, np.nan)
1447+
1448+
if using_nan_is_na:
1449+
expected = result
1450+
else:
1451+
expected = DataFrame(
1452+
{0: [1, np.nan, 1], 1: Series([0, np.nan, 1], dtype=dtype)}
1453+
)
1454+
assert np.isnan(expected.loc[1, 1])
1455+
1456+
tm.assert_frame_equal(result, expected)
1457+
1458+
@pytest.mark.parametrize(
1459+
"dtype",
1460+
[
1461+
"Int64",
1462+
pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")),
1463+
],
1464+
)
1465+
def test_replace_nan_nullable_ints(self, dtype, using_nan_is_na):
1466+
# GH#51237 with nan_is_na=False, replacing NaN should be a no-op here
1467+
ser = Series([1, 2, None], dtype=dtype)
1468+
1469+
result = ser.replace(np.nan, -1)
1470+
1471+
if using_nan_is_na:
1472+
# np.nan is equivalent to pd.NA here
1473+
expected = Series([1, 2, -1], dtype=dtype)
1474+
else:
1475+
expected = ser
1476+
tm.assert_series_equal(result, expected)
1477+
14331478

14341479
class TestDataFrameReplaceRegex:
14351480
@pytest.mark.parametrize(

‎pandas/tests/series/methods/test_convert_dtypes.py‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ def test_convert_dtypes(
246246
with pytest.raises(TypeError, match="Invalid value"):
247247
result[result.notna()] = np.nan
248248
else:
249-
result[result.notna()] = np.nan
249+
result[result.notna()] = pd.NA
250250

251251
# Make sure original not changed
252252
tm.assert_series_equal(series, copy)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /