From b9afcf52ca6c74a46556a70811d72ea08a423364 Mon Sep 17 00:00:00 2001 From: heoh Date: 2025年9月16日 19:15:00 +0000 Subject: [PATCH 1/9] BUG: Fix cast_pointwise_result with all-NA values --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/arrays/masked.py | 2 ++ pandas/tests/extension/test_masked.py | 17 +++++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f91d40c4d9ea9..433be9cc25565 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -942,6 +942,7 @@ Numeric Conversion ^^^^^^^^^^ +- Bug in :meth:`BaseMaskedArray._cast_pointwise_result` with all-NA values results returned ``object`` dtype instead of preserving the original dtype (:issue:`62344`) - Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index b00e362e1309a..bddca5bed6ff8 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -149,6 +149,8 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self: return cls(values, mask) def _cast_pointwise_result(self, values) -> ArrayLike: + if isna(values).all(): + return type(self)._from_sequence(values, dtype=self.dtype) values = np.asarray(values, dtype=object) result = lib.maybe_convert_objects(values, convert_to_nullable_dtype=True) lkind = self.dtype.kind diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index e3764b2514680..1c52ae42f4224 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -366,3 +366,20 @@ def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request mark = pytest.mark.xfail(reason="GH#62344 incorrectly casts to object") request.applymarker(mark) super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data) + + @pytest.mark.parametrize( + "arr, values", + [ + (pd.array([True, False]), [pd.NA, pd.NA]), + (pd.array([1, 2]), [pd.NA, pd.NA]), + ], + ) + def test_cast_pointwise_result_all_na_respects_dtype(self, arr, values): + """ + GH#62344 + Ensure that _cast_pointwise_result respects the original dtype + even when the result consists entirely of NA values. + """ + result = arr._cast_pointwise_result(values) + assert result.dtype == arr.dtype + assert all(x is pd.NA for x in result) From 7fe3e03a6ca27327495b90f6a23d609ff09257ef Mon Sep 17 00:00:00 2001 From: heoh Date: 2025年9月16日 19:18:18 +0000 Subject: [PATCH 2/9] Remove xfail mark --- pandas/tests/extension/test_masked.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index 1c52ae42f4224..ca9040df1a7f6 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -362,9 +362,6 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool): tm.assert_series_equal(result, expected) def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request): - if data.dtype.kind == "b": - mark = pytest.mark.xfail(reason="GH#62344 incorrectly casts to object") - request.applymarker(mark) super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data) @pytest.mark.parametrize( From da8e156ef5d69bc4b196d6e9b53f7efd0c5cc79f Mon Sep 17 00:00:00 2001 From: heoh Date: 2025年9月16日 23:46:37 +0000 Subject: [PATCH 3/9] Replace misused docstring with comment in test --- pandas/tests/extension/test_masked.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index ca9040df1a7f6..72b47947a6858 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -371,12 +371,8 @@ def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request (pd.array([1, 2]), [pd.NA, pd.NA]), ], ) - def test_cast_pointwise_result_all_na_respects_dtype(self, arr, values): - """ - GH#62344 - Ensure that _cast_pointwise_result respects the original dtype - even when the result consists entirely of NA values. - """ + def test_cast_pointwise_result_all_na_respects_original_dtype(self, arr, values): + # GH#62344 result = arr._cast_pointwise_result(values) assert result.dtype == arr.dtype assert all(x is pd.NA for x in result) From 08a5416d29baafede18703cb3d03e4ca183ef1f9 Mon Sep 17 00:00:00 2001 From: heoh Date: 2025年9月16日 23:51:20 +0000 Subject: [PATCH 4/9] Separate independent test function --- pandas/tests/extension/test_masked.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index 72b47947a6858..cfe37f8f7df0e 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -364,15 +364,16 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool): def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request): super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data) - @pytest.mark.parametrize( - "arr, values", - [ - (pd.array([True, False]), [pd.NA, pd.NA]), - (pd.array([1, 2]), [pd.NA, pd.NA]), - ], - ) - def test_cast_pointwise_result_all_na_respects_original_dtype(self, arr, values): - # GH#62344 - result = arr._cast_pointwise_result(values) - assert result.dtype == arr.dtype - assert all(x is pd.NA for x in result) + +@pytest.mark.parametrize( + "arr, values", + [ + (pd.array([True, False]), [pd.NA, pd.NA]), + (pd.array([1, 2]), [pd.NA, pd.NA]), + ], +) +def test_cast_pointwise_result_all_na_respects_original_dtype(arr, values): + # GH#62344 + result = arr._cast_pointwise_result(values) + assert result.dtype == arr.dtype + assert all(x is pd.NA for x in result) From 945f1aab576c4acb0fd2f599962eaae4a583b398 Mon Sep 17 00:00:00 2001 From: heoh Date: 2025年9月16日 23:56:52 +0000 Subject: [PATCH 5/9] Move fixed parameter to local variable --- pandas/tests/extension/test_masked.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index cfe37f8f7df0e..960dd4dbc5086 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -365,15 +365,10 @@ def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data) -@pytest.mark.parametrize( - "arr, values", - [ - (pd.array([True, False]), [pd.NA, pd.NA]), - (pd.array([1, 2]), [pd.NA, pd.NA]), - ], -) -def test_cast_pointwise_result_all_na_respects_original_dtype(arr, values): +@pytest.mark.parametrize("arr", [pd.array([True, False]), pd.array([1, 2])]) +def test_cast_pointwise_result_all_na_respects_original_dtype(arr): # GH#62344 + values = [pd.NA, pd.NA] result = arr._cast_pointwise_result(values) assert result.dtype == arr.dtype assert all(x is pd.NA for x in result) From e636ce212400206b95b2b6db303d3dc287d36a39 Mon Sep 17 00:00:00 2001 From: heoh Date: 2025年9月16日 23:59:54 +0000 Subject: [PATCH 6/9] Add test case for Float64 input --- pandas/tests/extension/test_masked.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index 960dd4dbc5086..44a77fec5fc4e 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -365,7 +365,9 @@ def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data) -@pytest.mark.parametrize("arr", [pd.array([True, False]), pd.array([1, 2])]) +@pytest.mark.parametrize( + "arr", [pd.array([True, False]), pd.array([1, 2]), pd.array([1.0, 2.0])] +) def test_cast_pointwise_result_all_na_respects_original_dtype(arr): # GH#62344 values = [pd.NA, pd.NA] From 242dbdffbc1dbe9e2f217023eb4bc83fc22b05a7 Mon Sep 17 00:00:00 2001 From: heoh Date: 2025年9月17日 00:04:01 +0000 Subject: [PATCH 7/9] Remove unnecessary whatsnew note --- doc/source/whatsnew/v3.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 433be9cc25565..f91d40c4d9ea9 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -942,7 +942,6 @@ Numeric Conversion ^^^^^^^^^^ -- Bug in :meth:`BaseMaskedArray._cast_pointwise_result` with all-NA values results returned ``object`` dtype instead of preserving the original dtype (:issue:`62344`) - Bug in :meth:`DataFrame.astype` not casting ``values`` for Arrow-based dictionary dtype correctly (:issue:`58479`) - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) From d5c6cfb09c1855fe09b34266f27956256275a377 Mon Sep 17 00:00:00 2001 From: heoh Date: 2025年9月18日 16:20:08 +0000 Subject: [PATCH 8/9] Move all-NA handling logic to maybe_convert_objects --- pandas/_libs/lib.pyi | 3 +++ pandas/_libs/lib.pyx | 13 +++++++++++++ pandas/core/arrays/masked.py | 6 +++--- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index e50b301c34868..8ea18a31fa4ca 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -94,6 +94,7 @@ def maybe_convert_objects( convert_numeric: bool = ..., convert_non_numeric: Literal[False] = ..., convert_to_nullable_dtype: Literal[False] = ..., + dtype_if_all_na: DtypeObj | None = ..., dtype_if_all_nat: DtypeObj | None = ..., ) -> npt.NDArray[np.object_ | np.number]: ... @overload @@ -105,6 +106,7 @@ def maybe_convert_objects( convert_numeric: bool = ..., convert_non_numeric: bool = ..., convert_to_nullable_dtype: Literal[True] = ..., + dtype_if_all_na: DtypeObj | None = ..., dtype_if_all_nat: DtypeObj | None = ..., ) -> ArrayLike: ... @overload @@ -116,6 +118,7 @@ def maybe_convert_objects( convert_numeric: bool = ..., convert_non_numeric: bool = ..., convert_to_nullable_dtype: bool = ..., + dtype_if_all_na: DtypeObj | None = ..., dtype_if_all_nat: DtypeObj | None = ..., ) -> ArrayLike: ... @overload diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 83a1b09f00a11..814790ff0ba15 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2545,6 +2545,7 @@ def maybe_convert_objects(ndarray[object] objects, bint convert_numeric=True, # NB: different default! bint convert_to_nullable_dtype=False, bint convert_non_numeric=False, + object dtype_if_all_na=None, object dtype_if_all_nat=None) -> "ArrayLike": """ Type inference function-- convert object array to proper dtype @@ -2566,6 +2567,8 @@ def maybe_convert_objects(ndarray[object] objects, encountered, whether to convert and return an Boolean/IntegerArray. convert_non_numeric : bool, default False Whether to convert datetime, timedelta, period, interval types. + dtype_if_all_na : np.dtype, ExtensionDtype, or None, default None + Dtype to cast to if we have all-NA or all-None. dtype_if_all_nat : np.dtype, ExtensionDtype, or None, default None Dtype to cast to if we have all-NaT. @@ -2838,6 +2841,16 @@ def maybe_convert_objects(ndarray[object] objects, else: seen.object_ = True + elif seen.null_: + if not seen.object_ and not seen.numeric_ and not seen.bool_: + # all NaT, None, or nan (at least one NA or None) + dtype = dtype_if_all_na + if dtype is not None: + cls = dtype.construct_array_type() + obj = cls._from_sequence([], dtype=dtype) + taker = -np.ones((objects).shape, dtype=np.intp) + return obj.take(taker, allow_fill=True) + if not convert_numeric: # Note: we count "bool" as numeric here. This is because # np.array(list_of_items) will convert bools just like it will numeric diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index bddca5bed6ff8..112e3599f42fb 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -149,10 +149,10 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self: return cls(values, mask) def _cast_pointwise_result(self, values) -> ArrayLike: - if isna(values).all(): - return type(self)._from_sequence(values, dtype=self.dtype) values = np.asarray(values, dtype=object) - result = lib.maybe_convert_objects(values, convert_to_nullable_dtype=True) + result = lib.maybe_convert_objects( + values, convert_to_nullable_dtype=True, dtype_if_all_na=self.dtype + ) lkind = self.dtype.kind rkind = result.dtype.kind if (lkind in "iu" and rkind in "iu") or (lkind == rkind == "f"): From 825591adcabe06b20c822b1a53fb3438dc295263 Mon Sep 17 00:00:00 2001 From: heoh Date: 2025年9月20日 02:44:43 +0000 Subject: [PATCH 9/9] Revert "Move all-NA handling logic to maybe_convert_objects" This reverts commit d5c6cfb09c1855fe09b34266f27956256275a377. --- pandas/_libs/lib.pyi | 3 --- pandas/_libs/lib.pyx | 13 ------------- pandas/core/arrays/masked.py | 6 +++--- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index 8ea18a31fa4ca..e50b301c34868 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -94,7 +94,6 @@ def maybe_convert_objects( convert_numeric: bool = ..., convert_non_numeric: Literal[False] = ..., convert_to_nullable_dtype: Literal[False] = ..., - dtype_if_all_na: DtypeObj | None = ..., dtype_if_all_nat: DtypeObj | None = ..., ) -> npt.NDArray[np.object_ | np.number]: ... @overload @@ -106,7 +105,6 @@ def maybe_convert_objects( convert_numeric: bool = ..., convert_non_numeric: bool = ..., convert_to_nullable_dtype: Literal[True] = ..., - dtype_if_all_na: DtypeObj | None = ..., dtype_if_all_nat: DtypeObj | None = ..., ) -> ArrayLike: ... @overload @@ -118,7 +116,6 @@ def maybe_convert_objects( convert_numeric: bool = ..., convert_non_numeric: bool = ..., convert_to_nullable_dtype: bool = ..., - dtype_if_all_na: DtypeObj | None = ..., dtype_if_all_nat: DtypeObj | None = ..., ) -> ArrayLike: ... @overload diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 814790ff0ba15..83a1b09f00a11 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2545,7 +2545,6 @@ def maybe_convert_objects(ndarray[object] objects, bint convert_numeric=True, # NB: different default! bint convert_to_nullable_dtype=False, bint convert_non_numeric=False, - object dtype_if_all_na=None, object dtype_if_all_nat=None) -> "ArrayLike": """ Type inference function-- convert object array to proper dtype @@ -2567,8 +2566,6 @@ def maybe_convert_objects(ndarray[object] objects, encountered, whether to convert and return an Boolean/IntegerArray. convert_non_numeric : bool, default False Whether to convert datetime, timedelta, period, interval types. - dtype_if_all_na : np.dtype, ExtensionDtype, or None, default None - Dtype to cast to if we have all-NA or all-None. dtype_if_all_nat : np.dtype, ExtensionDtype, or None, default None Dtype to cast to if we have all-NaT. @@ -2841,16 +2838,6 @@ def maybe_convert_objects(ndarray[object] objects, else: seen.object_ = True - elif seen.null_: - if not seen.object_ and not seen.numeric_ and not seen.bool_: - # all NaT, None, or nan (at least one NA or None) - dtype = dtype_if_all_na - if dtype is not None: - cls = dtype.construct_array_type() - obj = cls._from_sequence([], dtype=dtype) - taker = -np.ones((objects).shape, dtype=np.intp) - return obj.take(taker, allow_fill=True) - if not convert_numeric: # Note: we count "bool" as numeric here. This is because # np.array(list_of_items) will convert bools just like it will numeric diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 112e3599f42fb..bddca5bed6ff8 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -149,10 +149,10 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self: return cls(values, mask) def _cast_pointwise_result(self, values) -> ArrayLike: + if isna(values).all(): + return type(self)._from_sequence(values, dtype=self.dtype) values = np.asarray(values, dtype=object) - result = lib.maybe_convert_objects( - values, convert_to_nullable_dtype=True, dtype_if_all_na=self.dtype - ) + result = lib.maybe_convert_objects(values, convert_to_nullable_dtype=True) lkind = self.dtype.kind rkind = result.dtype.kind if (lkind in "iu" and rkind in "iu") or (lkind == rkind == "f"):

AltStyle によって変換されたページ (->オリジナル) /