Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit bc500f7

Browse files
jbrockmendelmroeschke
andauthored
BUG: adding row to DataFrame with EADtype index loses dtype (#62345)
Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
1 parent 324b986 commit bc500f7

File tree

13 files changed

+92
-19
lines changed

13 files changed

+92
-19
lines changed

‎doc/source/whatsnew/v3.0.0.rst‎

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,7 @@ Other API changes
603603
an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining
604604
the dtype of the resulting Index (:issue:`60797`)
605605
- :class:`IncompatibleFrequency` now subclasses ``TypeError`` instead of ``ValueError``. As a result, joins with mismatched frequencies now cast to object like other non-comparable joins, and arithmetic with indexes with mismatched frequencies align (:issue:`55782`)
606+
- :meth:`CategoricalIndex.append` no longer attempts to cast different-dtype indexes to the caller's dtype (:issue:`41626`)
606607
- :meth:`ExtensionDtype.construct_array_type` is now a regular method instead of a ``classmethod`` (:issue:`58663`)
607608
- Comparison operations between :class:`Index` and :class:`Series` now consistently return :class:`Series` regardless of which object is on the left or right (:issue:`36759`)
608609
- Numpy functions like ``np.isinf`` that return a bool dtype when called on a :class:`Index` object now return a bool-dtype :class:`Index` instead of ``np.ndarray`` (:issue:`52676`)
@@ -974,8 +975,8 @@ Indexing
974975
- Bug in reindexing of :class:`DataFrame` with :class:`PeriodDtype` columns in case of consolidated block (:issue:`60980`, :issue:`60273`)
975976
- Bug in :meth:`DataFrame.loc.__getitem__` and :meth:`DataFrame.iloc.__getitem__` with a :class:`CategoricalDtype` column with integer categories raising when trying to index a row containing a ``NaN`` entry (:issue:`58954`)
976977
- Bug in :meth:`Index.__getitem__` incorrectly raising with a 0-dim ``np.ndarray`` key (:issue:`55601`)
978+
- Bug in adding new rows with :meth:`DataFrame.loc.__setitem__` or :class:`Series.loc.__setitem__` which failed to retain dtype on the object's index in some cases (:issue:`41626`)
977979
- Bug in indexing on a :class:`DatetimeIndex` with a ``timestamp[pyarrow]`` dtype or on a :class:`TimedeltaIndex` with a ``duration[pyarrow]`` dtype (:issue:`62277`)
978-
-
979980

980981
Missing
981982
^^^^^^^
@@ -1094,7 +1095,7 @@ Reshaping
10941095
- Bug in :func:`melt` where calling with duplicate column names in ``id_vars`` raised a misleading ``AttributeError`` (:issue:`61475`)
10951096
- Bug in :meth:`DataFrame.merge` where user-provided suffixes could result in duplicate column names if the resulting names matched existing columns. Now raises a :class:`MergeError` in such cases. (:issue:`61402`)
10961097
- Bug in :meth:`DataFrame.merge` with :class:`CategoricalDtype` columns incorrectly raising ``RecursionError`` (:issue:`56376`)
1097-
-
1098+
- Bug in :meth:`DataFrame.merge` with a ``float32`` index incorrectly casting the index to ``float64`` (:issue:`41626`)
10981099

10991100
Sparse
11001101
^^^^^^

‎pandas/core/frame.py‎

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10918,6 +10918,13 @@ def _append_internal(
1091810918
),
1091910919
)
1092010920
row_df = other.to_frame().T
10921+
if isinstance(self.index.dtype, ExtensionDtype):
10922+
# GH#41626 retain e.g. CategoricalDtype if reached via
10923+
# df.loc[key] = item
10924+
row_df.index = self.index.array._cast_pointwise_result(
10925+
row_df.index._values
10926+
)
10927+
1092110928
# infer_objects is needed for
1092210929
# test_append_empty_frame_to_series_with_dateutil_tz
1092310930
other = row_df.infer_objects().rename_axis(index.names)

‎pandas/core/indexes/category.py‎

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
)
1919

2020
from pandas.core.dtypes.common import is_scalar
21-
from pandas.core.dtypes.concat import concat_compat
2221
from pandas.core.dtypes.dtypes import CategoricalDtype
2322
from pandas.core.dtypes.missing import (
2423
is_valid_na_for_dtype,
@@ -519,17 +518,3 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
519518
"""
520519
mapped = self._values.map(mapper, na_action=na_action)
521520
return Index(mapped, name=self.name)
522-
523-
def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
524-
# if calling index is category, don't check dtype of others
525-
try:
526-
cat = Categorical._concat_same_type(
527-
[self._is_dtype_compat(c) for c in to_concat]
528-
)
529-
except TypeError:
530-
# not all to_concat elements are among our categories (or NA)
531-
532-
res = concat_compat([x._values for x in to_concat])
533-
return Index(res, name=name)
534-
else:
535-
return type(self)._simple_new(cat, name=name)

‎pandas/core/reshape/merge.py‎

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1483,7 +1483,11 @@ def _create_join_index(
14831483
mask = indexer == -1
14841484
if np.any(mask):
14851485
fill_value = na_value_for_dtype(index.dtype, compat=False)
1486-
index = index.append(Index([fill_value]))
1486+
if not index._can_hold_na:
1487+
new_index = Index([fill_value])
1488+
else:
1489+
new_index = Index([fill_value], dtype=index.dtype)
1490+
index = index.append(new_index)
14871491
if indexer is None:
14881492
return index.copy()
14891493
return index.take(indexer)

‎pandas/tests/extension/base/setitem.py‎

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas.core.dtypes.common import is_hashable
5+
46
import pandas as pd
57
import pandas._testing as tm
68

@@ -310,6 +312,22 @@ def test_setitem_expand_with_extension(self, data):
310312
result.loc[:, "B"] = data
311313
tm.assert_frame_equal(result, expected)
312314

315+
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data):
316+
# GH#41626 retain index.dtype in setitem-with-expansion
317+
if not is_hashable(data[0]):
318+
pytest.skip("Test does not apply to non-hashable data.")
319+
data = data.unique()
320+
expected = pd.DataFrame({"A": range(len(data))}, index=data)
321+
df = expected.iloc[:-1]
322+
ser = df["A"]
323+
item = data[-1]
324+
325+
df.loc[item] = len(data) - 1
326+
tm.assert_frame_equal(df, expected)
327+
328+
ser.loc[item] = len(data) - 1
329+
tm.assert_series_equal(ser, expected["A"])
330+
313331
def test_setitem_frame_invalid_length(self, data):
314332
df = pd.DataFrame({"A": [1] * len(data)})
315333
xpr = (

‎pandas/tests/extension/test_arrow.py‎

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,6 +1067,15 @@ def test_comp_masked_numpy(self, masked_dtype, comparison_op):
10671067
expected = pd.Series(exp, dtype=ArrowDtype(pa.bool_()))
10681068
tm.assert_series_equal(result, expected)
10691069

1070+
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
1071+
pa_dtype = data.dtype.pyarrow_dtype
1072+
if pa.types.is_date(pa_dtype):
1073+
mark = pytest.mark.xfail(
1074+
reason="GH#62343 incorrectly casts to timestamp[ms][pyarrow]"
1075+
)
1076+
request.applymarker(mark)
1077+
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
1078+
10701079

10711080
class TestLogicalOps:
10721081
"""Various Series and DataFrame logical ops methods."""

‎pandas/tests/extension/test_interval.py‎

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,13 @@ def test_EA_types(self, engine, data, request):
126126
def test_astype_str(self, data):
127127
super().test_astype_str(data)
128128

129+
@pytest.mark.xfail(
130+
reason="Test is invalid for IntervalDtype, needs to be adapted for "
131+
"this dtype with an index with index._index_as_unique."
132+
)
133+
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data):
134+
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
135+
129136

130137
# TODO: either belongs in tests.arrays.interval or move into base tests.
131138
def test_fillna_non_scalar_raises(data_missing):

‎pandas/tests/extension/test_masked.py‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,3 +360,9 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
360360
)
361361
)
362362
tm.assert_series_equal(result, expected)
363+
364+
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
365+
if data.dtype.kind == "b":
366+
mark = pytest.mark.xfail(reason="GH#62344 incorrectly casts to object")
367+
request.applymarker(mark)
368+
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)

‎pandas/tests/extension/test_numpy.py‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,12 @@ def test_index_from_listlike_with_dtype(self, data):
421421
def test_EA_types(self, engine, data, request):
422422
super().test_EA_types(engine, data, request)
423423

424+
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(self, data, request):
425+
if isinstance(data[-1], tuple):
426+
mark = pytest.mark.xfail(reason="Unpacks tuple")
427+
request.applymarker(mark)
428+
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
429+
424430

425431
class Test2DCompat(base.NDArrayBacked2DTests):
426432
pass

‎pandas/tests/extension/test_string.py‎

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,14 @@ def test_arith_series_with_array(
257257
request.applymarker(mark)
258258
super().test_arith_series_with_array(data, all_arithmetic_operators)
259259

260+
def test_loc_setitem_with_expansion_preserves_ea_index_dtype(
261+
self, data, request, using_infer_string
262+
):
263+
if not using_infer_string and data.dtype.storage == "python":
264+
mark = pytest.mark.xfail(reason="Casts to object")
265+
request.applymarker(mark)
266+
super().test_loc_setitem_with_expansion_preserves_ea_index_dtype(data)
267+
260268

261269
class Test2DCompat(base.Dim2CompatTests):
262270
@pytest.fixture(autouse=True)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /