Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit a160ae6

Browse files
jorisvandenbosschejbrockmendel
andauthored
[backport 2.3.x] BUG[string]: incorrect index downcast in DataFrame.join (#61771) (#61800)
* BUG[string]: incorrect index downcast in DataFrame.join (#61771) Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com> * avoid warning --------- Co-authored-by: jbrockmendel <jbrockmendel@gmail.com>
1 parent cf5db68 commit a160ae6

File tree

3 files changed

+10
-16
lines changed

3 files changed

+10
-16
lines changed

‎doc/source/whatsnew/v2.3.1.rst‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ correctly, rather than defaulting to ``object`` dtype. For example:
5757
Bug fixes
5858
^^^^^^^^^
5959
- Bug in :meth:`.DataFrameGroupBy.min`, :meth:`.DataFrameGroupBy.max`, :meth:`.Resampler.min`, :meth:`.Resampler.max` where all NA values of string dtype would return float instead of string dtype (:issue:`60810`)
60+
- Bug in :meth:`DataFrame.join` incorrectly downcasting object-dtype indexes (:issue:`61771`)
6061
- Bug in :meth:`DataFrame.sum` with ``axis=1``, :meth:`.DataFrameGroupBy.sum` or :meth:`.SeriesGroupBy.sum` with ``skipna=True``, and :meth:`.Resampler.sum` with all NA values of :class:`StringDtype` resulted in ``0`` instead of the empty string ``""`` (:issue:`60229`)
6162
- Fixed bug in :meth:`DataFrame.explode` and :meth:`Series.explode` where methods would fail with ``dtype="str"`` (:issue:`61623`)
6263
- Fixed bug in unpickling objects pickled in pandas versions pre-2.3.0 that used :class:`StringDtype` (:issue:`61763`).

‎pandas/core/reshape/merge.py‎

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,13 +1076,13 @@ def _maybe_add_join_keys(
10761076
# if we have an all missing left_indexer
10771077
# make sure to just use the right values or vice-versa
10781078
if left_indexer is not None and (left_indexer == -1).all():
1079-
key_col = Index(rvals)
1079+
key_col = Index(rvals, dtype=rvals.dtype, copy=False)
10801080
result_dtype = rvals.dtype
10811081
elif right_indexer is not None and (right_indexer == -1).all():
1082-
key_col = Index(lvals)
1082+
key_col = Index(lvals, dtype=lvals.dtype, copy=False)
10831083
result_dtype = lvals.dtype
10841084
else:
1085-
key_col = Index(lvals)
1085+
key_col = Index(lvals, dtype=lvals.dtype, copy=False)
10861086
if left_indexer is not None:
10871087
mask_left = left_indexer == -1
10881088
key_col = key_col.where(~mask_left, rvals)
@@ -1112,7 +1112,8 @@ def _maybe_add_join_keys(
11121112

11131113
result.set_index(idx_list, inplace=True)
11141114
else:
1115-
result.index = Index(key_col, name=name)
1115+
key_col.name = name
1116+
result.index = key_col
11161117
else:
11171118
result.insert(i, name or f"key_{i}", key_col)
11181119

‎pandas/tests/copy_view/test_functions.py‎

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
6-
from pandas.compat import HAS_PYARROW
7-
84
from pandas import (
95
DataFrame,
106
Index,
@@ -317,13 +313,9 @@ def test_merge_copy_keyword(using_copy_on_write, copy):
317313
assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
318314

319315

320-
@pytest.mark.xfail(
321-
using_string_dtype() and HAS_PYARROW,
322-
reason="TODO(infer_string); result.index infers str dtype while both "
323-
"df1 and df2 index are object.",
324-
)
325-
def test_join_on_key(using_copy_on_write):
326-
df_index = Index(["a", "b", "c"], name="key", dtype=object)
316+
@pytest.mark.parametrize("dtype", [object, "str"])
317+
def test_join_on_key(dtype, using_copy_on_write):
318+
df_index = Index(["a", "b", "c"], name="key", dtype=dtype)
327319

328320
df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
329321
df2 = DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True))
@@ -336,7 +328,7 @@ def test_join_on_key(using_copy_on_write):
336328
if using_copy_on_write:
337329
assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
338330
assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
339-
assert np.shares_memory(get_array(result.index), get_array(df1.index))
331+
assert tm.shares_memory(get_array(result.index), get_array(df1.index))
340332
assert not np.shares_memory(get_array(result.index), get_array(df2.index))
341333
else:
342334
assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /