Commit a160ae6

jorisvandenbosschejbrockmendel

and

authored

[backport 2.3.x] BUG[string]: incorrect index downcast in DataFrame.join (#61771) (#61800)

* BUG[string]: incorrect index downcast in DataFrame.join (#61771) Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com> * avoid warning --------- Co-authored-by: jbrockmendel <jbrockmendel@gmail.com>

1 parent cf5db68 commit a160ae6Copy full SHA for a160ae6

File tree

3 files changed

+10

-16

lines changed

doc/source/whatsnew
- v2.3.1.rst
pandas
- core/reshape
  - merge.py
- tests/copy_view
  - test_functions.py

3 files changed

+10

-16

lines changed

`‎doc/source/whatsnew/v2.3.1.rst‎`

Lines changed: 1 addition & 0 deletions

Original file line number	Diff line number	Diff line change
@@ -57,6 +57,7 @@ correctly, rather than defaulting to ``object`` dtype. For example:
`57`	`57`	`Bug fixes`
`58`	`58`	`^^^^^^^^^`
`59`	`59`	- Bug in :meth:`.DataFrameGroupBy.min`, :meth:`.DataFrameGroupBy.max`, :meth:`.Resampler.min`, :meth:`.Resampler.max` where all NA values of string dtype would return float instead of string dtype (:issue:`60810`)
	`60`	+- Bug in :meth:`DataFrame.join` incorrectly downcasting object-dtype indexes (:issue:`61771`)
`60`	`61`	- Bug in :meth:`DataFrame.sum` with ``axis=1``, :meth:`.DataFrameGroupBy.sum` or :meth:`.SeriesGroupBy.sum` with ``skipna=True``, and :meth:`.Resampler.sum` with all NA values of :class:`StringDtype` resulted in ``0`` instead of the empty string ``""`` (:issue:`60229`)
`61`	`62`	- Fixed bug in :meth:`DataFrame.explode` and :meth:`Series.explode` where methods would fail with ``dtype="str"`` (:issue:`61623`)
`62`	`63`	- Fixed bug in unpickling objects pickled in pandas versions pre-2.3.0 that used :class:`StringDtype` (:issue:`61763`).

`‎pandas/core/reshape/merge.py‎`

Lines changed: 5 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -1076,13 +1076,13 @@ def _maybe_add_join_keys(`
`1076`	`1076`	`# if we have an all missing left_indexer`
`1077`	`1077`	`# make sure to just use the right values or vice-versa`
`1078`	`1078`	`if left_indexer is not None and (left_indexer == -1).all():`
`1079`		`- key_col = Index(rvals)`
	`1079`	`+ key_col = Index(rvals, dtype=rvals.dtype, copy=False)`
`1080`	`1080`	`result_dtype = rvals.dtype`
`1081`	`1081`	`elif right_indexer is not None and (right_indexer == -1).all():`
`1082`		`- key_col = Index(lvals)`
	`1082`	`+ key_col = Index(lvals, dtype=lvals.dtype, copy=False)`
`1083`	`1083`	`result_dtype = lvals.dtype`
`1084`	`1084`	`else:`
`1085`		`- key_col = Index(lvals)`
	`1085`	`+ key_col = Index(lvals, dtype=lvals.dtype, copy=False)`
`1086`	`1086`	`if left_indexer is not None:`
`1087`	`1087`	`mask_left = left_indexer == -1`
`1088`	`1088`	`key_col = key_col.where(~mask_left, rvals)`
`@@ -1112,7 +1112,8 @@ def _maybe_add_join_keys(`
`1112`	`1112`
`1113`	`1113`	`result.set_index(idx_list, inplace=True)`
`1114`	`1114`	`else:`
`1115`		`- result.index = Index(key_col, name=name)`
	`1115`	`+ key_col.name = name`
	`1116`	`+ result.index = key_col`
`1116`	`1117`	`else:`
`1117`	`1118`	`result.insert(i, name or f"key_{i}", key_col)`
`1118`	`1119`

`‎pandas/tests/copy_view/test_functions.py‎`

Lines changed: 4 additions & 12 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,10 +1,6 @@`
`1`	`1`	`import numpy as np`
`2`	`2`	`import pytest`
`3`	`3`
`4`		`-from pandas._config import using_string_dtype`
`5`		`-`
`6`		`-from pandas.compat import HAS_PYARROW`
`7`		`-`
`8`	`4`	`from pandas import (`
`9`	`5`	`DataFrame,`
`10`	`6`	`Index,`
`@@ -317,13 +313,9 @@ def test_merge_copy_keyword(using_copy_on_write, copy):`
`317`	`313`	`assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))`
`318`	`314`
`319`	`315`
`320`		`-@pytest.mark.xfail(`
`321`		`- using_string_dtype() and HAS_PYARROW,`
`322`		`- reason="TODO(infer_string); result.index infers str dtype while both "`
`323`		`- "df1 and df2 index are object.",`
`324`		`-)`
`325`		`-def test_join_on_key(using_copy_on_write):`
`326`		`- df_index = Index(["a", "b", "c"], name="key", dtype=object)`
	`316`	`+@pytest.mark.parametrize("dtype", [object, "str"])`
	`317`	`+def test_join_on_key(dtype, using_copy_on_write):`
	`318`	`+ df_index = Index(["a", "b", "c"], name="key", dtype=dtype)`
`327`	`319`
`328`	`320`	`df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))`
`329`	`321`	`df2 = DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True))`
`@@ -336,7 +328,7 @@ def test_join_on_key(using_copy_on_write):`
`336`	`328`	`if using_copy_on_write:`
`337`	`329`	`assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))`
`338`	`330`	`assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))`
`339`		`- assert np.shares_memory(get_array(result.index), get_array(df1.index))`
	`331`	`+ assert tm.shares_memory(get_array(result.index), get_array(df1.index))`
`340`	`332`	`assert not np.shares_memory(get_array(result.index), get_array(df2.index))`
`341`	`333`	`else:`
`342`	`334`	`assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Commit a160ae6

File tree

3 files changed

3 files changed

`‎doc/source/whatsnew/v2.3.1.rst‎`

`‎pandas/core/reshape/merge.py‎`

`‎pandas/tests/copy_view/test_functions.py‎`

0 commit comments