Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 2b25842

Browse files
String dtype: keep select_dtypes(include=object) selecting string columns (#62323)
1 parent e817930 commit 2b25842

File tree

4 files changed

+31
-13
lines changed

4 files changed

+31
-13
lines changed

‎doc/source/whatsnew/v2.3.3.rst‎

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,16 @@ Most changes in this release are related to :class:`StringDtype` which will
1818
become the default string dtype in pandas 3.0. See
1919
:ref:`whatsnew_230.upcoming_changes` for more details.
2020

21+
.. _whatsnew_233.string_fixes.improvements:
22+
23+
Improvements
24+
^^^^^^^^^^^^
25+
- Update :meth:`DataFrame.select_dtypes` to keep selecting ``str`` columns when
26+
specifying ``include=["object"]`` for backwards compatibility. In a future
27+
release, this will be deprecated and code for pandas 3+ should be updated to
28+
do ``include=["str"]`` (:issue:`61916`)
29+
30+
2131
.. _whatsnew_233.string_fixes.bugs:
2232

2333
Bug fixes

‎pandas/core/dtypes/cast.py‎

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,9 @@ def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None:
865865
np.dtype("<U").type, # type: ignore[arg-type]
866866
}
867867
if non_string_dtypes != dtype_set:
868-
raise TypeError("string dtypes are not allowed, use 'object' instead")
868+
raise TypeError(
869+
"numpy string dtypes are not allowed, use 'str' or 'object' instead"
870+
)
869871

870872

871873
def coerce_indexer_dtype(indexer, categories) -> np.ndarray:

‎pandas/core/frame.py‎

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5150,10 +5150,14 @@ def check_int_infer_dtype(dtypes):
51505150
def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool:
51515151
# GH 46870: BooleanDtype._is_numeric == True but should be excluded
51525152
dtype = dtype if not isinstance(dtype, ArrowDtype) else dtype.numpy_dtype
5153-
return issubclass(dtype.type, tuple(dtypes_set)) or (
5154-
np.number in dtypes_set
5155-
and getattr(dtype, "_is_numeric", False)
5156-
and not is_bool_dtype(dtype)
5153+
return (
5154+
issubclass(dtype.type, tuple(dtypes_set))
5155+
or (
5156+
np.number in dtypes_set
5157+
and getattr(dtype, "_is_numeric", False)
5158+
and not is_bool_dtype(dtype)
5159+
)
5160+
or (dtype.type is str and np.object_ in dtypes_set)
51575161
)
51585162

51595163
def predicate(arr: ArrayLike) -> bool:

‎pandas/tests/frame/methods/test_select_dtypes.py‎

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,10 @@ def test_select_dtypes_include_using_list_like(self, using_infer_string):
102102
ri = df.select_dtypes(include=[str])
103103
tm.assert_frame_equal(ri, ei)
104104

105+
ri = df.select_dtypes(include=["object"])
106+
ei = df[["a"]]
107+
tm.assert_frame_equal(ri, ei)
108+
105109
def test_select_dtypes_exclude_using_list_like(self):
106110
df = DataFrame(
107111
{
@@ -309,17 +313,15 @@ def test_select_dtypes_not_an_attr_but_still_valid_dtype(self, using_infer_strin
309313
df["g"] = df.f.diff()
310314
assert not hasattr(np, "u8")
311315
r = df.select_dtypes(include=["i8", "O"], exclude=["timedelta"])
312-
if using_infer_string:
313-
e = df[["b"]]
314-
else:
315-
e = df[["a", "b"]]
316+
# if using_infer_string:
317+
# TODO warn
318+
e = df[["a", "b"]]
316319
tm.assert_frame_equal(r, e)
317320

318321
r = df.select_dtypes(include=["i8", "O", "timedelta64[ns]"])
319-
if using_infer_string:
320-
e = df[["b", "g"]]
321-
else:
322-
e = df[["a", "b", "g"]]
322+
# if using_infer_string:
323+
# TODO warn
324+
e = df[["a", "b", "g"]]
323325
tm.assert_frame_equal(r, e)
324326

325327
def test_select_dtypes_empty(self):

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /