Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 4ee17b3

Browse files
jbrockmendeljorisvandenbossche
andauthored
BUG: String[pyarrow] comparison with mixed object (#62424)
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
1 parent e97a56e commit 4ee17b3

File tree

3 files changed

+35
-14
lines changed

3 files changed

+35
-14
lines changed

‎doc/source/whatsnew/v2.3.3.rst‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ Bug fixes
4747
- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
4848
with a compiled regex and custom flags (:issue:`62240`)
4949
- Fix :meth:`Series.str.match` and :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
50-
50+
- Fix comparing a :class:`StringDtype` Series with mixed objects raising an error (:issue:`60228`)
5151

5252
Improvements and fixes for Copy-on-Write
5353
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

‎pandas/core/arrays/arrow/array.py‎

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -883,22 +883,27 @@ def _cmp_method(self, other, op) -> ArrowExtensionArray:
883883
ltype = self._pa_array.type
884884

885885
if isinstance(other, (ExtensionArray, np.ndarray, list)):
886-
boxed = self._box_pa(other)
887-
rtype = boxed.type
888-
if (pa.types.is_timestamp(ltype) and pa.types.is_date(rtype)) or (
889-
pa.types.is_timestamp(rtype) and pa.types.is_date(ltype)
890-
):
891-
# GH#62157 match non-pyarrow behavior
892-
result = ops.invalid_comparison(self, other, op)
893-
result = pa.array(result, type=pa.bool_())
886+
try:
887+
boxed = self._box_pa(other)
888+
except pa.lib.ArrowInvalid:
889+
# e.g. GH#60228 [1, "b"] we have to operate pointwise
890+
res_values = [op(x, y) for x, y in zip(self, other)]
891+
result = pa.array(res_values, type=pa.bool_(), from_pandas=True)
894892
else:
895-
try:
896-
result=pc_func(self._pa_array, boxed)
897-
exceptpa.ArrowNotImplementedError:
898-
# TODO: could this be wrong if other is object dtype?
899-
# in which case we need to operate pointwise?
893+
rtype=boxed.type
894+
if (pa.types.is_timestamp(ltype) andpa.types.is_date(rtype)) or (
895+
pa.types.is_timestamp(rtype) andpa.types.is_date(ltype)
896+
):
897+
# GH#62157 match non-pyarrow behavior
900898
result = ops.invalid_comparison(self, other, op)
901899
result = pa.array(result, type=pa.bool_())
900+
else:
901+
try:
902+
result = pc_func(self._pa_array, boxed)
903+
except pa.ArrowNotImplementedError:
904+
result = ops.invalid_comparison(self, other, op)
905+
result = pa.array(result, type=pa.bool_())
906+
902907
elif is_scalar(other):
903908
if (isinstance(other, datetime) and pa.types.is_date(ltype)) or (
904909
type(other) is date and pa.types.is_timestamp(ltype)

‎pandas/tests/extension/test_string.py‎

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,3 +288,19 @@ def test_searchsorted_with_na_raises(data_for_sorting, as_series):
288288
)
289289
with pytest.raises(ValueError, match=msg):
290290
arr.searchsorted(b)
291+
292+
293+
def test_mixed_object_comparison(dtype):
294+
# GH#60228
295+
ser = pd.Series(["a", "b"], dtype=dtype)
296+
297+
mixed = pd.Series([1, "b"], dtype=object)
298+
299+
result = ser == mixed
300+
expected = pd.Series([False, True], dtype=bool)
301+
if dtype.storage == "python" and dtype.na_value is pd.NA:
302+
expected = expected.astype("boolean")
303+
elif dtype.storage == "pyarrow" and dtype.na_value is pd.NA:
304+
expected = expected.astype("bool[pyarrow]")
305+
306+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /