Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit f307a0a

Browse files
ENH (string dtype): convert string_view columns to future string dtype instead of object dtype in Parquet/Feather IO (#60235)
* ENH (string dtype): convert string_view columns to future string dtype instead of object dtype in Parquet IO * move test to feather * fixup
1 parent cccf1e6 commit f307a0a

File tree

2 files changed

+27
-2
lines changed

2 files changed

+27
-2
lines changed

‎pandas/io/_util.py‎

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import numpy as np
66

7+
from pandas.compat import pa_version_under18p0
78
from pandas.compat._optional import import_optional_dependency
89

910
import pandas as pd
@@ -35,7 +36,11 @@ def _arrow_dtype_mapping() -> dict:
3536
def arrow_string_types_mapper() -> Callable:
3637
pa = import_optional_dependency("pyarrow")
3738

38-
return {
39+
mapping= {
3940
pa.string(): pd.StringDtype(na_value=np.nan),
4041
pa.large_string(): pd.StringDtype(na_value=np.nan),
41-
}.get
42+
}
43+
if not pa_version_under18p0:
44+
mapping[pa.string_view()] = pd.StringDtype(na_value=np.nan)
45+
46+
return mapping.get

‎pandas/tests/io/test_feather.py‎

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import numpy as np
77
import pytest
88

9+
from pandas.compat.pyarrow import pa_version_under18p0
10+
911
import pandas as pd
1012
import pandas._testing as tm
1113

@@ -249,6 +251,24 @@ def test_string_inference(self, tmp_path):
249251
)
250252
tm.assert_frame_equal(result, expected)
251253

254+
@pytest.mark.skipif(pa_version_under18p0, reason="not supported before 18.0")
255+
def test_string_inference_string_view_type(self, tmp_path):
256+
# GH#54798
257+
import pyarrow as pa
258+
from pyarrow import feather
259+
260+
path = tmp_path / "string_view.parquet"
261+
table = pa.table({"a": pa.array([None, "b", "c"], pa.string_view())})
262+
feather.write_feather(table, path)
263+
264+
with pd.option_context("future.infer_string", True):
265+
result = read_feather(path)
266+
267+
expected = pd.DataFrame(
268+
data={"a": [None, "b", "c"]}, dtype=pd.StringDtype(na_value=np.nan)
269+
)
270+
tm.assert_frame_equal(result, expected)
271+
252272
def test_out_of_bounds_datetime_to_feather(self):
253273
# GH#47832
254274
df = pd.DataFrame(

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /