Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 86d45aa

Browse files
DOC: whatsnew entry for on_bad_lines regression fix (GH#61837)
1 parent b7d555a commit 86d45aa

File tree

3 files changed

+21
-25
lines changed

3 files changed

+21
-25
lines changed

‎doc/source/whatsnew/v3.0.0.rst‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,8 @@ MultiIndex
10031003
I/O
10041004
^^^
10051005
- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping` elements. (:issue:`57915`)
1006+
- Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits
1007+
``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`)
10061008
- Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`)
10071009
- Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`)
10081010
- Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`)

‎pandas/io/parsers/python_parser.py‎

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import numpy as np
2222

2323
from pandas._libs import lib
24+
from pandas._typing import Scalar
2425
from pandas.errors import (
2526
EmptyDataError,
2627
ParserError,
@@ -77,7 +78,6 @@
7778
ArrayLike,
7879
DtypeObj,
7980
ReadCsvBuffer,
80-
Scalar,
8181
T,
8282
)
8383

@@ -954,7 +954,9 @@ def _alert_malformed(self, msg: str, row_num: int) -> None:
954954
"""
955955
if self.on_bad_lines == self.BadLineHandleMethod.ERROR:
956956
raise ParserError(msg)
957-
if self.on_bad_lines == self.BadLineHandleMethod.WARN:
957+
if self.on_bad_lines == self.BadLineHandleMethod.WARN or callable(
958+
self.on_bad_lines
959+
):
958960
warnings.warn(
959961
f"Skipping line {row_num}: {msg}\n",
960962
ParserWarning,
@@ -1193,34 +1195,31 @@ def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]:
11931195
if callable(self.on_bad_lines):
11941196
new_l = self.on_bad_lines(_content)
11951197
if new_l is not None:
1196-
# Truncate extra elements and warn.
1198+
new_l=cast(list[Scalar], new_l)
11971199
if len(new_l) > col_len:
1198-
warnings.warn(
1199-
"Header/names length != data length. "
1200-
"Extra fields dropped.",
1201-
ParserWarning,
1202-
stacklevel=find_stack_level(),
1203-
)
1200+
row_num = self.pos - (content_len - i + footers)
1201+
bad_lines.append((row_num, len(new_l), "callable"))
12041202
new_l = new_l[:col_len]
1205-
content.append(new_l) # pyright: ignore[reportArgumentType]
1203+
content.append(new_l)
1204+
12061205
elif self.on_bad_lines in (
12071206
self.BadLineHandleMethod.ERROR,
12081207
self.BadLineHandleMethod.WARN,
12091208
):
12101209
row_num = self.pos - (content_len - i + footers)
1211-
bad_lines.append((row_num, actual_len))
1210+
bad_lines.append((row_num, actual_len, "normal"))
12121211
if self.on_bad_lines == self.BadLineHandleMethod.ERROR:
12131212
break
1214-
else:
1215-
content.append(_content)
12161213
else:
12171214
content.append(_content)
12181215

1219-
for row_num, actual_len in bad_lines:
1216+
for row_num, actual_len, source in bad_lines:
12201217
msg = (
12211218
f"Expected {col_len} fields in line {row_num + 1}, saw {actual_len}"
12221219
)
1223-
if (
1220+
if source == "callable":
1221+
msg += " from bad_lines callable"
1222+
elif (
12241223
self.delimiter
12251224
and len(self.delimiter) > 1
12261225
and self.quoting != csv.QUOTE_NONE

‎pandas/tests/io/parser/test_python_parser_only.py‎

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ def test_on_bad_lines_callable_not_expected_length(python_parser_only):
432432
bad_sio = StringIO(data)
433433

434434
result = parser.read_csv_check_warnings(
435-
ParserWarning, "Length of header or names", bad_sio, on_bad_lines=lambda x: x
435+
ParserWarning, "from bad_lines callable", bad_sio, on_bad_lines=lambda x: x
436436
)
437437
expected = DataFrame({"a": [1, 2, 3], "b": [2, 3, 4]})
438438
tm.assert_frame_equal(result, expected)
@@ -568,21 +568,16 @@ def test_no_thousand_convert_for_non_numeric_cols(python_parser_only, dtype, exp
568568
def test_on_bad_lines_callable_warns_and_truncates_with_index_col(
569569
python_parser_only, index_col
570570
):
571-
"""
572-
GH#61837 regression: callable on_bad_lines returning extra fields must emit a
573-
ParserWarning and drop extras regardless of index_col. [2][3]
574-
"""
571+
# GH#61837
575572
parser = python_parser_only
576573
data = "id,field_1,field_2\n101,A,B\n102,C,D,E\n103,F,G\n"
577574

578575
def fixer(bad_line):
579-
# Over-return to trigger truncation + warning
580576
return list(bad_line) + ["EXTRA1", "EXTRA2"]
581577

582-
# Assert ParserWarning is emitted using module helper
583-
df = parser.read_csv_check_warnings(
578+
result = parser.read_csv_check_warnings(
584579
ParserWarning,
585-
"Length of header or names",
580+
"from bad_lines callable",
586581
StringIO(data),
587582
on_bad_lines=fixer,
588583
index_col=index_col,
@@ -602,4 +597,4 @@ def fixer(bad_line):
602597
index=Index([101, 102, 103], name="id"),
603598
)
604599

605-
tm.assert_frame_equal(df, expected)
600+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /