Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 7f303f7

Browse files
DOC: whatsnew entry for on_bad_lines regression fix (GH#61837)
1 parent 4db84df commit 7f303f7

File tree

3 files changed

+23
-13
lines changed

3 files changed

+23
-13
lines changed

‎doc/source/whatsnew/v2.3.3.rst‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ Bug fixes
2525
- Fix bug in :meth:`Series.str.replace` using named capture groups (e.g., ``\g<name>``) with the Arrow-backed dtype would raise an error (:issue:`57636`)
2626
- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
2727
with a compiled regex and custom flags (:issue:`62240`)
28+
- Fix regression in ``on_bad_lines`` callable when returning too many fields: now emits
29+
``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`)
2830

2931
.. ---------------------------------------------------------------------------
3032
.. _whatsnew_233.contributors:

‎pandas/io/parsers/base_parser.py‎

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -621,12 +621,15 @@ def _check_data_length(
621621
empty_str_or_na = empty_str | isna(data[-1]) # type: ignore[operator]
622622
if len(columns) == len(data) - 1 and np.all(empty_str_or_na):
623623
return
624-
warnings.warn(
625-
"Length of header or names does not match length of data. This leads "
626-
"to a loss of data with index_col=False.",
627-
ParserWarning,
628-
stacklevel=find_stack_level(),
629-
)
624+
# Don't warn if on_bad_lines is set to handle bad lines
625+
if self.on_bad_lines == self.BadLineHandleMethod.ERROR:
626+
warnings.warn(
627+
"Length of header or names does not match length of data. "
628+
"This leads "
629+
"to a loss of data with index_col=False.",
630+
ParserWarning,
631+
stacklevel=find_stack_level(),
632+
)
630633

631634
@final
632635
def _validate_usecols_names(self, usecols: SequenceT, names: Sequence) -> SequenceT:

‎pandas/io/parsers/python_parser.py‎

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import numpy as np
2222

2323
from pandas._libs import lib
24+
from pandas._typing import Scalar
2425
from pandas.errors import (
2526
EmptyDataError,
2627
ParserError,
@@ -77,7 +78,6 @@
7778
ArrayLike,
7879
DtypeObj,
7980
ReadCsvBuffer,
80-
Scalar,
8181
T,
8282
)
8383

@@ -1194,23 +1194,28 @@ def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]:
11941194
new_l = self.on_bad_lines(_content)
11951195
if new_l is not None:
11961196
# Truncate extra elements and warn.
1197+
new_l = cast(list[Scalar], new_l)
11971198
if len(new_l) > col_len:
11981199
warnings.warn(
1199-
"Header/names length != data length. "
1200-
"Extra fields dropped.",
1200+
"Length of header or names does not match length "
1201+
"of data. This leads "
1202+
"to a loss of data with index_col=False.",
12011203
ParserWarning,
12021204
stacklevel=find_stack_level(),
12031205
)
12041206
new_l = new_l[:col_len]
1205-
content.append(new_l) # pyright: ignore[reportArgumentType]
1207+
content.append(new_l)
1208+
elif self.on_bad_lines == self.BadLineHandleMethod.ERROR:
1209+
row_num = self.pos - (content_len - i + footers)
1210+
bad_lines.append((row_num, actual_len))
1211+
break
12061212
elif self.on_bad_lines in (
1207-
self.BadLineHandleMethod.ERROR,
12081213
self.BadLineHandleMethod.WARN,
1214+
self.BadLineHandleMethod.SKIP,
12091215
):
12101216
row_num = self.pos - (content_len - i + footers)
12111217
bad_lines.append((row_num, actual_len))
1212-
if self.on_bad_lines == self.BadLineHandleMethod.ERROR:
1213-
break
1218+
# For WARN and SKIP, don't append the bad content
12141219
else:
12151220
content.append(_content)
12161221
else:

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /