Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit 0729267

Browse files
DOC: whatsnew entry for on_bad_lines regression fix (GH#61837)
1 parent c167d06 commit 0729267

File tree

3 files changed

+20
-11
lines changed

3 files changed

+20
-11
lines changed

‎doc/source/whatsnew/v2.3.3.rst‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ Bug fixes
2424
^^^^^^^^^
2525
- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
2626
with a compiled regex and custom flags (:issue:`62240`)
27+
- Fix regression in ``on_bad_lines`` callable when returning too many fields: now emits
28+
``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`)
2729

2830
.. ---------------------------------------------------------------------------
2931
.. _whatsnew_233.contributors:

‎pandas/io/parsers/base_parser.py‎

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -621,12 +621,15 @@ def _check_data_length(
621621
empty_str_or_na = empty_str | isna(data[-1]) # type: ignore[operator]
622622
if len(columns) == len(data) - 1 and np.all(empty_str_or_na):
623623
return
624-
warnings.warn(
625-
"Length of header or names does not match length of data. This leads "
626-
"to a loss of data with index_col=False.",
627-
ParserWarning,
628-
stacklevel=find_stack_level(),
629-
)
624+
# Don't warn if on_bad_lines is set to handle bad lines
625+
if self.on_bad_lines == self.BadLineHandleMethod.ERROR:
626+
warnings.warn(
627+
"Length of header or names does not match length of data. "
628+
"This leads "
629+
"to a loss of data with index_col=False.",
630+
ParserWarning,
631+
stacklevel=find_stack_level(),
632+
)
630633

631634
@final
632635
def _validate_usecols_names(self, usecols: SequenceT, names: Sequence) -> SequenceT:

‎pandas/io/parsers/python_parser.py‎

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,21 +1196,25 @@ def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]:
11961196
# Truncate extra elements and warn.
11971197
if len(new_l) > col_len:
11981198
warnings.warn(
1199-
"Header/names length != data length. "
1200-
"Extra fields dropped.",
1199+
"Length of header or names does not match length "
1200+
"of data. This leads "
1201+
"to a loss of data with index_col=False.",
12011202
ParserWarning,
12021203
stacklevel=find_stack_level(),
12031204
)
12041205
new_l = new_l[:col_len]
12051206
content.append(new_l) # pyright: ignore[reportArgumentType]
1207+
elif self.on_bad_lines == self.BadLineHandleMethod.ERROR:
1208+
row_num = self.pos - (content_len - i + footers)
1209+
bad_lines.append((row_num, actual_len))
1210+
break
12061211
elif self.on_bad_lines in (
1207-
self.BadLineHandleMethod.ERROR,
12081212
self.BadLineHandleMethod.WARN,
1213+
self.BadLineHandleMethod.SKIP,
12091214
):
12101215
row_num = self.pos - (content_len - i + footers)
12111216
bad_lines.append((row_num, actual_len))
1212-
if self.on_bad_lines == self.BadLineHandleMethod.ERROR:
1213-
break
1217+
# For WARN and SKIP, don't append the bad content
12141218
else:
12151219
content.append(_content)
12161220
else:

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /