Commit 0729267

committed

DOC: whatsnew entry for on_bad_lines regression fix (GH#61837)

1 parent c167d06 commit 0729267Copy full SHA for 0729267

File tree

+20

-11

lines changed

+20

-11

lines changed

Lines changed: 2 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,8 @@ Bug fixes`
`24`	`24`	`^^^^^^^^^`
`25`	`25`	- Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch``
`26`	`26`	with a compiled regex and custom flags (:issue:`62240`)
	`27`	+- Fix regression in ``on_bad_lines`` callable when returning too many fields: now emits
	`28`	+ ``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`)
`27`	`29`
`28`	`30`	`.. ---------------------------------------------------------------------------`
`29`	`31`	`.. _whatsnew_233.contributors:`

Lines changed: 9 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -621,12 +621,15 @@ def _check_data_length(`
`621`	`621`	`empty_str_or_na = empty_str \| isna(data[-1]) # type: ignore[operator]`
`622`	`622`	`if len(columns) == len(data) - 1 and np.all(empty_str_or_na):`
`623`	`623`	`return`
`624`		`- warnings.warn(`
`625`		`- "Length of header or names does not match length of data. This leads "`
`626`		`- "to a loss of data with index_col=False.",`
`627`		`- ParserWarning,`
`628`		`- stacklevel=find_stack_level(),`
`629`		`- )`
	`624`	`+ # Don't warn if on_bad_lines is set to handle bad lines`
	`625`	`+ if self.on_bad_lines == self.BadLineHandleMethod.ERROR:`
	`626`	`+ warnings.warn(`
	`627`	`+ "Length of header or names does not match length of data. "`
	`628`	`+ "This leads "`
	`629`	`+ "to a loss of data with index_col=False.",`
	`630`	`+ ParserWarning,`
	`631`	`+ stacklevel=find_stack_level(),`
	`632`	`+ )`
`630`	`633`
`631`	`634`	`@final`
`632`	`635`	`def _validate_usecols_names(self, usecols: SequenceT, names: Sequence) -> SequenceT:`

Lines changed: 9 additions & 5 deletions

Original file line number	Diff line number	Diff line change
`@@ -1196,21 +1196,25 @@ def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]:`
`1196`	`1196`	`# Truncate extra elements and warn.`
`1197`	`1197`	`if len(new_l) > col_len:`
`1198`	`1198`	`warnings.warn(`
`1199`		`- "Header/names length != data length. "`
`1200`		`- "Extra fields dropped.",`
	`1199`	`+ "Length of header or names does not match length "`
	`1200`	`+ "of data. This leads "`
	`1201`	`+ "to a loss of data with index_col=False.",`
`1201`	`1202`	`ParserWarning,`
`1202`	`1203`	`stacklevel=find_stack_level(),`
`1203`	`1204`	`)`
`1204`	`1205`	`new_l = new_l[:col_len]`
`1205`	`1206`	`content.append(new_l) # pyright: ignore[reportArgumentType]`
	`1207`	`+ elif self.on_bad_lines == self.BadLineHandleMethod.ERROR:`
	`1208`	`+ row_num = self.pos - (content_len - i + footers)`
	`1209`	`+ bad_lines.append((row_num, actual_len))`
	`1210`	`+ break`
`1206`	`1211`	`elif self.on_bad_lines in (`
`1207`		`- self.BadLineHandleMethod.ERROR,`
`1208`	`1212`	`self.BadLineHandleMethod.WARN,`
	`1213`	`+ self.BadLineHandleMethod.SKIP,`
`1209`	`1214`	`):`
`1210`	`1215`	`row_num = self.pos - (content_len - i + footers)`
`1211`	`1216`	`bad_lines.append((row_num, actual_len))`
`1212`		`- if self.on_bad_lines == self.BadLineHandleMethod.ERROR:`
`1213`		`- break`
	`1217`	`+ # For WARN and SKIP, don't append the bad content`
`1214`	`1218`	`else:`
`1215`	`1219`	`content.append(_content)`
`1216`	`1220`	`else:`

Comments

(0)