Commit 452c7fb

authored

ENH: add value_counts to EA interface (#62254)

1 parent bcc60a4 commit 452c7fbCopy full SHA for 452c7fb

File tree

7 files changed

+29

-59

lines changed

ci
- code_checks.sh
pandas
- core/arrays
- tests/extension
  - decimal
    - array.py
    - test_decimal.py
  - json
    - test_json.py

7 files changed

+29

-59

lines changed

`‎ci/code_checks.sh`

Lines changed: 1 addition & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -73,6 +73,7 @@ if [[ -z "$CHECK" \|\| "$CHECK" == "docstrings" ]]; then`
`73`	`73`	`-i "pandas.Period.freq GL08" \`
`74`	`74`	`-i "pandas.Period.ordinal GL08" \`
`75`	`75`	`-i "pandas.errors.IncompatibleFrequency SA01,SS06,EX01" \`
	`76`	`+ -i "pandas.api.extensions.ExtensionArray.value_counts EX01,RT03,SA01" \`
`76`	`77`	`-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \`
`77`	`78`	`-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \`
`78`	`79`	`-i "pandas.core.resample.Resampler.quantile PR01,PR07" \`

`‎pandas/core/arrays/base.py`

Lines changed: 23 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -99,7 +99,10 @@`
`99`	`99`	`npt,`
`100`	`100`	`)`
`101`	`101`
`102`		`- from pandas import Index`
	`102`	`+ from pandas import (`
	`103`	`+ Index,`
	`104`	`+ Series,`
	`105`	`+ )`
`103`	`106`
`104`	`107`	`_extension_array_shared_docs: dict[str, str] = {}`
`105`	`108`
`@@ -1673,6 +1676,25 @@ def repeat(self, repeats: int \| Sequence[int], axis: AxisInt \| None = None) -> S`
`1673`	`1676`	`ind = np.arange(len(self)).repeat(repeats)`
`1674`	`1677`	`return self.take(ind)`
`1675`	`1678`
	`1679`	`+ def value_counts(self, dropna: bool = True) -> Series:`
	`1680`	`+ """`
	`1681`	`+ Return a Series containing counts of unique values.`
	`1682`	`+`
	`1683`	`+ Parameters`
	`1684`	`+ ----------`
	`1685`	`+ dropna : bool, default True`
	`1686`	`+ Don't include counts of NA values.`
	`1687`	`+`
	`1688`	`+ Returns`
	`1689`	`+ -------`
	`1690`	`+ Series`
	`1691`	`+ """`
	`1692`	`+ from pandas.core.algorithms import value_counts_internal as value_counts`
	`1693`	`+`
	`1694`	`+ result = value_counts(self.to_numpy(copy=False), sort=False, dropna=dropna)`
	`1695`	`+ result.index = result.index.astype(self.dtype)`
	`1696`	`+ return result`
	`1697`	`+`
`1676`	`1698`	`# ------------------------------------------------------------------------`
`1677`	`1699`	`# Indexing methods`
`1678`	`1700`	`# ------------------------------------------------------------------------`

`‎pandas/core/arrays/interval.py`

Lines changed: 0 additions & 24 deletions

Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,6 @@`
`75`	`75`	`isin,`
`76`	`76`	`take,`
`77`	`77`	`unique,`
`78`		`- value_counts_internal as value_counts,`
`79`	`78`	`)`
`80`	`79`	`from pandas.core.arrays import ArrowExtensionArray`
`81`	`80`	`from pandas.core.arrays.base import (`
`@@ -105,7 +104,6 @@`
`105`	`104`
`106`	`105`	`from pandas import (`
`107`	`106`	`Index,`
`108`		`- Series,`
`109`	`107`	`)`
`110`	`108`
`111`	`109`
`@@ -1197,28 +1195,6 @@ def _validate_setitem_value(self, value):`
`1197`	`1195`
`1198`	`1196`	`return value_left, value_right`
`1199`	`1197`
`1200`		`- def value_counts(self, dropna: bool = True) -> Series:`
`1201`		`- """`
`1202`		`- Returns a Series containing counts of each interval.`
`1203`		`-`
`1204`		`- Parameters`
`1205`		`- ----------`
`1206`		`- dropna : bool, default True`
`1207`		`- Don't include counts of NaN.`
`1208`		`-`
`1209`		`- Returns`
`1210`		`- -------`
`1211`		`- counts : Series`
`1212`		`-`
`1213`		`- See Also`
`1214`		`- --------`
`1215`		`- Series.value_counts`
`1216`		`- """`
`1217`		`- # TODO: implement this is a non-naive way!`
`1218`		`- result = value_counts(np.asarray(self), dropna=dropna)`
`1219`		`- result.index = result.index.astype(self.dtype)`
`1220`		`- return result`
`1221`		`-`
`1222`	`1198`	`# ---------------------------------------------------------------------`
`1223`	`1199`	`# Rendering Methods`
`1224`	`1200`

`‎pandas/core/arrays/string_.py`

Lines changed: 1 addition & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -1037,10 +1037,7 @@ def sum(`
`1037`	`1037`	`return self._wrap_reduction_result(axis, result)`
`1038`	`1038`
`1039`	`1039`	`def value_counts(self, dropna: bool = True) -> Series:`
`1040`		`- from pandas.core.algorithms import value_counts_internal as value_counts`
`1041`		`-`
`1042`		`- result = value_counts(self._ndarray, sort=False, dropna=dropna)`
`1043`		`- result.index = result.index.astype(self.dtype)`
	`1040`	`+ result = super().value_counts(dropna=dropna)`
`1044`	`1041`
`1045`	`1042`	`if self.dtype.na_value is libmissing.NA:`
`1046`	`1043`	`result = result.astype("Int64")`

`‎pandas/tests/extension/decimal/array.py`

Lines changed: 0 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,6 @@`
`25`	`25`	`is_scalar,`
`26`	`26`	`)`
`27`	`27`	`from pandas.core import arraylike`
`28`		`-from pandas.core.algorithms import value_counts_internal as value_counts`
`29`	`28`	`from pandas.core.arraylike import OpsMixin`
`30`	`29`	`from pandas.core.arrays import (`
`31`	`30`	`ExtensionArray,`
`@@ -291,9 +290,6 @@ def convert_values(param):`
`291`	`290`
`292`	`291`	`return np.asarray(res, dtype=bool)`
`293`	`292`
`294`		`- def value_counts(self, dropna: bool = True):`
`295`		`- return value_counts(self.to_numpy(), dropna=dropna)`
`296`		`-`
`297`	`293`	`# We override fillna here to simulate a 3rd party EA that has done so. This`
`298`	`294`	`# lets us test a 3rd-party EA that has not yet updated to include a "copy"`
`299`	`295`	`# keyword in its fillna method.`

`‎pandas/tests/extension/decimal/test_decimal.py`

Lines changed: 0 additions & 20 deletions

Original file line number	Diff line number	Diff line change
`@@ -171,26 +171,6 @@ def test_fillna_limit_series(self, data_missing):`
`171`	`171`	`):`
`172`	`172`	`super().test_fillna_limit_series(data_missing)`
`173`	`173`
`174`		`- @pytest.mark.parametrize("dropna", [True, False])`
`175`		`- def test_value_counts(self, all_data, dropna):`
`176`		`- all_data = all_data[:10]`
`177`		`- if dropna:`
`178`		`- other = np.array(all_data[~all_data.isna()])`
`179`		`- else:`
`180`		`- other = all_data`
`181`		`-`
`182`		`- vcs = pd.Series(all_data).value_counts(dropna=dropna)`
`183`		`- vcs_ex = pd.Series(other).value_counts(dropna=dropna)`
`184`		`-`
`185`		`- with decimal.localcontext() as ctx:`
`186`		`- # avoid raising when comparing Decimal("NAN") < Decimal(2)`
`187`		`- ctx.traps[decimal.InvalidOperation] = False`
`188`		`-`
`189`		`- result = vcs.sort_index()`
`190`		`- expected = vcs_ex.sort_index()`
`191`		`-`
`192`		`- tm.assert_series_equal(result, expected)`
`193`		`-`
`194`	`174`	`def test_series_repr(self, data):`
`195`	`175`	`# Overriding this base test to explicitly test that`
`196`	`176`	`# the custom _formatter is used`

`‎pandas/tests/extension/json/test_json.py`

Lines changed: 4 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -189,14 +189,12 @@ def test_ffill_limit_area(`
`189`	`189`	`data_missing, limit_area, input_ilocs, expected_ilocs`
`190`	`190`	`)`
`191`	`191`
`192`		`- @unhashable`
`193`		`- def test_value_counts(self, all_data, dropna):`
	`192`	`+ def test_value_counts(self, all_data, dropna, request):`
	`193`	`+ if len(all_data) == 100 or dropna:`
	`194`	`+ mark = pytest.mark.xfail(reason="unhashable")`
	`195`	`+ request.applymarker(mark)`
`194`	`196`	`super().test_value_counts(all_data, dropna)`
`195`	`197`
`196`		`- @unhashable`
`197`		`- def test_value_counts_with_normalize(self, data):`
`198`		`- super().test_value_counts_with_normalize(data)`
`199`		`-`
`200`	`198`	`@unhashable`
`201`	`199`	`def test_sort_values_frame(self):`
`202`	`200`	`# TODO (EA.factorize): see if _values_for_factorize allows this.`

0 commit comments

Comments

(0)

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Commit 452c7fb

File tree

7 files changed

7 files changed

`‎ci/code_checks.sh`

`‎pandas/core/arrays/base.py`

`‎pandas/core/arrays/interval.py`

`‎pandas/core/arrays/string_.py`

`‎pandas/tests/extension/decimal/array.py`

`‎pandas/tests/extension/decimal/test_decimal.py`

`‎pandas/tests/extension/json/test_json.py`

0 commit comments