diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 4b5d2acf008a8..a61f9498d52c7 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1585,6 +1585,22 @@ def map( >>> cat.map({"a": "first", "b": "second"}, na_action=None) Index(['first', 'second', nan], dtype='str') + + The mapping function is applied to categories, not to each value. It is + therefore only called once per unique category, and the result reused for + all occurrences: + +>>> cat = pd.Categorical(["a", "a", "b"]) +>>> calls = [] +>>> def f(x): + ... calls.append(x) + ... return x.upper() +>>> result = cat.map(f) +>>> result + ['A', 'A', 'B'] + Categories (2, str): ['A', 'B'] +>>> calls + ['a', 'b'] """ assert callable(mapper) or is_dict_like(mapper) diff --git a/pandas/core/series.py b/pandas/core/series.py index 11a59f261de5c..3029296dad887 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4419,6 +4419,34 @@ def map( 2 NaN 3 I am a rabbit dtype: object + + For categorical data, the function is only applied to the categories: + +>>> s = pd.Series(list("cabaa")) +>>> s.map(print) + c + a + b + a + a + 0 None + 1 None + 2 None + 3 None + 4 None + dtype: object + +>>> s_cat = s.astype("category") +>>> s_cat.map(print) # function called once per unique category + a + b + c + 0 None + 1 None + 2 None + 3 None + 4 None + dtype: object """ if func is None: if "arg" in kwargs: