Skip to content

Commit d66a960

Browse files
DOC: add storage and na_value to StringDtype reference page (#63104)
1 parent 1c08a93 commit d66a960

File tree

5 files changed

+51
-7
lines changed

5 files changed

+51
-7
lines changed

‎ci/code_checks.sh‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7979
-i "pandas.api.typing.DataFrameGroupBy.plot PR02" \
8080
-i "pandas.api.typing.SeriesGroupBy.plot PR02" \
8181
-i "pandas.api.typing.Resampler.quantile PR01,PR07" \
82+
-i "pandas.StringDtype.storage SA01" \
83+
-i "pandas.StringDtype.na_value SA01" \
8284
-i "pandas.tseries.offsets.BDay PR02,SA01" \
8385
-i "pandas.tseries.offsets.BHalfYearBegin.is_on_offset GL08" \
8486
-i "pandas.tseries.offsets.BHalfYearBegin.n GL08" \

‎doc/source/reference/arrays.rst‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,8 @@ with a bool :class:`numpy.ndarray`.
637637
DatetimeTZDtype.tz
638638
PeriodDtype.freq
639639
IntervalDtype.subtype
640+
StringDtype.storage
641+
StringDtype.na_value
640642

641643
*********
642644
Utilities

‎doc/source/user_guide/text.rst‎

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,10 @@ Differences in behavior will be primarily due to the kind of NA value.
753753
The four :class:`StringDtype` variants
754754
======================================
755755

756-
There are four :class:`StringDtype` variants that are available to users.
756+
There are four :class:`StringDtype` variants that are available to users,
757+
controlled by the ``storage`` and ``na_value`` parameters of :class:`StringDtype`.
758+
At runtime, these can be checked via the :attr:`StringDtype.storage`
759+
and :attr:`StringDtype.na_value` attributes.
757760

758761
Python storage with ``np.nan`` values
759762
-------------------------------------

‎pandas/core/arrays/string_.py‎

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ class StringDtype(StorageExtensionDtype):
119119
120120
Attributes
121121
----------
122-
None
122+
storage
123+
na_value
123124
124125
Methods
125126
-------
@@ -149,8 +150,40 @@ def name(self) -> str: # type: ignore[override]
149150
# follows NumPy semantics, which uses nan.
150151
@property
151152
defna_value(self) ->libmissing.NAType|float: # type: ignore[override]
153+
"""
154+
The missing value representation for this dtype.
155+
156+
This value indicates which missing value semantics are used by this dtype.
157+
Returns ``np.nan`` for the default string dtype with NumPy semantics,
158+
and ``pd.NA`` for the opt-in string dtype with pandas NA semantics.
159+
160+
Examples
161+
--------
162+
>>> ser = pd.Series(["a", "b"])
163+
>>> ser.dtype
164+
<StringDtype(na_value=nan)>
165+
>>> ser.dtype.na_value
166+
nan
167+
"""
152168
returnself._na_value
153169

170+
@property
171+
defstorage(self) ->str:
172+
"""
173+
The storage backend for this dtype.
174+
175+
Can be either "pyarrow" or "python".
176+
177+
Examples
178+
--------
179+
>>> ser = pd.Series(["a", "b"])
180+
>>> ser.dtype
181+
<StringDtype(na_value=nan)>
182+
>>> ser.dtype.storage
183+
'pyarrow'
184+
"""
185+
returnself._storage
186+
154187
_metadata= ("storage", "_na_value") # type: ignore[assignment]
155188

156189
def__init__(
@@ -185,7 +218,7 @@ def __init__(
185218
elifna_valueisnotlibmissing.NA:
186219
raiseValueError(f"'na_value' must be np.nan or pd.NA, got {na_value}")
187220

188-
self.storage=cast(str, storage)
221+
self._storage=cast(str, storage)
189222
self._na_value=na_value
190223

191224
def__repr__(self) ->str:
@@ -211,7 +244,7 @@ def __eq__(self, other: object) -> bool:
211244

212245
def__setstate__(self, state: MutableMapping[str, Any]) ->None:
213246
# back-compat for pandas < 2.3, where na_value did not yet exist
214-
self.storage=state.pop("storage", "python")
247+
self._storage=state.pop("storage", "python")
215248
self._na_value=state.pop("_na_value", libmissing.NA)
216249

217250
def__hash__(self) ->int:
@@ -306,7 +339,7 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
306339
# if both python and pyarrow storage -> priority to pyarrow
307340
storage="pyarrow"
308341
else:
309-
storage=next(iter(storages))# type: ignore[assignment]
342+
storage=next(iter(storages))
310343

311344
na_value: libmissing.NAType|float
312345
iflen(na_values) ==2:

‎pandas/core/dtypes/base.py‎

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -457,8 +457,8 @@ class StorageExtensionDtype(ExtensionDtype):
457457
name: str
458458
_metadata= ("storage",)
459459

460-
def__init__(self, storage: str|None=None) ->None:
461-
self.storage=storage
460+
def__init__(self, storage: str) ->None:
461+
self._storage=storage
462462

463463
def__repr__(self) ->str:
464464
returnf"{self.name}[{self.storage}]"
@@ -479,6 +479,10 @@ def __hash__(self) -> int:
479479
defna_value(self) ->libmissing.NAType:
480480
returnlibmissing.NA
481481

482+
@property
483+
defstorage(self) ->str:
484+
returnself._storage
485+
482486

483487
@set_module("pandas.api.extensions")
484488
defregister_extension_dtype(cls: type_t[ExtensionDtypeT]) ->type_t[ExtensionDtypeT]:

0 commit comments

Comments
(0)