Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/reference/arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,8 @@ with a bool :class:`numpy.ndarray`.
DatetimeTZDtype.tz
PeriodDtype.freq
IntervalDtype.subtype
StringDtype.storage
StringDtype.na_value

*********
Utilities
Expand Down
5 changes: 4 additions & 1 deletion doc/source/user_guide/text.rst
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,10 @@ Differences in behavior will be primarily due to the kind of NA value.
The four :class:`StringDtype` variants
======================================

There are four :class:`StringDtype` variants that are available to users.
There are four :class:`StringDtype` variants that are available to users,
controlled by the ``storage`` and ``na_value`` parameters of :class:`StringDtype`.
At runtime, these can be checked via the :attr:`StringDtype.storage`
and :attr:`StringDtype.na_value` attributes.

Python storage with ``np.nan`` values
-------------------------------------
Expand Down
25 changes: 21 additions & 4 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ class StringDtype(StorageExtensionDtype):

Attributes
----------
None
storage
na_value

Methods
-------
Expand Down Expand Up @@ -149,8 +150,24 @@ def name(self) -> str: # type: ignore[override]
# follows NumPy semantics, which uses nan.
@property
def na_value(self) -> libmissing.NAType | float: # type: ignore[override]
"""
The missing value representation for this dtype.

This value indicates which missing value semantics are used by this dtype.
Returns ``np.nan`` for the default string dtype with NumPy semantics,
and ``pd.NA`` for the opt-in string dtype with pandas NA semantics.
"""
return self._na_value

@property
def storage(self) -> str:
"""
The storage backend for this dtype.

Can be either "pyarrow" or "python".
"""
return self._storage

_metadata = ("storage", "_na_value") # type: ignore[assignment]

def __init__(
Expand Down Expand Up @@ -185,7 +202,7 @@ def __init__(
elif na_value is not libmissing.NA:
raise ValueError(f"'na_value' must be np.nan or pd.NA, got {na_value}")

self.storage = cast(str, storage)
self._storage = cast(str, storage)
self._na_value = na_value

def __repr__(self) -> str:
Expand All @@ -211,7 +228,7 @@ def __eq__(self, other: object) -> bool:

def __setstate__(self, state: MutableMapping[str, Any]) -> None:
# back-compat for pandas < 2.3, where na_value did not yet exist
self.storage = state.pop("storage", "python")
self._storage = state.pop("storage", "python")
self._na_value = state.pop("_na_value", libmissing.NA)

def __hash__(self) -> int:
Expand Down Expand Up @@ -306,7 +323,7 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
# if both python and pyarrow storage -> priority to pyarrow
storage = "pyarrow"
else:
storage = next(iter(storages)) # type: ignore[assignment]
storage = next(iter(storages))

na_value: libmissing.NAType | float
if len(na_values) == 2:
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/dtypes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,8 +458,8 @@ class StorageExtensionDtype(ExtensionDtype):
name: str
_metadata = ("storage",)

def __init__(self, storage: str | None = None) -> None:
self.storage = storage
def __init__(self, storage: str) -> None:
self._storage = storage

def __repr__(self) -> str:
return f"{self.name}[{self.storage}]"
Expand All @@ -480,6 +480,10 @@ def __hash__(self) -> int:
def na_value(self) -> libmissing.NAType:
return libmissing.NA

@property
def storage(self) -> str:
return self._storage


@set_module("pandas.api.extensions")
def register_extension_dtype(cls: type_t[ExtensionDtypeT]) -> type_t[ExtensionDtypeT]:
Expand Down
Loading