@@ -903,8 +903,246 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame:
903903 result .index = default_index (len (result ))
904904 return result
905905
906- @doc (Series .describe )
907906 def describe (self , percentiles = None , include = None , exclude = None ) -> Series :
907+ """
908+ Generate descriptive statistics.
909+
910+ Descriptive statistics include those that summarize the central
911+ tendency, dispersion and shape of a
912+ dataset's distribution, excluding ``NaN`` values.
913+
914+ Analyzes both numeric and object series, as well
915+ as ``DataFrame`` column sets of mixed data types. The output
916+ will vary depending on what is provided. Refer to the notes
917+ below for more detail.
918+
919+ Parameters
920+ ----------
921+ percentiles : list-like of numbers, optional
922+ The percentiles to include in the output. All should
923+ fall between 0 and 1. The default, ``None``, will automatically
924+ return the 25th, 50th, and 75th percentiles.
925+ include : 'all', list-like of dtypes or None (default), optional
926+ A white list of data types to include in the result. Ignored
927+ for ``Series``. Here are the options:
928+
929+ - 'all' : All columns of the input will be included in the output.
930+ - A list-like of dtypes : Limits the results to the
931+ provided data types.
932+ To limit the result to numeric types submit
933+ ``numpy.number``. To limit it instead to object columns submit
934+ the ``numpy.object`` data type. Strings
935+ can also be used in the style of
936+ ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To
937+ select pandas categorical columns, use ``'category'``
938+ - None (default) : The result will include all numeric columns.
939+ exclude : list-like of dtypes or None (default), optional,
940+ A black list of data types to omit from the result. Ignored
941+ for ``Series``. Here are the options:
942+
943+ - A list-like of dtypes : Excludes the provided data types
944+ from the result. To exclude numeric types submit
945+ ``numpy.number``. To exclude object columns submit the data
946+ type ``numpy.object``. Strings can also be used in the style of
947+ ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``). To
948+ exclude pandas categorical columns, use ``'category'``
949+ - None (default) : The result will exclude nothing.
950+
951+ Returns
952+ -------
953+ Series or DataFrame
954+ Summary statistics of the Series or Dataframe provided.
955+
956+ See Also
957+ --------
958+ DataFrame.count: Count number of non-NA/null observations.
959+ DataFrame.max: Maximum of the values in the object.
960+ DataFrame.min: Minimum of the values in the object.
961+ DataFrame.mean: Mean of the values.
962+ DataFrame.std: Standard deviation of the observations.
963+ DataFrame.select_dtypes: Subset of a DataFrame including/excluding
964+ columns based on their dtype.
965+
966+ Notes
967+ -----
968+ For numeric data, the result's index will include ``count``,
969+ ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
970+ upper percentiles. By default the lower percentile is ``25`` and the
971+ upper percentile is ``75``. The ``50`` percentile is the
972+ same as the median.
973+
974+ For object data (e.g. strings), the result's index
975+ will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
976+ is the most common value. The ``freq`` is the most common value's
977+ frequency.
978+
979+ If multiple object values have the highest count, then the
980+ ``count`` and ``top`` results will be arbitrarily chosen from
981+ among those with the highest count.
982+
983+ For mixed data types provided via a ``DataFrame``, the default is to
984+ return only an analysis of numeric columns. If the DataFrame consists
985+ only of object and categorical data without any numeric columns, the
986+ default is to return an analysis of both the object and categorical
987+ columns. If ``include='all'`` is provided as an option, the result
988+ will include a union of attributes of each type.
989+
990+ The `include` and `exclude` parameters can be used to limit
991+ which columns in a ``DataFrame`` are analyzed for the output.
992+ The parameters are ignored when analyzing a ``Series``.
993+
994+ Examples
995+ --------
996+ Describing a numeric ``Series``.
997+
998+ >>> s = pd.Series([1, 2, 3])
999+ >>> s.describe()
1000+ count 3.0
1001+ mean 2.0
1002+ std 1.0
1003+ min 1.0
1004+ 25% 1.5
1005+ 50% 2.0
1006+ 75% 2.5
1007+ max 3.0
1008+ dtype: float64
1009+
1010+ Describing a categorical ``Series``.
1011+
1012+ >>> s = pd.Series(["a", "a", "b", "c"])
1013+ >>> s.describe()
1014+ count 4
1015+ unique 3
1016+ top a
1017+ freq 2
1018+ dtype: object
1019+
1020+ Describing a timestamp ``Series``.
1021+
1022+ >>> s = pd.Series(
1023+ ... [
1024+ ... np.datetime64("2000-01-01"),
1025+ ... np.datetime64("2010-01-01"),
1026+ ... np.datetime64("2010-01-01"),
1027+ ... ]
1028+ ... )
1029+ >>> s.describe()
1030+ count 3
1031+ mean 2006-09-01 08:00:00
1032+ min 2000-01-01 00:00:00
1033+ 25% 2004-12-31 12:00:00
1034+ 50% 2010-01-01 00:00:00
1035+ 75% 2010-01-01 00:00:00
1036+ max 2010-01-01 00:00:00
1037+ dtype: object
1038+
1039+ Describing a ``DataFrame``. By default only numeric fields
1040+ are returned.
1041+
1042+ >>> df = pd.DataFrame(
1043+ ... {
1044+ ... "categorical": pd.Categorical(["d", "e", "f"]),
1045+ ... "numeric": [1, 2, 3],
1046+ ... "object": ["a", "b", "c"],
1047+ ... }
1048+ ... )
1049+ >>> df.describe()
1050+ numeric
1051+ count 3.0
1052+ mean 2.0
1053+ std 1.0
1054+ min 1.0
1055+ 25% 1.5
1056+ 50% 2.0
1057+ 75% 2.5
1058+ max 3.0
1059+
1060+ Describing all columns of a ``DataFrame`` regardless of data type.
1061+
1062+ >>> df.describe(include="all") # doctest: +SKIP
1063+ categorical numeric object
1064+ count 3 3.0 3
1065+ unique 3 NaN 3
1066+ top f NaN a
1067+ freq 1 NaN 1
1068+ mean NaN 2.0 NaN
1069+ std NaN 1.0 NaN
1070+ min NaN 1.0 NaN
1071+ 25% NaN 1.5 NaN
1072+ 50% NaN 2.0 NaN
1073+ 75% NaN 2.5 NaN
1074+ max NaN 3.0 NaN
1075+
1076+ Describing a column from a ``DataFrame`` by accessing it as
1077+ an attribute.
1078+
1079+ >>> df.numeric.describe()
1080+ count 3.0
1081+ mean 2.0
1082+ std 1.0
1083+ min 1.0
1084+ 25% 1.5
1085+ 50% 2.0
1086+ 75% 2.5
1087+ max 3.0
1088+ Name: numeric, dtype: float64
1089+
1090+ Including only numeric columns in a ``DataFrame`` description.
1091+
1092+ >>> df.describe(include=[np.number])
1093+ numeric
1094+ count 3.0
1095+ mean 2.0
1096+ std 1.0
1097+ min 1.0
1098+ 25% 1.5
1099+ 50% 2.0
1100+ 75% 2.5
1101+ max 3.0
1102+
1103+ Including only string columns in a ``DataFrame`` description.
1104+
1105+ >>> df.describe(include=[object]) # doctest: +SKIP
1106+ object
1107+ count 3
1108+ unique 3
1109+ top a
1110+ freq 1
1111+
1112+ Including only categorical columns from a ``DataFrame`` description.
1113+
1114+ >>> df.describe(include=["category"])
1115+ categorical
1116+ count 3
1117+ unique 3
1118+ top d
1119+ freq 1
1120+
1121+ Excluding numeric columns from a ``DataFrame`` description.
1122+
1123+ >>> df.describe(exclude=[np.number]) # doctest: +SKIP
1124+ categorical object
1125+ count 3 3
1126+ unique 3 3
1127+ top f a
1128+ freq 1 1
1129+
1130+ Excluding object columns from a ``DataFrame`` description.
1131+
1132+ >>> df.describe(exclude=[object]) # doctest: +SKIP
1133+ categorical numeric
1134+ count 3 3.0
1135+ unique 3 NaN
1136+ top f NaN
1137+ freq 1 NaN
1138+ mean NaN 2.0
1139+ std NaN 1.0
1140+ min NaN 1.0
1141+ 25% NaN 1.5
1142+ 50% NaN 2.0
1143+ 75% NaN 2.5
1144+ max NaN 3.0
1145+ """
9081146 return super ().describe (
9091147 percentiles = percentiles , include = include , exclude = exclude
9101148 )
0 commit comments