pandas-dev
diff --git a/‎pandas/_typing.py‎
Lines changed: 1 addition & 1 deletion b/‎pandas/_typing.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/array_algos/quantile.py‎
Lines changed: 1 addition & 1 deletion b/‎pandas/core/array_algos/quantile.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/arrays/_mixins.py‎
Lines changed: 2 additions & 5 deletions b/‎pandas/core/arrays/_mixins.py‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎pandas/core/arrays/arrow/_arrow_utils.py‎
Lines changed: 1 addition & 1 deletion b/‎pandas/core/arrays/arrow/_arrow_utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/arrays/datetimes.py‎
Lines changed: 1 addition & 1 deletion b/‎pandas/core/arrays/datetimes.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/nanops.py‎
Lines changed: 1 addition & 1 deletion b/‎pandas/core/nanops.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/reshape/encoding.py‎
Lines changed: 175 additions & 51 deletions b/‎pandas/core/reshape/encoding.py‎
Lines changed: 175 additions & 51 deletions
@@ -84,7 +84,7 @@
     # numpy compatible types
     NumpyValueArrayLike: TypeAlias = ScalarLike_co | npt.ArrayLike
     # Name "npt._ArrayLikeInt_co" is not defined  [name-defined]
-    NumpySorter: TypeAlias = npt._ArrayLikeInt_co | None  # type: ignore[name-defined]
+    NumpySorter: TypeAlias = npt._ArrayLikeInt_co | None
 
 
 P = ParamSpec("P")
 
@@ -102,7 +102,7 @@ def quantile_with_mask(
             interpolation=interpolation,
         )
 
-        result = np.asarray(result)  # type: ignore[assignment]
+        result = np.asarray(result)
         result = result.T
 
     return result
 
@@ -146,11 +146,8 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike:
 
             dt64_values = arr.view(dtype)
             return DatetimeArray._simple_new(dt64_values, dtype=dtype)
-        elif lib.is_np_dtype(dtype, "m") and is_supported_dtype(dtype):
-            from pandas.core.arrays import TimedeltaArray
-
-            td64_values = arr.view(dtype)
-            return TimedeltaArray._simple_new(td64_values, dtype=dtype)
+        elif isinstance(dtype, ExtensionDtype):
+            raise NotImplementedError(f"view not implemented for {dtype}")
         return arr.view(dtype=dtype)
 
     def take(
 
@@ -44,7 +44,7 @@ def pyarrow_array_to_numpy_and_mask(
         mask = pyarrow.BooleanArray.from_buffers(
             pyarrow.bool_(), len(arr), [None, bitmask], offset=arr.offset
         )
-        mask = np.asarray(mask)  # type: ignore[assignment]
+        mask = np.asarray(mask)
     else:
         mask = np.ones(len(arr), dtype=bool)
     return data, mask
@@ -804,7 +804,7 @@ def _add_offset(self, offset: BaseOffset) -> Self:
         try:
             res_values = offset._apply_array(values._ndarray)
             if res_values.dtype.kind == "i":
-                res_values = res_values.view(values.dtype)
+                res_values = res_values.view(values._ndarray.dtype)
         except NotImplementedError:
             if get_option("performance_warnings"):
                 warnings.warn(
 
@@ -898,7 +898,7 @@ def _get_counts_nanvar(
             # error: Incompatible types in assignment (expression has type
             # "float", variable has type "Union[floating[Any], ndarray[Any,
             # dtype[floating[Any]]]]")
-            count = np.nan  # type: ignore[assignment]
+            count = np.nan
             d = np.nan
     else:
         # count is not narrowed by is_float check
 
@@ -51,53 +51,117 @@ def get_dummies(
     """
     Convert categorical variable into dummy/indicator variables.
 
-    Each variable is converted in as many 0/1 variables as there are different
-    values. Columns in the output are each named after a value; if the input is
-    a DataFrame, the name of the original variable is prepended to the value.
+    This function converts categorical data into binary (0/1) data, also known as
+    one-hot encoding or dummy variables. It's commonly used in statistical modeling
+    and machine learning. For more details, see the :ref:`reshaping` section in
+    the user guide.
 
     Parameters
     ----------
     data : array-like, Series, or DataFrame
-        Data of which to get dummy indicators.
+        Data to encode. If a DataFrame, can work on multiple columns.
     prefix : str, list of str, or dict of str, default None
-        A string to be prepended to DataFrame column names.
-        Pass a list with length equal to the number of columns
-        when calling get_dummies on a DataFrame. Alternatively, `prefix`
-        can be a dictionary mapping column names to prefixes.
+        String to prepend to column names.
+        * If a string, the same prefix is used for all columns
+        * If a list, it should have the same length as the number of columns
+        * If a dict, maps column names to prefixes
+        * If None, no prefix is used
     prefix_sep : str, list of str, or dict of str, default '_'
-        Should you choose to prepend DataFrame column names with a prefix, this
-        is the separator/delimiter to use between the two. Alternatively,
-        `prefix_sep` can be a list with length equal to the number of columns,
-        or a dictionary mapping column names to separators.
+        Separator between prefix and dummy column names.
+        * If a string, the same separator is used for all columns
+        * If a list, should have same length as number of columns
+        * If a dict, maps column names to separators
     dummy_na : bool, default False
-        If True, a NaN indicator column will be added even if no NaN values are present.
-        If False, NA values are encoded as all zero.
+        Add a column to indicate NaN values:
+        * If True, creates NA column even if no NaN values present
+        * If False, NA values are encoded as all zeros
     columns : list-like, default None
-        Column names in the DataFrame to be encoded.
-        If `columns` is None then all the columns with
-        `object`, `string`, or `category` dtype will be converted.
+        Which columns to encode:
+        * If None, encodes all object, string, and category columns
+        * If list-like, encodes only specified columns
     sparse : bool, default False
-        Whether the dummy-encoded columns should be backed by
-        a :class:`SparseArray` (True) or a regular NumPy array (False).
+        If True, return SparseArray (save memory for many zeros)
+        If False, return regular NumPy array
     drop_first : bool, default False
-        Whether to get k-1 dummies out of k categorical levels by removing the
-        first level.
+        Whether to drop the first category level:
+        * If True, drops first level (avoid collinearity in models)
+        * If False, keeps all levels
     dtype : dtype, default bool
-        Data type for new columns. Only a single dtype is allowed.
+        Data type for dummy columns. Must be a single dtype.
 
     Returns
     -------
     DataFrame
-        Dummy-coded data. If `data` contains other columns than the
-        dummy-coded one(s), these will be prepended, unaltered, to the result.
+        Dummy-coded data:
+        * Each categorical level becomes a new column of 1s and 0s
+        * Original non-encoded columns are included unchanged
+        * Each new column name combines the original column name,
+          prefix_sep, and the encoded level
 
     See Also
     --------
-    Series.str.get_dummies : Convert Series of strings to dummy codes.
-    :func:`~pandas.from_dummies` : Convert dummy codes to categorical ``DataFrame``.
+    Series.str.get_dummies : Convert string Series to dummy codes.
+    from_dummies : Convert dummy codes back to categorical DataFrame.
+    DataFrame.astype : Convert dtypes of DataFrame columns.
+    pandas.Categorical : Represent categorical data.
 
     Notes
     -----
+    * Dummy variables are commonly used in statistical models and machine
+      learning to convert categorical data into a format suitable for 
+      numerical processing.
+    * The `drop_first` option is useful for avoiding the "dummy variable trap"
+      where perfectly correlated dummy variables can cause problems in 
+      regression models.
+    * For sparse matrices, using `sparse=True` can significantly reduce 
+      memory usage when data has many zeros.
+
+    Examples
+    --------
+    Basic usage for a single column:
+
+    >>> s = pd.Series(list('abca'))
+    >>> pd.get_dummies(s)
+       a  b  c
+    0  1  0  0
+    1  0  1  0
+    2  0  0  1
+    3  1  0  0
+
+    With NaN values:
+
+    >>> s = pd.Series(list('abcaa'))
+    >>> s.loc[3] = np.nan
+    >>> pd.get_dummies(s, dummy_na=True)
+       a  b  c  NaN
+    0  1  0  0    0
+    1  0  1  0    0
+    2  0  0  1    0
+    3  0  0  0    1
+    4  1  0  0    0
+
+    With `drop_first=True`:
+
+    >>> pd.get_dummies(s, drop_first=True)
+       b  c
+    0  0  0
+    1  1  0
+    2  0  1
+    3  0  0
+    4  0  0
+
+    With DataFrame input and custom prefixes:
+
+    >>> df = pd.DataFrame({
+    ...     'A': ['a', 'b', 'a'],
+    ...     'B': ['b', 'a', 'c'],
+    ...     'C': [1, 2, 3]
+    ... })
+    >>> pd.get_dummies(df, prefix=['col1', 'col2'])
+       C  col1_a  col1_b  col2_a  col2_b  col2_c
+    0  1       1       0       0       1       0
+    1  2       0       1       1       0       0
+    2  3       1       0       0       0       1
     Reference :ref:`the user guide <reshaping.dummies>` for more examples.
 
     Examples
@@ -372,48 +436,108 @@ def from_dummies(
     """
     Create a categorical ``DataFrame`` from a ``DataFrame`` of dummy variables.
 
-    Inverts the operation performed by :func:`~pandas.get_dummies`.
+    This function converts dummy/indicator variables (typically 0s and 1s) back into
+    categorical variables, essentially inverting :func:`~pandas.get_dummies`. For
+    more details, see the :ref:`reshaping` section in the user guide.
 
     .. versionadded:: 1.5.0
 
     Parameters
     ----------
     data : DataFrame
-        Data which contains dummy-coded variables in form of integer columns of
-        1's and 0's.
+        DataFrame containing dummy-coded variables (columns of 0s and 1s).
+        Each group of dummy columns represents one original categorical variable.
     sep : str, default None
-        Separator used in the column names of the dummy categories they are
-        character indicating the separation of the categorical names from the prefixes.
-        For example, if your column names are 'prefix_A' and 'prefix_B',
-        you can strip the underscore by specifying sep='_'.
+        Separator used in the dummy column names between the prefix and category.
+        For example, if columns are 'color_red', 'color_blue', use sep='_' to
+        identify 'color' as the original variable name and ['red', 'blue'] as
+        the categories.
     default_category : None, Hashable or dict of Hashables, default None
-        The default category is the implied category when a value has none of the
-        listed categories specified with a one, i.e. if all dummies in a row are
-        zero. Can be a single value for all variables or a dict directly mapping
-        the default categories to a prefix of a variable. The default category
-        will be coerced to the dtype of ``data.columns`` if such coercion is
-        lossless, and will raise otherwise.
+        Category to use for rows where all dummy values are 0:
+        * If None, raises error when a row has all zeros
+        * If Hashable, uses this value for all variables
+        * If dict, maps each variable prefix to its default category
+        Value will be coerced to match column dtype if possible.
 
     Returns
     -------
     DataFrame
-        Categorical data decoded from the dummy input-data.
+        A DataFrame with categorical columns decoded from dummy variables.
+        Each group of dummy columns is converted back to a single
+        categorical column.
+
+    See Also
+    --------
+    get_dummies : Convert categorical variable(s) to dummy variables.
+    Categorical : Pandas Categorical type for categorical data.
+    DataFrame.astype : Convert DataFrame columns to different types.
+
+    Notes
+    -----
+    * The function assumes each group of dummy columns represents one
+      original categorical variable
+    * Column names must follow the pattern: prefix + sep + category
+    * Each row should have at most one 1 in each group of dummies
+      (unless using default_category)
+    * NA values are not allowed in the dummy columns
+
+    Examples
+    --------
+    Convert dummy columns back to a single categorical column:
+
+    >>> df = pd.DataFrame({
+    ...     'color_red': [1, 0, 0],
+    ...     'color_blue': [0, 1, 0],
+    ...     'color_green': [0, 0, 1],
+    ...     'size': [1, 2, 3]
+    ... })
+    >>> pd.from_dummies(df, sep='_')
+       size   color
+    0     1     red
+    1     2    blue
+    2     3   green
+
+    With a default category for rows of all zeros:
+
+    >>> df = pd.DataFrame({
+    ...     'color_red': [1, 0, 0, 0],
+    ...     'color_blue': [0, 1, 0, 0],
+    ...     'color_green': [0, 0, 1, 0]
+    ... })
+    >>> pd.from_dummies(df, sep='_', default_category='unknown')
+         color
+    0      red
+    1     blue
+    2    green
+    3  unknown
+
+    With different defaults for different variables:
+
+    >>> df = pd.DataFrame({
+    ...     'color_red': [1, 0, 0],
+    ...     'color_blue': [0, 0, 0],
+    ...     'size_S': [0, 1, 0],
+    ...     'size_M': [0, 0, 0]
+    ... })
+    >>> defaults = {'color': 'unknown', 'size': 'L'}
+    >>> pd.from_dummies(df, sep='_', default_category=defaults)
+         color size
+    0      red    L
+    1  unknown    S
+    2  unknown    L
 
     Raises
     ------
     ValueError
-        * When the input ``DataFrame`` ``data`` contains NA values.
-        * When the input ``DataFrame`` ``data`` contains column names with separators
-          that do not match the separator specified with ``sep``.
-        * When a ``dict`` passed to ``default_category`` does not include an implied
-          category for each prefix.
-        * When a value in ``data`` has more than one category assigned to it.
-        * When ``default_category=None`` and a value in ``data`` has no category
-          assigned to it.
+        * When input contains NA values
+        * When column names don't match the sep pattern
+        * When default_category dict is missing categories
+        * When a row has multiple 1s in one dummy group
+        * When a row has all 0s and no default_category
     TypeError
-        * When the input ``data`` is not of type ``DataFrame``.
-        * When the input ``DataFrame`` ``data`` contains non-dummy data.
-        * When the passed ``sep`` is of a wrong data type.
+        * When input is not a DataFrame
+        * When columns don't contain dummy data
+        * When sep is not a string
         * When the passed ``default_category`` is of a wrong data type.
 
     See Also
Original file line number	Diff line number	Diff line change
`@@ -102,7 +102,7 @@ def quantile_with_mask(`
`102`	`102`	`interpolation=interpolation,`
`103`	`103`	`)`
`104`	`104`
`105`		`- result = np.asarray(result) # type: ignore[assignment]`
	`105`	`+ result = np.asarray(result)`
`106`	`106`	`result = result.T`
`107`	`107`
`108`	`108`	`return result`
Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ def pyarrow_array_to_numpy_and_mask(`
`44`	`44`	`mask = pyarrow.BooleanArray.from_buffers(`
`45`	`45`	`pyarrow.bool_(), len(arr), [None, bitmask], offset=arr.offset`
`46`	`46`	`)`
`47`		`- mask = np.asarray(mask) # type: ignore[assignment]`
	`47`	`+ mask = np.asarray(mask)`
`48`	`48`	`else:`
`49`	`49`	`mask = np.ones(len(arr), dtype=bool)`
`50`	`50`	`return data, mask`