|
32 | 32 | SpecificationError, |
33 | 33 | ) |
34 | 34 | from pandas.util._decorators import ( |
35 | | - Appender, |
36 | | - Substitution, |
37 | 35 | doc, |
38 | 36 | set_module, |
39 | 37 | ) |
|
71 | 69 | from pandas.core.groupby.groupby import ( |
72 | 70 | GroupBy, |
73 | 71 | GroupByPlot, |
74 | | - _transform_template, |
75 | 72 | ) |
76 | 73 | from pandas.core.indexes.api import ( |
77 | 74 | Index, |
@@ -675,9 +672,143 @@ def _wrap_applied_output( |
675 | 672 | """ |
676 | 673 | ) |
677 | 674 |
|
678 | | - @Substitution(klass="Series", example=__examples_series_doc) |
679 | | - @Appender(_transform_template) |
680 | 675 | def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): |
| 676 | + """ |
| 677 | + Call function producing a same-indexed Series on each group. |
| 678 | +
|
| 679 | + Returns a Series having the same indexes as the original object |
| 680 | + filled with the transformed values. |
| 681 | +
|
| 682 | + Parameters |
| 683 | + ---------- |
| 684 | + func : function, str |
| 685 | + Function to apply to each group. See the Notes section below for |
| 686 | + requirements. |
| 687 | +
|
| 688 | + Accepted inputs are: |
| 689 | +
|
| 690 | + - String |
| 691 | + - Python function |
| 692 | + - Numba JIT function with ``engine='numba'`` specified. |
| 693 | +
|
| 694 | + Only passing a single function is supported with this engine. |
| 695 | + If the ``'numba'`` engine is chosen, the function must be |
| 696 | + a user defined function with ``values`` and ``index`` as the |
| 697 | + first and second arguments respectively in the function signature. |
| 698 | + Each group's index will be passed to the user defined function |
| 699 | + and optionally available for use. |
| 700 | +
|
| 701 | + If a string is chosen, then it needs to be the name |
| 702 | + of the groupby method you want to use. |
| 703 | + *args |
| 704 | + Positional arguments to pass to func. |
| 705 | + engine : str, default None |
| 706 | + * ``'cython'`` : Runs the function through C-extensions from cython. |
| 707 | + * ``'numba'`` : Runs the function through JIT compiled code from numba. |
| 708 | + * ``None`` : Defaults to ``'cython'`` or the global setting |
| 709 | + ``compute.use_numba`` |
| 710 | +
|
| 711 | + engine_kwargs : dict, default None |
| 712 | + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` |
| 713 | + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` |
| 714 | + and ``parallel`` dictionary keys. The values must either be ``True`` or |
| 715 | + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is |
| 716 | + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be |
| 717 | + applied to the function |
| 718 | +
|
| 719 | + **kwargs |
| 720 | + Keyword arguments to be passed into func. |
| 721 | +
|
| 722 | + Returns |
| 723 | + ------- |
| 724 | + Series |
| 725 | + Series with the same indexes as the original object filled |
| 726 | + with transformed values. |
| 727 | +
|
| 728 | + See Also |
| 729 | + -------- |
| 730 | + Series.groupby.apply : Apply function ``func`` group-wise and combine |
| 731 | + the results together. |
| 732 | + Series.groupby.aggregate : Aggregate using one or more operations. |
| 733 | + Series.transform : Call ``func`` on self producing a Series with the |
| 734 | + same axis shape as self. |
| 735 | +
|
| 736 | + Notes |
| 737 | + ----- |
| 738 | + Each group is endowed the attribute 'name' in case you need to know |
| 739 | + which group you are working on. |
| 740 | +
|
| 741 | + The current implementation imposes three requirements on f: |
| 742 | +
|
| 743 | + * f must return a value that either has the same shape as the input |
| 744 | + subframe or can be broadcast to the shape of the input subframe. |
| 745 | + For example, if `f` returns a scalar it will be broadcast to have the |
| 746 | + same shape as the input subframe. |
| 747 | + * if this is a DataFrame, f must support application column-by-column |
| 748 | + in the subframe. If f also supports application to the entire subframe, |
| 749 | + then a fast path is used starting from the second chunk. |
| 750 | + * f must not mutate groups. Mutation is not supported and may |
| 751 | + produce unexpected results. See :ref:`gotchas.udf-mutation` for more details. |
| 752 | +
|
| 753 | + When using ``engine='numba'``, there will be no "fall back" behavior internally. |
| 754 | + The group data and group index will be passed as numpy arrays to the JITed |
| 755 | + user defined function, and no alternative execution attempts will be tried. |
| 756 | +
|
| 757 | + .. versionchanged:: 1.3.0 |
| 758 | +
|
| 759 | + The resulting dtype will reflect the return value of the passed ``func``, |
| 760 | + see the examples below. |
| 761 | +
|
| 762 | + .. versionchanged:: 2.0.0 |
| 763 | +
|
| 764 | + When using ``.transform`` on a grouped DataFrame and the |
| 765 | + transformation function returns a DataFrame, pandas now aligns the |
| 766 | + result's index with the input's index. You can call ``.to_numpy()`` |
| 767 | + on the result of the transformation function to avoid alignment. |
| 768 | +
|
| 769 | + Examples |
| 770 | + -------- |
| 771 | + >>> ser = pd.Series( |
| 772 | + ... [390.0, 350.0, 30.0, 20.0], |
| 773 | + ... index=["Falcon", "Falcon", "Parrot", "Parrot"], |
| 774 | + ... name="Max Speed", |
| 775 | + ... ) |
| 776 | + >>> grouped = ser.groupby([1, 1, 2, 2]) |
| 777 | + >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) |
| 778 | + Falcon 0.707107 |
| 779 | + Falcon -0.707107 |
| 780 | + Parrot 0.707107 |
| 781 | + Parrot -0.707107 |
| 782 | + Name: Max Speed, dtype: float64 |
| 783 | +
|
| 784 | + Broadcast result of the transformation |
| 785 | +
|
| 786 | + >>> grouped.transform(lambda x: x.max() - x.min()) |
| 787 | + Falcon 40.0 |
| 788 | + Falcon 40.0 |
| 789 | + Parrot 10.0 |
| 790 | + Parrot 10.0 |
| 791 | + Name: Max Speed, dtype: float64 |
| 792 | +
|
| 793 | + >>> grouped.transform("mean") |
| 794 | + Falcon 370.0 |
| 795 | + Falcon 370.0 |
| 796 | + Parrot 25.0 |
| 797 | + Parrot 25.0 |
| 798 | + Name: Max Speed, dtype: float64 |
| 799 | +
|
| 800 | + .. versionchanged:: 1.3.0 |
| 801 | +
|
| 802 | + The resulting dtype will reflect the return value of the passed ``func``, |
| 803 | + for example: |
| 804 | +
|
| 805 | + >>> grouped.transform(lambda x: x.astype(int).max()) |
| 806 | + Falcon 390 |
| 807 | + Falcon 390 |
| 808 | + Parrot 30 |
| 809 | + Parrot 30 |
| 810 | + Name: Max Speed, dtype: int64 |
| 811 | + """ |
681 | 812 | return self._transform( |
682 | 813 | func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs |
683 | 814 | ) |
@@ -2298,9 +2429,154 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): |
2298 | 2429 | """ |
2299 | 2430 | ) |
2300 | 2431 |
|
2301 | | - @Substitution(klass="DataFrame", example=__examples_dataframe_doc) |
2302 | | - @Appender(_transform_template) |
2303 | 2432 | def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): |
| 2433 | + """ |
| 2434 | + Call function producing a same-indexed DataFrame on each group. |
| 2435 | +
|
| 2436 | + Returns a DataFrame having the same indexes as the original object |
| 2437 | + filled with the transformed values. |
| 2438 | +
|
| 2439 | + Parameters |
| 2440 | + ---------- |
| 2441 | + func : function, str |
| 2442 | + Function to apply to each group. See the Notes section below for |
| 2443 | + requirements. |
| 2444 | +
|
| 2445 | + Accepted inputs are: |
| 2446 | +
|
| 2447 | + - String |
| 2448 | + - Python function |
| 2449 | + - Numba JIT function with ``engine='numba'`` specified. |
| 2450 | +
|
| 2451 | + Only passing a single function is supported with this engine. |
| 2452 | + If the ``'numba'`` engine is chosen, the function must be |
| 2453 | + a user defined function with ``values`` and ``index`` as the |
| 2454 | + first and second arguments respectively in the function signature. |
| 2455 | + Each group's index will be passed to the user defined function |
| 2456 | + and optionally available for use. |
| 2457 | +
|
| 2458 | + If a string is chosen, then it needs to be the name |
| 2459 | + of the groupby method you want to use. |
| 2460 | + *args |
| 2461 | + Positional arguments to pass to func. |
| 2462 | + engine : str, default None |
| 2463 | + * ``'cython'`` : Runs the function through C-extensions from cython. |
| 2464 | + * ``'numba'`` : Runs the function through JIT compiled code from numba. |
| 2465 | + * ``None`` : Defaults to ``'cython'`` or the global setting |
| 2466 | + ``compute.use_numba`` |
| 2467 | +
|
| 2468 | + engine_kwargs : dict, default None |
| 2469 | + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` |
| 2470 | + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` |
| 2471 | + and ``parallel`` dictionary keys. The values must either be ``True`` or |
| 2472 | + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is |
| 2473 | + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be |
| 2474 | + applied to the function |
| 2475 | +
|
| 2476 | + **kwargs |
| 2477 | + Keyword arguments to be passed into func. |
| 2478 | +
|
| 2479 | + Returns |
| 2480 | + ------- |
| 2481 | + DataFrame |
| 2482 | + DataFrame with the same indexes as the original object filled |
| 2483 | + with transformed values. |
| 2484 | +
|
| 2485 | + See Also |
| 2486 | + -------- |
| 2487 | + DataFrame.groupby.apply : Apply function ``func`` group-wise and combine |
| 2488 | + the results together. |
| 2489 | + DataFrame.groupby.aggregate : Aggregate using one or more operations. |
| 2490 | + DataFrame.transform : Call ``func`` on self producing a DataFrame with the |
| 2491 | + same axis shape as self. |
| 2492 | +
|
| 2493 | + Notes |
| 2494 | + ----- |
| 2495 | + Each group is endowed the attribute 'name' in case you need to know |
| 2496 | + which group you are working on. |
| 2497 | +
|
| 2498 | + The current implementation imposes three requirements on f: |
| 2499 | +
|
| 2500 | + * f must return a value that either has the same shape as the input |
| 2501 | + subframe or can be broadcast to the shape of the input subframe. |
| 2502 | + For example, if `f` returns a scalar it will be broadcast to have the |
| 2503 | + same shape as the input subframe. |
| 2504 | + * if this is a DataFrame, f must support application column-by-column |
| 2505 | + in the subframe. If f also supports application to the entire subframe, |
| 2506 | + then a fast path is used starting from the second chunk. |
| 2507 | + * f must not mutate groups. Mutation is not supported and may |
| 2508 | + produce unexpected results. See :ref:`gotchas.udf-mutation` for more details. |
| 2509 | +
|
| 2510 | + When using ``engine='numba'``, there will be no "fall back" behavior internally. |
| 2511 | + The group data and group index will be passed as numpy arrays to the JITed |
| 2512 | + user defined function, and no alternative execution attempts will be tried. |
| 2513 | +
|
| 2514 | + .. versionchanged:: 1.3.0 |
| 2515 | +
|
| 2516 | + The resulting dtype will reflect the return value of the passed ``func``, |
| 2517 | + see the examples below. |
| 2518 | +
|
| 2519 | + .. versionchanged:: 2.0.0 |
| 2520 | +
|
| 2521 | + When using ``.transform`` on a grouped DataFrame and the transformation |
| 2522 | + function returns a DataFrame, pandas now aligns the result's index |
| 2523 | + with the input's index. You can call ``.to_numpy()`` on the |
| 2524 | + result of the transformation function to avoid alignment. |
| 2525 | +
|
| 2526 | + Examples |
| 2527 | + -------- |
| 2528 | + >>> df = pd.DataFrame( |
| 2529 | + ... { |
| 2530 | + ... "A": ["foo", "bar", "foo", "bar", "foo", "bar"], |
| 2531 | + ... "B": ["one", "one", "two", "three", "two", "two"], |
| 2532 | + ... "C": [1, 5, 5, 2, 5, 5], |
| 2533 | + ... "D": [2.0, 5.0, 8.0, 1.0, 2.0, 9.0], |
| 2534 | + ... } |
| 2535 | + ... ) |
| 2536 | + >>> grouped = df.groupby("A")[["C", "D"]] |
| 2537 | + >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) |
| 2538 | + C D |
| 2539 | + 0 -1.154701 -0.577350 |
| 2540 | + 1 0.577350 0.000000 |
| 2541 | + 2 0.577350 1.154701 |
| 2542 | + 3 -1.154701 -1.000000 |
| 2543 | + 4 0.577350 -0.577350 |
| 2544 | + 5 0.577350 1.000000 |
| 2545 | +
|
| 2546 | + Broadcast result of the transformation |
| 2547 | +
|
| 2548 | + >>> grouped.transform(lambda x: x.max() - x.min()) |
| 2549 | + C D |
| 2550 | + 0 4.0 6.0 |
| 2551 | + 1 3.0 8.0 |
| 2552 | + 2 4.0 6.0 |
| 2553 | + 3 3.0 8.0 |
| 2554 | + 4 4.0 6.0 |
| 2555 | + 5 3.0 8.0 |
| 2556 | +
|
| 2557 | + >>> grouped.transform("mean") |
| 2558 | + C D |
| 2559 | + 0 3.666667 4.0 |
| 2560 | + 1 4.000000 5.0 |
| 2561 | + 2 3.666667 4.0 |
| 2562 | + 3 4.000000 5.0 |
| 2563 | + 4 3.666667 4.0 |
| 2564 | + 5 4.000000 5.0 |
| 2565 | +
|
| 2566 | + .. versionchanged:: 1.3.0 |
| 2567 | +
|
| 2568 | + The resulting dtype will reflect the return value of the passed ``func``, |
| 2569 | + for example: |
| 2570 | +
|
| 2571 | + >>> grouped.transform(lambda x: x.astype(int).max()) |
| 2572 | + C D |
| 2573 | + 0 5 8 |
| 2574 | + 1 5 9 |
| 2575 | + 2 5 8 |
| 2576 | + 3 5 9 |
| 2577 | + 4 5 8 |
| 2578 | + 5 5 9 |
| 2579 | + """ |
2304 | 2580 | return self._transform( |
2305 | 2581 | func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs |
2306 | 2582 | ) |
|
0 commit comments