Return a list of ``(result_column, func, input_column)`` tuples. Spec can be - a function - a list of functions - a dictionary that maps input-columns to functions - a dictionary that maps input-columns to a lists of functions - a dictionary that maps input-columns to
(spec, non_group_columns)
| 611 | |
| 612 | |
| 613 | def _normalize_spec(spec, non_group_columns): |
| 614 | """ |
| 615 | Return a list of ``(result_column, func, input_column)`` tuples. |
| 616 | |
| 617 | Spec can be |
| 618 | |
| 619 | - a function |
| 620 | - a list of functions |
| 621 | - a dictionary that maps input-columns to functions |
| 622 | - a dictionary that maps input-columns to a lists of functions |
| 623 | - a dictionary that maps input-columns to a dictionaries that map |
| 624 | output-columns to functions. |
| 625 | |
| 626 | The non-group columns are a list of all column names that are not used in |
| 627 | the groupby operation. |
| 628 | |
| 629 | Usually, the result columns are multi-level names, returned as tuples. |
| 630 | If only a single function is supplied or dictionary mapping columns |
| 631 | to single functions, simple names are returned as strings (see the first |
| 632 | two examples below). |
| 633 | |
| 634 | Examples |
| 635 | -------- |
| 636 | >>> _normalize_spec('mean', ['a', 'b', 'c']) |
| 637 | [('a', 'mean', 'a'), ('b', 'mean', 'b'), ('c', 'mean', 'c')] |
| 638 | |
| 639 | >>> spec = collections.OrderedDict([('a', 'mean'), ('b', 'count')]) |
| 640 | >>> _normalize_spec(spec, ['a', 'b', 'c']) |
| 641 | [('a', 'mean', 'a'), ('b', 'count', 'b')] |
| 642 | |
| 643 | >>> _normalize_spec(['var', 'mean'], ['a', 'b', 'c']) |
| 644 | ... # doctest: +NORMALIZE_WHITESPACE |
| 645 | [(('a', 'var'), 'var', 'a'), (('a', 'mean'), 'mean', 'a'), \ |
| 646 | (('b', 'var'), 'var', 'b'), (('b', 'mean'), 'mean', 'b'), \ |
| 647 | (('c', 'var'), 'var', 'c'), (('c', 'mean'), 'mean', 'c')] |
| 648 | |
| 649 | >>> spec = collections.OrderedDict([('a', 'mean'), ('b', ['sum', 'count'])]) |
| 650 | >>> _normalize_spec(spec, ['a', 'b', 'c']) |
| 651 | ... # doctest: +NORMALIZE_WHITESPACE |
| 652 | [(('a', 'mean'), 'mean', 'a'), (('b', 'sum'), 'sum', 'b'), \ |
| 653 | (('b', 'count'), 'count', 'b')] |
| 654 | |
| 655 | >>> spec = collections.OrderedDict() |
| 656 | >>> spec['a'] = ['mean', 'size'] |
| 657 | >>> spec['b'] = collections.OrderedDict([('e', 'count'), ('f', 'var')]) |
| 658 | >>> _normalize_spec(spec, ['a', 'b', 'c']) |
| 659 | ... # doctest: +NORMALIZE_WHITESPACE |
| 660 | [(('a', 'mean'), 'mean', 'a'), (('a', 'size'), 'size', 'a'), \ |
| 661 | (('b', 'e'), 'count', 'b'), (('b', 'f'), 'var', 'b')] |
| 662 | """ |
| 663 | if not isinstance(spec, dict): |
| 664 | spec = collections.OrderedDict(zip(non_group_columns, it.repeat(spec))) |
| 665 | |
| 666 | res = [] |
| 667 | |
| 668 | if isinstance(spec, dict): |
| 669 | for input_column, subspec in spec.items(): |
| 670 | if isinstance(subspec, dict): |