Groups table by columns from args. Note: Usually followed by `.reduce()` that aggregates the result and returns a table. Args: args: columns to group by. id: if provided, is the column used to set id's of the rows of the result sort_b
(
self,
*args: expr.ColumnReference,
id: expr.ColumnReference | None = None,
sort_by: expr.ColumnReference | None = None,
_filter_out_results_of_forgetting: bool = False,
instance: expr.ColumnReference | None = None,
_skip_errors: bool = True,
_is_window: bool = False,
)
| 1190 | @arg_handler(handler=groupby_handler) |
| 1191 | @check_arg_types |
| 1192 | def groupby( |
| 1193 | self, |
| 1194 | *args: expr.ColumnReference, |
| 1195 | id: expr.ColumnReference | None = None, |
| 1196 | sort_by: expr.ColumnReference | None = None, |
| 1197 | _filter_out_results_of_forgetting: bool = False, |
| 1198 | instance: expr.ColumnReference | None = None, |
| 1199 | _skip_errors: bool = True, |
| 1200 | _is_window: bool = False, |
| 1201 | ) -> groupbys.GroupedTable: |
| 1202 | """Groups table by columns from args. |
| 1203 | |
| 1204 | Note: |
| 1205 | Usually followed by `.reduce()` that aggregates the result and returns a table. |
| 1206 | |
| 1207 | Args: |
| 1208 | args: columns to group by. |
| 1209 | id: if provided, is the column used to set id's of the rows of the result |
| 1210 | sort_by: if provided, column values are used as sorting keys for particular reducers |
| 1211 | instance: optional argument describing partitioning of the data into separate instances |
| 1212 | |
| 1213 | Returns: |
| 1214 | GroupedTable: Groupby object. |
| 1215 | |
| 1216 | Example: |
| 1217 | |
| 1218 | >>> import pathway as pw |
| 1219 | >>> t1 = pw.debug.table_from_markdown(''' |
| 1220 | ... age | owner | pet |
| 1221 | ... 10 | Alice | dog |
| 1222 | ... 9 | Bob | dog |
| 1223 | ... 8 | Alice | cat |
| 1224 | ... 7 | Bob | dog |
| 1225 | ... ''') |
| 1226 | >>> t2 = t1.groupby(t1.pet, t1.owner).reduce(t1.owner, t1.pet, ageagg=pw.reducers.sum(t1.age)) |
| 1227 | >>> pw.debug.compute_and_print(t2, include_id=False) |
| 1228 | owner | pet | ageagg |
| 1229 | Alice | cat | 8 |
| 1230 | Alice | dog | 10 |
| 1231 | Bob | dog | 16 |
| 1232 | """ |
| 1233 | if instance is not None: |
| 1234 | args = (*args, instance) |
| 1235 | if id is not None: |
| 1236 | if len(args) == 0: |
| 1237 | args = (id,) |
| 1238 | elif len(args) > 1: |
| 1239 | raise ValueError( |
| 1240 | "Table.groupby() cannot have id argument when grouping by multiple columns." |
| 1241 | ) |
| 1242 | elif args[0]._column != id._column: |
| 1243 | raise ValueError( |
| 1244 | "Table.groupby() received id argument and is grouped by a single column," |
| 1245 | + " but the arguments are not equal.\n" |
| 1246 | + "Consider using <table>.groupby(id=...), skipping the positional argument." |
| 1247 | ) |
| 1248 | |
| 1249 | for arg in args: |