Apply LSH bucketer for each row and flatten the table.
(data: pw.Table, bucketer, origin_id="origin_id", include_data=True)
| 80 | |
| 81 | |
| 82 | def lsh(data: pw.Table, bucketer, origin_id="origin_id", include_data=True) -> pw.Table: |
| 83 | """Apply LSH bucketer for each row and flatten the table.""" |
| 84 | flat_data = data.select( |
| 85 | buckets=pw.apply(lambda x: list(enumerate(bucketer(x))), data.data) |
| 86 | ) |
| 87 | flat_data = flat_data.flatten(pw.this.buckets, origin_id=origin_id) |
| 88 | flat_data = flat_data.select(flat_data[origin_id]) + unpack_col( |
| 89 | flat_data.buckets, |
| 90 | pw.this.bucketing, |
| 91 | pw.this.band, |
| 92 | ) |
| 93 | if include_data: |
| 94 | flat_data += flat_data.select( |
| 95 | data.ix(flat_data[origin_id]).data, |
| 96 | ) |
| 97 | return flat_data |