Verifies that close points are mapped together and distant ones - apart.
()
| 38 | |
| 39 | |
| 40 | def test_lsh(): |
| 41 | """Verifies that close points are mapped together and distant ones - apart.""" |
| 42 | L = 3 # number of ORs |
| 43 | data_df = pd.DataFrame({"data": [[1, 2, 3], [1.02, 2.01, 3.03], [4, 5, 6]]}) |
| 44 | data = T(data_df, format="pandas", unsafe_trusted_ids=True) |
| 45 | |
| 46 | bucketer = generate_euclidean_lsh_bucketer(d=3, M=5, L=L, A=3) |
| 47 | flat_data = lsh(data, bucketer, origin_id="data_id") |
| 48 | result = flat_data.groupby(flat_data.bucketing, flat_data.band).reduce( |
| 49 | data_ids=reducers.sorted_tuple(apply(int, flat_data.data_id)) |
| 50 | ) |
| 51 | # TODO change app apply_with_type(int, int, ...) to cast(int, ...) once |
| 52 | # we have cast from Pointer to int |
| 53 | res_pd = table_to_pandas(result) |
| 54 | assert np.array_equal( |
| 55 | np.unique(res_pd["data_ids"]), np.array([(0, 1), (2,)], dtype=object) |
| 56 | ) # point 0 and 1 are close together, point 2 is further away |
| 57 | |
| 58 | |
| 59 | def test_lsh_bucketing(): |
nothing calls this directly
no test coverage detected