(obj, b)
| 26 | |
| 27 | |
| 28 | def compute_hll_array(obj, b): |
| 29 | # b is the number of bits |
| 30 | |
| 31 | if not 8 <= b <= 16: |
| 32 | raise ValueError("b should be between 8 and 16") |
| 33 | num_bits_discarded = 32 - b |
| 34 | m = 1 << b |
| 35 | |
| 36 | # Get an array of the hashes |
| 37 | hashes = hash_pandas_object(obj, index=False) |
| 38 | if isinstance(hashes, pd.Series): |
| 39 | hashes = hashes._values |
| 40 | hashes = hashes.astype(np.uint32) |
| 41 | |
| 42 | # Of the first b bits, which is the first nonzero? |
| 43 | j = hashes >> num_bits_discarded |
| 44 | first_bit = compute_first_bit(hashes) |
| 45 | |
| 46 | # Pandas can do the max aggregation |
| 47 | df = pd.DataFrame({"j": j, "first_bit": first_bit}) |
| 48 | series = df.groupby("j").max()["first_bit"] |
| 49 | |
| 50 | # Return a dense array so we can concat them and get a result |
| 51 | # that is easy to deal with |
| 52 | return series.reindex(np.arange(m), fill_value=0).values.astype(np.uint8) |
| 53 | |
| 54 | |
| 55 | def reduce_state(Ms, b): |
nothing calls this directly
no test coverage detected
searching dependent graphs…