Mine the FP-Tree recursively to discover frequent itemsets. Args: in_tree: The FP-Tree to mine. header_table: The header table dictionary with item information. min_sup: The minimum support threshold. pre_fix: A set of items as a prefix for the itemsets bein
(
in_tree: TreeNode, # noqa: ARG001
header_table: dict,
min_sup: int,
pre_fix: set,
freq_item_list: list,
)
| 277 | |
| 278 | |
| 279 | def mine_tree( |
| 280 | in_tree: TreeNode, # noqa: ARG001 |
| 281 | header_table: dict, |
| 282 | min_sup: int, |
| 283 | pre_fix: set, |
| 284 | freq_item_list: list, |
| 285 | ) -> None: |
| 286 | """ |
| 287 | Mine the FP-Tree recursively to discover frequent itemsets. |
| 288 | |
| 289 | Args: |
| 290 | in_tree: The FP-Tree to mine. |
| 291 | header_table: The header table dictionary with item information. |
| 292 | min_sup: The minimum support threshold. |
| 293 | pre_fix: A set of items as a prefix for the itemsets being mined. |
| 294 | freq_item_list: A list to store the frequent itemsets. |
| 295 | |
| 296 | Example: |
| 297 | >>> data_set = [ |
| 298 | ... ['A', 'B', 'C'], |
| 299 | ... ['A', 'C'], |
| 300 | ... ['A', 'B', 'E'], |
| 301 | ... ['A', 'B', 'C', 'E'], |
| 302 | ... ['B', 'E'] |
| 303 | ... ] |
| 304 | >>> min_sup = 2 |
| 305 | >>> fp_tree, header_table = create_tree(data_set, min_sup) |
| 306 | >>> fp_tree |
| 307 | TreeNode('Null Set', 1, None) |
| 308 | >>> frequent_itemsets = [] |
| 309 | >>> mine_tree(fp_tree, header_table, min_sup, set([]), frequent_itemsets) |
| 310 | >>> expe_itm = [{'C'}, {'C', 'A'}, {'E'}, {'A', 'E'}, {'E', 'B'}, {'A'}, {'B'}] |
| 311 | >>> all(expected in frequent_itemsets for expected in expe_itm) |
| 312 | True |
| 313 | """ |
| 314 | sorted_items = sorted(header_table.items(), key=lambda item_info: item_info[1][0]) |
| 315 | big_l = [item[0] for item in sorted_items] |
| 316 | for base_pat in big_l: |
| 317 | new_freq_set = pre_fix.copy() |
| 318 | new_freq_set.add(base_pat) |
| 319 | freq_item_list.append(new_freq_set) |
| 320 | cond_patt_bases = find_prefix_path(base_pat, header_table[base_pat][1]) |
| 321 | my_cond_tree, my_head = create_tree(list(cond_patt_bases), min_sup) |
| 322 | if my_head is not None: |
| 323 | # Pass header_table[base_pat][1] as node_to_test to update_header |
| 324 | header_table[base_pat][1] = update_header( |
| 325 | header_table[base_pat][1], my_cond_tree |
| 326 | ) |
| 327 | mine_tree(my_cond_tree, my_head, min_sup, new_freq_set, freq_item_list) |
| 328 | |
| 329 | |
| 330 | if __name__ == "__main__": |
no test coverage detected