Create Frequent Pattern tree Args: data_set: A list of transactions, where each transaction is a list of items. min_sup: The minimum support threshold. Items with support less than this will be pruned. Default is 1. Returns: The root of the FP-Tree.
(data_set: list, min_sup: int = 1)
| 53 | |
| 54 | |
| 55 | def create_tree(data_set: list, min_sup: int = 1) -> tuple[TreeNode, dict]: |
| 56 | """ |
| 57 | Create Frequent Pattern tree |
| 58 | |
| 59 | Args: |
| 60 | data_set: A list of transactions, where each transaction is a list of items. |
| 61 | min_sup: The minimum support threshold. |
| 62 | Items with support less than this will be pruned. Default is 1. |
| 63 | |
| 64 | Returns: |
| 65 | The root of the FP-Tree. |
| 66 | header_table: The header table dictionary with item information. |
| 67 | |
| 68 | Example: |
| 69 | >>> data_set = [ |
| 70 | ... ['A', 'B', 'C'], |
| 71 | ... ['A', 'C'], |
| 72 | ... ['A', 'B', 'E'], |
| 73 | ... ['A', 'B', 'C', 'E'], |
| 74 | ... ['B', 'E'] |
| 75 | ... ] |
| 76 | >>> min_sup = 2 |
| 77 | >>> fp_tree, header_table = create_tree(data_set, min_sup) |
| 78 | >>> fp_tree |
| 79 | TreeNode('Null Set', 1, None) |
| 80 | >>> len(header_table) |
| 81 | 4 |
| 82 | >>> header_table["A"] |
| 83 | [[4, None], TreeNode('A', 4, TreeNode('Null Set', 1, None))] |
| 84 | >>> header_table["E"][1] # doctest: +NORMALIZE_WHITESPACE |
| 85 | TreeNode('E', 1, TreeNode('B', 3, TreeNode('A', 4, TreeNode('Null Set', 1, None)))) |
| 86 | >>> sorted(header_table) |
| 87 | ['A', 'B', 'C', 'E'] |
| 88 | >>> fp_tree.name |
| 89 | 'Null Set' |
| 90 | >>> sorted(fp_tree.children) |
| 91 | ['A', 'B'] |
| 92 | >>> fp_tree.children['A'].name |
| 93 | 'A' |
| 94 | >>> sorted(fp_tree.children['A'].children) |
| 95 | ['B', 'C'] |
| 96 | """ |
| 97 | header_table: dict = {} |
| 98 | for trans in data_set: |
| 99 | for item in trans: |
| 100 | header_table[item] = header_table.get(item, [0, None]) |
| 101 | header_table[item][0] += 1 |
| 102 | |
| 103 | for k in list(header_table): |
| 104 | if header_table[k][0] < min_sup: |
| 105 | del header_table[k] |
| 106 | |
| 107 | if not (freq_item_set := set(header_table)): |
| 108 | return TreeNode("Null Set", 1, None), {} |
| 109 | |
| 110 | for key, value in header_table.items(): |
| 111 | header_table[key] = [value, None] |
| 112 |
no test coverage detected