(file, seqlen)
| 17 | import collections |
| 18 | |
| 19 | def parse(file, seqlen): |
| 20 | # example: |
| 21 | # pc = 00, sp = 0, curpos = 0, curchar = 0000000a ..., PushBacktrack, label: e4 |
| 22 | rx = re.compile( |
| 23 | r'pc = (?P<pc>[0-9a-f]+), sp = (?P<sp>\d+), ' |
| 24 | r'curpos = (?P<curpos>\d+), curchar = (?P<char_hex>[0-9a-f]+) ' |
| 25 | r'(:?\.|\()(?P<char>\.|\w)(:?\.|\)), (?P<bc>\w+), .*') |
| 26 | total = 0 |
| 27 | |
| 28 | # stats[len][seq_tuple] = {'count': 0, 'loops': 0} |
| 29 | # Using tuple as key for easier rotation |
| 30 | stats = [{} for _ in range(seqlen)] |
| 31 | |
| 32 | # last[len] = deque of (bc, pc_int) |
| 33 | last = [collections.deque(maxlen=i + 1) for i in range(seqlen)] |
| 34 | |
| 35 | with open(file) as f: |
| 36 | l = f.readline() |
| 37 | while l: |
| 38 | l = l.strip() |
| 39 | if l.startswith("Start bytecode interpreter"): |
| 40 | for i in range(seqlen): |
| 41 | last[i].clear() |
| 42 | |
| 43 | match = rx.search(l) |
| 44 | if match: |
| 45 | total += 1 |
| 46 | bc = match.group('bc') |
| 47 | pc = int(match.group('pc'), 16) |
| 48 | |
| 49 | current_node = (bc, pc) |
| 50 | |
| 51 | for i in range(seqlen): |
| 52 | deque = last[i] |
| 53 | |
| 54 | if len(deque) == i + 1: |
| 55 | # Canonicalize based on PC: find the rotation that starts with the |
| 56 | # lowest PC. This aligns loops to their header. |
| 57 | min_pc = float('inf') |
| 58 | rotation_start = 0 |
| 59 | |
| 60 | # Find the index with the minimum PC |
| 61 | for idx, item in enumerate(deque): |
| 62 | if item[1] < min_pc: |
| 63 | min_pc = item[1] |
| 64 | rotation_start = idx |
| 65 | elif item[1] == min_pc: |
| 66 | # Tie-break. If the same sequence appears twice (A->B->A->B), |
| 67 | # we prefer the first. |
| 68 | pass |
| 69 | |
| 70 | # Rotate to start at rotation_start |
| 71 | canonical_seq = [] |
| 72 | for k in range(i + 1): |
| 73 | idx = (rotation_start + k) % (i + 1) |
| 74 | canonical_seq.append(deque[idx][0]) |
| 75 | |
| 76 | seq_key = tuple(canonical_seq) |
no test coverage detected
searching dependent graphs…