Generate comparison results for a same-tagged range.
(a, alo, ahi, b, blo, bhi)
| 50 | |
| 51 | |
| 52 | def _fancy_replace(a, alo, ahi, b, blo, bhi): |
| 53 | """Generate comparison results for a same-tagged range.""" |
| 54 | |
| 55 | # don't synch up unless the lines have a similarity score of at |
| 56 | # least cutoff; best_ratio tracks the best score seen so far |
| 57 | best_ratio, cutoff = 0.54, 0.55 |
| 58 | cruncher = difflib.SequenceMatcher() |
| 59 | eqi, eqj = None, None # 1st indices of equal lines (if any) |
| 60 | |
| 61 | # search for the pair that matches best without being identical |
| 62 | # (identical lines must be junk lines, & we don't want to synch up |
| 63 | # on junk -- unless we have to) |
| 64 | for j in range(blo, bhi): |
| 65 | bj = b[j] |
| 66 | cruncher.set_seq2(bj) |
| 67 | for i in range(alo, ahi): |
| 68 | ai = a[i] |
| 69 | if ai == bj: |
| 70 | if eqi is None: |
| 71 | eqi, eqj = i, j |
| 72 | continue |
| 73 | cruncher.set_seq1(ai) |
| 74 | # computing similarity is expensive, so use the quick |
| 75 | # upper bounds first -- have seen this speed up messy |
| 76 | # compares by a factor of 3. |
| 77 | # note that ratio() is only expensive to compute the first |
| 78 | # time it's called on a sequence pair; the expensive part |
| 79 | # of the computation is cached by cruncher |
| 80 | if cruncher.real_quick_ratio() > best_ratio and \ |
| 81 | cruncher.quick_ratio() > best_ratio and \ |
| 82 | cruncher.ratio() > best_ratio: |
| 83 | best_ratio, best_i, best_j = cruncher.ratio(), i, j |
| 84 | if best_ratio < cutoff: |
| 85 | # no non-identical "pretty close" pair |
| 86 | if eqi is None: |
| 87 | # no identical pair either -- treat it as a straight replace |
| 88 | yield from _plain_replace(a, alo, ahi, b, blo, bhi) |
| 89 | return |
| 90 | # no close pair, but an identical pair -- synch up on that |
| 91 | best_i, best_j, best_ratio = eqi, eqj, 1.0 |
| 92 | else: |
| 93 | # there's a close pair, so forget the identical pair (if any) |
| 94 | eqi = None |
| 95 | |
| 96 | # a[best_i] very similar to b[best_j]; eqi is None if they're not |
| 97 | # identical |
| 98 | |
| 99 | # pump out diffs from before the synch point |
| 100 | yield from _fancy_helper(a, alo, best_i, b, blo, best_j) |
| 101 | |
| 102 | # pump out the synch point |
| 103 | yield '<p>' |
| 104 | if eqi is None: |
| 105 | aelt, belt = a[best_i], b[best_j] |
| 106 | cruncher.set_seqs(aelt, belt) |
| 107 | for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes(): |
| 108 | if tag == 'replace': |
| 109 | yield from _dump_chunk('chg-del', aelt[ai1:ai2]) |
no test coverage detected