(T, m, max_motifs, max_matches, T_subseq_isconstant=None)
| 10 | |
| 11 | |
| 12 | def naive_motifs(T, m, max_motifs, max_matches, T_subseq_isconstant=None): |
| 13 | # To avoid complexity, this naive function is written |
| 14 | # such that each array in the ouput has shape |
| 15 | # (max_motif, max_matches). |
| 16 | |
| 17 | # To this end, the following items are considered: |
| 18 | # 1. `max_distance` and `cutoff` are both hardcoded and |
| 19 | # set to np.inf |
| 20 | # 2. If the number of subsequence, i.e. `len(T)-m+1`, is |
| 21 | # not less than `m * max_motifs * max_matches`, then the |
| 22 | # output definitely has the shape (max_motif, max_matches). |
| 23 | |
| 24 | l = len(T) - m + 1 |
| 25 | excl_zone = int(np.ceil(m / 4)) |
| 26 | T_subseq_isconstant = naive.rolling_isconstant(T, m, T_subseq_isconstant) |
| 27 | |
| 28 | output_shape = (max_motifs, max_matches) |
| 29 | motif_distances = np.full(output_shape, -np.inf, dtype=np.float64) |
| 30 | motif_indices = np.full(output_shape, -1, dtype=np.int64) |
| 31 | |
| 32 | D = naive.distance_matrix(T, T, m) |
| 33 | D[np.isnan(D)] = np.inf |
| 34 | for i in range(D.shape[0]): |
| 35 | for j in range(D.shape[1]): |
| 36 | if np.isfinite(D[i, j]): |
| 37 | if T_subseq_isconstant[i] and T_subseq_isconstant[j]: |
| 38 | D[i, j] = 0.0 |
| 39 | elif T_subseq_isconstant[i] or T_subseq_isconstant[j]: |
| 40 | D[i, j] = np.sqrt(m) |
| 41 | else: # pragma: no cover |
| 42 | pass |
| 43 | |
| 44 | for i in range(D.shape[0]): |
| 45 | naive.apply_exclusion_zone(D[i], i, excl_zone, np.inf) |
| 46 | |
| 47 | P = np.min(D, axis=1) |
| 48 | for i in range(max_motifs): |
| 49 | distances = [] |
| 50 | indices = [] |
| 51 | |
| 52 | idx = np.argmin(P) |
| 53 | |
| 54 | # self match |
| 55 | distances.append(0) |
| 56 | indices.append(idx) |
| 57 | naive.apply_exclusion_zone(P, idx, excl_zone, np.inf) |
| 58 | |
| 59 | # Explore distance profile D[idx] till `max_matches` are found. |
| 60 | naive.apply_exclusion_zone(D[idx], idx, excl_zone, np.inf) |
| 61 | for _ in range(l): |
| 62 | if len(distances) >= max_matches: |
| 63 | break |
| 64 | |
| 65 | nn = np.argmin(D[idx]) |
| 66 | distances.append(D[idx, nn]) |
| 67 | indices.append(nn) |
| 68 | |
| 69 | # Update D[idx] to avoid finding matches that are trivial to |
no outgoing calls
no test coverage detected