Compute Word Error Rate. [Reference] https://martin-thoma.com/word-error-rate-calculation/ Args: ref (list): words in the reference transcript hyp (list): words in the predicted transcript normalize (bool, optional): if True, divide by the length of re
(ref, hyp, normalize=False)
| 107 | |
| 108 | # Decoding |
| 109 | def compute_wer(ref, hyp, normalize=False): |
| 110 | """Compute Word Error Rate. |
| 111 | [Reference] |
| 112 | https://martin-thoma.com/word-error-rate-calculation/ |
| 113 | Args: |
| 114 | ref (list): words in the reference transcript |
| 115 | hyp (list): words in the predicted transcript |
| 116 | normalize (bool, optional): if True, divide by the length of ref |
| 117 | Returns: |
| 118 | wer (float): Word Error Rate between ref and hyp |
| 119 | n_sub (int): the number of substitution |
| 120 | n_ins (int): the number of insertion |
| 121 | n_del (int): the number of deletion |
| 122 | """ |
| 123 | # Initialisation |
| 124 | d = np.zeros((len(ref) + 1) * (len(hyp) + 1), dtype=np.uint16) |
| 125 | d = d.reshape((len(ref) + 1, len(hyp) + 1)) |
| 126 | for i in range(len(ref) + 1): |
| 127 | for j in range(len(hyp) + 1): |
| 128 | if i == 0: |
| 129 | d[0][j] = j |
| 130 | elif j == 0: |
| 131 | d[i][0] = i |
| 132 | |
| 133 | # Computation |
| 134 | for i in range(1, len(ref) + 1): |
| 135 | for j in range(1, len(hyp) + 1): |
| 136 | if ref[i - 1] == hyp[j - 1]: |
| 137 | d[i][j] = d[i - 1][j - 1] |
| 138 | else: |
| 139 | sub_tmp = d[i - 1][j - 1] + 1 |
| 140 | ins_tmp = d[i][j - 1] + 1 |
| 141 | del_tmp = d[i - 1][j] + 1 |
| 142 | d[i][j] = min(sub_tmp, ins_tmp, del_tmp) |
| 143 | |
| 144 | wer = d[len(ref)][len(hyp)] |
| 145 | |
| 146 | # Find out the manipulation steps |
| 147 | x = len(ref) |
| 148 | y = len(hyp) |
| 149 | error_list = [] |
| 150 | while True: |
| 151 | if x == 0 and y == 0: |
| 152 | break |
| 153 | else: |
| 154 | if x > 0 and y > 0: |
| 155 | if d[x][y] == d[x - 1][y - 1] and ref[x - 1] == hyp[y - 1]: |
| 156 | error_list.append("C") |
| 157 | x = x - 1 |
| 158 | y = y - 1 |
| 159 | elif d[x][y] == d[x][y - 1] + 1: |
| 160 | error_list.append("I") |
| 161 | y = y - 1 |
| 162 | elif d[x][y] == d[x - 1][y - 1] + 1: |
| 163 | error_list.append("S") |
| 164 | x = x - 1 |
| 165 | y = y - 1 |
| 166 | else: |
no test coverage detected