Sentence postprocess. Args: words: TODO. time_stamp: TODO.
(words: List[Any], time_stamp: List[List] = None)
| 163 | |
| 164 | |
| 165 | def sentence_postprocess(words: List[Any], time_stamp: List[List] = None): |
| 166 | """Sentence postprocess. |
| 167 | |
| 168 | Args: |
| 169 | words: TODO. |
| 170 | time_stamp: TODO. |
| 171 | """ |
| 172 | middle_lists = [] |
| 173 | word_lists = [] |
| 174 | word_item = "" |
| 175 | ts_lists = [] |
| 176 | |
| 177 | # wash words lists |
| 178 | for i in words: |
| 179 | word = "" |
| 180 | if isinstance(i, str): |
| 181 | word = i |
| 182 | else: |
| 183 | word = i.decode("utf-8") |
| 184 | |
| 185 | if word in ["<s>", "</s>", "<unk>", "<OOV>"]: |
| 186 | continue |
| 187 | else: |
| 188 | middle_lists.append(word) |
| 189 | |
| 190 | # all chinese characters |
| 191 | if isAllChinese(middle_lists): |
| 192 | for i, ch in enumerate(middle_lists): |
| 193 | word_lists.append(ch.replace(" ", "")) |
| 194 | if time_stamp is not None: |
| 195 | ts_lists = time_stamp |
| 196 | |
| 197 | # all alpha characters |
| 198 | elif isAllAlpha(middle_lists): |
| 199 | ts_flag = True |
| 200 | for i, ch in enumerate(middle_lists): |
| 201 | if ts_flag and time_stamp is not None: |
| 202 | begin = time_stamp[i][0] |
| 203 | end = time_stamp[i][1] |
| 204 | word = "" |
| 205 | if "@@" in ch: |
| 206 | word = ch.replace("@@", "") |
| 207 | word_item += word |
| 208 | if time_stamp is not None: |
| 209 | ts_flag = False |
| 210 | end = time_stamp[i][1] |
| 211 | else: |
| 212 | word_item += ch |
| 213 | word_lists.append(word_item) |
| 214 | word_lists.append(" ") |
| 215 | word_item = "" |
| 216 | if time_stamp is not None: |
| 217 | ts_flag = True |
| 218 | end = time_stamp[i][1] |
| 219 | ts_lists.append([begin, end]) |
| 220 | begin = end |
| 221 | |
| 222 | # mix characters |
nothing calls this directly
no test coverage detected
searching dependent graphs…