Function used in Word Spotting that finds if the Ground Truth transcription meets the rules to enter into the dictionary. If not, the transcription will be cared as don't care
(transcription)
| 180 | |
| 181 | |
| 182 | def include_in_dictionary(transcription): |
| 183 | """ |
| 184 | Function used in Word Spotting that finds if the Ground Truth transcription meets the rules to enter into the dictionary. If not, the transcription will be cared as don't care |
| 185 | """ |
| 186 | #special case 's at final |
| 187 | if transcription[len(transcription)-2:]=="'s" or transcription[len(transcription)-2:]=="'S": |
| 188 | transcription = transcription[0:len(transcription)-2] |
| 189 | |
| 190 | #hypens at init or final of the word |
| 191 | transcription = transcription.strip('-'); |
| 192 | |
| 193 | specialCharacters = unicode("'!?.:,*\"()·[]/","utf-8"); |
| 194 | for character in specialCharacters: |
| 195 | transcription = transcription.replace(character,' ') |
| 196 | |
| 197 | transcription = transcription.strip() |
| 198 | |
| 199 | if len(transcription) != len(transcription.replace(" ","")) : |
| 200 | return False; |
| 201 | |
| 202 | if len(transcription) < evaluationParams['MIN_LENGTH_CARE_WORD']: |
| 203 | return False; |
| 204 | |
| 205 | notAllowed = unicode("×÷·","utf-8"); |
| 206 | |
| 207 | range1 = [ ord(u'a'), ord(u'z') ] |
| 208 | range2 = [ ord(u'A'), ord(u'Z') ] |
| 209 | range3 = [ ord(u'À'), ord(u'ƿ') ] |
| 210 | range4 = [ ord(u'DŽ'), ord(u'ɿ') ] |
| 211 | range5 = [ ord(u'Ά'), ord(u'Ͽ') ] |
| 212 | range6 = [ ord(u'-'), ord(u'-') ] |
| 213 | |
| 214 | for char in transcription : |
| 215 | charCode = ord(char) |
| 216 | if(notAllowed.find(char) != -1): |
| 217 | return False |
| 218 | |
| 219 | valid = ( charCode>=range1[0] and charCode<=range1[1] ) or ( charCode>=range2[0] and charCode<=range2[1] ) or ( charCode>=range3[0] and charCode<=range3[1] ) or ( charCode>=range4[0] and charCode<=range4[1] ) or ( charCode>=range5[0] and charCode<=range5[1] ) or ( charCode>=range6[0] and charCode<=range6[1] ) |
| 220 | if valid == False: |
| 221 | return False |
| 222 | |
| 223 | return True |
| 224 | |
| 225 | def include_in_dictionary_transcription(transcription): |
| 226 | """ |