Create a dictionary and a mapping of words, sorted by frequency.
(sentences, lower)
| 63 | |
| 64 | |
| 65 | def char_mapping(sentences, lower): |
| 66 | """ |
| 67 | Create a dictionary and a mapping of words, sorted by frequency. |
| 68 | """ |
| 69 | chars = [[x[0].lower() if lower else x[0] for x in s] for s in sentences] |
| 70 | dico = create_dico(chars) |
| 71 | dico["<PAD>"] = 10000001 |
| 72 | dico['<UNK>'] = 10000000 |
| 73 | char_to_id, id_to_char = create_mapping(dico) |
| 74 | print("Found %i unique words (%i in total)" % ( |
| 75 | len(dico), sum(len(x) for x in chars) |
| 76 | )) |
| 77 | return dico, char_to_id, id_to_char |
| 78 | |
| 79 | |
| 80 | def tag_mapping(sentences): |
no test coverage detected