Convert unicode emojis into their text form
(text)
| 2110 | |
| 2111 | |
| 2112 | def deform_emojis(text): |
| 2113 | """Convert unicode emojis into their text form""" |
| 2114 | new_text = "" |
| 2115 | emojiless_text = "" |
| 2116 | data = regex.findall(r"\X", text) |
| 2117 | emojis_in_text = [] |
| 2118 | |
| 2119 | for word in data: |
| 2120 | if any(char in UNICODE_EMOJI for char in word): |
| 2121 | word_emoji = emoji.demojize(word).replace(":", "").replace("_", " ") |
| 2122 | if word_emoji not in emojis_in_text: # do not add an emoji if |
| 2123 | # already exists in text |
| 2124 | emojiless_text += " " |
| 2125 | new_text += " ({}) ".format(word_emoji) |
| 2126 | emojis_in_text.append(word_emoji) |
| 2127 | else: |
| 2128 | emojiless_text += " " |
| 2129 | new_text += " " # add a space [instead of an emoji to be |
| 2130 | # duplicated] |
| 2131 | |
| 2132 | else: |
| 2133 | new_text += word |
| 2134 | emojiless_text += word |
| 2135 | |
| 2136 | emojiless_text = remove_extra_spaces(emojiless_text) |
| 2137 | new_text = remove_extra_spaces(new_text) |
| 2138 | |
| 2139 | return new_text, emojiless_text |
| 2140 | |
| 2141 | |
| 2142 | def extract_text_from_element(elem): |
no test coverage detected