(self, raw_text: str, file: File)
| 1852 | return new_temp_filename |
| 1853 | |
| 1854 | def save_text_tokens(self, raw_text: str, file: File) -> None: |
| 1855 | # By Default, file has NLTK tokenizer. |
| 1856 | tokenizer = TextTokenizer(type = file.text_tokenizer) |
| 1857 | |
| 1858 | word_tokens = tokenizer.tokenize_words(raw_text) |
| 1859 | sentences_tokens = tokenizer.tokenize_sentences(raw_text) |
| 1860 | |
| 1861 | json_data = { |
| 1862 | file.text_tokenizer: { |
| 1863 | 'words': word_tokens, |
| 1864 | 'sentences': sentences_tokens |
| 1865 | } |
| 1866 | } |
| 1867 | json_string_data = json.dumps(json_data) |
| 1868 | self.new_text_file.tokens_url_signed_blob_path = '{}{}/{}_tokens.json'.format( |
| 1869 | settings.PROJECT_TEXT_FILES_BASE_DIR, |
| 1870 | str(self.project_id), |
| 1871 | str(self.new_text_file.id)) |
| 1872 | logger.debug(f'Blob path Tokens: {self.new_text_file.tokens_url_signed_blob_path}') |
| 1873 | data_tools.upload_from_string(self.new_text_file.tokens_url_signed_blob_path, |
| 1874 | json_string_data, |
| 1875 | content_type = 'application/json') |
| 1876 | self.new_text_file.tokens_url_signed = data_tools.build_secure_url( |
| 1877 | self.new_text_file.tokens_url_signed_blob_path, |
| 1878 | self.new_text_file.tokens_url_signed_expiry) |
| 1879 | logger.info(f"Saved Tokens on: {self.new_text_file.tokens_url_signed_blob_path}") |
| 1880 | |
| 1881 | def save_raw_audio_file(self): |
| 1882 | new_offset_in_seconds = settings.SIGNED_URL_CACHE_NEW_OFFSET_SECONDS_VALID |
no test coverage detected