Process with non-LLM model (SenseVoice/Paraformer).
(model_name, audio_path, language=None)
| 164 | } |
| 165 | |
| 166 | def _process_fallback(model_name, audio_path, language=None): |
| 167 | """Process with non-LLM model (SenseVoice/Paraformer).""" |
| 168 | model = _load_fallback(model_name) |
| 169 | kwargs = {"input": audio_path, "batch_size": 1} |
| 170 | if language: |
| 171 | kwargs["language"] = language |
| 172 | result = model.generate(**kwargs) |
| 173 | text = re.sub(r'<\|[^|]*\|>', '', result[0]["text"]).strip() |
| 174 | segments = [] |
| 175 | if "sentence_info" in result[0]: |
| 176 | for s in result[0]["sentence_info"]: |
| 177 | segments.append({ |
| 178 | "start": s.get("start", 0)/1000, |
| 179 | "end": s.get("end", 0)/1000, |
| 180 | "text": re.sub(r'<\|[^|]*\|>', '', s.get("text", "")).strip(), |
| 181 | "speaker": s.get("spk"), |
| 182 | }) |
| 183 | return {"text": text, "segments": segments} |
| 184 | |
| 185 | # Pre-load |
| 186 | if preload_model == "fun-asr-nano": |
no test coverage detected
searching dependent graphs…