Best-effort BPM and key detection. On failure, returns (None, None) and leaves job fields untouched -- the chips stay as placeholders.
(job: Job, source: Path)
| 247 | |
| 248 | |
| 249 | def analyze(job: Job, source: Path) -> tuple[int | None, str | None]: |
| 250 | """Best-effort BPM and key detection. On failure, returns (None, None) |
| 251 | and leaves job fields untouched -- the chips stay as placeholders.""" |
| 252 | logger.info("analyze: entering for job %s, source=%s", job.id, source) |
| 253 | _set(job, status="analyzing", progress=0.0, stage="Analyzing audio...") |
| 254 | try: |
| 255 | import librosa |
| 256 | except ImportError: |
| 257 | logger.warning("librosa not installed -- skipping BPM/key analysis") |
| 258 | return None, None |
| 259 | |
| 260 | try: |
| 261 | # Analyse the first 180 s. Decode via ffmpeg directly into numpy |
| 262 | # to avoid librosa's deprecated audioread fallback for |
| 263 | # .webm/.m4a/.opus inputs. |
| 264 | loaded = _load_audio_ffmpeg(source, sr=22050, duration=180.0) |
| 265 | if loaded is None: |
| 266 | return None, None |
| 267 | y, sr = loaded |
| 268 | |
| 269 | # Harmonic / percussive separation. Beat tracking sees a cleaner |
| 270 | # onset envelope on the percussive component; chroma sees a |
| 271 | # cleaner pitch profile on the harmonic component (no cymbal |
| 272 | # smear, no kick fundamentals leaking in). |
| 273 | y_harmonic, y_percussive = librosa.effects.hpss(y) |
| 274 | |
| 275 | tempo_arr, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr) |
| 276 | try: |
| 277 | tempo = float(tempo_arr[0]) # type: ignore[index] |
| 278 | except (TypeError, IndexError): |
| 279 | tempo = float(tempo_arr) |
| 280 | bpm = int(round(tempo)) if tempo > 0 else None |
| 281 | |
| 282 | # chroma_cqt is constant-Q based — better pitch resolution than |
| 283 | # chroma_stft, especially in the bass register where the open |
| 284 | # strings of a guitar live. |
| 285 | chroma = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr) |
| 286 | chroma_mean = chroma.mean(axis=1).tolist() |
| 287 | if any(chroma_mean): |
| 288 | key, scale, key_confidence = _detect_key(chroma_mean) |
| 289 | else: |
| 290 | key, scale, key_confidence = None, None, None |
| 291 | |
| 292 | # LUFS / peak. Computed on the same 22 kHz mono buffer; this |
| 293 | # loses a few dB of accuracy vs full-sample-rate stereo, but |
| 294 | # it's good enough for a UI display and adds ~50 ms to analyze. |
| 295 | lufs, peak_db = _measure_loudness(y, sr) |
| 296 | |
| 297 | dynamic_range: float | None = None |
| 298 | if lufs is not None and peak_db is not None: |
| 299 | dynamic_range = round(peak_db - lufs, 1) |
| 300 | |
| 301 | # Beat interval coefficient of variation → stability 0-100. |
| 302 | # CV = std/mean of inter-beat intervals; CV=0 is perfectly metronomic. |
| 303 | tempo_stability: int | None = None |
| 304 | import numpy as np |
| 305 | |
| 306 | beat_times = librosa.frames_to_time(beat_frames, sr=sr) |
no test coverage detected