| 38 | } |
| 39 | |
| 40 | function buildInput( |
| 41 | type: AudioType, |
| 42 | params: GenerateFalAudioParams, |
| 43 | model: string |
| 44 | ): Record<string, unknown> { |
| 45 | const input: Record<string, unknown> = {} |
| 46 | if (type === 'speech') { |
| 47 | // Gemini 3.1 Flash TTS takes the text (with optional inline tags) in `prompt`. |
| 48 | input.prompt = params.prompt |
| 49 | if (params.voice) input.voice = params.voice |
| 50 | } else if (type === 'sfx') { |
| 51 | // ElevenLabs sound-effects take `text`. |
| 52 | input.text = params.prompt |
| 53 | if (params.duration !== undefined) input.duration_seconds = params.duration |
| 54 | } else { |
| 55 | // Music. Two modes, both supported: |
| 56 | // - instrumental bed (default): no vocals, no lyrics required |
| 57 | // - song with vocals: explicit `lyrics`, or auto-written from the prompt |
| 58 | input.prompt = params.prompt |
| 59 | const wantsVocals = params.instrumental === false || Boolean(params.lyrics) |
| 60 | if (model.includes('minimax')) { |
| 61 | // MiniMax Music 2.6 requires `lyrics` unless is_instrumental=true, and rejects a |
| 62 | // top-level `duration` (that combination is the 422 we were hitting on every call). |
| 63 | if (wantsVocals) { |
| 64 | input.is_instrumental = false |
| 65 | if (params.lyrics) input.lyrics = params.lyrics |
| 66 | else input.lyrics_optimizer = true |
| 67 | } else { |
| 68 | input.is_instrumental = true |
| 69 | } |
| 70 | } else if (model.includes('elevenlabs/music')) { |
| 71 | if (!wantsVocals) input.force_instrumental = true |
| 72 | if (params.lyrics) input.prompt = `${params.prompt}\n\nLyrics:\n${params.lyrics}` |
| 73 | if (params.duration !== undefined) input.music_length_ms = Math.round(params.duration * 1000) |
| 74 | } else { |
| 75 | // Other music models: best-effort passthrough. |
| 76 | if (params.instrumental !== undefined) input.instrumental = params.instrumental |
| 77 | if (params.lyrics) input.lyrics = params.lyrics |
| 78 | if (params.duration !== undefined) input.duration = params.duration |
| 79 | } |
| 80 | } |
| 81 | return input |
| 82 | } |
| 83 | |
| 84 | export async function generateFalAudio(params: GenerateFalAudioParams): Promise<GeneratedAudio> { |
| 85 | const type: AudioType = params.type || 'speech' |