( params: Partial<AzureTtsParams> )
| 693 | } |
| 694 | |
| 695 | async function synthesizeWithAzure( |
| 696 | params: Partial<AzureTtsParams> |
| 697 | ): Promise<{ audioBuffer: Buffer; format: string; mimeType: string }> { |
| 698 | const { |
| 699 | text, |
| 700 | apiKey, |
| 701 | voiceId = 'en-US-JennyNeural', |
| 702 | region = 'eastus', |
| 703 | outputFormat = 'audio-24khz-96kbitrate-mono-mp3', |
| 704 | rate, |
| 705 | pitch, |
| 706 | style, |
| 707 | styleDegree, |
| 708 | role, |
| 709 | } = params |
| 710 | |
| 711 | if (!text || !apiKey) { |
| 712 | throw new Error('text and apiKey are required for Azure TTS') |
| 713 | } |
| 714 | |
| 715 | const AZURE_REGION_RE = /^[a-z][a-z0-9-]{1,30}[a-z0-9]$/ |
| 716 | if (!AZURE_REGION_RE.test(region)) { |
| 717 | throw new Error( |
| 718 | 'Invalid Azure region: must match /^[a-z][a-z0-9-]{1,30}[a-z0-9]$/ (e.g. eastus, westeurope)' |
| 719 | ) |
| 720 | } |
| 721 | |
| 722 | let ssml = `<speak version='1.0' xml:lang='en-US' xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts"><voice name='${voiceId}'>` |
| 723 | |
| 724 | if (style) { |
| 725 | ssml += `<mstts:express-as style='${style}'` |
| 726 | if (styleDegree) ssml += ` styledegree='${styleDegree}'` |
| 727 | if (role) ssml += ` role='${role}'` |
| 728 | ssml += '>' |
| 729 | } |
| 730 | |
| 731 | if (rate || pitch) { |
| 732 | ssml += '<prosody' |
| 733 | if (rate) ssml += ` rate='${rate}'` |
| 734 | if (pitch) ssml += ` pitch='${pitch}'` |
| 735 | ssml += '>' |
| 736 | } |
| 737 | |
| 738 | ssml += text |
| 739 | |
| 740 | if (rate || pitch) { |
| 741 | ssml += '</prosody>' |
| 742 | } |
| 743 | |
| 744 | if (style) { |
| 745 | ssml += '</mstts:express-as>' |
| 746 | } |
| 747 | |
| 748 | ssml += '</voice></speak>' |
| 749 | |
| 750 | const response = await fetch(`https://${region}.tts.speech.microsoft.com/cognitiveservices/v1`, { |
| 751 | method: 'POST', |
| 752 | headers: { |
no test coverage detected