Skip to content

Commit daadcb4

Browse files
authored
fix elevenlabs auto mode (#820)
1 parent 21b25dc commit daadcb4

File tree

2 files changed

+71
-22
lines changed

2 files changed

+71
-22
lines changed

.changeset/fuzzy-boxes-follow.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@livekit/agents-plugin-elevenlabs': patch
3+
---
4+
5+
fix setting autoMode for elevenlabs

plugins/elevenlabs/src/tts.ts

Lines changed: 66 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -53,21 +53,20 @@ export interface TTSOptions {
5353
baseURL: string;
5454
encoding: TTSEncoding;
5555
streamingLatency?: number;
56-
wordTokenizer: tokenize.WordTokenizer;
56+
wordTokenizer: tokenize.WordTokenizer | tokenize.SentenceTokenizer;
5757
chunkLengthSchedule?: number[];
5858
enableSsmlParsing: boolean;
5959
inactivityTimeout: number;
6060
syncAlignment: boolean;
6161
autoMode?: boolean;
6262
}
6363

64-
const defaultTTSOptions: TTSOptions = {
64+
const defaultTTSOptionsBase = {
6565
apiKey: process.env.ELEVEN_API_KEY,
6666
voice: DEFAULT_VOICE,
6767
modelID: 'eleven_turbo_v2_5',
6868
baseURL: API_BASE_URL_V1,
69-
encoding: 'pcm_22050',
70-
wordTokenizer: new tokenize.basic.WordTokenizer(false),
69+
encoding: 'pcm_22050' as TTSEncoding,
7170
enableSsmlParsing: false,
7271
inactivityTimeout: DEFAULT_INACTIVITY_TIMEOUT,
7372
syncAlignment: true,
@@ -78,13 +77,33 @@ export class TTS extends tts.TTS {
7877
label = 'elevenlabs.TTS';
7978

8079
constructor(opts: Partial<TTSOptions> = {}) {
81-
super(sampleRateFromFormat(opts.encoding || defaultTTSOptions.encoding), 1, {
80+
super(sampleRateFromFormat(opts.encoding || defaultTTSOptionsBase.encoding), 1, {
8281
streaming: true,
8382
});
8483

84+
// Set autoMode to true by default if not provided is Python behavior,
85+
// but to make it non-breaking, we keep false as default in typescript
86+
const autoMode = opts.autoMode !== undefined ? opts.autoMode : false;
87+
88+
// Set default tokenizer based on autoMode if not provided
89+
let wordTokenizer = opts.wordTokenizer;
90+
if (!wordTokenizer) {
91+
wordTokenizer = autoMode
92+
? new tokenize.basic.SentenceTokenizer()
93+
: new tokenize.basic.WordTokenizer(false);
94+
} else if (autoMode && !(wordTokenizer instanceof tokenize.SentenceTokenizer)) {
95+
// Warn if autoMode is enabled but a WordTokenizer was provided
96+
log().warn(
97+
'autoMode is enabled, it expects full sentences or phrases. ' +
98+
'Please provide a SentenceTokenizer instead of a WordTokenizer.',
99+
);
100+
}
101+
85102
this.#opts = {
86-
...defaultTTSOptions,
103+
...defaultTTSOptionsBase,
87104
...opts,
105+
autoMode,
106+
wordTokenizer,
88107
};
89108

90109
if (this.#opts.apiKey === undefined) {
@@ -156,10 +175,10 @@ export class SynthesizeStream extends tts.SynthesizeStream {
156175
}
157176

158177
protected async run() {
159-
const segments = new AsyncIterableQueue<tokenize.WordStream>();
178+
const segments = new AsyncIterableQueue<tokenize.WordStream | tokenize.SentenceStream>();
160179

161180
const tokenizeInput = async () => {
162-
let stream: tokenize.WordStream | null = null;
181+
let stream: tokenize.WordStream | tokenize.SentenceStream | null = null;
163182
for await (const text of this.input) {
164183
if (this.abortController.signal.aborted) {
165184
break;
@@ -191,7 +210,7 @@ export class SynthesizeStream extends tts.SynthesizeStream {
191210
await Promise.all([tokenizeInput(), runStream()]);
192211
}
193212

194-
async #runWS(stream: tokenize.WordStream, maxRetry = 3) {
213+
async #runWS(stream: tokenize.WordStream | tokenize.SentenceStream, maxRetry = 3) {
195214
let retries = 0;
196215
let ws: WebSocket;
197216
while (true) {
@@ -229,20 +248,40 @@ export class SynthesizeStream extends tts.SynthesizeStream {
229248
const requestId = shortuuid();
230249
const segmentId = shortuuid();
231250

232-
ws.send(
233-
JSON.stringify({
234-
text: ' ',
235-
voice_settings: this.#opts.voice.settings,
236-
...(this.#opts.chunkLengthSchedule && {
237-
generation_config: {
238-
chunk_length_schedule: this.#opts.chunkLengthSchedule,
239-
},
240-
}),
251+
// simple helper to make sure what we send to ws.send
252+
const wsSend = (data: {
253+
// (SynthesizeContent from python)
254+
text: string;
255+
// setting flush somehow never finishes the current speech generation
256+
// https://github.com/livekit/agents-js/pull/820#issuecomment-3517138706
257+
// flush?: boolean;
258+
// initialization
259+
voice_settings?: VoiceSettings;
260+
generation_config?: {
261+
chunk_length_schedule: number[];
262+
};
263+
}) => {
264+
ws.send(JSON.stringify(data));
265+
};
266+
267+
wsSend({
268+
text: ' ',
269+
voice_settings: this.#opts.voice.settings,
270+
...(this.#opts.chunkLengthSchedule && {
271+
generation_config: {
272+
chunk_length_schedule: this.#opts.chunkLengthSchedule,
273+
},
241274
}),
242-
);
275+
});
243276
let eosSent = false;
244277

245278
const sendTask = async () => {
279+
// Determine if we should flush on each chunk (sentence)
280+
/*const flushOnChunk =
281+
this.#opts.wordTokenizer instanceof tokenize.SentenceTokenizer &&
282+
this.#opts.autoMode !== undefined &&
283+
this.#opts.autoMode;*/
284+
246285
let xmlContent: string[] = [];
247286
for await (const data of stream) {
248287
if (this.abortController.signal.aborted) {
@@ -260,15 +299,20 @@ export class SynthesizeStream extends tts.SynthesizeStream {
260299
}
261300
}
262301

263-
ws.send(JSON.stringify({ text: text + ' ' })); // must always end with a space
302+
wsSend({
303+
text: text + ' ', // must always end with a space
304+
// ...(flushOnChunk && { flush: true }),
305+
});
264306
}
265307

266308
if (xmlContent.length) {
267309
this.#logger.warn('ElevenLabs stream ended with incomplete XML content');
268310
}
269311

270-
// no more tokens, mark eos
271-
ws.send(JSON.stringify({ text: '' }));
312+
// no more tokens, mark eos with flush
313+
// setting flush somehow never finishes the current speech generation
314+
// wsSend({ text: '', flush: true });
315+
wsSend({ text: '' });
272316
eosSent = true;
273317
};
274318

0 commit comments

Comments
 (0)