feat: support stream response

This commit is contained in:
WJG 2024-02-24 20:13:25 +08:00
parent e948261e4e
commit 631876d2ac
No known key found for this signature in database
GPG Key ID: 258474EF8590014A
6 changed files with 319 additions and 13 deletions

View File

@ -8,6 +8,7 @@ import {
} from "mi-service-lite";
import { sleep } from "../../utils/base";
import { Http } from "../http";
import { ResponseStream } from "./stream";
export type TTSProvider = "xiaoai" | "doubao";
@ -49,6 +50,7 @@ export class BaseSpeaker {
}
async unWakeUp() {
// ! FIXME 新版小爱音箱固件在关闭/打开麦克风时会有提示音
await this.MiIOT!.setProperty(4, 1, true); // 关闭麦克风
await this.MiIOT!.setProperty(4, 1, false); // 打开麦克风
}
@ -57,6 +59,7 @@ export class BaseSpeaker {
async response(options: {
tts?: TTSProvider;
text?: string;
stream?: ResponseStream;
audio?: string;
speaker?: string;
keepAlive?: boolean;
@ -65,16 +68,98 @@ export class BaseSpeaker {
let {
text,
audio,
stream,
playSFX = true,
keepAlive = false,
tts = this.tts,
} = options ?? {};
const ttsNotXiaoai = (!!stream || !!text) && !audio && tts !== "xiaoai";
playSFX = ttsNotXiaoai && playSFX;
if (ttsNotXiaoai && !stream) {
// 长文本 TTS 转化成 stream 分段模式
stream = ResponseStream.createResponseStream(text!);
}
let res;
this.responding = true;
// 开始响应
if (stream) {
let _response = "";
while (true) {
const { nextSentence, noMore } = stream.getNextResponse();
if (nextSentence) {
if (_response.length < 1) {
// 播放开始提示音
if (playSFX) {
await this.MiNA!.play({ url: process.env.AUDIO_BEEP });
}
// 在播放 TTS 语音之前,先取消小爱音箱的唤醒状态,防止将 TTS 语音识别成用户指令
if (ttsNotXiaoai) {
await this.unWakeUp();
}
}
res = await this._response({
...options,
text: nextSentence,
playSFX: false,
keepAlive: false,
});
if (res === "break") {
// 终止回复
stream.cancel();
break;
}
_response += nextSentence;
}
if (noMore) {
if (_response.length > 0) {
// 播放结束提示音
if (playSFX) {
await this.MiNA!.play({ url: process.env.AUDIO_BEEP });
}
}
// 保持唤醒状态
if (keepAlive) {
await this.wakeUp();
}
// 播放完毕
break;
}
await sleep(this.interval);
}
} else {
res = await this._response(options);
}
this.responding = false;
return res;
}
private async _response(options: {
tts?: TTSProvider;
text?: string;
stream?: ResponseStream;
audio?: string;
speaker?: string;
keepAlive?: boolean;
playSFX?: boolean;
}) {
let {
text,
audio,
stream,
playSFX = true,
keepAlive = false,
tts = this.tts,
speaker = this._defaultSpeaker,
} = options ?? {};
const ttsNotXiaoai = !stream && !!text && !audio && tts !== "xiaoai";
playSFX = ttsNotXiaoai && playSFX;
// 播放回复
const play = async (args?: { tts?: string; url?: string }) => {
const ttsNotXiaoai = !audio && tts !== "xiaoai";
playSFX = ttsNotXiaoai && playSFX;
// 播放开始提示音
if (playSFX) {
await this.MiNA!.play({ url: process.env.AUDIO_BEEP });
@ -112,7 +197,6 @@ export class BaseSpeaker {
// 开始响应
let res;
this.responding = true;
if (audio) {
// 音频回复
res = await play({ url: audio });
@ -120,18 +204,18 @@ export class BaseSpeaker {
// 文字回复
switch (tts) {
case "doubao":
text = encodeURIComponent(text);
const _text = encodeURIComponent(text);
const doubaoTTS = process.env.TTS_DOUBAO;
const url = `${doubaoTTS}?speaker=${speaker}&text=${text}`;
const url = `${doubaoTTS}?speaker=${speaker}&text=${_text}`;
res = await play({ url });
break;
case "xiaoai":
default:
res = await play({ tts: text });
break;
}
this.responding = false;
return res;
}
return res;
}
private _doubaoSpeakers?: Speaker[];

View File

@ -266,7 +266,7 @@ export class Speaker extends BaseSpeaker {
const ttsAnswer = e.answers.find((e) => e.type === "TTS") as any;
return {
text: e.query,
answer: ttsAnswer?.tts?.text,
answer: ttsAnswer?.tts?.text?.trim(),
timestamp: e.time,
};
});

View File

@ -0,0 +1,190 @@
type ResponseStatus = "idle" | "responding" | "finished" | "canceled";
interface ResponseStreamOptions {
/**
*
*/
maxSentenceLength?: number;
/**
*
*
* 100ms => 100ms Response
*
* 200(0 )
*/
firstSubmitTimeout?: number;
/**
*
*
* 1000ms => 1s Response
*
* 1s(0 )
*/
batchSubmitTimeout?: number;
}
export class ResponseStream {
// 将已有的大篇文字回复 chuck 成 stream 回复
static createResponseStream(text: string, options?: ResponseStreamOptions) {
const { maxSentenceLength = 100 } = options ?? {};
if (text.length > maxSentenceLength) {
const stream = new ResponseStream(options);
stream.addResponse(text);
stream.finish();
return stream;
}
}
maxSentenceLength: number;
firstSubmitTimeout: number;
batchSubmitTimeout: number;
constructor(options?: ResponseStreamOptions) {
const {
maxSentenceLength = 100,
firstSubmitTimeout = 200,
batchSubmitTimeout = 1000,
} = options ?? {};
this.maxSentenceLength = maxSentenceLength;
this.firstSubmitTimeout = firstSubmitTimeout;
this.batchSubmitTimeout = batchSubmitTimeout;
}
status: ResponseStatus = "responding";
cancel() {
if (["idle", "responding"].includes(this.status)) {
this.status = "canceled";
}
return this.status === "canceled";
}
addResponse(text: string) {
if (this.status === "idle") {
this.status = "responding";
}
if (this.status !== "responding") {
return;
}
this._batchSubmit(text);
}
private _nextChunkIdx = 0;
getNextResponse() {
const nextSentence = this._chunks[this._nextChunkIdx];
if (nextSentence) {
this._nextChunkIdx++;
}
const noMore =
this._nextChunkIdx > this._chunks.length - 1 &&
["finished", "canceled"].includes(this.status);
return { nextSentence, noMore };
}
finish() {
if (["idle", "responding"].includes(this.status)) {
if (this._tempText) {
// 提交暂存的文本
this._addResponse(this._tempText);
this._tempText = "";
}
if (this._remainingText) {
// 提交完整句子
this._chunks.push(this._remainingText);
this._remainingText = "";
}
this.status = "finished";
}
return this.status === "finished";
}
private _chunks: string[] = [];
private _tempText = "";
private _remainingText: string = "";
private _preSubmitTimestamp = 0;
/**
* /
*
* 使 AI stream /
*/
private _batchSubmit(text: string, immediately?: boolean) {
this._tempText += text;
const submitImmediately = () => {
if (this._tempText) {
this._addResponse(this._tempText);
this._tempText = "";
}
this._preSubmitTimestamp = Date.now();
};
immediately =
immediately ??
(this.firstSubmitTimeout < 100 || this.batchSubmitTimeout < 100);
if (immediately) {
return submitImmediately();
}
const isFirstSubmit = this._preSubmitTimestamp === 0;
const batchSubmit = (timeout: number) => {
// 当消息长度积攒到一定长度,或达到一定时间间隔后,批量提交消息
if (
Date.now() - this._preSubmitTimestamp > timeout ||
this._tempText.length > this.maxSentenceLength
) {
submitImmediately();
}
};
const submit = (timeout: number) => {
batchSubmit(timeout);
setTimeout(() => {
batchSubmit(timeout);
}, timeout);
};
if (isFirstSubmit) {
this._preSubmitTimestamp = Date.now();
submit(this.firstSubmitTimeout);
} else {
submit(this.batchSubmitTimeout);
}
}
private _addResponse(text: string) {
this._remainingText += text;
while (this._remainingText.length > 0) {
let lastCutIndex = this._findLastCutIndex(this._remainingText);
if (lastCutIndex > 0) {
const currentChunk = this._remainingText.substring(0, lastCutIndex);
this._chunks.push(currentChunk);
this._remainingText = this._remainingText.substring(lastCutIndex);
} else {
// 搜索不到
break;
}
}
}
private _findLastCutIndex(text: string): number {
const punctuations = ",。?!:;……,.?!:;…";
let lastCutIndex = -1;
for (let i = 0; i < Math.min(text.length, this.maxSentenceLength); i++) {
if (punctuations.includes(text[i])) {
lastCutIndex = i + 1;
}
}
return lastCutIndex;
}
}
const stream = new ResponseStream();
// ai onNewText
// {
// onNewText(text:string){
// if(stream.status==='canceled'){
// return 'canceled';
// }
// if(finished){
// stream.finish()
// }else{
// stream.addResponse(text)
// }
// }
// }

View File

@ -3,12 +3,14 @@ import { println } from "../src/utils/base";
import { kBannerASCII } from "../src/utils/string";
import { runWithDB } from "../src/services/db";
import { testDB } from "./db";
import { testSpeaker } from "./speaker";
dotenv.config();
async function main() {
println(kBannerASCII);
testDB();
// testDB();
testSpeaker();
}
runWithDB(main);

View File

@ -1,7 +1,8 @@
import { AISpeaker } from "../src/services/speaker/ai";
import { ResponseStream } from "../src/services/speaker/stream";
import { sleep } from "../src/utils/base";
export async function main() {
export async function testSpeaker() {
const config: any = {
userId: process.env.MI_USER!,
password: process.env.MI_PASS!,
@ -12,10 +13,11 @@ export async function main() {
const speaker = new AISpeaker(config);
await speaker.initMiServices();
// await testSpeakerResponse(speaker);
await testSpeakerStreamResponse(speaker);
// await testSpeakerGetMessages(speaker);
// await testSwitchSpeaker(speaker);
// await testSpeakerUnWakeUp(speaker);
await testAISpeaker(speaker);
// await testAISpeaker(speaker);
}
async function testAISpeaker(speaker: AISpeaker) {
@ -51,8 +53,34 @@ async function testSpeakerGetMessages(speaker: AISpeaker) {
async function testSpeakerResponse(speaker: AISpeaker) {
let status = await speaker.MiNA!.getStatus();
console.log("curent status", status);
speaker.response({ text: "你好,我是豆包,很高兴认识你!" });
await speaker.response({ text: "你好,我是豆包,很高兴认识你!" });
sleep(1000);
status = await speaker.MiNA!.getStatus();
console.log("tts status", status);
}
async function testSpeakerStreamResponse(speaker: AISpeaker) {
const stream = new ResponseStream();
const add = async (text: string) => {
stream.addResponse(text);
await sleep(100);
};
setTimeout(async () => {
await add(`地球是圆的主要原因`);
await add(`是由于地球的引力和自转。`);
await add(`地球的引力使得地球在形成过程中变得更加圆滑,因为引力会使得地球`);
await add(`的物质向地心靠拢,从而使得地球的形状更接近于一个球体。此外,`);
await add(
`地球的自转也会导致地球呈现出圆形,因为地球自转会使得地球的物质在赤道附近向外扩散,从而使得`
);
await add(
`地球在赤道处稍微膨胀,而在极地处稍微收缩,最终形成一个近似于球体的形状。因此,地球是圆的`
);
await add(`主要原因是由于地球的引力和自转共同作用所致。`);
await sleep(10 * 1000);
console.log("finished!");
stream.finish();
});
await speaker.response({ stream });
console.log("hello!");
}

View File

@ -874,8 +874,10 @@ merge2@^1.3.0, merge2@^1.4.1:
resolved "https://registry.yarnpkg.com/merge2/-/merge2-1.4.1.tgz#4368892f885e907455a6fd7dc55c0c9d404990ae"
integrity sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==
"mi-service-lite@file:../mi-service-lite":
mi-service-lite@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/mi-service-lite/-/mi-service-lite-2.0.0.tgz#c043a931574011c154a3113ecabe4fc2a61b328a"
integrity sha512-PqMWtvEHQ7a6mhKee9RAnT6Xh+rqf+RvhlCki/8VsSTnjREAzl/kxZh3U0ogFhN5iQzwlK4YC8Is0rnSljl2og==
dependencies:
axios "^1.6.5"
pako "^2.1.0"