kenken999 commited on
Commit
026f880
·
1 Parent(s): c2710ed
Files changed (1) hide show
  1. staticfiles/did.html +465 -1
staticfiles/did.html CHANGED
@@ -3,7 +3,6 @@
3
  <meta charset="UTF-8">
4
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
5
  <title>Miibo Avatar Sample</title>
6
- <script src="https://cdn.jsdelivr.net/gh/miibo-ai/prototypes@0.0.1/miibo-avatar/miibo-avatar.min.js"></script>
7
  <script src="https://cdnjs.cloudflare.com/ajax/libs/recorderjs/0.1.0/recorder.min.js" integrity="sha512-Dc8aBUPSsnAiEtyqTYZrldxDfs2FnS8cU7BVHIJ1m5atjKrtQCoPRIn3gsVbKm2qY8NwjpTVTnawoC4XBvEZiQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
8
  <style>
9
  <!--
@@ -18,6 +17,471 @@
18
  <video id="my-video" autoplay></video>
19
 
20
  <script>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  const miiboAvatar = new MiiboAvatar({
22
  container: "my-video",
23
  option: {
 
3
  <meta charset="UTF-8">
4
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
5
  <title>Miibo Avatar Sample</title>
 
6
  <script src="https://cdnjs.cloudflare.com/ajax/libs/recorderjs/0.1.0/recorder.min.js" integrity="sha512-Dc8aBUPSsnAiEtyqTYZrldxDfs2FnS8cU7BVHIJ1m5atjKrtQCoPRIn3gsVbKm2qY8NwjpTVTnawoC4XBvEZiQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
7
  <style>
8
  <!--
 
17
  <video id="my-video" autoplay></video>
18
 
19
  <script>
20
+
21
+ 'use strict';
22
+ class MiiboAvatar {
23
+ constructor(config) {
24
+ this.container = config.container;
25
+ this.speechToTextOptions = config.option.speech_to_text;
26
+ this.miiboOptions = config.option.miibo;
27
+ this.didOptions = config.option.d_id;
28
+ this.initialize();
29
+ }
30
+
31
+ initialize() {
32
+ const RTCPeerConnection = window.RTCPeerConnection || window.webkitRTCPeerConnection || window.mozRTCPeerConnection;
33
+
34
+ this.videoElement = document.getElementById(this.container);
35
+ this.videoElement.setAttribute('playsinline', '');
36
+ this.playIdleVideo();
37
+
38
+ this.createNewStream();
39
+ this.rec = new webkitSpeechRecognition()
40
+
41
+ this.speaching = false;
42
+ this.processing = false;
43
+ this.streams = [];
44
+ }
45
+
46
+ async createNewStream() {
47
+ try {
48
+ this.stopAllStreams();
49
+ this.closePC();
50
+
51
+ let presenter = {"source_url": this.didOptions.presenter.image_url}
52
+ const sessionResponse = await this.fetchWithRetries(`https://api.d-id.com/${this.didOptions.service}/streams`, {
53
+ method: 'POST',
54
+ headers: {
55
+ Authorization: `Basic ${this.didOptions.key}`,
56
+ 'Content-Type': 'application/json',
57
+ },
58
+ body: JSON.stringify(presenter),
59
+ });
60
+
61
+ const { id: newStreamId, offer, ice_servers: iceServers, session_id: newSessionId } = await sessionResponse.json();
62
+ this.streamId = newStreamId;
63
+ this.sessionId = newSessionId;
64
+
65
+ try {
66
+ this.sessionClientAnswer = await this.createPeerConnection(offer, iceServers);
67
+ } catch (e) {
68
+ console.log('Error creating peer connection:', e);
69
+ this.stopAllStreams();
70
+ this.closePC();
71
+ return;
72
+ }
73
+
74
+ const sdpResponse = await fetch(`https://api.d-id.com/${this.didOptions.service}/streams/${this.streamId}/sdp`, {
75
+ method: 'POST',
76
+ headers: {
77
+ Authorization: `Basic ${this.didOptions.key}`,
78
+ 'Content-Type': 'application/json',
79
+ },
80
+ body: JSON.stringify({
81
+ answer: this.sessionClientAnswer,
82
+ session_id: this.sessionId,
83
+ }),
84
+ });
85
+
86
+ // Handle sdpResponse if needed
87
+ } catch (error) {
88
+ console.log('Error creating new stream:', error);
89
+ // Handle error
90
+ }
91
+ }
92
+
93
+ speechRecogInit() {
94
+ this.audioContext = new (window.AudioContext || window.webkitAudioContext)()
95
+
96
+ if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) {
97
+ navigator.mediaDevices.getUserMedia({audio: true}).then((stream) => {
98
+ var input = this.audioContext.createMediaStreamSource(stream)
99
+ this.audioContext.resume()
100
+ this.recorder = new Recorder(input)
101
+ })
102
+ }
103
+ }
104
+
105
+
106
+ startRecording() {
107
+ this.recorder && this.recorder.record();
108
+ }
109
+
110
+ stopRecording() {
111
+ this.playLoadingVideo();
112
+ this.recorder && this.recorder.stop();
113
+ this.audioRecognize();
114
+ this.recorder.clear();
115
+ }
116
+
117
+ audioRecognize() {
118
+ this.recorder && this.recorder.exportWAV((blob) => {
119
+ let reader = new FileReader()
120
+ reader.onload = () => {
121
+ let result = new Uint8Array(reader.result)
122
+
123
+ let data = {
124
+ "config": {
125
+ "encoding": "LINEAR16",
126
+ "languageCode": "ja-JP",
127
+ "alternativeLanguageCodes": ["en-US"],//,"cmn-CN","ko-KR"],
128
+ "audio_channel_count": 2
129
+ },
130
+ "audio": {
131
+ "content": this.arrayBufferToBase64(result)
132
+ }
133
+ }
134
+ fetch('https://speech.googleapis.com/v1/speech:recognize?key=' + this.speechToTextOptions.api_key, {
135
+ method: 'POST',
136
+ headers: {
137
+ 'Content-Type': 'application/json; charset=utf-8'
138
+ },
139
+ body: JSON.stringify(data)
140
+ }).then((response) => {
141
+ return response.text()
142
+ }).then((text) => {
143
+ let result_json = JSON.parse(text)
144
+ text = result_json.results[0].alternatives[0].transcript;
145
+ this.languageCode = result_json.results[0].languageCode;
146
+ this.ask(text)
147
+ })
148
+ }
149
+ reader.readAsArrayBuffer(blob)
150
+ })
151
+ }
152
+
153
+ // Chrome Only
154
+ autoRecognize() {
155
+ this.rec.continuous = false
156
+ this.rec.interimResults = false
157
+ this.rec.lang = 'ja-JP'
158
+
159
+ this.rec.onresult = (e) => {
160
+ this.processing = true
161
+ this.playLoadingVideo();
162
+
163
+ this.rec.stop()
164
+
165
+ for (var i = e.resultIndex; i < e.results.length; i++) {
166
+ if (!e.results[i].isFinal) continue
167
+
168
+ const { transcript } = e.results[i][0]
169
+ this.ask(transcript);
170
+ }
171
+ }
172
+
173
+ this.rec.onend = () => { this.autoRecognizeRestart() }
174
+ this.rec.start()
175
+ }
176
+
177
+ autoRecognizeRestart() {
178
+ if (this.processing) {
179
+ setTimeout(() => {this.autoRecognizeRestart()}, 1000)
180
+ } else {
181
+ this.rec.start()
182
+ }
183
+ }
184
+
185
+ arrayBufferToBase64(buffer) {
186
+ let binary = ''
187
+ let bytes = new Uint8Array(buffer);
188
+ let len = bytes.byteLength
189
+ for (let i = 0; i < len; i++) {
190
+ binary += String.fromCharCode(bytes[i])
191
+ }
192
+ return window.btoa(binary)
193
+ }
194
+
195
+ ask(message) {
196
+ this.getMiiboResponse(message);
197
+ }
198
+
199
+ async getMiiboResponse(utterance) {
200
+ const params = {
201
+ api_key: this.miiboOptions.api_key,
202
+ agent_id: this.miiboOptions.agent_id,
203
+ uid: this.miiboOptions.user_id,
204
+ stream: true,
205
+ utterance: utterance
206
+ };
207
+
208
+ try {
209
+ const res = await fetch("https://api-mebo.dev/api", {
210
+ method: "POST",
211
+ headers: { "Content-Type": "application/json" },
212
+ body: JSON.stringify(params),
213
+ });
214
+
215
+ const reader = res.body.getReader();
216
+ const decoder = new TextDecoder();
217
+ let output = "";
218
+ let sentences = [];
219
+ let current_index = 0;
220
+
221
+ const read = async () => {
222
+ const { done, value } = await reader.read();
223
+ if (done) return;
224
+
225
+ let dataString = decoder.decode(value).split("\n").filter(x => x != "");
226
+
227
+ try {
228
+ let responseData = JSON.parse(dataString[dataString.length - 1]);
229
+
230
+ output = responseData.bestResponse.utterance.split("\n").filter(x => x.trim() != "").join("\n");
231
+ sentences = output.replace(/[。、\.\,\!\?!?]/,".").split(".")
232
+ if (this.didOptions.priority == "speed" && current_index == 0 && current_index + 1 < sentences.length) {
233
+ this.startTalk(sentences[current_index++])
234
+ }
235
+ } catch(e) {
236
+ console.log(e);
237
+ }
238
+
239
+ return read();
240
+ };
241
+
242
+ await read();
243
+ reader.releaseLock();
244
+
245
+ this.startTalk(sentences.slice(current_index).join("。"));
246
+ } catch(error) {
247
+ console.log("Error fetching AI response: ", error);
248
+ }
249
+ }
250
+
251
+ async startTalk(input) {
252
+ if (this.peerConnection?.signalingState === 'stable' || this.peerConnection?.iceConnectionState === 'connected') {
253
+
254
+ const gender = this.didOptions.presenter.gender;
255
+ let voice_id = this.didOptions.presenter.voice_id || "";
256
+
257
+ if (voice_id == "") {
258
+ switch (this.languageCode) {
259
+ case "en-us":
260
+ voice_id = gender == "male" ? "en-US-GuyNeural" : "en-US-AriaNeural"
261
+ break;
262
+ case "ko-kr":
263
+ voice_id = gender == "male" ? "ko-KR-InJoonNeural" : "ko-KR-YuJinNeural"
264
+ break;
265
+ case "cmn-CN":
266
+ voice_id = gender == "male" ? "zh-CN-YunjianNeural" : "zh-CN-XiaohanNeural"
267
+ break;
268
+ default:
269
+ voice_id = gender == "male" ? "ja-JP-KeitaNeural" : "ja-JP-NanamiNeural"
270
+ }
271
+ }
272
+
273
+ const requestOptions = {
274
+ method: 'POST',
275
+ headers: {
276
+ Authorization: `Basic ${this.didOptions.key}`,
277
+ 'Content-Type': 'application/json',
278
+ },
279
+ body: JSON.stringify({
280
+ script: {
281
+ type: "text",
282
+ subtitles: false,
283
+ provider: {
284
+ type: "microsoft",
285
+ voice_id: voice_id
286
+ },
287
+ ssml: false,
288
+ input: input
289
+ },
290
+ config: {
291
+ fluent: false,
292
+ pad_audio: 0,
293
+ align_driver: false,
294
+ stitch: false,
295
+ auto_match: false,
296
+ sharpen: false,
297
+ normalization_factor: 0
298
+ },
299
+ session_id: this.sessionId,
300
+ }),
301
+ };
302
+
303
+ if (this.didOptions.service === 'clips') {
304
+ requestOptions.body.background = { color: '#FFFFFF' };
305
+ }
306
+
307
+ try {
308
+ const playResponse = await this.fetchWithRetries(`https://api.d-id.com/${this.didOptions.service}/streams/${this.streamId}`, requestOptions);
309
+ // Handle response if needed
310
+ } catch (error) {
311
+ console.error('Error starting talk:', error);
312
+ // Handle error
313
+ }
314
+ }
315
+ }
316
+
317
+ async destoryTalk(input) {
318
+ try {
319
+ await fetch(`https://api.d-id.com/${this.didOptions.service}/streams/${this.streamId}`, {
320
+ method: 'DELETE',
321
+ headers: {
322
+ Authorization: `Basic ${this.didOptions.key}`,
323
+ 'Content-Type': 'application/json',
324
+ },
325
+ body: JSON.stringify({ session_id: this.sessionId }),
326
+ });
327
+
328
+ await this.stopAllStreams();
329
+ await this.closePC();
330
+ } catch (error) {
331
+ console.error('Error destroying talk:', error);
332
+ // Handle error
333
+ }
334
+ }
335
+
336
+ onIceCandidate(event) {
337
+ if (event.candidate) {
338
+ const { candidate, sdpMid, sdpMLineIndex } = event.candidate;
339
+
340
+ fetch(`https://api.d-id.com/${this.didOptions.service}/streams/${this.streamId}/ice`, {
341
+ method: 'POST',
342
+ headers: {
343
+ Authorization: `Basic ${this.didOptions.key}`,
344
+ 'Content-Type': 'application/json',
345
+ },
346
+ body: JSON.stringify({
347
+ candidate,
348
+ sdpMid,
349
+ sdpMLineIndex,
350
+ session_id: this.sessionId,
351
+ }),
352
+ }).catch((error) => {
353
+ console.error('Error sending ICE candidate:', error);
354
+ // Handle error
355
+ });
356
+ }
357
+ }
358
+
359
+ onIceConnectionStateChange() {
360
+ if (this.peerConnection.iceConnectionState === 'failed' || this.peerConnection.iceConnectionState === 'closed') {
361
+ this.stopAllStreams();
362
+ this.closePC();
363
+ }
364
+ }
365
+
366
+ onTrack(event) {
367
+ if (!event.track || !event.streams || event.streams.length === 0) return;
368
+
369
+ this.statsIntervalId = setInterval(async () => {
370
+ const stats = await this.peerConnection.getStats(event.track);
371
+ stats.forEach((report) => {
372
+ if (report.type === 'inbound-rtp' && report.mediaType === 'video') {
373
+ const videoStatusChanged = this.videoIsPlaying !== report.bytesReceived > this.lastBytesReceived;
374
+
375
+ if (videoStatusChanged) {
376
+ this.videoIsPlaying = report.bytesReceived > this.lastBytesReceived;
377
+ this.onVideoStatusChange(this.videoIsPlaying, event.streams[0]);
378
+ }
379
+ this.lastBytesReceived = report.bytesReceived;
380
+ }
381
+ });
382
+ }, 100);
383
+ }
384
+
385
+ onVideoStatusChange(videoIsPlaying, stream) {
386
+ let status;
387
+ if (videoIsPlaying) {
388
+ status = 'streaming';
389
+ const remoteStream = stream;
390
+ this.streams.push(remoteStream);
391
+ this.checkSpeaching();
392
+ } else {
393
+ status = 'empty';
394
+ this.speaching = false;
395
+ this.processing = false;
396
+ this.playIdleVideo();
397
+ }
398
+ }
399
+
400
+ checkSpeaching() {
401
+ if (this.speaching) {
402
+ setTimeout(() => {this.checkSpeaching()}, 20)
403
+ } else {
404
+ this.setVideoElement(this.streams.shift());
405
+ }
406
+ }
407
+
408
+ async createPeerConnection(offer, iceServers) {
409
+ if (!this.peerConnection) {
410
+ this.peerConnection = new RTCPeerConnection({ iceServers });
411
+ this.peerConnection.addEventListener('icecandidate', this.onIceCandidate.bind(this), true);
412
+ this.peerConnection.addEventListener('iceconnectionstatechange', this.onIceConnectionStateChange.bind(this), true);
413
+ this.peerConnection.addEventListener('track', this.onTrack.bind(this), true);
414
+ }
415
+
416
+ await this.peerConnection.setRemoteDescription(offer);
417
+ const sessionClientAnswer = await this.peerConnection.createAnswer();
418
+ await this.peerConnection.setLocalDescription(sessionClientAnswer);
419
+ return sessionClientAnswer;
420
+ }
421
+
422
+
423
+ setVideoElement(stream) {
424
+ if (!stream) return;
425
+ this.videoElement.srcObject = stream;
426
+ this.videoElement.loop = false;
427
+
428
+ // safari hotfix
429
+ if (this.videoElement.paused) {
430
+ this.videoElement
431
+ .play()
432
+ .then((_) => {})
433
+ .catch((e) => {});
434
+ }
435
+ }
436
+
437
+ playIdleVideo() {
438
+ this.videoElement.srcObject = undefined;
439
+ this.videoElement.src = this.didOptions.presenter.idle_movie;
440
+ this.videoElement.loop = true;
441
+ }
442
+
443
+ playLoadingVideo() {
444
+ this.videoElement.srcObject = undefined;
445
+ this.videoElement.src = this.didOptions.presenter.loading_movie;
446
+ this.videoElement.loop = false;
447
+ }
448
+
449
+ stopAllStreams() {
450
+ if (this.videoElement.srcObject) {
451
+ this.videoElement.srcObject.getTracks().forEach((track) => track.stop());
452
+ this.videoElement.srcObject = null;
453
+ }
454
+ }
455
+
456
+ closePC(pc = this.peerConnection) {
457
+ if (!pc) return;
458
+ pc.close();
459
+ pc.removeEventListener('icecandidate', this.onIceCandidate.bind(this), true);
460
+ pc.removeEventListener('iceconnectionstatechange', this.onIceConnectionStateChange.bind(this), true);
461
+ pc.removeEventListener('track', this.onTrack.bind(this), true);
462
+ clearInterval(this.statsIntervalId);
463
+ if (pc === this.peerConnection) {
464
+ this.peerConnection = null;
465
+ }
466
+ }
467
+
468
+ async fetchWithRetries(url, options, retries = 1) {
469
+ const maxRetryCount = 3;
470
+ const maxDelaySec = 4;
471
+ try {
472
+ return await fetch(url, options);
473
+ } catch (err) {
474
+ if (retries <= maxRetryCount) {
475
+ const delay = Math.min(Math.pow(2, retries) / 4 + Math.random(), maxDelaySec) * 1000;
476
+ await new Promise((resolve) => setTimeout(resolve, delay));
477
+ return this.fetchWithRetries(url, options, retries + 1);
478
+ } else {
479
+ throw new Error(`Max retries exceeded. error: ${err}`);
480
+ }
481
+ }
482
+ }
483
+ }
484
+
485
  const miiboAvatar = new MiiboAvatar({
486
  container: "my-video",
487
  option: {