Update README.md

#2
by reach-vb HF staff - opened
Dockerfile CHANGED
@@ -45,21 +45,10 @@ RUN apt-get update && \
45
  # gradio dependencies \
46
  ffmpeg \
47
  # fairseq2 dependencies \
48
- libjpeg8-dev \
49
- libpng-dev \
50
  libsndfile-dev && \
51
  apt-get clean && \
52
  rm -rf /var/lib/apt/lists/*
53
 
54
- USER root
55
- RUN ln -s /usr/lib/x86_64-linux-gnu/libsox.so.3 /usr/lib/x86_64-linux-gnu/libsox.so
56
- # install older versions libjpeg62-turbo and libpng15
57
- RUN wget http://ftp.us.debian.org/debian/pool/main/libj/libjpeg-turbo/libjpeg62-turbo_2.1.5-2_amd64.deb && \
58
- dpkg -i libjpeg62-turbo_2.1.5-2_amd64.deb && \
59
- rm libjpeg62-turbo_2.1.5-2_amd64.deb
60
- RUN wget https://master.dl.sourceforge.net/project/libpng/libpng15/1.5.30/libpng-1.5.30.tar.gz && \
61
- tar -xvf libpng-1.5.30.tar.gz && cd libpng-1.5.30 && ./configure && make && make install && cd .. && rm -rf libpng-1.5.30.tar.gz libpng-1.5.30
62
-
63
  RUN useradd -m -u 1000 user
64
  USER user
65
  ENV HOME=/home/user \
@@ -76,18 +65,13 @@ RUN pyenv install $PYTHON_VERSION && \
76
 
77
  COPY --chown=user:user ./seamless_server ./seamless_server
78
  # change dir since pip needs to seed whl folder
79
- RUN cd seamless_server && \
80
- pip install fairseq2 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/whl/nightly/pt2.1.1/cu118 && \
81
- pip install --no-cache-dir --upgrade -r requirements.txt
82
  COPY --from=frontend /app/dist ./streaming-react-app/dist
83
 
84
  WORKDIR $HOME/app/seamless_server
85
- RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=false \
86
- huggingface-cli login --token $(cat /run/secrets/HF_TOKEN) || echo "HF_TOKEN error" && \
87
- huggingface-cli download meta-private/SeamlessExpressive pretssel_melhifigan_wm-final.pt --local-dir ./models/Seamless/ || echo "HF_TOKEN error" && \
88
- ln -s $(readlink -f models/Seamless/pretssel_melhifigan_wm-final.pt) models/Seamless/pretssel_melhifigan_wm.pt || true;
89
-
90
  USER user
91
- RUN ["chmod", "+x", "./run_docker.sh"]
92
- CMD ./run_docker.sh
93
 
 
45
  # gradio dependencies \
46
  ffmpeg \
47
  # fairseq2 dependencies \
 
 
48
  libsndfile-dev && \
49
  apt-get clean && \
50
  rm -rf /var/lib/apt/lists/*
51
 
 
 
 
 
 
 
 
 
 
52
  RUN useradd -m -u 1000 user
53
  USER user
54
  ENV HOME=/home/user \
 
65
 
66
  COPY --chown=user:user ./seamless_server ./seamless_server
67
  # change dir since pip needs to seed whl folder
68
+ RUN cd seamless_server && pip install --no-cache-dir --upgrade -r requirements.txt
 
 
69
  COPY --from=frontend /app/dist ./streaming-react-app/dist
70
 
71
  WORKDIR $HOME/app/seamless_server
72
+ USER root
73
+ RUN ln -s /usr/lib/x86_64-linux-gnu/libsox.so.3 /usr/lib/x86_64-linux-gnu/libsox.so
 
 
 
74
  USER user
75
+ CMD [ "uvicorn", "app_pubsub:app", "--host", "0.0.0.0", "--port", "7860" ]
76
+
77
 
README.md CHANGED
@@ -5,9 +5,7 @@ colorFrom: blue
5
  colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
- suggested_hardware: t4-small
9
- models:
10
- - facebook/seamless-streaming
11
  ---
12
 
13
  # Seamless Streaming demo
@@ -17,21 +15,12 @@ You can simply duplicate the space to run it.
17
  ## Running locally
18
  ### Install backend seamless_server dependencies
19
 
20
- > [!NOTE]
21
- > Please note: we *do not* recommend running the model on CPU. CPU inference will be slow and introduce noticable delays in the simultaneous translation.
22
 
23
- > [!NOTE]
24
- > The example below is for PyTorch stable (2.1.1) and variant cu118.
25
- > Check [here](https://pytorch.org/get-started/locally/) to find the torch/torchaudio command for your variant.
26
- > Check [here](https://github.com/facebookresearch/fairseq2#variants) to find the fairseq2 command for your variant.
27
-
28
- If running for the first time, create conda environment and install the desired torch version. Then install the rest of the requirements:
29
  ```
30
- cd seamless_server
31
- conda create --yes --name smlss_server python=3.8 libsndfile==1.0.31
32
  conda activate smlss_server
33
- conda install --yes pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
34
- pip install fairseq2 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/whl/nightly/pt2.1.1/cu118
35
  pip install -r requirements.txt
36
  ```
37
 
@@ -39,9 +28,8 @@ pip install -r requirements.txt
39
  ```
40
  conda install -c conda-forge nodejs
41
  cd streaming-react-app
42
- npm install --global yarn
43
- yarn
44
- yarn build # this will create the dist/ folder
45
  ```
46
 
47
 
@@ -51,14 +39,12 @@ The server can be run locally with uvicorn below.
51
  Run the server in dev mode:
52
 
53
  ```
54
- cd seamless_server
55
  uvicorn app_pubsub:app --reload --host localhost
56
  ```
57
 
58
  Run the server in prod mode:
59
 
60
  ```
61
- cd seamless_server
62
  uvicorn app_pubsub:app --host 0.0.0.0
63
  ```
64
 
 
5
  colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
+ suggested_hardware: t4-medium
 
 
9
  ---
10
 
11
  # Seamless Streaming demo
 
15
  ## Running locally
16
  ### Install backend seamless_server dependencies
17
 
18
+ `cd seamless-experiences/seamless_vc/seamless_server`
 
19
 
20
+ If running for the first time, create conda environment:
 
 
 
 
 
21
  ```
22
+ conda create --name smlss_server python=3.8
 
23
  conda activate smlss_server
 
 
24
  pip install -r requirements.txt
25
  ```
26
 
 
28
  ```
29
  conda install -c conda-forge nodejs
30
  cd streaming-react-app
31
+ npm install
32
+ npm run build # this will create the dist/ folder
 
33
  ```
34
 
35
 
 
39
  Run the server in dev mode:
40
 
41
  ```
 
42
  uvicorn app_pubsub:app --reload --host localhost
43
  ```
44
 
45
  Run the server in prod mode:
46
 
47
  ```
 
48
  uvicorn app_pubsub:app --host 0.0.0.0
49
  ```
50
 
seamless_server/app_pubsub.py CHANGED
@@ -123,26 +123,8 @@ class ServerLock(TypedDict):
123
  client_id: str
124
  member_object: Member
125
 
126
- MAX_SPEAKERS = os.environ.get("MAX_SPEAKERS")
127
-
128
- if os.environ.get("LOCK_SERVER_COMPLETELY", "0") == "1":
129
- logger.info("LOCK_SERVER_COMPLETELY is set. Server will be locked on startup.")
130
- if MAX_SPEAKERS is not None and int(MAX_SPEAKERS):
131
- logger.info(f"MAX_SPEAKERS is set to: {MAX_SPEAKERS}")
132
- dummy_server_lock_member_object = Member(
133
- client_id="seamless_user", session_id="dummy", name="Seamless User"
134
- )
135
- # Normally this would be an actual transcoder, but it's fine putting True here since currently we only check for the presence of the transcoder
136
- dummy_server_lock_member_object.transcoder = True
137
- server_lock: Optional[ServerLock] = (
138
- {
139
- "name": "Seamless User",
140
- "client_id": "seamless_user",
141
- "member_object": dummy_server_lock_member_object,
142
- }
143
- if os.environ.get("LOCK_SERVER_COMPLETELY", "0") == "1"
144
- else None
145
- )
146
 
147
  server_id = str(uuid4())
148
 
@@ -519,8 +501,6 @@ async def join_room(sid, client_id, room_id_from_client, config_dict):
519
  server_lock is not None
520
  and config_dict.get("lockServerName")
521
  == ESCAPE_HATCH_SERVER_LOCK_RELEASE_NAME
522
- # If we are locking the server completely we don't want someone to be able to unlock it
523
- and not os.environ.get("LOCK_SERVER_COMPLETELY", "0") == "1"
524
  ):
525
  server_lock = None
526
  logger.info(
@@ -528,7 +508,7 @@ async def join_room(sid, client_id, room_id_from_client, config_dict):
528
  )
529
 
530
  # If the server is not locked, set a lock. If it's already locked to this client, update the lock object
531
- if server_lock is None or server_lock.get("client_id") == client_id:
532
  # TODO: Add some sort of timeout as a backstop in case someone leaves the browser tab open after locking the server
533
  server_lock = {
534
  "name": config_dict.get("lockServerName"),
@@ -559,12 +539,6 @@ async def join_room(sid, client_id, room_id_from_client, config_dict):
559
 
560
  return {"roomsJoined": sio.rooms(sid), "roomID": room_id}
561
 
562
- def allow_speaker(room, client_id):
563
- if MAX_SPEAKERS is not None and client_id in room.speakers:
564
- room_statuses = {room_id: room.get_room_status_dict() for room_id, room in rooms.items()}
565
- speakers = sum(room_status["activeTranscoders"] for room_status in room_statuses.values())
566
- return speakers < int(MAX_SPEAKERS)
567
- return True
568
 
569
  # TODO: Add code to prevent more than one speaker from connecting/streaming at a time
570
  @sio.event
@@ -585,12 +559,6 @@ async def configure_stream(sid, config):
585
  )
586
  return {"status": "error", "message": "member_or_room_is_none"}
587
 
588
- if not allow_speaker(room, client_id):
589
- logger.error(
590
- f"In MAX_SPEAKERS mode we only allow one speaker at a time. Ignoring request to configure stream from client {client_id}."
591
- )
592
- return {"status": "error", "message": "max_speakers"}
593
-
594
  # If there is a server lock WITH an active transcoder session, prevent other users from configuring and starting a stream
595
  # If the server lock client does NOT have an active transcoder session allow this to proceed, knowing that
596
  # this stream will be interrupted if the server lock client starts streaming
 
123
  client_id: str
124
  member_object: Member
125
 
126
+
127
+ server_lock: Optional[ServerLock] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  server_id = str(uuid4())
130
 
 
501
  server_lock is not None
502
  and config_dict.get("lockServerName")
503
  == ESCAPE_HATCH_SERVER_LOCK_RELEASE_NAME
 
 
504
  ):
505
  server_lock = None
506
  logger.info(
 
508
  )
509
 
510
  # If the server is not locked, set a lock. If it's already locked to this client, update the lock object
511
+ elif server_lock is None or server_lock.get("client_id") == client_id:
512
  # TODO: Add some sort of timeout as a backstop in case someone leaves the browser tab open after locking the server
513
  server_lock = {
514
  "name": config_dict.get("lockServerName"),
 
539
 
540
  return {"roomsJoined": sio.rooms(sid), "roomID": room_id}
541
 
 
 
 
 
 
 
542
 
543
  # TODO: Add code to prevent more than one speaker from connecting/streaming at a time
544
  @sio.event
 
559
  )
560
  return {"status": "error", "message": "member_or_room_is_none"}
561
 
 
 
 
 
 
 
562
  # If there is a server lock WITH an active transcoder session, prevent other users from configuring and starting a stream
563
  # If the server lock client does NOT have an active transcoder session allow this to proceed, knowing that
564
  # this stream will be interrupted if the server lock client starts streaming
seamless_server/models/Seamless/vad_s2st_sc_24khz_main.yaml DELETED
@@ -1,25 +0,0 @@
1
- agent_class: seamless_communication.streaming.agents.seamless_s2st.SeamlessS2STDualVocoderVADAgent
2
- monotonic_decoder_model_name: seamless_streaming_monotonic_decoder
3
- unity_model_name: seamless_streaming_unity
4
- sentencepiece_model: spm_256k_nllb100.model
5
-
6
- task: s2st
7
- tgt_lang: "eng"
8
- min_unit_chunk_size: 50
9
- decision_threshold: 0.7
10
- no_early_stop: True
11
- block_ngrams: True
12
- vocoder_name: vocoder_v2
13
- expr_vocoder_name: vocoder_pretssel
14
- gated_model_dir: .
15
- expr_vocoder_gain: 3.0
16
- upstream_idx: 1
17
- wav2vec_yaml: wav2vec.yaml
18
- min_starting_wait_w2vbert: 192
19
-
20
- config_yaml: cfg_fbank_u2t.yaml
21
- upstream_idx: 1
22
- detokenize_only: True
23
- device: cuda:0
24
- max_len_a: 0
25
- max_len_b: 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
seamless_server/models/SeamlessStreaming/{vad_s2st_sc_main.yaml → vad_s2st_sc_24khz_main.yaml} RENAMED
File without changes
seamless_server/requirements.txt CHANGED
@@ -1,6 +1,7 @@
 
 
1
  # seamless_communication
2
- git+https://github.com/facebookresearch/seamless_communication.git
3
- # ./whl/seamless_communication-1.0.0-py3-none-any.whl
4
  Flask==2.1.3
5
  Flask_Sockets==0.2.1
6
  g2p_en==2.1.0
@@ -13,10 +14,10 @@ protobuf==4.24.2
13
  psola==0.0.1
14
  pydub==0.25.1
15
  silero==0.4.1
 
16
  soundfile==0.11.0
17
  stable_ts==1.4.0
18
- # torch # to be installed by user for desired PyTorch version
19
- # simuleval # to be installed by seamless_communication
20
  Werkzeug==2.0.3
21
  whisper==1.1.10
22
  colorlog==6.7.0
 
1
+ --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/pt2.1.1/cu118
2
+ simuleval==1.1.3
3
  # seamless_communication
4
+ ./whl/seamless_communication-1.0.0-py3-none-any.whl
 
5
  Flask==2.1.3
6
  Flask_Sockets==0.2.1
7
  g2p_en==2.1.0
 
14
  psola==0.0.1
15
  pydub==0.25.1
16
  silero==0.4.1
17
+ # simuleval==1.1.1
18
  soundfile==0.11.0
19
  stable_ts==1.4.0
20
+ torch # specific torch version depends on fairseq2 installation
 
21
  Werkzeug==2.0.3
22
  whisper==1.1.10
23
  colorlog==6.7.0
seamless_server/run_docker.sh DELETED
@@ -1,5 +0,0 @@
1
- # !/bin/bash
2
- if [ -f models/Seamless/pretssel_melhifigan_wm.pt ] ; then
3
- export USE_EXPRESSIVE_MODEL=1;
4
- fi
5
- uvicorn app_pubsub:app --host 0.0.0.0 --port 7860
 
 
 
 
 
 
seamless_server/src/simuleval_agent_directory.py CHANGED
@@ -1,7 +1,6 @@
1
  # Creates a directory in which to look up available agents
2
 
3
- import os
4
- from typing import List, Optional
5
  from src.simuleval_transcoder import SimulevalTranscoder
6
  import json
7
  import logging
@@ -34,10 +33,8 @@ class AgentWithInfo:
34
  # Supported dynamic params are defined in StreamingTypes.ts
35
  dynamic_params: List[str] = [],
36
  description="",
37
- has_expressive: Optional[bool] = None,
38
  ):
39
  self.agent = agent
40
- self.has_expressive = has_expressive
41
  self.name = name
42
  self.description = description
43
  self.modalities = modalities
@@ -78,7 +75,6 @@ class AgentWithInfo:
78
  class SimulevalAgentDirectory:
79
  # Available models. These are the directories where the models can be found, and also serve as an ID for the model.
80
  seamless_streaming_agent = "SeamlessStreaming"
81
- seamless_agent = "Seamless"
82
 
83
  def __init__(self):
84
  self.agents = []
@@ -100,12 +96,7 @@ class SimulevalAgentDirectory:
100
  model_id,
101
  )
102
  except Exception as e:
103
- from fairseq2.assets.error import AssetError
104
  logger.warning("Failed to build agent %s: %s" % (model_id, e))
105
- if isinstance(e, AssetError):
106
- logger.warning(
107
- "Please download gated assets and set `gated_model_dir` in the config"
108
- )
109
  raise e
110
 
111
  return agent
@@ -119,32 +110,20 @@ class SimulevalAgentDirectory:
119
  for agent_info in agent_infos:
120
  self.add_agent(agent_info)
121
  else:
122
- s2s_agent = None
123
- if os.environ.get("USE_EXPRESSIVE_MODEL", "0") == "1":
124
- logger.info("Building expressive model...")
125
- s2s_agent = self.build_agent_if_available(
126
- SimulevalAgentDirectory.seamless_agent,
127
- config_name="vad_s2st_sc_24khz_main.yaml",
128
- )
129
- has_expressive = True
130
- else:
131
- logger.info("Building non-expressive model...")
132
- s2s_agent = self.build_agent_if_available(
133
- SimulevalAgentDirectory.seamless_streaming_agent,
134
- config_name="vad_s2st_sc_main.yaml",
135
- )
136
- has_expressive = False
137
 
138
- if s2s_agent:
139
  self.add_agent(
140
  AgentWithInfo(
141
- agent=s2s_agent,
142
  name=SimulevalAgentDirectory.seamless_streaming_agent,
143
  modalities=["s2t", "s2s"],
144
  target_langs=M4T_P0_LANGS,
145
  dynamic_params=["expressive"],
146
  description="multilingual expressive model that supports S2S and S2T",
147
- has_expressive=has_expressive,
148
  )
149
  )
150
 
@@ -158,7 +137,7 @@ class SimulevalAgentDirectory:
158
  def get_agent(self, name):
159
  for agent in self.agents:
160
  if agent.name == name:
161
- return agent
162
  return None
163
 
164
  def get_agent_or_throw(self, name):
 
1
  # Creates a directory in which to look up available agents
2
 
3
+ from typing import List
 
4
  from src.simuleval_transcoder import SimulevalTranscoder
5
  import json
6
  import logging
 
33
  # Supported dynamic params are defined in StreamingTypes.ts
34
  dynamic_params: List[str] = [],
35
  description="",
 
36
  ):
37
  self.agent = agent
 
38
  self.name = name
39
  self.description = description
40
  self.modalities = modalities
 
75
  class SimulevalAgentDirectory:
76
  # Available models. These are the directories where the models can be found, and also serve as an ID for the model.
77
  seamless_streaming_agent = "SeamlessStreaming"
 
78
 
79
  def __init__(self):
80
  self.agents = []
 
96
  model_id,
97
  )
98
  except Exception as e:
 
99
  logger.warning("Failed to build agent %s: %s" % (model_id, e))
 
 
 
 
100
  raise e
101
 
102
  return agent
 
110
  for agent_info in agent_infos:
111
  self.add_agent(agent_info)
112
  else:
113
+ s2s_m4t_expr_agent = self.build_agent_if_available(
114
+ SimulevalAgentDirectory.seamless_streaming_agent,
115
+ config_name="vad_s2st_sc_24khz_main.yaml",
116
+ )
 
 
 
 
 
 
 
 
 
 
 
117
 
118
+ if s2s_m4t_expr_agent:
119
  self.add_agent(
120
  AgentWithInfo(
121
+ agent=s2s_m4t_expr_agent,
122
  name=SimulevalAgentDirectory.seamless_streaming_agent,
123
  modalities=["s2t", "s2s"],
124
  target_langs=M4T_P0_LANGS,
125
  dynamic_params=["expressive"],
126
  description="multilingual expressive model that supports S2S and S2T",
 
127
  )
128
  )
129
 
 
137
  def get_agent(self, name):
138
  for agent in self.agents:
139
  if agent.name == name:
140
+ return agent.agent
141
  return None
142
 
143
  def get_agent_or_throw(self, name):
seamless_server/src/simuleval_transcoder.py CHANGED
@@ -119,8 +119,7 @@ class OutputSegments:
119
 
120
  class SimulevalTranscoder:
121
  def __init__(self, agent, sample_rate, debug, buffer_limit):
122
- self.agent = agent.agent
123
- self.has_expressive = agent.has_expressive
124
  self.input_queue = asyncio.Queue()
125
  self.output_queue = asyncio.Queue()
126
  self.states = self.agent.build_states()
@@ -186,7 +185,7 @@ class SimulevalTranscoder:
186
  logger.info(*args)
187
 
188
  @classmethod
189
- def build_agent(cls, model_path, config_name):
190
  logger.info(f"Building simuleval agent: {model_path}, {config_name}")
191
  agent = build_system_from_dir(
192
  Path(__file__).resolve().parent.parent / f"models/{model_path}",
@@ -209,10 +208,6 @@ class SimulevalTranscoder:
209
  tgt_lang=dynamic_config.get("targetLanguage"),
210
  config=dynamic_config,
211
  )
212
- if dynamic_config.get("expressive") is True and self.has_expressive is False:
213
- logger.warning(
214
- "Passing 'expressive' but the agent does not support expressive output!"
215
- )
216
  # # segment is array([0, 0, 0, ..., 0, 0, 0], dtype=int16)
217
  self.input_queue.put_nowait(segment)
218
 
 
119
 
120
  class SimulevalTranscoder:
121
  def __init__(self, agent, sample_rate, debug, buffer_limit):
122
+ self.agent = agent
 
123
  self.input_queue = asyncio.Queue()
124
  self.output_queue = asyncio.Queue()
125
  self.states = self.agent.build_states()
 
185
  logger.info(*args)
186
 
187
  @classmethod
188
+ def build_agent(cls, model_path, config_name="vad_s2st_main.yaml"):
189
  logger.info(f"Building simuleval agent: {model_path}, {config_name}")
190
  agent = build_system_from_dir(
191
  Path(__file__).resolve().parent.parent / f"models/{model_path}",
 
208
  tgt_lang=dynamic_config.get("targetLanguage"),
209
  config=dynamic_config,
210
  )
 
 
 
 
211
  # # segment is array([0, 0, 0, ..., 0, 0, 0], dtype=int16)
212
  self.input_queue.put_nowait(segment)
213
 
seamless_server/whl/seamless_communication-1.0.0-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5b81add4d9917ac562c2e8a10bd5b3c88804b8bd94c56cef4e9a01ecde4a839
3
- size 204321
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1df10e0c85ee0ffbc9f2e1bf8896850a52c551383df0332a94d26d9d39770c85
3
+ size 201552
streaming-react-app/package.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "name": "streaming-react-app",
3
  "private": true,
4
- "version": "0.0.14",
5
  "type": "module",
6
  "scripts": {
7
  "dev": "vite --host --strictPort",
8
- "build": "vite build",
9
  "preview": "vite preview",
10
  "clean:node-modules": "rm -rf node_modules/",
11
  "ts-check": "tsc --noEmit",
@@ -24,6 +24,7 @@
24
  "amazon-cognito-identity-js": "^6.3.6",
25
  "audiobuffer-to-wav": "^1.0.0",
26
  "aws-sdk": "^2.1472.0",
 
27
  "js-cookie": "^3.0.5",
28
  "lodash": "4.17.21",
29
  "react": "^18.2.0",
 
1
  {
2
  "name": "streaming-react-app",
3
  "private": true,
4
+ "version": "0.0.13",
5
  "type": "module",
6
  "scripts": {
7
  "dev": "vite --host --strictPort",
8
+ "build": "tsc && vite build",
9
  "preview": "vite preview",
10
  "clean:node-modules": "rm -rf node_modules/",
11
  "ts-check": "tsc --noEmit",
 
24
  "amazon-cognito-identity-js": "^6.3.6",
25
  "audiobuffer-to-wav": "^1.0.0",
26
  "aws-sdk": "^2.1472.0",
27
+ "iso-639-1": "^3.1.0",
28
  "js-cookie": "^3.0.5",
29
  "lodash": "4.17.21",
30
  "react": "^18.2.0",
streaming-react-app/src/StreamingInterface.tsx CHANGED
@@ -57,12 +57,12 @@ import {CURSOR_BLINK_INTERVAL_MS} from './cursorBlinkInterval';
57
  import {getURLParams} from './URLParams';
58
  import debug from './debug';
59
  import DebugSection from './DebugSection';
60
- import Switch from '@mui/material/Switch';
61
- import Grid from '@mui/material/Grid';
62
  import {getLanguageFromThreeLetterCode} from './languageLookup';
63
- import HeadphonesIcon from '@mui/icons-material/Headphones';
64
 
65
- const AUDIO_STREAM_DEFAULTS = {
 
 
66
  userMedia: {
67
  echoCancellation: false,
68
  noiseSuppression: true,
@@ -71,10 +71,13 @@ const AUDIO_STREAM_DEFAULTS = {
71
  echoCancellation: false,
72
  noiseSuppression: false,
73
  },
74
- } as const;
75
 
76
  async function requestUserMediaAudioStream(
77
- config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['userMedia'],
 
 
 
78
  ) {
79
  const stream = await navigator.mediaDevices.getUserMedia({
80
  audio: {...config, channelCount: 1},
@@ -87,7 +90,10 @@ async function requestUserMediaAudioStream(
87
  }
88
 
89
  async function requestDisplayMediaAudioStream(
90
- config: BrowserAudioStreamConfig = AUDIO_STREAM_DEFAULTS['displayMedia'],
 
 
 
91
  ) {
92
  const stream = await navigator.mediaDevices.getDisplayMedia({
93
  audio: {...config, channelCount: 1},
@@ -152,7 +158,6 @@ export default function StreamingInterface() {
152
  useState<StreamingStatus>('stopped');
153
 
154
  const isStreamConfiguredRef = useRef<boolean>(false);
155
- const [hasMaxSpeakers, setHasMaxSpeakers] = useState<boolean>(false);
156
 
157
  const [outputMode, setOutputMode] = useState<SupportedOutputMode>('s2s&t');
158
  const [inputSource, setInputSource] =
@@ -166,9 +171,6 @@ export default function StreamingInterface() {
166
 
167
  // Dynamic Params:
168
  const [targetLang, setTargetLang] = useState<string | null>(null);
169
- const [enableExpressive, setEnableExpressive] = useState<boolean | null>(
170
- null,
171
- );
172
 
173
  const [serverDebugFlag, setServerDebugFlag] = useState<boolean>(
174
  debugParam ?? false,
@@ -250,7 +252,6 @@ export default function StreamingInterface() {
250
  setAgent((prevAgent) => {
251
  if (prevAgent?.name !== newAgent?.name) {
252
  setTargetLang(newAgent?.targetLangs[0] ?? null);
253
- setEnableExpressive(null);
254
  }
255
  return newAgent;
256
  });
@@ -309,7 +310,6 @@ export default function StreamingInterface() {
309
  console.log('[configureStreamAsync] sending config', config);
310
 
311
  socket.emit('configure_stream', config, (statusObject) => {
312
- setHasMaxSpeakers(statusObject.message === 'max_speakers')
313
  if (statusObject.status === 'ok') {
314
  isStreamConfiguredRef.current = true;
315
  console.debug(
@@ -427,7 +427,6 @@ export default function StreamingInterface() {
427
  // available before actually configuring and starting the stream
428
  const fullDynamicConfig: DynamicConfig = {
429
  targetLanguage: targetLang,
430
- expressive: enableExpressive,
431
  };
432
 
433
  await onSetDynamicConfig(fullDynamicConfig);
@@ -757,23 +756,14 @@ export default function StreamingInterface() {
757
  <div className="header-container-sra">
758
  <div>
759
  <Typography variant="body2" sx={{color: '#65676B'}}>
760
- Welcome! This space is limited to one speaker at a time.
761
- If using the live HF space, sharing room code to listeners on another
762
- IP address may not work because it's running on different replicas.
763
- Use headphones if you are both speaker and listener to prevent feedback.
764
- <br/>
765
- If max speakers reached, please duplicate the space <a target="_blank" rel="noopener noreferrer" href="https://huggingface.co/spaces/facebook/seamless-streaming?duplicate=true">here</a>.
766
- In your duplicated space, join a room as speaker or listener (or both),
767
- and share the room code to invite listeners.
768
- <br/>
769
- Check out the seamless_communication <a target="_blank" rel="noopener noreferrer" href="https://github.com/facebookresearch/seamless_communication/tree/main">README</a> for more information.
770
  <br/>
771
  SeamlessStreaming model is a research model and is not released
772
- for production deployment. It is important to use a microphone with
773
- noise cancellation (for e.g. a smartphone), otherwise you may see model hallucination on noises.
774
- It works best if you pause every couple of sentences, or you may wish adjust the VAD threshold
775
- in the model config. The real-time performance will degrade
776
- if you try streaming multiple speakers at the same time.
777
  </Typography>
778
  </div>
779
  </div>
@@ -917,28 +907,6 @@ export default function StreamingInterface() {
917
  spacing={1}
918
  alignItems="flex-start"
919
  sx={{flexGrow: 1}}>
920
- {currentAgent?.dynamicParams?.includes(
921
- 'expressive',
922
- ) && (
923
- <FormControlLabel
924
- control={
925
- <Switch
926
- checked={enableExpressive ?? false}
927
- onChange={(
928
- event: React.ChangeEvent<HTMLInputElement>,
929
- ) => {
930
- const newValue = event.target.checked;
931
- setEnableExpressive(newValue);
932
- onSetDynamicConfig({
933
- expressive: newValue,
934
- });
935
- }}
936
- />
937
- }
938
- label="Expressive"
939
- />
940
- )}
941
-
942
  {isListener && (
943
  <Box
944
  sx={{
@@ -955,6 +923,13 @@ export default function StreamingInterface() {
955
  </Grid>
956
  </Stack>
957
 
 
 
 
 
 
 
 
958
  <Stack
959
  direction="row"
960
  spacing={2}
@@ -984,9 +959,8 @@ export default function StreamingInterface() {
984
  </RadioGroup>
985
  </FormControl>
986
  </Box>
987
-
988
- <Box sx={{flex: 1, flexGrow: 2}}>
989
- <FormControl disabled={streamFixedConfigOptionsDisabled}>
990
  <FormLabel>Options</FormLabel>
991
  <FormControlLabel
992
  control={
@@ -1003,9 +977,9 @@ export default function StreamingInterface() {
1003
  }
1004
  />
1005
  }
1006
- label="Noise Suppression"
1007
  />
1008
- <FormControlLabel
1009
  control={
1010
  <Checkbox
1011
  checked={
@@ -1020,7 +994,7 @@ export default function StreamingInterface() {
1020
  }
1021
  />
1022
  }
1023
- label="Echo Cancellation (not recommended)"
1024
  />
1025
  <FormControlLabel
1026
  control={
@@ -1031,34 +1005,12 @@ export default function StreamingInterface() {
1031
  ) => setServerDebugFlag(event.target.checked)}
1032
  />
1033
  }
1034
- label="Enable Server Debugging"
1035
  />
1036
  </FormControl>
1037
  </Box>
1038
  </Stack>
1039
 
1040
- {isSpeaker &&
1041
- isListener &&
1042
- inputSource === 'userMedia' &&
1043
- !enableEchoCancellation &&
1044
- gain !== 0 && (
1045
- <div>
1046
- <Alert severity="warning" icon={<HeadphonesIcon />}>
1047
- Headphones required to prevent feedback.
1048
- </Alert>
1049
- </div>
1050
- )}
1051
-
1052
- {isSpeaker && enableEchoCancellation && (
1053
- <div>
1054
- <Alert severity="warning">
1055
- We don't recommend using echo cancellation as it may
1056
- distort the input audio. If possible, use headphones and
1057
- disable echo cancellation instead.
1058
- </Alert>
1059
- </div>
1060
- )}
1061
-
1062
  <Stack direction="row" spacing={2}>
1063
  {streamingStatus === 'stopped' ? (
1064
  <Button
@@ -1120,13 +1072,7 @@ export default function StreamingInterface() {
1120
  </Alert>
1121
  </div>
1122
  )}
1123
- {serverState != null && hasMaxSpeakers && (
1124
- <div>
1125
- <Alert severity="error">
1126
- {`Maximum number of speakers reached. Please try again at a later time.`}
1127
- </Alert>
1128
- </div>
1129
- )}
1130
  {serverState != null &&
1131
  serverState.totalActiveTranscoders >=
1132
  TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD && (
@@ -1141,7 +1087,7 @@ export default function StreamingInterface() {
1141
  serverState.serverLock.clientID !== clientID && (
1142
  <div>
1143
  <Alert severity="warning">
1144
- {`The server is currently locked. Priority will be given to that client when they are streaming, and your streaming session may be halted abruptly.`}
1145
  </Alert>
1146
  </div>
1147
  )}
 
57
  import {getURLParams} from './URLParams';
58
  import debug from './debug';
59
  import DebugSection from './DebugSection';
60
+ import {Grid} from '@mui/material';
 
61
  import {getLanguageFromThreeLetterCode} from './languageLookup';
 
62
 
63
+ const AUDIO_STREAM_DEFAULTS: {
64
+ [key in SupportedInputSource]: BrowserAudioStreamConfig;
65
+ } = {
66
  userMedia: {
67
  echoCancellation: false,
68
  noiseSuppression: true,
 
71
  echoCancellation: false,
72
  noiseSuppression: false,
73
  },
74
+ };
75
 
76
  async function requestUserMediaAudioStream(
77
+ config: BrowserAudioStreamConfig = {
78
+ echoCancellation: false,
79
+ noiseSuppression: true,
80
+ },
81
  ) {
82
  const stream = await navigator.mediaDevices.getUserMedia({
83
  audio: {...config, channelCount: 1},
 
90
  }
91
 
92
  async function requestDisplayMediaAudioStream(
93
+ config: BrowserAudioStreamConfig = {
94
+ echoCancellation: false,
95
+ noiseSuppression: false,
96
+ },
97
  ) {
98
  const stream = await navigator.mediaDevices.getDisplayMedia({
99
  audio: {...config, channelCount: 1},
 
158
  useState<StreamingStatus>('stopped');
159
 
160
  const isStreamConfiguredRef = useRef<boolean>(false);
 
161
 
162
  const [outputMode, setOutputMode] = useState<SupportedOutputMode>('s2s&t');
163
  const [inputSource, setInputSource] =
 
171
 
172
  // Dynamic Params:
173
  const [targetLang, setTargetLang] = useState<string | null>(null);
 
 
 
174
 
175
  const [serverDebugFlag, setServerDebugFlag] = useState<boolean>(
176
  debugParam ?? false,
 
252
  setAgent((prevAgent) => {
253
  if (prevAgent?.name !== newAgent?.name) {
254
  setTargetLang(newAgent?.targetLangs[0] ?? null);
 
255
  }
256
  return newAgent;
257
  });
 
310
  console.log('[configureStreamAsync] sending config', config);
311
 
312
  socket.emit('configure_stream', config, (statusObject) => {
 
313
  if (statusObject.status === 'ok') {
314
  isStreamConfiguredRef.current = true;
315
  console.debug(
 
427
  // available before actually configuring and starting the stream
428
  const fullDynamicConfig: DynamicConfig = {
429
  targetLanguage: targetLang,
 
430
  };
431
 
432
  await onSetDynamicConfig(fullDynamicConfig);
 
756
  <div className="header-container-sra">
757
  <div>
758
  <Typography variant="body2" sx={{color: '#65676B'}}>
759
+ Welcome! Join a room as speaker or listener (or both), and share the
760
+ room code to invite listeners.
 
 
 
 
 
 
 
 
761
  <br/>
762
  SeamlessStreaming model is a research model and is not released
763
+ for production deployment. The streaming quality is closely
764
+ related to proper VAD segmentation. It works best if you pause
765
+ every couple of sentences, or you may wish adjust the VAD threshold
766
+ in the model config.
 
767
  </Typography>
768
  </div>
769
  </div>
 
907
  spacing={1}
908
  alignItems="flex-start"
909
  sx={{flexGrow: 1}}>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
910
  {isListener && (
911
  <Box
912
  sx={{
 
923
  </Grid>
924
  </Stack>
925
 
926
+ <Typography variant="body2" sx={{color: '#65676B'}}>
927
+ Note: we don't recommend echo cancellation, as it may distort
928
+ the input audio (dropping words/sentences) if there is output
929
+ audio playing. Instead, you should use headphones if you'd like
930
+ to listen to the output audio while speaking.
931
+ </Typography>
932
+
933
  <Stack
934
  direction="row"
935
  spacing={2}
 
959
  </RadioGroup>
960
  </FormControl>
961
  </Box>
962
+ <Box sx={{flex: 1}}>
963
+ <FormControl disabled={streamFixedConfigOptionsDisabled}>
 
964
  <FormLabel>Options</FormLabel>
965
  <FormControlLabel
966
  control={
 
977
  }
978
  />
979
  }
980
+ label="Noise Suppression (Browser)"
981
  />
982
+ <FormControlLabel
983
  control={
984
  <Checkbox
985
  checked={
 
994
  }
995
  />
996
  }
997
+ label="Echo Cancellation (Browser)"
998
  />
999
  <FormControlLabel
1000
  control={
 
1005
  ) => setServerDebugFlag(event.target.checked)}
1006
  />
1007
  }
1008
+ label="Server Debug Flag"
1009
  />
1010
  </FormControl>
1011
  </Box>
1012
  </Stack>
1013
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1014
  <Stack direction="row" spacing={2}>
1015
  {streamingStatus === 'stopped' ? (
1016
  <Button
 
1072
  </Alert>
1073
  </div>
1074
  )}
1075
+
 
 
 
 
 
 
1076
  {serverState != null &&
1077
  serverState.totalActiveTranscoders >=
1078
  TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD && (
 
1087
  serverState.serverLock.clientID !== clientID && (
1088
  <div>
1089
  <Alert severity="warning">
1090
+ {`The server is currently locked by "${serverState.serverLock.name}". Priority will be given to that client when they are streaming, and your streaming session may be halted abruptly.`}
1091
  </Alert>
1092
  </div>
1093
  )}
streaming-react-app/src/languageLookup.ts CHANGED
@@ -1,110 +1,108 @@
1
- const LANG3_TO_NAME = {
2
- afr: 'afrikaans',
3
- amh: 'amharic',
4
- arb: 'arabic',
5
- asm: 'assamese',
6
- azj: 'azerbaijani',
7
- bak: 'bashkir',
8
- bel: 'belarusian',
9
- ben: 'bengali',
10
- bod: 'tibetan',
11
- bos: 'bosnian',
12
- bre: 'breton',
13
- bul: 'bulgarian',
14
- cat: 'catalan',
15
- ces: 'czech',
16
- cmn: 'chinese',
17
- cym: 'welsh',
18
- dan: 'danish',
19
- deu: 'german',
20
- ell: 'greek',
21
- eng: 'english',
22
- est: 'estonian',
23
- eus: 'basque',
24
- fao: 'faroese',
25
- fin: 'finnish',
26
- fra: 'french',
27
- glg: 'galician',
28
- guj: 'gujarati',
29
- hat: 'haitian creole',
30
- hau: 'hausa',
31
- haw: 'hawaiian',
32
- heb: 'hebrew',
33
- hin: 'hindi',
34
- hrv: 'croatian',
35
- hun: 'hungarian',
36
- hye: 'armenian',
37
- ind: 'indonesian',
38
- isl: 'icelandic',
39
- ita: 'italian',
40
- jav: 'javanese',
41
- jpn: 'japanese',
42
- kan: 'kannada',
43
- kat: 'georgian',
44
- kaz: 'kazakh',
45
- khk: 'mongolian',
46
- khm: 'khmer',
47
- kor: 'korean',
48
- lao: 'lao',
49
- lat: 'latin',
50
- lin: 'lingala',
51
- lit: 'lithuanian',
52
- ltz: 'luxembourgish',
53
- lvs: 'latvian',
54
- mal: 'malayalam',
55
- mar: 'marathi',
56
- mkd: 'macedonian',
57
- mlg: 'malagasy',
58
- mlt: 'maltese',
59
- mri: 'maori',
60
- mya: 'myanmar',
61
- nld: 'dutch',
62
- nno: 'nynorsk',
63
- nob: 'norwegian',
64
- npi: 'nepali',
65
- oci: 'occitan',
66
- pan: 'punjabi',
67
- pbt: 'pashto',
68
- pes: 'persian',
69
- pol: 'polish',
70
- por: 'portuguese',
71
- ron: 'romanian',
72
- rus: 'russian',
73
- san: 'sanskrit',
74
- sin: 'sinhala',
75
- slk: 'slovak',
76
- slv: 'slovenian',
77
- sna: 'shona',
78
- snd: 'sindhi',
79
- som: 'somali',
80
- spa: 'spanish',
81
- sqi: 'albanian',
82
- srp: 'serbian',
83
- sun: 'sundanese',
84
- swe: 'swedish',
85
- swh: 'swahili',
86
- tam: 'tamil',
87
- tat: 'tatar',
88
- tel: 'telugu',
89
- tgk: 'tajik',
90
- tgl: 'tagalog',
91
- tha: 'thai',
92
- tuk: 'turkmen',
93
- tur: 'turkish',
94
- ukr: 'ukrainian',
95
- urd: 'urdu',
96
- uzn: 'uzbek',
97
- vie: 'vietnamese',
98
- yid: 'yiddish',
99
- yor: 'yoruba',
100
- zlm: 'malay',
101
  };
102
 
103
- export function getLanguageFromThreeLetterCode(
104
- lang3Code: string,
105
- ): string | null {
106
  try {
107
- const name = LANG3_TO_NAME[lang3Code] ?? null;
108
  if (name == null) {
109
  return null;
110
  }
@@ -113,7 +111,7 @@ export function getLanguageFromThreeLetterCode(
113
  .map((word: string) => word[0].toUpperCase() + word.slice(1));
114
  return capitalizedWords.join(' ');
115
  } catch (e) {
116
- console.warn(`Unable to get language name for code ${lang3Code}: ${e}`);
117
  }
118
  return null;
119
  }
 
1
+ const LANG3_FULL = {
2
+ eng: 'english',
3
+ arb: 'arabic',
4
+ asm: 'assamese',
5
+ bel: 'belarusian',
6
+ bul: 'bulgarian',
7
+ ben: 'bengali',
8
+ cat: 'catalan',
9
+ ces: 'czech',
10
+ cym: 'welsh',
11
+ dan: 'danish',
12
+ deu: 'german',
13
+ ell: 'greek',
14
+ spa: 'spanish',
15
+ est: 'estonian',
16
+ pes: 'persian',
17
+ fin: 'finnish',
18
+ fra: 'french',
19
+ hin: 'hindi',
20
+ hun: 'hungarian',
21
+ ind: 'indonesian',
22
+ ita: 'italian',
23
+ jpn: 'japanese',
24
+ kat: 'georgian',
25
+ lit: 'lithuanian',
26
+ lvs: 'latvian',
27
+ khk: 'mongolian',
28
+ mar: 'marathi',
29
+ mlt: 'maltese',
30
+ nld: 'dutch',
31
+ pan: 'punjabi',
32
+ pol: 'polish',
33
+ por: 'portuguese',
34
+ ron: 'romanian',
35
+ rus: 'russian',
36
+ slk: 'slovak',
37
+ slv: 'slovenian',
38
+ swe: 'swedish',
39
+ swh: 'swahili',
40
+ tam: 'tamil',
41
+ tha: 'thai',
42
+ tur: 'turkish',
43
+ ukr: 'ukrainian',
44
+ urd: 'urdu',
45
+ uzn: 'uzbek',
46
+ vie: 'vietnamese',
47
+ cmn: 'chinese',
48
+ afr: 'afrikaans',
49
+ isl: 'icelandic',
50
+ ltz: 'luxembourgish',
51
+ nob: 'norwegian',
52
+ glg: 'galician',
53
+ bos: 'bosnian',
54
+ hrv: 'croatian',
55
+ mkd: 'macedonian',
56
+ srp: 'serbian',
57
+ hye: 'armenian',
58
+ azj: 'azerbaijani',
59
+ kaz: 'kazakh',
60
+ kor: 'korean',
61
+ guj: 'gujarati',
62
+ kan: 'kannada',
63
+ npi: 'nepali',
64
+ snd: 'sindhi',
65
+ tel: 'telugu',
66
+ jav: 'javanese',
67
+ zlm: 'malay',
68
+ mal: 'malayalam',
69
+ tgl: 'tagalog',
70
+ mya: 'myanmar',
71
+ khm: 'khmer',
72
+ lao: 'lao',
73
+ heb: 'hebrew',
74
+ pbt: 'pashto',
75
+ tgk: 'tajik',
76
+ amh: 'amharic',
77
+ lin: 'lingala',
78
+ som: 'somali',
79
+ yor: 'yoruba',
80
+ sna: 'shona',
81
+ mri: 'maori',
82
+ hau: 'hausa',
83
+ oci: 'occitan',
84
+ bak: 'bashkir',
85
+ bre: 'breton',
86
+ yid: 'yiddish',
87
+ hat: 'haitian creole',
88
+ mlg: 'malagasy',
89
+ sin: 'sinhala',
90
+ sqi: 'albanian',
91
+ sun: 'sundanese',
92
+ eus: 'basque',
93
+ nno: 'nynorsk',
94
+ tat: 'tatar',
95
+ bod: 'tibetan',
96
+ fao: 'faroese',
97
+ haw: 'hawaiian',
98
+ lat: 'latin',
99
+ san: 'sanskrit',
100
+ tuk: 'turkmen'
101
  };
102
 
103
+ export function getLanguageFromThreeLetterCode(code: string): string | null {
 
 
104
  try {
105
+ const name = LANG3_FULL[code] ?? null;
106
  if (name == null) {
107
  return null;
108
  }
 
111
  .map((word: string) => word[0].toUpperCase() + word.slice(1));
112
  return capitalizedWords.join(' ');
113
  } catch (e) {
114
+ console.warn(`Unable to get language name for code ${code}: ${e}`);
115
  }
116
  return null;
117
  }
streaming-react-app/src/react-xr/TextBlocks.tsx CHANGED
@@ -1,8 +1,9 @@
1
- import {useEffect, useRef, useState} from 'react';
2
  import robotoFontFamilyJson from '../assets/RobotoMono-Regular-msdf.json?url';
3
  import robotoFontTexture from '../assets/RobotoMono-Regular.png';
4
  import ThreeMeshUIText, {ThreeMeshUITextType} from './ThreeMeshUIText';
5
- import supportedCharSet from './supportedCharSet';
 
6
 
7
  const NUM_LINES = 3;
8
 
@@ -21,80 +22,44 @@ const SCROLL_Y_DELTA = 0.001;
21
  const OFFSET = 0.01;
22
  const OFFSET_WIDTH = OFFSET * 3;
23
 
24
- const CHARS_PER_SECOND = 10;
25
-
26
- // The tick interval
27
- const RENDER_INTERVAL = 300;
28
-
29
- const CURSOR_BLINK_INTERVAL_MS = 1000;
30
-
31
- type TextBlockProps = {
32
  content: string;
33
  // The actual position or end position when animating
34
  y: number;
35
  // The start position when animating
36
  startY: number;
 
 
37
  textOpacity: number;
38
  backgroundOpacity: number;
39
- index: number;
40
- isBottomLine: boolean;
41
- // key: number;
42
- };
43
-
44
- type TranscriptState = {
45
- textBlocksProps: TextBlockProps[];
46
- lastTranslationStringIndex: number;
47
- lastTranslationLineStartIndex: number;
48
- transcriptLines: string[];
49
- lastRenderTime: number;
50
  };
51
 
52
  function TextBlock({
53
  content,
54
  y,
55
  startY,
 
 
56
  textOpacity,
57
  backgroundOpacity,
58
  index,
59
- isBottomLine,
60
- }: TextBlockProps) {
61
  const [scrollY, setScrollY] = useState<number>(y);
 
62
  // We are reusing text blocks so this keeps track of when we changed rows so we can restart animation
63
- const lastIndex = useRef<number>(index);
64
  useEffect(() => {
65
  if (index != lastIndex.current) {
66
  lastIndex.current = index;
67
- !isBottomLine && setScrollY(startY);
68
  } else if (scrollY < y) {
69
  setScrollY((prev) => prev + SCROLL_Y_DELTA);
70
  }
71
- }, [isBottomLine, index, scrollY, setScrollY, startY, y]);
72
-
73
- const [cursorBlinkOn, setCursorBlinkOn] = useState(false);
74
- useEffect(() => {
75
- if (isBottomLine) {
76
- const interval = setInterval(() => {
77
- setCursorBlinkOn((prev) => !prev);
78
- }, CURSOR_BLINK_INTERVAL_MS);
79
-
80
- return () => clearInterval(interval);
81
- } else {
82
- setCursorBlinkOn(false);
83
- }
84
- }, [isBottomLine]);
85
-
86
- const numChars = content.length;
87
-
88
- if (cursorBlinkOn) {
89
- content = content + '|';
90
- }
91
-
92
- // Accounting for potential cursor for block width (the +1)
93
- const width =
94
- (numChars + (isBottomLine ? 1.1 : 0) + (numChars < 10 ? 1 : 0)) *
95
- CHAR_WIDTH;
96
-
97
- const height = LINE_HEIGHT;
98
 
99
  // This is needed to update text content (doesn't work if we just update the content prop)
100
  const textRef = useRef<ThreeMeshUITextType>();
@@ -146,162 +111,125 @@ function TextBlock({
146
  );
147
  }
148
 
149
- function initialTextBlockProps(count: number): TextBlockProps[] {
150
- return Array.from({length: count}).map(() => {
151
- // Push in non display blocks because mesh UI crashes if elements are add / removed from screen.
152
- return {
153
- y: Y_COORD_START,
154
- startY: 0,
155
- index: 0,
156
- textOpacity: 0,
157
- backgroundOpacity: 0,
158
- width: MAX_WIDTH,
159
- height: LINE_HEIGHT,
160
- content: '',
161
- isBottomLine: true,
162
- };
163
- });
 
 
 
 
 
 
 
164
  }
165
 
166
  export default function TextBlocks({
167
- translationText,
 
168
  }: {
169
- translationText: string;
 
170
  }) {
171
- const transcriptStateRef = useRef<TranscriptState>({
172
- textBlocksProps: initialTextBlockProps(NUM_LINES),
173
- lastTranslationStringIndex: 0,
174
- lastTranslationLineStartIndex: 0,
175
- transcriptLines: [],
176
- lastRenderTime: new Date().getTime(),
177
- });
178
-
179
- const transcriptState = transcriptStateRef.current;
180
- const {textBlocksProps, lastTranslationStringIndex, lastRenderTime} =
181
- transcriptState;
182
-
183
- const [charsToRender, setCharsToRender] = useState<number>(0);
184
 
 
185
  useEffect(() => {
186
- const interval = setInterval(() => {
187
- const currentTime = new Date().getTime();
188
- const charsToRender = Math.round(
189
- ((currentTime - lastRenderTime) * CHARS_PER_SECOND) / 1000,
190
- );
191
- setCharsToRender(charsToRender);
192
- }, RENDER_INTERVAL);
193
-
194
- return () => clearInterval(interval);
195
- }, [lastRenderTime]);
196
-
197
- const currentTime = new Date().getTime();
198
- if (charsToRender < 1) {
199
- return textBlocksProps.map((props, idx) => (
200
- <TextBlock {...props} key={idx} />
201
- ));
202
- }
203
-
204
- const nextTranslationStringIndex = Math.min(
205
- lastTranslationStringIndex + charsToRender,
206
- translationText.length,
207
- );
208
- const newString = translationText.substring(
209
- lastTranslationStringIndex,
210
- nextTranslationStringIndex,
211
- );
212
- if (nextTranslationStringIndex === lastTranslationStringIndex) {
213
- transcriptState.lastRenderTime = currentTime;
214
- return textBlocksProps.map((props, idx) => (
215
- <TextBlock {...props} key={idx} />
216
- ));
217
- }
218
-
219
- // Wait until more characters are accumulated if its just blankspace
220
- if (/^\s*$/.test(newString)) {
221
- transcriptState.lastRenderTime = currentTime;
222
- return textBlocksProps.map((props, idx) => (
223
- <TextBlock {...props} key={idx} />
224
- ));
225
- }
226
-
227
- // Ideally we continue where we left off but this is complicated when we have mid-words. Recalculating for now
228
- const runAll = true;
229
- const newSentences = runAll
230
- ? translationText.substring(0, nextTranslationStringIndex).split('\n')
231
- : newString.split('\n');
232
- const transcriptLines = runAll ? [''] : transcriptState.transcriptLines;
233
- newSentences.forEach((newSentence, sentenceIdx) => {
234
- const words = newSentence.split(/\s+/);
235
- words.forEach((word) => {
236
- const filteredWord = [...word]
237
- .filter((c) => {
238
- if (supportedCharSet().has(c)) {
239
- return true;
240
- }
241
- console.error(
242
- `Unsupported char ${c} - make sure this is supported in the font family msdf file`,
243
- );
244
- return false;
245
- })
246
- .join('');
247
-
248
- const lastLineSoFar = transcriptLines[0];
249
- const charCount = lastLineSoFar.length + filteredWord.length + 1;
250
-
251
- if (charCount <= CHARS_PER_LINE) {
252
- transcriptLines[0] = lastLineSoFar + ' ' + filteredWord;
253
- } else {
254
- transcriptLines.unshift(filteredWord);
255
- }
256
- });
257
 
258
- if (sentenceIdx < newSentences.length - 1) {
259
- transcriptLines.unshift('\n');
260
- transcriptLines.unshift('');
261
  }
262
- });
263
-
264
- transcriptState.transcriptLines = transcriptLines;
265
- transcriptState.lastTranslationStringIndex = nextTranslationStringIndex;
266
 
267
- const newTextBlocksProps: TextBlockProps[] = [];
268
  let currentY = Y_COORD_START;
 
 
 
 
 
 
 
 
 
269
 
270
- transcriptLines.forEach((line, i) => {
271
- if (newTextBlocksProps.length == NUM_LINES) {
272
- return;
273
- }
 
274
 
275
- // const line = transcriptLines[i];
276
- if (line === '\n') {
277
- currentY += BLOCK_SPACING;
278
- return;
279
- }
280
- const y = currentY + LINE_HEIGHT / 2;
281
- const isBottomLine = newTextBlocksProps.length === 0;
282
 
283
- const textOpacity = 1 - 0.1 * newTextBlocksProps.length;
284
- newTextBlocksProps.push({
285
- y,
286
- startY: currentY,
287
- index: i,
288
- textOpacity,
289
- backgroundOpacity: 0.98,
290
- content: line,
291
- isBottomLine,
292
- });
 
 
 
 
 
 
 
 
 
 
293
 
294
- currentY = y + LINE_HEIGHT / 2;
295
- });
 
 
296
 
297
- const numRemainingBlocks = NUM_LINES - newTextBlocksProps.length;
298
  if (numRemainingBlocks > 0) {
299
- newTextBlocksProps.push(...initialTextBlockProps(numRemainingBlocks));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  }
301
 
302
- transcriptState.textBlocksProps = newTextBlocksProps;
303
- transcriptState.lastRenderTime = currentTime;
304
- return newTextBlocksProps.map((props, idx) => (
305
- <TextBlock {...props} key={idx} />
306
- ));
307
  }
 
1
+ import {JSX, useEffect, useRef, useState} from 'react';
2
  import robotoFontFamilyJson from '../assets/RobotoMono-Regular-msdf.json?url';
3
  import robotoFontTexture from '../assets/RobotoMono-Regular.png';
4
  import ThreeMeshUIText, {ThreeMeshUITextType} from './ThreeMeshUIText';
5
+ import {getURLParams} from '../URLParams';
6
+ import {CURSOR_BLINK_INTERVAL_MS} from '../cursorBlinkInterval';
7
 
8
  const NUM_LINES = 3;
9
 
 
22
  const OFFSET = 0.01;
23
  const OFFSET_WIDTH = OFFSET * 3;
24
 
25
+ type Props = {
 
 
 
 
 
 
 
26
  content: string;
27
  // The actual position or end position when animating
28
  y: number;
29
  // The start position when animating
30
  startY: number;
31
+ width: number;
32
+ height: number;
33
  textOpacity: number;
34
  backgroundOpacity: number;
35
+ // Use this to keep track of sentence + line position for animation
36
+ index: string;
37
+ enableAnimation: boolean;
 
 
 
 
 
 
 
 
38
  };
39
 
40
  function TextBlock({
41
  content,
42
  y,
43
  startY,
44
+ width,
45
+ height,
46
  textOpacity,
47
  backgroundOpacity,
48
  index,
49
+ enableAnimation,
50
+ }: Props) {
51
  const [scrollY, setScrollY] = useState<number>(y);
52
+
53
  // We are reusing text blocks so this keeps track of when we changed rows so we can restart animation
54
+ const lastIndex = useRef<string>(index);
55
  useEffect(() => {
56
  if (index != lastIndex.current) {
57
  lastIndex.current = index;
58
+ enableAnimation && setScrollY(startY);
59
  } else if (scrollY < y) {
60
  setScrollY((prev) => prev + SCROLL_Y_DELTA);
61
  }
62
+ }, [enableAnimation, index, scrollY, setScrollY, startY, y]);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  // This is needed to update text content (doesn't work if we just update the content prop)
65
  const textRef = useRef<ThreeMeshUITextType>();
 
111
  );
112
  }
113
 
114
+ // Background behind the text so it covers any missing spaces
115
+ function TranscriptionPanel() {
116
+ const panelHeight = LINE_HEIGHT * NUM_LINES + 2 * BLOCK_SPACING + 2 * OFFSET;
117
+ const xPosition = OFFSET_WIDTH;
118
+ return (
119
+ <block
120
+ args={[
121
+ {
122
+ backgroundOpacity: 1,
123
+ width:
124
+ MAX_WIDTH * ((CHARS_PER_LINE + 2) / CHARS_PER_LINE) +
125
+ 2 * OFFSET_WIDTH,
126
+ height: panelHeight,
127
+ borderRadius: 0,
128
+ },
129
+ ]}
130
+ position={[
131
+ -OFFSET + xPosition,
132
+ Y_COORD_START + panelHeight / 2 - 2 * OFFSET,
133
+ Z_COORD,
134
+ ]}></block>
135
+ );
136
  }
137
 
138
  export default function TextBlocks({
139
+ sentences,
140
+ blinkCursor,
141
  }: {
142
+ sentences: string[][];
143
+ blinkCursor: boolean;
144
  }) {
145
+ const showTranscriptionPanel =
146
+ getURLParams().ARTranscriptionType === 'lines_with_background';
147
+ const textBlocks: JSX.Element[] = [];
 
 
 
 
 
 
 
 
 
 
148
 
149
+ const [cursorBlinkOn, setCursorBlinkOn] = useState(false);
150
  useEffect(() => {
151
+ if (blinkCursor) {
152
+ const interval = setInterval(() => {
153
+ setCursorBlinkOn((prev) => !prev);
154
+ }, CURSOR_BLINK_INTERVAL_MS);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
+ return () => clearInterval(interval);
157
+ } else {
158
+ setCursorBlinkOn(false);
159
  }
160
+ }, [blinkCursor]);
 
 
 
161
 
162
+ // Start from bottom and populate most recent sentences by line until we fill max lines.
163
  let currentY = Y_COORD_START;
164
+ for (let i = sentences.length - 1; i >= 0; i--) {
165
+ const sentenceLines = sentences[i];
166
+ for (let j = sentenceLines.length - 1; j >= 0; j--) {
167
+ if (textBlocks.length == NUM_LINES) {
168
+ if (showTranscriptionPanel) {
169
+ textBlocks.push(<TranscriptionPanel key={textBlocks.length} />);
170
+ }
171
+ return textBlocks;
172
+ }
173
 
174
+ const isBottomSentence = i === sentences.length - 1;
175
+ const isBottomLine = isBottomSentence && textBlocks.length === 0;
176
+ const y = currentY + LINE_HEIGHT / 2;
177
+ let textBlockLine = sentenceLines[j];
178
+ const numChars = textBlockLine.length;
179
 
180
+ if (cursorBlinkOn && isBottomLine) {
181
+ textBlockLine = textBlockLine + '|';
182
+ }
 
 
 
 
183
 
184
+ // Accounting for potential cursor for block width (the +1)
185
+ const blockWidth =
186
+ (numChars + (isBottomLine ? 1.1 : 0) + (numChars < 10 ? 1 : 0)) *
187
+ CHAR_WIDTH;
188
+ const textOpacity = 1 - 0.1 * textBlocks.length;
189
+ textBlocks.push(
190
+ <TextBlock
191
+ key={textBlocks.length}
192
+ y={y}
193
+ startY={currentY}
194
+ index={`${sentences.length - i},${j}`}
195
+ textOpacity={textOpacity}
196
+ backgroundOpacity={0.98}
197
+ height={LINE_HEIGHT}
198
+ width={blockWidth}
199
+ // content={"BLOCK " + textBlocks.length + ": " + content}
200
+ content={textBlockLine}
201
+ enableAnimation={!isBottomLine}
202
+ />,
203
+ );
204
 
205
+ currentY = y + LINE_HEIGHT / 2;
206
+ }
207
+ currentY += showTranscriptionPanel ? BLOCK_SPACING / 3 : BLOCK_SPACING;
208
+ }
209
 
210
+ const numRemainingBlocks = textBlocks.length - NUM_LINES;
211
  if (numRemainingBlocks > 0) {
212
+ Array.from({length: numRemainingBlocks}).forEach(() => {
213
+ // Push in non display blocks because mesh UI crashes if elements are add / removed from screen.
214
+ textBlocks.push(
215
+ <TextBlock
216
+ key={textBlocks.length}
217
+ y={Y_COORD_START}
218
+ startY={0}
219
+ index="0,0"
220
+ textOpacity={0}
221
+ backgroundOpacity={0}
222
+ enableAnimation={false}
223
+ width={MAX_WIDTH}
224
+ height={LINE_HEIGHT}
225
+ content=""
226
+ />,
227
+ );
228
+ });
229
  }
230
 
231
+ if (showTranscriptionPanel) {
232
+ textBlocks.push(<TranscriptionPanel key={textBlocks.length} />);
233
+ }
234
+ return textBlocks;
 
235
  }
streaming-react-app/src/react-xr/XRConfig.tsx CHANGED
@@ -25,15 +25,29 @@ import {BLACK, WHITE} from './Colors';
25
  import robotoFontFamilyJson from '../assets/RobotoMono-Regular-msdf.json?url';
26
  import robotoFontTexture from '../assets/RobotoMono-Regular.png';
27
  import {getURLParams} from '../URLParams';
28
- import TextBlocks from './TextBlocks';
29
  import {BufferedSpeechPlayer} from '../createBufferedSpeechPlayer';
30
  import {CURSOR_BLINK_INTERVAL_MS} from '../cursorBlinkInterval';
31
- import supportedCharSet from './supportedCharSet';
32
 
33
  // Adds on react JSX for add-on libraries to react-three-fiber
34
  extend(ThreeMeshUI);
35
  extend({TextGeometry});
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  // This component wraps any children so it is positioned relative to the camera, rather than from the origin
38
  function CameraLinkedObject({children}) {
39
  const camera = useThree((state) => state.camera);
@@ -62,7 +76,10 @@ function ThreeMeshUIComponents({
62
  translationSentences={translationSentences}
63
  />
64
  ) : (
65
- <TranscriptPanelBlocks translationSentences={translationSentences} />
 
 
 
66
  )}
67
  {skipARIntro ? null : (
68
  <IntroPanel started={started} setStarted={setStarted} />
@@ -136,7 +153,7 @@ function TranscriptPanelSingleBlock({
136
  (wordChunks, currentWord) => {
137
  const filteredWord = [...currentWord]
138
  .filter((c) => {
139
- if (supportedCharSet().has(c)) {
140
  return true;
141
  }
142
  console.error(
@@ -206,14 +223,59 @@ function TranscriptPanelSingleBlock({
206
  // Splits up the lines into separate blocks to treat each one separately.
207
  // This allows changing of opacity, animating per line, changing height / width per line etc
208
  function TranscriptPanelBlocks({
 
209
  translationSentences,
210
  }: {
 
211
  translationSentences: TranslationSentences;
212
  }) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  return (
214
- <TextBlocks
215
- translationText={'Listening...\n' + translationSentences.join('\n')}
216
- />
217
  );
218
  }
219
 
@@ -299,8 +361,6 @@ export type XRConfigProps = {
299
  startStreaming: () => Promise<void>;
300
  stopStreaming: () => Promise<void>;
301
  debugParam: boolean | null;
302
- onARVisible?: () => void;
303
- onARHidden?: () => void;
304
  };
305
 
306
  export default function XRConfig(props: XRConfigProps) {
 
25
  import robotoFontFamilyJson from '../assets/RobotoMono-Regular-msdf.json?url';
26
  import robotoFontTexture from '../assets/RobotoMono-Regular.png';
27
  import {getURLParams} from '../URLParams';
28
+ import TextBlocks, {CHARS_PER_LINE} from './TextBlocks';
29
  import {BufferedSpeechPlayer} from '../createBufferedSpeechPlayer';
30
  import {CURSOR_BLINK_INTERVAL_MS} from '../cursorBlinkInterval';
 
31
 
32
  // Adds on react JSX for add-on libraries to react-three-fiber
33
  extend(ThreeMeshUI);
34
  extend({TextGeometry});
35
 
36
+ async function fetchSupportedCharSet(): Promise<Set<string>> {
37
+ try {
38
+ const response = await fetch(robotoFontFamilyJson);
39
+ const fontFamily = await response.json();
40
+
41
+ return new Set(fontFamily.info.charset);
42
+ } catch (e) {
43
+ console.error('Failed to fetch supported XR charset', e);
44
+ return new Set();
45
+ }
46
+ }
47
+
48
+ let supportedCharSet = new Set();
49
+ fetchSupportedCharSet().then((result) => (supportedCharSet = result));
50
+
51
  // This component wraps any children so it is positioned relative to the camera, rather than from the origin
52
  function CameraLinkedObject({children}) {
53
  const camera = useThree((state) => state.camera);
 
76
  translationSentences={translationSentences}
77
  />
78
  ) : (
79
+ <TranscriptPanelBlocks
80
+ animateTextDisplay={animateTextDisplay}
81
+ translationSentences={translationSentences}
82
+ />
83
  )}
84
  {skipARIntro ? null : (
85
  <IntroPanel started={started} setStarted={setStarted} />
 
153
  (wordChunks, currentWord) => {
154
  const filteredWord = [...currentWord]
155
  .filter((c) => {
156
+ if (supportedCharSet.has(c)) {
157
  return true;
158
  }
159
  console.error(
 
223
  // Splits up the lines into separate blocks to treat each one separately.
224
  // This allows changing of opacity, animating per line, changing height / width per line etc
225
  function TranscriptPanelBlocks({
226
+ animateTextDisplay,
227
  translationSentences,
228
  }: {
229
+ animateTextDisplay: boolean;
230
  translationSentences: TranslationSentences;
231
  }) {
232
+ const [didReceiveTranslationSentences, setDidReceiveTranslationSentences] =
233
+ // Currently causing issues with displaying dummy text, skip over
234
+ useState(false);
235
+
236
+ // Normally we don't setState in render, but here we need to for computed state, and this if statement assures it won't loop infinitely
237
+ if (!didReceiveTranslationSentences && translationSentences.length > 0) {
238
+ setDidReceiveTranslationSentences(true);
239
+ }
240
+
241
+ const initialPrompt = 'Listening...';
242
+ const transcriptSentences: string[] = didReceiveTranslationSentences
243
+ ? translationSentences
244
+ : [initialPrompt];
245
+
246
+ // The transcript is an array of sentences. For each sentence we break this down into an array of words per line.
247
+ // This is needed so we can "scroll" through without changing the order of words in the transcript
248
+ const sentenceLines = transcriptSentences.map((sentence) => {
249
+ const words = sentence.split(/\s+/);
250
+ // Here we break each sentence up with newlines so all words per line fit within the panel
251
+ return words.reduce(
252
+ (wordChunks, currentWord) => {
253
+ const filteredWord = [...currentWord]
254
+ .filter((c) => {
255
+ if (supportedCharSet.has(c)) {
256
+ return true;
257
+ }
258
+ console.error(
259
+ `Unsupported char ${c} - make sure this is supported in the font family msdf file`,
260
+ );
261
+ return false;
262
+ })
263
+ .join('');
264
+ const lastLineSoFar = wordChunks[wordChunks.length - 1];
265
+ const charCount = lastLineSoFar.length + filteredWord.length + 1;
266
+ if (charCount <= CHARS_PER_LINE) {
267
+ wordChunks[wordChunks.length - 1] =
268
+ lastLineSoFar + ' ' + filteredWord;
269
+ } else {
270
+ wordChunks.push(filteredWord);
271
+ }
272
+ return wordChunks;
273
+ },
274
+ [''],
275
+ );
276
+ });
277
  return (
278
+ <TextBlocks sentences={sentenceLines} blinkCursor={animateTextDisplay} />
 
 
279
  );
280
  }
281
 
 
361
  startStreaming: () => Promise<void>;
362
  stopStreaming: () => Promise<void>;
363
  debugParam: boolean | null;
 
 
364
  };
365
 
366
  export default function XRConfig(props: XRConfigProps) {
streaming-react-app/src/react-xr/XRDialog.tsx CHANGED
@@ -8,12 +8,27 @@ import {
8
  Typography,
9
  } from '@mui/material';
10
  import CloseIcon from '@mui/icons-material/Close';
11
- import {XRConfigProps} from './XRConfig';
12
  import {useEffect, useRef, useState} from 'react';
13
  import './XRDialog.css';
14
  import {getRenderer, init, updatetranslationText} from './XRRendering';
15
  import ARButton from './ARButton';
16
  import {getURLParams} from '../URLParams';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  function XRContent(props: XRConfigProps) {
19
  const debugParam = getURLParams().debug;
 
8
  Typography,
9
  } from '@mui/material';
10
  import CloseIcon from '@mui/icons-material/Close';
 
11
  import {useEffect, useRef, useState} from 'react';
12
  import './XRDialog.css';
13
  import {getRenderer, init, updatetranslationText} from './XRRendering';
14
  import ARButton from './ARButton';
15
  import {getURLParams} from '../URLParams';
16
+ import { BufferedSpeechPlayer } from '../createBufferedSpeechPlayer';
17
+ import { TranslationSentences } from '../types/StreamingTypes';
18
+ import { RoomState } from '../types/RoomState';
19
+
20
+ type XRConfigProps = {
21
+ animateTextDisplay: boolean;
22
+ bufferedSpeechPlayer: BufferedSpeechPlayer;
23
+ translationSentences: TranslationSentences;
24
+ roomState: RoomState | null;
25
+ roomID: string | null;
26
+ startStreaming: () => Promise<void>;
27
+ stopStreaming: () => Promise<void>;
28
+ debugParam: boolean | null;
29
+ onARVisible?: () => void;
30
+ onARHidden?: () => void;
31
+ };
32
 
33
  function XRContent(props: XRConfigProps) {
34
  const debugParam = getURLParams().debug;
streaming-react-app/src/types/StreamingTypes.ts CHANGED
@@ -55,9 +55,9 @@ export const SUPPORTED_INPUT_SOURCES: Array<{
55
  value: SupportedInputSource;
56
  label: string;
57
  }> = [
58
- {value: 'userMedia', label: 'Microphone'},
59
- {value: 'displayMedia', label: 'Browser Tab (Chrome only)'},
60
- ];
61
 
62
  export type StartStreamEventConfig = {
63
  event: 'config';
@@ -70,7 +70,6 @@ export type StartStreamEventConfig = {
70
  };
71
 
72
  export interface BrowserAudioStreamConfig {
73
- echoCancellation: boolean;
74
  noiseSuppression: boolean;
75
  echoCancellation: boolean;
76
  }
@@ -113,7 +112,6 @@ export type TranslationSentences = Array<string>;
113
  export type DynamicConfig = {
114
  // targetLanguage: a 3-letter string representing the desired output language.
115
  targetLanguage: string;
116
- expressive: boolean | null;
117
  };
118
 
119
  export type PartialDynamicConfig = Partial<DynamicConfig>;
 
55
  value: SupportedInputSource;
56
  label: string;
57
  }> = [
58
+ { value: 'userMedia', label: 'Microphone' },
59
+ { value: 'displayMedia', label: 'Browser Tab' },
60
+ ];
61
 
62
  export type StartStreamEventConfig = {
63
  event: 'config';
 
70
  };
71
 
72
  export interface BrowserAudioStreamConfig {
 
73
  noiseSuppression: boolean;
74
  echoCancellation: boolean;
75
  }
 
112
  export type DynamicConfig = {
113
  // targetLanguage: a 3-letter string representing the desired output language.
114
  targetLanguage: string;
 
115
  };
116
 
117
  export type PartialDynamicConfig = Partial<DynamicConfig>;
streaming-react-app/vite.config.ts CHANGED
@@ -1,5 +1,10 @@
1
  import { defineConfig } from 'vite';
2
  import react from '@vitejs/plugin-react';
 
 
 
 
 
3
 
4
  // https://vitejs.dev/config/
5
  export default defineConfig(({ command }) => {
 
1
  import { defineConfig } from 'vite';
2
  import react from '@vitejs/plugin-react';
3
+ // import {resolve} from 'path';
4
+
5
+ // const rootDir = resolve(__dirname, 'src');
6
+ // const assetsDir = resolve(rootDir, 'assets');
7
+ // const typesDir = resolve(__dirname, 'types');
8
 
9
  // https://vitejs.dev/config/
10
  export default defineConfig(({ command }) => {
streaming-react-app/yarn.lock CHANGED
@@ -1853,6 +1853,11 @@ isexe@^2.0.0:
1853
  resolved "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz"
1854
  integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==
1855
 
 
 
 
 
 
1856
  isomorphic-unfetch@^3.0.0:
1857
  version "3.1.0"
1858
  resolved "https://registry.npmjs.org/isomorphic-unfetch/-/isomorphic-unfetch-3.1.0.tgz"
 
1853
  resolved "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz"
1854
  integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==
1855
 
1856
+ iso-639-1@^3.1.0:
1857
+ version "3.1.0"
1858
+ resolved "https://registry.npmjs.org/iso-639-1/-/iso-639-1-3.1.0.tgz"
1859
+ integrity sha512-rWcHp9dcNbxa5C8jA/cxFlWNFNwy5Vup0KcFvgA8sPQs9ZeJHj/Eq0Y8Yz2eL8XlWYpxw4iwh9FfTeVxyqdRMw==
1860
+
1861
  isomorphic-unfetch@^3.0.0:
1862
  version "3.1.0"
1863
  resolved "https://registry.npmjs.org/isomorphic-unfetch/-/isomorphic-unfetch-3.1.0.tgz"