Spaces:
Running
on
T4
Running
on
T4
Update README.md
#2
by
reach-vb
HF staff
- opened
- Dockerfile +5 -21
- README.md +6 -20
- seamless_server/app_pubsub.py +3 -35
- seamless_server/models/Seamless/vad_s2st_sc_24khz_main.yaml +0 -25
- seamless_server/models/SeamlessStreaming/{vad_s2st_sc_main.yaml → vad_s2st_sc_24khz_main.yaml} +0 -0
- seamless_server/requirements.txt +5 -4
- seamless_server/run_docker.sh +0 -5
- seamless_server/src/simuleval_agent_directory.py +8 -29
- seamless_server/src/simuleval_transcoder.py +2 -7
- seamless_server/whl/seamless_communication-1.0.0-py3-none-any.whl +2 -2
- streaming-react-app/package.json +3 -2
- streaming-react-app/src/StreamingInterface.tsx +34 -88
- streaming-react-app/src/languageLookup.ts +103 -105
- streaming-react-app/src/react-xr/TextBlocks.tsx +119 -191
- streaming-react-app/src/react-xr/XRConfig.tsx +69 -9
- streaming-react-app/src/react-xr/XRDialog.tsx +16 -1
- streaming-react-app/src/types/StreamingTypes.ts +3 -5
- streaming-react-app/vite.config.ts +5 -0
- streaming-react-app/yarn.lock +5 -0
Dockerfile
CHANGED
@@ -45,21 +45,10 @@ RUN apt-get update && \
|
|
45 |
# gradio dependencies \
|
46 |
ffmpeg \
|
47 |
# fairseq2 dependencies \
|
48 |
-
libjpeg8-dev \
|
49 |
-
libpng-dev \
|
50 |
libsndfile-dev && \
|
51 |
apt-get clean && \
|
52 |
rm -rf /var/lib/apt/lists/*
|
53 |
|
54 |
-
USER root
|
55 |
-
RUN ln -s /usr/lib/x86_64-linux-gnu/libsox.so.3 /usr/lib/x86_64-linux-gnu/libsox.so
|
56 |
-
# install older versions libjpeg62-turbo and libpng15
|
57 |
-
RUN wget http://ftp.us.debian.org/debian/pool/main/libj/libjpeg-turbo/libjpeg62-turbo_2.1.5-2_amd64.deb && \
|
58 |
-
dpkg -i libjpeg62-turbo_2.1.5-2_amd64.deb && \
|
59 |
-
rm libjpeg62-turbo_2.1.5-2_amd64.deb
|
60 |
-
RUN wget https://master.dl.sourceforge.net/project/libpng/libpng15/1.5.30/libpng-1.5.30.tar.gz && \
|
61 |
-
tar -xvf libpng-1.5.30.tar.gz && cd libpng-1.5.30 && ./configure && make && make install && cd .. && rm -rf libpng-1.5.30.tar.gz libpng-1.5.30
|
62 |
-
|
63 |
RUN useradd -m -u 1000 user
|
64 |
USER user
|
65 |
ENV HOME=/home/user \
|
@@ -76,18 +65,13 @@ RUN pyenv install $PYTHON_VERSION && \
|
|
76 |
|
77 |
COPY --chown=user:user ./seamless_server ./seamless_server
|
78 |
# change dir since pip needs to seed whl folder
|
79 |
-
RUN cd seamless_server &&
|
80 |
-
pip install fairseq2 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/whl/nightly/pt2.1.1/cu118 && \
|
81 |
-
pip install --no-cache-dir --upgrade -r requirements.txt
|
82 |
COPY --from=frontend /app/dist ./streaming-react-app/dist
|
83 |
|
84 |
WORKDIR $HOME/app/seamless_server
|
85 |
-
|
86 |
-
|
87 |
-
huggingface-cli download meta-private/SeamlessExpressive pretssel_melhifigan_wm-final.pt --local-dir ./models/Seamless/ || echo "HF_TOKEN error" && \
|
88 |
-
ln -s $(readlink -f models/Seamless/pretssel_melhifigan_wm-final.pt) models/Seamless/pretssel_melhifigan_wm.pt || true;
|
89 |
-
|
90 |
USER user
|
91 |
-
|
92 |
-
|
93 |
|
|
|
45 |
# gradio dependencies \
|
46 |
ffmpeg \
|
47 |
# fairseq2 dependencies \
|
|
|
|
|
48 |
libsndfile-dev && \
|
49 |
apt-get clean && \
|
50 |
rm -rf /var/lib/apt/lists/*
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
RUN useradd -m -u 1000 user
|
53 |
USER user
|
54 |
ENV HOME=/home/user \
|
|
|
65 |
|
66 |
COPY --chown=user:user ./seamless_server ./seamless_server
|
67 |
# change dir since pip needs to seed whl folder
|
68 |
+
RUN cd seamless_server && pip install --no-cache-dir --upgrade -r requirements.txt
|
|
|
|
|
69 |
COPY --from=frontend /app/dist ./streaming-react-app/dist
|
70 |
|
71 |
WORKDIR $HOME/app/seamless_server
|
72 |
+
USER root
|
73 |
+
RUN ln -s /usr/lib/x86_64-linux-gnu/libsox.so.3 /usr/lib/x86_64-linux-gnu/libsox.so
|
|
|
|
|
|
|
74 |
USER user
|
75 |
+
CMD [ "uvicorn", "app_pubsub:app", "--host", "0.0.0.0", "--port", "7860" ]
|
76 |
+
|
77 |
|
README.md
CHANGED
@@ -5,9 +5,7 @@ colorFrom: blue
|
|
5 |
colorTo: yellow
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
-
suggested_hardware: t4-
|
9 |
-
models:
|
10 |
-
- facebook/seamless-streaming
|
11 |
---
|
12 |
|
13 |
# Seamless Streaming demo
|
@@ -17,21 +15,12 @@ You can simply duplicate the space to run it.
|
|
17 |
## Running locally
|
18 |
### Install backend seamless_server dependencies
|
19 |
|
20 |
-
|
21 |
-
> Please note: we *do not* recommend running the model on CPU. CPU inference will be slow and introduce noticable delays in the simultaneous translation.
|
22 |
|
23 |
-
|
24 |
-
> The example below is for PyTorch stable (2.1.1) and variant cu118.
|
25 |
-
> Check [here](https://pytorch.org/get-started/locally/) to find the torch/torchaudio command for your variant.
|
26 |
-
> Check [here](https://github.com/facebookresearch/fairseq2#variants) to find the fairseq2 command for your variant.
|
27 |
-
|
28 |
-
If running for the first time, create conda environment and install the desired torch version. Then install the rest of the requirements:
|
29 |
```
|
30 |
-
|
31 |
-
conda create --yes --name smlss_server python=3.8 libsndfile==1.0.31
|
32 |
conda activate smlss_server
|
33 |
-
conda install --yes pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
|
34 |
-
pip install fairseq2 --pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/whl/nightly/pt2.1.1/cu118
|
35 |
pip install -r requirements.txt
|
36 |
```
|
37 |
|
@@ -39,9 +28,8 @@ pip install -r requirements.txt
|
|
39 |
```
|
40 |
conda install -c conda-forge nodejs
|
41 |
cd streaming-react-app
|
42 |
-
npm install
|
43 |
-
|
44 |
-
yarn build # this will create the dist/ folder
|
45 |
```
|
46 |
|
47 |
|
@@ -51,14 +39,12 @@ The server can be run locally with uvicorn below.
|
|
51 |
Run the server in dev mode:
|
52 |
|
53 |
```
|
54 |
-
cd seamless_server
|
55 |
uvicorn app_pubsub:app --reload --host localhost
|
56 |
```
|
57 |
|
58 |
Run the server in prod mode:
|
59 |
|
60 |
```
|
61 |
-
cd seamless_server
|
62 |
uvicorn app_pubsub:app --host 0.0.0.0
|
63 |
```
|
64 |
|
|
|
5 |
colorTo: yellow
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
+
suggested_hardware: t4-medium
|
|
|
|
|
9 |
---
|
10 |
|
11 |
# Seamless Streaming demo
|
|
|
15 |
## Running locally
|
16 |
### Install backend seamless_server dependencies
|
17 |
|
18 |
+
`cd seamless-experiences/seamless_vc/seamless_server`
|
|
|
19 |
|
20 |
+
If running for the first time, create conda environment:
|
|
|
|
|
|
|
|
|
|
|
21 |
```
|
22 |
+
conda create --name smlss_server python=3.8
|
|
|
23 |
conda activate smlss_server
|
|
|
|
|
24 |
pip install -r requirements.txt
|
25 |
```
|
26 |
|
|
|
28 |
```
|
29 |
conda install -c conda-forge nodejs
|
30 |
cd streaming-react-app
|
31 |
+
npm install
|
32 |
+
npm run build # this will create the dist/ folder
|
|
|
33 |
```
|
34 |
|
35 |
|
|
|
39 |
Run the server in dev mode:
|
40 |
|
41 |
```
|
|
|
42 |
uvicorn app_pubsub:app --reload --host localhost
|
43 |
```
|
44 |
|
45 |
Run the server in prod mode:
|
46 |
|
47 |
```
|
|
|
48 |
uvicorn app_pubsub:app --host 0.0.0.0
|
49 |
```
|
50 |
|
seamless_server/app_pubsub.py
CHANGED
@@ -123,26 +123,8 @@ class ServerLock(TypedDict):
|
|
123 |
client_id: str
|
124 |
member_object: Member
|
125 |
|
126 |
-
|
127 |
-
|
128 |
-
if os.environ.get("LOCK_SERVER_COMPLETELY", "0") == "1":
|
129 |
-
logger.info("LOCK_SERVER_COMPLETELY is set. Server will be locked on startup.")
|
130 |
-
if MAX_SPEAKERS is not None and int(MAX_SPEAKERS):
|
131 |
-
logger.info(f"MAX_SPEAKERS is set to: {MAX_SPEAKERS}")
|
132 |
-
dummy_server_lock_member_object = Member(
|
133 |
-
client_id="seamless_user", session_id="dummy", name="Seamless User"
|
134 |
-
)
|
135 |
-
# Normally this would be an actual transcoder, but it's fine putting True here since currently we only check for the presence of the transcoder
|
136 |
-
dummy_server_lock_member_object.transcoder = True
|
137 |
-
server_lock: Optional[ServerLock] = (
|
138 |
-
{
|
139 |
-
"name": "Seamless User",
|
140 |
-
"client_id": "seamless_user",
|
141 |
-
"member_object": dummy_server_lock_member_object,
|
142 |
-
}
|
143 |
-
if os.environ.get("LOCK_SERVER_COMPLETELY", "0") == "1"
|
144 |
-
else None
|
145 |
-
)
|
146 |
|
147 |
server_id = str(uuid4())
|
148 |
|
@@ -519,8 +501,6 @@ async def join_room(sid, client_id, room_id_from_client, config_dict):
|
|
519 |
server_lock is not None
|
520 |
and config_dict.get("lockServerName")
|
521 |
== ESCAPE_HATCH_SERVER_LOCK_RELEASE_NAME
|
522 |
-
# If we are locking the server completely we don't want someone to be able to unlock it
|
523 |
-
and not os.environ.get("LOCK_SERVER_COMPLETELY", "0") == "1"
|
524 |
):
|
525 |
server_lock = None
|
526 |
logger.info(
|
@@ -528,7 +508,7 @@ async def join_room(sid, client_id, room_id_from_client, config_dict):
|
|
528 |
)
|
529 |
|
530 |
# If the server is not locked, set a lock. If it's already locked to this client, update the lock object
|
531 |
-
|
532 |
# TODO: Add some sort of timeout as a backstop in case someone leaves the browser tab open after locking the server
|
533 |
server_lock = {
|
534 |
"name": config_dict.get("lockServerName"),
|
@@ -559,12 +539,6 @@ async def join_room(sid, client_id, room_id_from_client, config_dict):
|
|
559 |
|
560 |
return {"roomsJoined": sio.rooms(sid), "roomID": room_id}
|
561 |
|
562 |
-
def allow_speaker(room, client_id):
|
563 |
-
if MAX_SPEAKERS is not None and client_id in room.speakers:
|
564 |
-
room_statuses = {room_id: room.get_room_status_dict() for room_id, room in rooms.items()}
|
565 |
-
speakers = sum(room_status["activeTranscoders"] for room_status in room_statuses.values())
|
566 |
-
return speakers < int(MAX_SPEAKERS)
|
567 |
-
return True
|
568 |
|
569 |
# TODO: Add code to prevent more than one speaker from connecting/streaming at a time
|
570 |
@sio.event
|
@@ -585,12 +559,6 @@ async def configure_stream(sid, config):
|
|
585 |
)
|
586 |
return {"status": "error", "message": "member_or_room_is_none"}
|
587 |
|
588 |
-
if not allow_speaker(room, client_id):
|
589 |
-
logger.error(
|
590 |
-
f"In MAX_SPEAKERS mode we only allow one speaker at a time. Ignoring request to configure stream from client {client_id}."
|
591 |
-
)
|
592 |
-
return {"status": "error", "message": "max_speakers"}
|
593 |
-
|
594 |
# If there is a server lock WITH an active transcoder session, prevent other users from configuring and starting a stream
|
595 |
# If the server lock client does NOT have an active transcoder session allow this to proceed, knowing that
|
596 |
# this stream will be interrupted if the server lock client starts streaming
|
|
|
123 |
client_id: str
|
124 |
member_object: Member
|
125 |
|
126 |
+
|
127 |
+
server_lock: Optional[ServerLock] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
|
129 |
server_id = str(uuid4())
|
130 |
|
|
|
501 |
server_lock is not None
|
502 |
and config_dict.get("lockServerName")
|
503 |
== ESCAPE_HATCH_SERVER_LOCK_RELEASE_NAME
|
|
|
|
|
504 |
):
|
505 |
server_lock = None
|
506 |
logger.info(
|
|
|
508 |
)
|
509 |
|
510 |
# If the server is not locked, set a lock. If it's already locked to this client, update the lock object
|
511 |
+
elif server_lock is None or server_lock.get("client_id") == client_id:
|
512 |
# TODO: Add some sort of timeout as a backstop in case someone leaves the browser tab open after locking the server
|
513 |
server_lock = {
|
514 |
"name": config_dict.get("lockServerName"),
|
|
|
539 |
|
540 |
return {"roomsJoined": sio.rooms(sid), "roomID": room_id}
|
541 |
|
|
|
|
|
|
|
|
|
|
|
|
|
542 |
|
543 |
# TODO: Add code to prevent more than one speaker from connecting/streaming at a time
|
544 |
@sio.event
|
|
|
559 |
)
|
560 |
return {"status": "error", "message": "member_or_room_is_none"}
|
561 |
|
|
|
|
|
|
|
|
|
|
|
|
|
562 |
# If there is a server lock WITH an active transcoder session, prevent other users from configuring and starting a stream
|
563 |
# If the server lock client does NOT have an active transcoder session allow this to proceed, knowing that
|
564 |
# this stream will be interrupted if the server lock client starts streaming
|
seamless_server/models/Seamless/vad_s2st_sc_24khz_main.yaml
DELETED
@@ -1,25 +0,0 @@
|
|
1 |
-
agent_class: seamless_communication.streaming.agents.seamless_s2st.SeamlessS2STDualVocoderVADAgent
|
2 |
-
monotonic_decoder_model_name: seamless_streaming_monotonic_decoder
|
3 |
-
unity_model_name: seamless_streaming_unity
|
4 |
-
sentencepiece_model: spm_256k_nllb100.model
|
5 |
-
|
6 |
-
task: s2st
|
7 |
-
tgt_lang: "eng"
|
8 |
-
min_unit_chunk_size: 50
|
9 |
-
decision_threshold: 0.7
|
10 |
-
no_early_stop: True
|
11 |
-
block_ngrams: True
|
12 |
-
vocoder_name: vocoder_v2
|
13 |
-
expr_vocoder_name: vocoder_pretssel
|
14 |
-
gated_model_dir: .
|
15 |
-
expr_vocoder_gain: 3.0
|
16 |
-
upstream_idx: 1
|
17 |
-
wav2vec_yaml: wav2vec.yaml
|
18 |
-
min_starting_wait_w2vbert: 192
|
19 |
-
|
20 |
-
config_yaml: cfg_fbank_u2t.yaml
|
21 |
-
upstream_idx: 1
|
22 |
-
detokenize_only: True
|
23 |
-
device: cuda:0
|
24 |
-
max_len_a: 0
|
25 |
-
max_len_b: 1000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
seamless_server/models/SeamlessStreaming/{vad_s2st_sc_main.yaml → vad_s2st_sc_24khz_main.yaml}
RENAMED
File without changes
|
seamless_server/requirements.txt
CHANGED
@@ -1,6 +1,7 @@
|
|
|
|
|
|
1 |
# seamless_communication
|
2 |
-
|
3 |
-
# ./whl/seamless_communication-1.0.0-py3-none-any.whl
|
4 |
Flask==2.1.3
|
5 |
Flask_Sockets==0.2.1
|
6 |
g2p_en==2.1.0
|
@@ -13,10 +14,10 @@ protobuf==4.24.2
|
|
13 |
psola==0.0.1
|
14 |
pydub==0.25.1
|
15 |
silero==0.4.1
|
|
|
16 |
soundfile==0.11.0
|
17 |
stable_ts==1.4.0
|
18 |
-
|
19 |
-
# simuleval # to be installed by seamless_communication
|
20 |
Werkzeug==2.0.3
|
21 |
whisper==1.1.10
|
22 |
colorlog==6.7.0
|
|
|
1 |
+
--pre --extra-index-url https://fair.pkg.atmeta.com/fairseq2/pt2.1.1/cu118
|
2 |
+
simuleval==1.1.3
|
3 |
# seamless_communication
|
4 |
+
./whl/seamless_communication-1.0.0-py3-none-any.whl
|
|
|
5 |
Flask==2.1.3
|
6 |
Flask_Sockets==0.2.1
|
7 |
g2p_en==2.1.0
|
|
|
14 |
psola==0.0.1
|
15 |
pydub==0.25.1
|
16 |
silero==0.4.1
|
17 |
+
# simuleval==1.1.1
|
18 |
soundfile==0.11.0
|
19 |
stable_ts==1.4.0
|
20 |
+
torch # specific torch version depends on fairseq2 installation
|
|
|
21 |
Werkzeug==2.0.3
|
22 |
whisper==1.1.10
|
23 |
colorlog==6.7.0
|
seamless_server/run_docker.sh
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
# !/bin/bash
|
2 |
-
if [ -f models/Seamless/pretssel_melhifigan_wm.pt ] ; then
|
3 |
-
export USE_EXPRESSIVE_MODEL=1;
|
4 |
-
fi
|
5 |
-
uvicorn app_pubsub:app --host 0.0.0.0 --port 7860
|
|
|
|
|
|
|
|
|
|
|
|
seamless_server/src/simuleval_agent_directory.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
# Creates a directory in which to look up available agents
|
2 |
|
3 |
-
import
|
4 |
-
from typing import List, Optional
|
5 |
from src.simuleval_transcoder import SimulevalTranscoder
|
6 |
import json
|
7 |
import logging
|
@@ -34,10 +33,8 @@ class AgentWithInfo:
|
|
34 |
# Supported dynamic params are defined in StreamingTypes.ts
|
35 |
dynamic_params: List[str] = [],
|
36 |
description="",
|
37 |
-
has_expressive: Optional[bool] = None,
|
38 |
):
|
39 |
self.agent = agent
|
40 |
-
self.has_expressive = has_expressive
|
41 |
self.name = name
|
42 |
self.description = description
|
43 |
self.modalities = modalities
|
@@ -78,7 +75,6 @@ class AgentWithInfo:
|
|
78 |
class SimulevalAgentDirectory:
|
79 |
# Available models. These are the directories where the models can be found, and also serve as an ID for the model.
|
80 |
seamless_streaming_agent = "SeamlessStreaming"
|
81 |
-
seamless_agent = "Seamless"
|
82 |
|
83 |
def __init__(self):
|
84 |
self.agents = []
|
@@ -100,12 +96,7 @@ class SimulevalAgentDirectory:
|
|
100 |
model_id,
|
101 |
)
|
102 |
except Exception as e:
|
103 |
-
from fairseq2.assets.error import AssetError
|
104 |
logger.warning("Failed to build agent %s: %s" % (model_id, e))
|
105 |
-
if isinstance(e, AssetError):
|
106 |
-
logger.warning(
|
107 |
-
"Please download gated assets and set `gated_model_dir` in the config"
|
108 |
-
)
|
109 |
raise e
|
110 |
|
111 |
return agent
|
@@ -119,32 +110,20 @@ class SimulevalAgentDirectory:
|
|
119 |
for agent_info in agent_infos:
|
120 |
self.add_agent(agent_info)
|
121 |
else:
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
SimulevalAgentDirectory.seamless_agent,
|
127 |
-
config_name="vad_s2st_sc_24khz_main.yaml",
|
128 |
-
)
|
129 |
-
has_expressive = True
|
130 |
-
else:
|
131 |
-
logger.info("Building non-expressive model...")
|
132 |
-
s2s_agent = self.build_agent_if_available(
|
133 |
-
SimulevalAgentDirectory.seamless_streaming_agent,
|
134 |
-
config_name="vad_s2st_sc_main.yaml",
|
135 |
-
)
|
136 |
-
has_expressive = False
|
137 |
|
138 |
-
if
|
139 |
self.add_agent(
|
140 |
AgentWithInfo(
|
141 |
-
agent=
|
142 |
name=SimulevalAgentDirectory.seamless_streaming_agent,
|
143 |
modalities=["s2t", "s2s"],
|
144 |
target_langs=M4T_P0_LANGS,
|
145 |
dynamic_params=["expressive"],
|
146 |
description="multilingual expressive model that supports S2S and S2T",
|
147 |
-
has_expressive=has_expressive,
|
148 |
)
|
149 |
)
|
150 |
|
@@ -158,7 +137,7 @@ class SimulevalAgentDirectory:
|
|
158 |
def get_agent(self, name):
|
159 |
for agent in self.agents:
|
160 |
if agent.name == name:
|
161 |
-
return agent
|
162 |
return None
|
163 |
|
164 |
def get_agent_or_throw(self, name):
|
|
|
1 |
# Creates a directory in which to look up available agents
|
2 |
|
3 |
+
from typing import List
|
|
|
4 |
from src.simuleval_transcoder import SimulevalTranscoder
|
5 |
import json
|
6 |
import logging
|
|
|
33 |
# Supported dynamic params are defined in StreamingTypes.ts
|
34 |
dynamic_params: List[str] = [],
|
35 |
description="",
|
|
|
36 |
):
|
37 |
self.agent = agent
|
|
|
38 |
self.name = name
|
39 |
self.description = description
|
40 |
self.modalities = modalities
|
|
|
75 |
class SimulevalAgentDirectory:
|
76 |
# Available models. These are the directories where the models can be found, and also serve as an ID for the model.
|
77 |
seamless_streaming_agent = "SeamlessStreaming"
|
|
|
78 |
|
79 |
def __init__(self):
|
80 |
self.agents = []
|
|
|
96 |
model_id,
|
97 |
)
|
98 |
except Exception as e:
|
|
|
99 |
logger.warning("Failed to build agent %s: %s" % (model_id, e))
|
|
|
|
|
|
|
|
|
100 |
raise e
|
101 |
|
102 |
return agent
|
|
|
110 |
for agent_info in agent_infos:
|
111 |
self.add_agent(agent_info)
|
112 |
else:
|
113 |
+
s2s_m4t_expr_agent = self.build_agent_if_available(
|
114 |
+
SimulevalAgentDirectory.seamless_streaming_agent,
|
115 |
+
config_name="vad_s2st_sc_24khz_main.yaml",
|
116 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
+
if s2s_m4t_expr_agent:
|
119 |
self.add_agent(
|
120 |
AgentWithInfo(
|
121 |
+
agent=s2s_m4t_expr_agent,
|
122 |
name=SimulevalAgentDirectory.seamless_streaming_agent,
|
123 |
modalities=["s2t", "s2s"],
|
124 |
target_langs=M4T_P0_LANGS,
|
125 |
dynamic_params=["expressive"],
|
126 |
description="multilingual expressive model that supports S2S and S2T",
|
|
|
127 |
)
|
128 |
)
|
129 |
|
|
|
137 |
def get_agent(self, name):
|
138 |
for agent in self.agents:
|
139 |
if agent.name == name:
|
140 |
+
return agent.agent
|
141 |
return None
|
142 |
|
143 |
def get_agent_or_throw(self, name):
|
seamless_server/src/simuleval_transcoder.py
CHANGED
@@ -119,8 +119,7 @@ class OutputSegments:
|
|
119 |
|
120 |
class SimulevalTranscoder:
|
121 |
def __init__(self, agent, sample_rate, debug, buffer_limit):
|
122 |
-
self.agent = agent
|
123 |
-
self.has_expressive = agent.has_expressive
|
124 |
self.input_queue = asyncio.Queue()
|
125 |
self.output_queue = asyncio.Queue()
|
126 |
self.states = self.agent.build_states()
|
@@ -186,7 +185,7 @@ class SimulevalTranscoder:
|
|
186 |
logger.info(*args)
|
187 |
|
188 |
@classmethod
|
189 |
-
def build_agent(cls, model_path, config_name):
|
190 |
logger.info(f"Building simuleval agent: {model_path}, {config_name}")
|
191 |
agent = build_system_from_dir(
|
192 |
Path(__file__).resolve().parent.parent / f"models/{model_path}",
|
@@ -209,10 +208,6 @@ class SimulevalTranscoder:
|
|
209 |
tgt_lang=dynamic_config.get("targetLanguage"),
|
210 |
config=dynamic_config,
|
211 |
)
|
212 |
-
if dynamic_config.get("expressive") is True and self.has_expressive is False:
|
213 |
-
logger.warning(
|
214 |
-
"Passing 'expressive' but the agent does not support expressive output!"
|
215 |
-
)
|
216 |
# # segment is array([0, 0, 0, ..., 0, 0, 0], dtype=int16)
|
217 |
self.input_queue.put_nowait(segment)
|
218 |
|
|
|
119 |
|
120 |
class SimulevalTranscoder:
|
121 |
def __init__(self, agent, sample_rate, debug, buffer_limit):
|
122 |
+
self.agent = agent
|
|
|
123 |
self.input_queue = asyncio.Queue()
|
124 |
self.output_queue = asyncio.Queue()
|
125 |
self.states = self.agent.build_states()
|
|
|
185 |
logger.info(*args)
|
186 |
|
187 |
@classmethod
|
188 |
+
def build_agent(cls, model_path, config_name="vad_s2st_main.yaml"):
|
189 |
logger.info(f"Building simuleval agent: {model_path}, {config_name}")
|
190 |
agent = build_system_from_dir(
|
191 |
Path(__file__).resolve().parent.parent / f"models/{model_path}",
|
|
|
208 |
tgt_lang=dynamic_config.get("targetLanguage"),
|
209 |
config=dynamic_config,
|
210 |
)
|
|
|
|
|
|
|
|
|
211 |
# # segment is array([0, 0, 0, ..., 0, 0, 0], dtype=int16)
|
212 |
self.input_queue.put_nowait(segment)
|
213 |
|
seamless_server/whl/seamless_communication-1.0.0-py3-none-any.whl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1df10e0c85ee0ffbc9f2e1bf8896850a52c551383df0332a94d26d9d39770c85
|
3 |
+
size 201552
|
streaming-react-app/package.json
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
{
|
2 |
"name": "streaming-react-app",
|
3 |
"private": true,
|
4 |
-
"version": "0.0.
|
5 |
"type": "module",
|
6 |
"scripts": {
|
7 |
"dev": "vite --host --strictPort",
|
8 |
-
"build": "vite build",
|
9 |
"preview": "vite preview",
|
10 |
"clean:node-modules": "rm -rf node_modules/",
|
11 |
"ts-check": "tsc --noEmit",
|
@@ -24,6 +24,7 @@
|
|
24 |
"amazon-cognito-identity-js": "^6.3.6",
|
25 |
"audiobuffer-to-wav": "^1.0.0",
|
26 |
"aws-sdk": "^2.1472.0",
|
|
|
27 |
"js-cookie": "^3.0.5",
|
28 |
"lodash": "4.17.21",
|
29 |
"react": "^18.2.0",
|
|
|
1 |
{
|
2 |
"name": "streaming-react-app",
|
3 |
"private": true,
|
4 |
+
"version": "0.0.13",
|
5 |
"type": "module",
|
6 |
"scripts": {
|
7 |
"dev": "vite --host --strictPort",
|
8 |
+
"build": "tsc && vite build",
|
9 |
"preview": "vite preview",
|
10 |
"clean:node-modules": "rm -rf node_modules/",
|
11 |
"ts-check": "tsc --noEmit",
|
|
|
24 |
"amazon-cognito-identity-js": "^6.3.6",
|
25 |
"audiobuffer-to-wav": "^1.0.0",
|
26 |
"aws-sdk": "^2.1472.0",
|
27 |
+
"iso-639-1": "^3.1.0",
|
28 |
"js-cookie": "^3.0.5",
|
29 |
"lodash": "4.17.21",
|
30 |
"react": "^18.2.0",
|
streaming-react-app/src/StreamingInterface.tsx
CHANGED
@@ -57,12 +57,12 @@ import {CURSOR_BLINK_INTERVAL_MS} from './cursorBlinkInterval';
|
|
57 |
import {getURLParams} from './URLParams';
|
58 |
import debug from './debug';
|
59 |
import DebugSection from './DebugSection';
|
60 |
-
import
|
61 |
-
import Grid from '@mui/material/Grid';
|
62 |
import {getLanguageFromThreeLetterCode} from './languageLookup';
|
63 |
-
import HeadphonesIcon from '@mui/icons-material/Headphones';
|
64 |
|
65 |
-
const AUDIO_STREAM_DEFAULTS
|
|
|
|
|
66 |
userMedia: {
|
67 |
echoCancellation: false,
|
68 |
noiseSuppression: true,
|
@@ -71,10 +71,13 @@ const AUDIO_STREAM_DEFAULTS = {
|
|
71 |
echoCancellation: false,
|
72 |
noiseSuppression: false,
|
73 |
},
|
74 |
-
}
|
75 |
|
76 |
async function requestUserMediaAudioStream(
|
77 |
-
config: BrowserAudioStreamConfig =
|
|
|
|
|
|
|
78 |
) {
|
79 |
const stream = await navigator.mediaDevices.getUserMedia({
|
80 |
audio: {...config, channelCount: 1},
|
@@ -87,7 +90,10 @@ async function requestUserMediaAudioStream(
|
|
87 |
}
|
88 |
|
89 |
async function requestDisplayMediaAudioStream(
|
90 |
-
config: BrowserAudioStreamConfig =
|
|
|
|
|
|
|
91 |
) {
|
92 |
const stream = await navigator.mediaDevices.getDisplayMedia({
|
93 |
audio: {...config, channelCount: 1},
|
@@ -152,7 +158,6 @@ export default function StreamingInterface() {
|
|
152 |
useState<StreamingStatus>('stopped');
|
153 |
|
154 |
const isStreamConfiguredRef = useRef<boolean>(false);
|
155 |
-
const [hasMaxSpeakers, setHasMaxSpeakers] = useState<boolean>(false);
|
156 |
|
157 |
const [outputMode, setOutputMode] = useState<SupportedOutputMode>('s2s&t');
|
158 |
const [inputSource, setInputSource] =
|
@@ -166,9 +171,6 @@ export default function StreamingInterface() {
|
|
166 |
|
167 |
// Dynamic Params:
|
168 |
const [targetLang, setTargetLang] = useState<string | null>(null);
|
169 |
-
const [enableExpressive, setEnableExpressive] = useState<boolean | null>(
|
170 |
-
null,
|
171 |
-
);
|
172 |
|
173 |
const [serverDebugFlag, setServerDebugFlag] = useState<boolean>(
|
174 |
debugParam ?? false,
|
@@ -250,7 +252,6 @@ export default function StreamingInterface() {
|
|
250 |
setAgent((prevAgent) => {
|
251 |
if (prevAgent?.name !== newAgent?.name) {
|
252 |
setTargetLang(newAgent?.targetLangs[0] ?? null);
|
253 |
-
setEnableExpressive(null);
|
254 |
}
|
255 |
return newAgent;
|
256 |
});
|
@@ -309,7 +310,6 @@ export default function StreamingInterface() {
|
|
309 |
console.log('[configureStreamAsync] sending config', config);
|
310 |
|
311 |
socket.emit('configure_stream', config, (statusObject) => {
|
312 |
-
setHasMaxSpeakers(statusObject.message === 'max_speakers')
|
313 |
if (statusObject.status === 'ok') {
|
314 |
isStreamConfiguredRef.current = true;
|
315 |
console.debug(
|
@@ -427,7 +427,6 @@ export default function StreamingInterface() {
|
|
427 |
// available before actually configuring and starting the stream
|
428 |
const fullDynamicConfig: DynamicConfig = {
|
429 |
targetLanguage: targetLang,
|
430 |
-
expressive: enableExpressive,
|
431 |
};
|
432 |
|
433 |
await onSetDynamicConfig(fullDynamicConfig);
|
@@ -757,23 +756,14 @@ export default function StreamingInterface() {
|
|
757 |
<div className="header-container-sra">
|
758 |
<div>
|
759 |
<Typography variant="body2" sx={{color: '#65676B'}}>
|
760 |
-
Welcome!
|
761 |
-
|
762 |
-
IP address may not work because it's running on different replicas.
|
763 |
-
Use headphones if you are both speaker and listener to prevent feedback.
|
764 |
-
<br/>
|
765 |
-
If max speakers reached, please duplicate the space <a target="_blank" rel="noopener noreferrer" href="https://huggingface.co/spaces/facebook/seamless-streaming?duplicate=true">here</a>.
|
766 |
-
In your duplicated space, join a room as speaker or listener (or both),
|
767 |
-
and share the room code to invite listeners.
|
768 |
-
<br/>
|
769 |
-
Check out the seamless_communication <a target="_blank" rel="noopener noreferrer" href="https://github.com/facebookresearch/seamless_communication/tree/main">README</a> for more information.
|
770 |
<br/>
|
771 |
SeamlessStreaming model is a research model and is not released
|
772 |
-
for production deployment.
|
773 |
-
|
774 |
-
|
775 |
-
in the model config.
|
776 |
-
if you try streaming multiple speakers at the same time.
|
777 |
</Typography>
|
778 |
</div>
|
779 |
</div>
|
@@ -917,28 +907,6 @@ export default function StreamingInterface() {
|
|
917 |
spacing={1}
|
918 |
alignItems="flex-start"
|
919 |
sx={{flexGrow: 1}}>
|
920 |
-
{currentAgent?.dynamicParams?.includes(
|
921 |
-
'expressive',
|
922 |
-
) && (
|
923 |
-
<FormControlLabel
|
924 |
-
control={
|
925 |
-
<Switch
|
926 |
-
checked={enableExpressive ?? false}
|
927 |
-
onChange={(
|
928 |
-
event: React.ChangeEvent<HTMLInputElement>,
|
929 |
-
) => {
|
930 |
-
const newValue = event.target.checked;
|
931 |
-
setEnableExpressive(newValue);
|
932 |
-
onSetDynamicConfig({
|
933 |
-
expressive: newValue,
|
934 |
-
});
|
935 |
-
}}
|
936 |
-
/>
|
937 |
-
}
|
938 |
-
label="Expressive"
|
939 |
-
/>
|
940 |
-
)}
|
941 |
-
|
942 |
{isListener && (
|
943 |
<Box
|
944 |
sx={{
|
@@ -955,6 +923,13 @@ export default function StreamingInterface() {
|
|
955 |
</Grid>
|
956 |
</Stack>
|
957 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
958 |
<Stack
|
959 |
direction="row"
|
960 |
spacing={2}
|
@@ -984,9 +959,8 @@ export default function StreamingInterface() {
|
|
984 |
</RadioGroup>
|
985 |
</FormControl>
|
986 |
</Box>
|
987 |
-
|
988 |
-
|
989 |
-
<FormControl disabled={streamFixedConfigOptionsDisabled}>
|
990 |
<FormLabel>Options</FormLabel>
|
991 |
<FormControlLabel
|
992 |
control={
|
@@ -1003,9 +977,9 @@ export default function StreamingInterface() {
|
|
1003 |
}
|
1004 |
/>
|
1005 |
}
|
1006 |
-
label="Noise Suppression"
|
1007 |
/>
|
1008 |
-
|
1009 |
control={
|
1010 |
<Checkbox
|
1011 |
checked={
|
@@ -1020,7 +994,7 @@ export default function StreamingInterface() {
|
|
1020 |
}
|
1021 |
/>
|
1022 |
}
|
1023 |
-
label="Echo Cancellation (
|
1024 |
/>
|
1025 |
<FormControlLabel
|
1026 |
control={
|
@@ -1031,34 +1005,12 @@ export default function StreamingInterface() {
|
|
1031 |
) => setServerDebugFlag(event.target.checked)}
|
1032 |
/>
|
1033 |
}
|
1034 |
-
label="
|
1035 |
/>
|
1036 |
</FormControl>
|
1037 |
</Box>
|
1038 |
</Stack>
|
1039 |
|
1040 |
-
{isSpeaker &&
|
1041 |
-
isListener &&
|
1042 |
-
inputSource === 'userMedia' &&
|
1043 |
-
!enableEchoCancellation &&
|
1044 |
-
gain !== 0 && (
|
1045 |
-
<div>
|
1046 |
-
<Alert severity="warning" icon={<HeadphonesIcon />}>
|
1047 |
-
Headphones required to prevent feedback.
|
1048 |
-
</Alert>
|
1049 |
-
</div>
|
1050 |
-
)}
|
1051 |
-
|
1052 |
-
{isSpeaker && enableEchoCancellation && (
|
1053 |
-
<div>
|
1054 |
-
<Alert severity="warning">
|
1055 |
-
We don't recommend using echo cancellation as it may
|
1056 |
-
distort the input audio. If possible, use headphones and
|
1057 |
-
disable echo cancellation instead.
|
1058 |
-
</Alert>
|
1059 |
-
</div>
|
1060 |
-
)}
|
1061 |
-
|
1062 |
<Stack direction="row" spacing={2}>
|
1063 |
{streamingStatus === 'stopped' ? (
|
1064 |
<Button
|
@@ -1120,13 +1072,7 @@ export default function StreamingInterface() {
|
|
1120 |
</Alert>
|
1121 |
</div>
|
1122 |
)}
|
1123 |
-
|
1124 |
-
<div>
|
1125 |
-
<Alert severity="error">
|
1126 |
-
{`Maximum number of speakers reached. Please try again at a later time.`}
|
1127 |
-
</Alert>
|
1128 |
-
</div>
|
1129 |
-
)}
|
1130 |
{serverState != null &&
|
1131 |
serverState.totalActiveTranscoders >=
|
1132 |
TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD && (
|
@@ -1141,7 +1087,7 @@ export default function StreamingInterface() {
|
|
1141 |
serverState.serverLock.clientID !== clientID && (
|
1142 |
<div>
|
1143 |
<Alert severity="warning">
|
1144 |
-
{`The server is currently locked. Priority will be given to that client when they are streaming, and your streaming session may be halted abruptly.`}
|
1145 |
</Alert>
|
1146 |
</div>
|
1147 |
)}
|
|
|
57 |
import {getURLParams} from './URLParams';
|
58 |
import debug from './debug';
|
59 |
import DebugSection from './DebugSection';
|
60 |
+
import {Grid} from '@mui/material';
|
|
|
61 |
import {getLanguageFromThreeLetterCode} from './languageLookup';
|
|
|
62 |
|
63 |
+
const AUDIO_STREAM_DEFAULTS: {
|
64 |
+
[key in SupportedInputSource]: BrowserAudioStreamConfig;
|
65 |
+
} = {
|
66 |
userMedia: {
|
67 |
echoCancellation: false,
|
68 |
noiseSuppression: true,
|
|
|
71 |
echoCancellation: false,
|
72 |
noiseSuppression: false,
|
73 |
},
|
74 |
+
};
|
75 |
|
76 |
async function requestUserMediaAudioStream(
|
77 |
+
config: BrowserAudioStreamConfig = {
|
78 |
+
echoCancellation: false,
|
79 |
+
noiseSuppression: true,
|
80 |
+
},
|
81 |
) {
|
82 |
const stream = await navigator.mediaDevices.getUserMedia({
|
83 |
audio: {...config, channelCount: 1},
|
|
|
90 |
}
|
91 |
|
92 |
async function requestDisplayMediaAudioStream(
|
93 |
+
config: BrowserAudioStreamConfig = {
|
94 |
+
echoCancellation: false,
|
95 |
+
noiseSuppression: false,
|
96 |
+
},
|
97 |
) {
|
98 |
const stream = await navigator.mediaDevices.getDisplayMedia({
|
99 |
audio: {...config, channelCount: 1},
|
|
|
158 |
useState<StreamingStatus>('stopped');
|
159 |
|
160 |
const isStreamConfiguredRef = useRef<boolean>(false);
|
|
|
161 |
|
162 |
const [outputMode, setOutputMode] = useState<SupportedOutputMode>('s2s&t');
|
163 |
const [inputSource, setInputSource] =
|
|
|
171 |
|
172 |
// Dynamic Params:
|
173 |
const [targetLang, setTargetLang] = useState<string | null>(null);
|
|
|
|
|
|
|
174 |
|
175 |
const [serverDebugFlag, setServerDebugFlag] = useState<boolean>(
|
176 |
debugParam ?? false,
|
|
|
252 |
setAgent((prevAgent) => {
|
253 |
if (prevAgent?.name !== newAgent?.name) {
|
254 |
setTargetLang(newAgent?.targetLangs[0] ?? null);
|
|
|
255 |
}
|
256 |
return newAgent;
|
257 |
});
|
|
|
310 |
console.log('[configureStreamAsync] sending config', config);
|
311 |
|
312 |
socket.emit('configure_stream', config, (statusObject) => {
|
|
|
313 |
if (statusObject.status === 'ok') {
|
314 |
isStreamConfiguredRef.current = true;
|
315 |
console.debug(
|
|
|
427 |
// available before actually configuring and starting the stream
|
428 |
const fullDynamicConfig: DynamicConfig = {
|
429 |
targetLanguage: targetLang,
|
|
|
430 |
};
|
431 |
|
432 |
await onSetDynamicConfig(fullDynamicConfig);
|
|
|
756 |
<div className="header-container-sra">
|
757 |
<div>
|
758 |
<Typography variant="body2" sx={{color: '#65676B'}}>
|
759 |
+
Welcome! Join a room as speaker or listener (or both), and share the
|
760 |
+
room code to invite listeners.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
761 |
<br/>
|
762 |
SeamlessStreaming model is a research model and is not released
|
763 |
+
for production deployment. The streaming quality is closely
|
764 |
+
related to proper VAD segmentation. It works best if you pause
|
765 |
+
every couple of sentences, or you may wish adjust the VAD threshold
|
766 |
+
in the model config.
|
|
|
767 |
</Typography>
|
768 |
</div>
|
769 |
</div>
|
|
|
907 |
spacing={1}
|
908 |
alignItems="flex-start"
|
909 |
sx={{flexGrow: 1}}>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
910 |
{isListener && (
|
911 |
<Box
|
912 |
sx={{
|
|
|
923 |
</Grid>
|
924 |
</Stack>
|
925 |
|
926 |
+
<Typography variant="body2" sx={{color: '#65676B'}}>
|
927 |
+
Note: we don't recommend echo cancellation, as it may distort
|
928 |
+
the input audio (dropping words/sentences) if there is output
|
929 |
+
audio playing. Instead, you should use headphones if you'd like
|
930 |
+
to listen to the output audio while speaking.
|
931 |
+
</Typography>
|
932 |
+
|
933 |
<Stack
|
934 |
direction="row"
|
935 |
spacing={2}
|
|
|
959 |
</RadioGroup>
|
960 |
</FormControl>
|
961 |
</Box>
|
962 |
+
<Box sx={{flex: 1}}>
|
963 |
+
<FormControl disabled={streamFixedConfigOptionsDisabled}>
|
|
|
964 |
<FormLabel>Options</FormLabel>
|
965 |
<FormControlLabel
|
966 |
control={
|
|
|
977 |
}
|
978 |
/>
|
979 |
}
|
980 |
+
label="Noise Suppression (Browser)"
|
981 |
/>
|
982 |
+
<FormControlLabel
|
983 |
control={
|
984 |
<Checkbox
|
985 |
checked={
|
|
|
994 |
}
|
995 |
/>
|
996 |
}
|
997 |
+
label="Echo Cancellation (Browser)"
|
998 |
/>
|
999 |
<FormControlLabel
|
1000 |
control={
|
|
|
1005 |
) => setServerDebugFlag(event.target.checked)}
|
1006 |
/>
|
1007 |
}
|
1008 |
+
label="Server Debug Flag"
|
1009 |
/>
|
1010 |
</FormControl>
|
1011 |
</Box>
|
1012 |
</Stack>
|
1013 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1014 |
<Stack direction="row" spacing={2}>
|
1015 |
{streamingStatus === 'stopped' ? (
|
1016 |
<Button
|
|
|
1072 |
</Alert>
|
1073 |
</div>
|
1074 |
)}
|
1075 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
1076 |
{serverState != null &&
|
1077 |
serverState.totalActiveTranscoders >=
|
1078 |
TOTAL_ACTIVE_TRANSCODER_WARNING_THRESHOLD && (
|
|
|
1087 |
serverState.serverLock.clientID !== clientID && (
|
1088 |
<div>
|
1089 |
<Alert severity="warning">
|
1090 |
+
{`The server is currently locked by "${serverState.serverLock.name}". Priority will be given to that client when they are streaming, and your streaming session may be halted abruptly.`}
|
1091 |
</Alert>
|
1092 |
</div>
|
1093 |
)}
|
streaming-react-app/src/languageLookup.ts
CHANGED
@@ -1,110 +1,108 @@
|
|
1 |
-
const
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
};
|
102 |
|
103 |
-
export function getLanguageFromThreeLetterCode(
|
104 |
-
lang3Code: string,
|
105 |
-
): string | null {
|
106 |
try {
|
107 |
-
const name =
|
108 |
if (name == null) {
|
109 |
return null;
|
110 |
}
|
@@ -113,7 +111,7 @@ export function getLanguageFromThreeLetterCode(
|
|
113 |
.map((word: string) => word[0].toUpperCase() + word.slice(1));
|
114 |
return capitalizedWords.join(' ');
|
115 |
} catch (e) {
|
116 |
-
console.warn(`Unable to get language name for code ${
|
117 |
}
|
118 |
return null;
|
119 |
}
|
|
|
1 |
+
const LANG3_FULL = {
|
2 |
+
eng: 'english',
|
3 |
+
arb: 'arabic',
|
4 |
+
asm: 'assamese',
|
5 |
+
bel: 'belarusian',
|
6 |
+
bul: 'bulgarian',
|
7 |
+
ben: 'bengali',
|
8 |
+
cat: 'catalan',
|
9 |
+
ces: 'czech',
|
10 |
+
cym: 'welsh',
|
11 |
+
dan: 'danish',
|
12 |
+
deu: 'german',
|
13 |
+
ell: 'greek',
|
14 |
+
spa: 'spanish',
|
15 |
+
est: 'estonian',
|
16 |
+
pes: 'persian',
|
17 |
+
fin: 'finnish',
|
18 |
+
fra: 'french',
|
19 |
+
hin: 'hindi',
|
20 |
+
hun: 'hungarian',
|
21 |
+
ind: 'indonesian',
|
22 |
+
ita: 'italian',
|
23 |
+
jpn: 'japanese',
|
24 |
+
kat: 'georgian',
|
25 |
+
lit: 'lithuanian',
|
26 |
+
lvs: 'latvian',
|
27 |
+
khk: 'mongolian',
|
28 |
+
mar: 'marathi',
|
29 |
+
mlt: 'maltese',
|
30 |
+
nld: 'dutch',
|
31 |
+
pan: 'punjabi',
|
32 |
+
pol: 'polish',
|
33 |
+
por: 'portuguese',
|
34 |
+
ron: 'romanian',
|
35 |
+
rus: 'russian',
|
36 |
+
slk: 'slovak',
|
37 |
+
slv: 'slovenian',
|
38 |
+
swe: 'swedish',
|
39 |
+
swh: 'swahili',
|
40 |
+
tam: 'tamil',
|
41 |
+
tha: 'thai',
|
42 |
+
tur: 'turkish',
|
43 |
+
ukr: 'ukrainian',
|
44 |
+
urd: 'urdu',
|
45 |
+
uzn: 'uzbek',
|
46 |
+
vie: 'vietnamese',
|
47 |
+
cmn: 'chinese',
|
48 |
+
afr: 'afrikaans',
|
49 |
+
isl: 'icelandic',
|
50 |
+
ltz: 'luxembourgish',
|
51 |
+
nob: 'norwegian',
|
52 |
+
glg: 'galician',
|
53 |
+
bos: 'bosnian',
|
54 |
+
hrv: 'croatian',
|
55 |
+
mkd: 'macedonian',
|
56 |
+
srp: 'serbian',
|
57 |
+
hye: 'armenian',
|
58 |
+
azj: 'azerbaijani',
|
59 |
+
kaz: 'kazakh',
|
60 |
+
kor: 'korean',
|
61 |
+
guj: 'gujarati',
|
62 |
+
kan: 'kannada',
|
63 |
+
npi: 'nepali',
|
64 |
+
snd: 'sindhi',
|
65 |
+
tel: 'telugu',
|
66 |
+
jav: 'javanese',
|
67 |
+
zlm: 'malay',
|
68 |
+
mal: 'malayalam',
|
69 |
+
tgl: 'tagalog',
|
70 |
+
mya: 'myanmar',
|
71 |
+
khm: 'khmer',
|
72 |
+
lao: 'lao',
|
73 |
+
heb: 'hebrew',
|
74 |
+
pbt: 'pashto',
|
75 |
+
tgk: 'tajik',
|
76 |
+
amh: 'amharic',
|
77 |
+
lin: 'lingala',
|
78 |
+
som: 'somali',
|
79 |
+
yor: 'yoruba',
|
80 |
+
sna: 'shona',
|
81 |
+
mri: 'maori',
|
82 |
+
hau: 'hausa',
|
83 |
+
oci: 'occitan',
|
84 |
+
bak: 'bashkir',
|
85 |
+
bre: 'breton',
|
86 |
+
yid: 'yiddish',
|
87 |
+
hat: 'haitian creole',
|
88 |
+
mlg: 'malagasy',
|
89 |
+
sin: 'sinhala',
|
90 |
+
sqi: 'albanian',
|
91 |
+
sun: 'sundanese',
|
92 |
+
eus: 'basque',
|
93 |
+
nno: 'nynorsk',
|
94 |
+
tat: 'tatar',
|
95 |
+
bod: 'tibetan',
|
96 |
+
fao: 'faroese',
|
97 |
+
haw: 'hawaiian',
|
98 |
+
lat: 'latin',
|
99 |
+
san: 'sanskrit',
|
100 |
+
tuk: 'turkmen'
|
101 |
};
|
102 |
|
103 |
+
export function getLanguageFromThreeLetterCode(code: string): string | null {
|
|
|
|
|
104 |
try {
|
105 |
+
const name = LANG3_FULL[code] ?? null;
|
106 |
if (name == null) {
|
107 |
return null;
|
108 |
}
|
|
|
111 |
.map((word: string) => word[0].toUpperCase() + word.slice(1));
|
112 |
return capitalizedWords.join(' ');
|
113 |
} catch (e) {
|
114 |
+
console.warn(`Unable to get language name for code ${code}: ${e}`);
|
115 |
}
|
116 |
return null;
|
117 |
}
|
streaming-react-app/src/react-xr/TextBlocks.tsx
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
-
import {useEffect, useRef, useState} from 'react';
|
2 |
import robotoFontFamilyJson from '../assets/RobotoMono-Regular-msdf.json?url';
|
3 |
import robotoFontTexture from '../assets/RobotoMono-Regular.png';
|
4 |
import ThreeMeshUIText, {ThreeMeshUITextType} from './ThreeMeshUIText';
|
5 |
-
import
|
|
|
6 |
|
7 |
const NUM_LINES = 3;
|
8 |
|
@@ -21,80 +22,44 @@ const SCROLL_Y_DELTA = 0.001;
|
|
21 |
const OFFSET = 0.01;
|
22 |
const OFFSET_WIDTH = OFFSET * 3;
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
// The tick interval
|
27 |
-
const RENDER_INTERVAL = 300;
|
28 |
-
|
29 |
-
const CURSOR_BLINK_INTERVAL_MS = 1000;
|
30 |
-
|
31 |
-
type TextBlockProps = {
|
32 |
content: string;
|
33 |
// The actual position or end position when animating
|
34 |
y: number;
|
35 |
// The start position when animating
|
36 |
startY: number;
|
|
|
|
|
37 |
textOpacity: number;
|
38 |
backgroundOpacity: number;
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
};
|
43 |
-
|
44 |
-
type TranscriptState = {
|
45 |
-
textBlocksProps: TextBlockProps[];
|
46 |
-
lastTranslationStringIndex: number;
|
47 |
-
lastTranslationLineStartIndex: number;
|
48 |
-
transcriptLines: string[];
|
49 |
-
lastRenderTime: number;
|
50 |
};
|
51 |
|
52 |
function TextBlock({
|
53 |
content,
|
54 |
y,
|
55 |
startY,
|
|
|
|
|
56 |
textOpacity,
|
57 |
backgroundOpacity,
|
58 |
index,
|
59 |
-
|
60 |
-
}:
|
61 |
const [scrollY, setScrollY] = useState<number>(y);
|
|
|
62 |
// We are reusing text blocks so this keeps track of when we changed rows so we can restart animation
|
63 |
-
const lastIndex = useRef<
|
64 |
useEffect(() => {
|
65 |
if (index != lastIndex.current) {
|
66 |
lastIndex.current = index;
|
67 |
-
|
68 |
} else if (scrollY < y) {
|
69 |
setScrollY((prev) => prev + SCROLL_Y_DELTA);
|
70 |
}
|
71 |
-
}, [
|
72 |
-
|
73 |
-
const [cursorBlinkOn, setCursorBlinkOn] = useState(false);
|
74 |
-
useEffect(() => {
|
75 |
-
if (isBottomLine) {
|
76 |
-
const interval = setInterval(() => {
|
77 |
-
setCursorBlinkOn((prev) => !prev);
|
78 |
-
}, CURSOR_BLINK_INTERVAL_MS);
|
79 |
-
|
80 |
-
return () => clearInterval(interval);
|
81 |
-
} else {
|
82 |
-
setCursorBlinkOn(false);
|
83 |
-
}
|
84 |
-
}, [isBottomLine]);
|
85 |
-
|
86 |
-
const numChars = content.length;
|
87 |
-
|
88 |
-
if (cursorBlinkOn) {
|
89 |
-
content = content + '|';
|
90 |
-
}
|
91 |
-
|
92 |
-
// Accounting for potential cursor for block width (the +1)
|
93 |
-
const width =
|
94 |
-
(numChars + (isBottomLine ? 1.1 : 0) + (numChars < 10 ? 1 : 0)) *
|
95 |
-
CHAR_WIDTH;
|
96 |
-
|
97 |
-
const height = LINE_HEIGHT;
|
98 |
|
99 |
// This is needed to update text content (doesn't work if we just update the content prop)
|
100 |
const textRef = useRef<ThreeMeshUITextType>();
|
@@ -146,162 +111,125 @@ function TextBlock({
|
|
146 |
);
|
147 |
}
|
148 |
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
}
|
165 |
|
166 |
export default function TextBlocks({
|
167 |
-
|
|
|
168 |
}: {
|
169 |
-
|
|
|
170 |
}) {
|
171 |
-
const
|
172 |
-
|
173 |
-
|
174 |
-
lastTranslationLineStartIndex: 0,
|
175 |
-
transcriptLines: [],
|
176 |
-
lastRenderTime: new Date().getTime(),
|
177 |
-
});
|
178 |
-
|
179 |
-
const transcriptState = transcriptStateRef.current;
|
180 |
-
const {textBlocksProps, lastTranslationStringIndex, lastRenderTime} =
|
181 |
-
transcriptState;
|
182 |
-
|
183 |
-
const [charsToRender, setCharsToRender] = useState<number>(0);
|
184 |
|
|
|
185 |
useEffect(() => {
|
186 |
-
|
187 |
-
const
|
188 |
-
|
189 |
-
|
190 |
-
);
|
191 |
-
setCharsToRender(charsToRender);
|
192 |
-
}, RENDER_INTERVAL);
|
193 |
-
|
194 |
-
return () => clearInterval(interval);
|
195 |
-
}, [lastRenderTime]);
|
196 |
-
|
197 |
-
const currentTime = new Date().getTime();
|
198 |
-
if (charsToRender < 1) {
|
199 |
-
return textBlocksProps.map((props, idx) => (
|
200 |
-
<TextBlock {...props} key={idx} />
|
201 |
-
));
|
202 |
-
}
|
203 |
-
|
204 |
-
const nextTranslationStringIndex = Math.min(
|
205 |
-
lastTranslationStringIndex + charsToRender,
|
206 |
-
translationText.length,
|
207 |
-
);
|
208 |
-
const newString = translationText.substring(
|
209 |
-
lastTranslationStringIndex,
|
210 |
-
nextTranslationStringIndex,
|
211 |
-
);
|
212 |
-
if (nextTranslationStringIndex === lastTranslationStringIndex) {
|
213 |
-
transcriptState.lastRenderTime = currentTime;
|
214 |
-
return textBlocksProps.map((props, idx) => (
|
215 |
-
<TextBlock {...props} key={idx} />
|
216 |
-
));
|
217 |
-
}
|
218 |
-
|
219 |
-
// Wait until more characters are accumulated if its just blankspace
|
220 |
-
if (/^\s*$/.test(newString)) {
|
221 |
-
transcriptState.lastRenderTime = currentTime;
|
222 |
-
return textBlocksProps.map((props, idx) => (
|
223 |
-
<TextBlock {...props} key={idx} />
|
224 |
-
));
|
225 |
-
}
|
226 |
-
|
227 |
-
// Ideally we continue where we left off but this is complicated when we have mid-words. Recalculating for now
|
228 |
-
const runAll = true;
|
229 |
-
const newSentences = runAll
|
230 |
-
? translationText.substring(0, nextTranslationStringIndex).split('\n')
|
231 |
-
: newString.split('\n');
|
232 |
-
const transcriptLines = runAll ? [''] : transcriptState.transcriptLines;
|
233 |
-
newSentences.forEach((newSentence, sentenceIdx) => {
|
234 |
-
const words = newSentence.split(/\s+/);
|
235 |
-
words.forEach((word) => {
|
236 |
-
const filteredWord = [...word]
|
237 |
-
.filter((c) => {
|
238 |
-
if (supportedCharSet().has(c)) {
|
239 |
-
return true;
|
240 |
-
}
|
241 |
-
console.error(
|
242 |
-
`Unsupported char ${c} - make sure this is supported in the font family msdf file`,
|
243 |
-
);
|
244 |
-
return false;
|
245 |
-
})
|
246 |
-
.join('');
|
247 |
-
|
248 |
-
const lastLineSoFar = transcriptLines[0];
|
249 |
-
const charCount = lastLineSoFar.length + filteredWord.length + 1;
|
250 |
-
|
251 |
-
if (charCount <= CHARS_PER_LINE) {
|
252 |
-
transcriptLines[0] = lastLineSoFar + ' ' + filteredWord;
|
253 |
-
} else {
|
254 |
-
transcriptLines.unshift(filteredWord);
|
255 |
-
}
|
256 |
-
});
|
257 |
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
}
|
262 |
-
});
|
263 |
-
|
264 |
-
transcriptState.transcriptLines = transcriptLines;
|
265 |
-
transcriptState.lastTranslationStringIndex = nextTranslationStringIndex;
|
266 |
|
267 |
-
|
268 |
let currentY = Y_COORD_START;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
|
|
274 |
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
return;
|
279 |
-
}
|
280 |
-
const y = currentY + LINE_HEIGHT / 2;
|
281 |
-
const isBottomLine = newTextBlocksProps.length === 0;
|
282 |
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
|
294 |
-
|
295 |
-
|
|
|
|
|
296 |
|
297 |
-
const numRemainingBlocks =
|
298 |
if (numRemainingBlocks > 0) {
|
299 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
}
|
301 |
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
));
|
307 |
}
|
|
|
1 |
+
import {JSX, useEffect, useRef, useState} from 'react';
|
2 |
import robotoFontFamilyJson from '../assets/RobotoMono-Regular-msdf.json?url';
|
3 |
import robotoFontTexture from '../assets/RobotoMono-Regular.png';
|
4 |
import ThreeMeshUIText, {ThreeMeshUITextType} from './ThreeMeshUIText';
|
5 |
+
import {getURLParams} from '../URLParams';
|
6 |
+
import {CURSOR_BLINK_INTERVAL_MS} from '../cursorBlinkInterval';
|
7 |
|
8 |
const NUM_LINES = 3;
|
9 |
|
|
|
22 |
const OFFSET = 0.01;
|
23 |
const OFFSET_WIDTH = OFFSET * 3;
|
24 |
|
25 |
+
type Props = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
content: string;
|
27 |
// The actual position or end position when animating
|
28 |
y: number;
|
29 |
// The start position when animating
|
30 |
startY: number;
|
31 |
+
width: number;
|
32 |
+
height: number;
|
33 |
textOpacity: number;
|
34 |
backgroundOpacity: number;
|
35 |
+
// Use this to keep track of sentence + line position for animation
|
36 |
+
index: string;
|
37 |
+
enableAnimation: boolean;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
};
|
39 |
|
40 |
function TextBlock({
|
41 |
content,
|
42 |
y,
|
43 |
startY,
|
44 |
+
width,
|
45 |
+
height,
|
46 |
textOpacity,
|
47 |
backgroundOpacity,
|
48 |
index,
|
49 |
+
enableAnimation,
|
50 |
+
}: Props) {
|
51 |
const [scrollY, setScrollY] = useState<number>(y);
|
52 |
+
|
53 |
// We are reusing text blocks so this keeps track of when we changed rows so we can restart animation
|
54 |
+
const lastIndex = useRef<string>(index);
|
55 |
useEffect(() => {
|
56 |
if (index != lastIndex.current) {
|
57 |
lastIndex.current = index;
|
58 |
+
enableAnimation && setScrollY(startY);
|
59 |
} else if (scrollY < y) {
|
60 |
setScrollY((prev) => prev + SCROLL_Y_DELTA);
|
61 |
}
|
62 |
+
}, [enableAnimation, index, scrollY, setScrollY, startY, y]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
// This is needed to update text content (doesn't work if we just update the content prop)
|
65 |
const textRef = useRef<ThreeMeshUITextType>();
|
|
|
111 |
);
|
112 |
}
|
113 |
|
114 |
+
// Background behind the text so it covers any missing spaces
|
115 |
+
function TranscriptionPanel() {
|
116 |
+
const panelHeight = LINE_HEIGHT * NUM_LINES + 2 * BLOCK_SPACING + 2 * OFFSET;
|
117 |
+
const xPosition = OFFSET_WIDTH;
|
118 |
+
return (
|
119 |
+
<block
|
120 |
+
args={[
|
121 |
+
{
|
122 |
+
backgroundOpacity: 1,
|
123 |
+
width:
|
124 |
+
MAX_WIDTH * ((CHARS_PER_LINE + 2) / CHARS_PER_LINE) +
|
125 |
+
2 * OFFSET_WIDTH,
|
126 |
+
height: panelHeight,
|
127 |
+
borderRadius: 0,
|
128 |
+
},
|
129 |
+
]}
|
130 |
+
position={[
|
131 |
+
-OFFSET + xPosition,
|
132 |
+
Y_COORD_START + panelHeight / 2 - 2 * OFFSET,
|
133 |
+
Z_COORD,
|
134 |
+
]}></block>
|
135 |
+
);
|
136 |
}
|
137 |
|
138 |
export default function TextBlocks({
|
139 |
+
sentences,
|
140 |
+
blinkCursor,
|
141 |
}: {
|
142 |
+
sentences: string[][];
|
143 |
+
blinkCursor: boolean;
|
144 |
}) {
|
145 |
+
const showTranscriptionPanel =
|
146 |
+
getURLParams().ARTranscriptionType === 'lines_with_background';
|
147 |
+
const textBlocks: JSX.Element[] = [];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
+
const [cursorBlinkOn, setCursorBlinkOn] = useState(false);
|
150 |
useEffect(() => {
|
151 |
+
if (blinkCursor) {
|
152 |
+
const interval = setInterval(() => {
|
153 |
+
setCursorBlinkOn((prev) => !prev);
|
154 |
+
}, CURSOR_BLINK_INTERVAL_MS);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
+
return () => clearInterval(interval);
|
157 |
+
} else {
|
158 |
+
setCursorBlinkOn(false);
|
159 |
}
|
160 |
+
}, [blinkCursor]);
|
|
|
|
|
|
|
161 |
|
162 |
+
// Start from bottom and populate most recent sentences by line until we fill max lines.
|
163 |
let currentY = Y_COORD_START;
|
164 |
+
for (let i = sentences.length - 1; i >= 0; i--) {
|
165 |
+
const sentenceLines = sentences[i];
|
166 |
+
for (let j = sentenceLines.length - 1; j >= 0; j--) {
|
167 |
+
if (textBlocks.length == NUM_LINES) {
|
168 |
+
if (showTranscriptionPanel) {
|
169 |
+
textBlocks.push(<TranscriptionPanel key={textBlocks.length} />);
|
170 |
+
}
|
171 |
+
return textBlocks;
|
172 |
+
}
|
173 |
|
174 |
+
const isBottomSentence = i === sentences.length - 1;
|
175 |
+
const isBottomLine = isBottomSentence && textBlocks.length === 0;
|
176 |
+
const y = currentY + LINE_HEIGHT / 2;
|
177 |
+
let textBlockLine = sentenceLines[j];
|
178 |
+
const numChars = textBlockLine.length;
|
179 |
|
180 |
+
if (cursorBlinkOn && isBottomLine) {
|
181 |
+
textBlockLine = textBlockLine + '|';
|
182 |
+
}
|
|
|
|
|
|
|
|
|
183 |
|
184 |
+
// Accounting for potential cursor for block width (the +1)
|
185 |
+
const blockWidth =
|
186 |
+
(numChars + (isBottomLine ? 1.1 : 0) + (numChars < 10 ? 1 : 0)) *
|
187 |
+
CHAR_WIDTH;
|
188 |
+
const textOpacity = 1 - 0.1 * textBlocks.length;
|
189 |
+
textBlocks.push(
|
190 |
+
<TextBlock
|
191 |
+
key={textBlocks.length}
|
192 |
+
y={y}
|
193 |
+
startY={currentY}
|
194 |
+
index={`${sentences.length - i},${j}`}
|
195 |
+
textOpacity={textOpacity}
|
196 |
+
backgroundOpacity={0.98}
|
197 |
+
height={LINE_HEIGHT}
|
198 |
+
width={blockWidth}
|
199 |
+
// content={"BLOCK " + textBlocks.length + ": " + content}
|
200 |
+
content={textBlockLine}
|
201 |
+
enableAnimation={!isBottomLine}
|
202 |
+
/>,
|
203 |
+
);
|
204 |
|
205 |
+
currentY = y + LINE_HEIGHT / 2;
|
206 |
+
}
|
207 |
+
currentY += showTranscriptionPanel ? BLOCK_SPACING / 3 : BLOCK_SPACING;
|
208 |
+
}
|
209 |
|
210 |
+
const numRemainingBlocks = textBlocks.length - NUM_LINES;
|
211 |
if (numRemainingBlocks > 0) {
|
212 |
+
Array.from({length: numRemainingBlocks}).forEach(() => {
|
213 |
+
// Push in non display blocks because mesh UI crashes if elements are add / removed from screen.
|
214 |
+
textBlocks.push(
|
215 |
+
<TextBlock
|
216 |
+
key={textBlocks.length}
|
217 |
+
y={Y_COORD_START}
|
218 |
+
startY={0}
|
219 |
+
index="0,0"
|
220 |
+
textOpacity={0}
|
221 |
+
backgroundOpacity={0}
|
222 |
+
enableAnimation={false}
|
223 |
+
width={MAX_WIDTH}
|
224 |
+
height={LINE_HEIGHT}
|
225 |
+
content=""
|
226 |
+
/>,
|
227 |
+
);
|
228 |
+
});
|
229 |
}
|
230 |
|
231 |
+
if (showTranscriptionPanel) {
|
232 |
+
textBlocks.push(<TranscriptionPanel key={textBlocks.length} />);
|
233 |
+
}
|
234 |
+
return textBlocks;
|
|
|
235 |
}
|
streaming-react-app/src/react-xr/XRConfig.tsx
CHANGED
@@ -25,15 +25,29 @@ import {BLACK, WHITE} from './Colors';
|
|
25 |
import robotoFontFamilyJson from '../assets/RobotoMono-Regular-msdf.json?url';
|
26 |
import robotoFontTexture from '../assets/RobotoMono-Regular.png';
|
27 |
import {getURLParams} from '../URLParams';
|
28 |
-
import TextBlocks from './TextBlocks';
|
29 |
import {BufferedSpeechPlayer} from '../createBufferedSpeechPlayer';
|
30 |
import {CURSOR_BLINK_INTERVAL_MS} from '../cursorBlinkInterval';
|
31 |
-
import supportedCharSet from './supportedCharSet';
|
32 |
|
33 |
// Adds on react JSX for add-on libraries to react-three-fiber
|
34 |
extend(ThreeMeshUI);
|
35 |
extend({TextGeometry});
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
// This component wraps any children so it is positioned relative to the camera, rather than from the origin
|
38 |
function CameraLinkedObject({children}) {
|
39 |
const camera = useThree((state) => state.camera);
|
@@ -62,7 +76,10 @@ function ThreeMeshUIComponents({
|
|
62 |
translationSentences={translationSentences}
|
63 |
/>
|
64 |
) : (
|
65 |
-
<TranscriptPanelBlocks
|
|
|
|
|
|
|
66 |
)}
|
67 |
{skipARIntro ? null : (
|
68 |
<IntroPanel started={started} setStarted={setStarted} />
|
@@ -136,7 +153,7 @@ function TranscriptPanelSingleBlock({
|
|
136 |
(wordChunks, currentWord) => {
|
137 |
const filteredWord = [...currentWord]
|
138 |
.filter((c) => {
|
139 |
-
if (supportedCharSet
|
140 |
return true;
|
141 |
}
|
142 |
console.error(
|
@@ -206,14 +223,59 @@ function TranscriptPanelSingleBlock({
|
|
206 |
// Splits up the lines into separate blocks to treat each one separately.
|
207 |
// This allows changing of opacity, animating per line, changing height / width per line etc
|
208 |
function TranscriptPanelBlocks({
|
|
|
209 |
translationSentences,
|
210 |
}: {
|
|
|
211 |
translationSentences: TranslationSentences;
|
212 |
}) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
return (
|
214 |
-
<TextBlocks
|
215 |
-
translationText={'Listening...\n' + translationSentences.join('\n')}
|
216 |
-
/>
|
217 |
);
|
218 |
}
|
219 |
|
@@ -299,8 +361,6 @@ export type XRConfigProps = {
|
|
299 |
startStreaming: () => Promise<void>;
|
300 |
stopStreaming: () => Promise<void>;
|
301 |
debugParam: boolean | null;
|
302 |
-
onARVisible?: () => void;
|
303 |
-
onARHidden?: () => void;
|
304 |
};
|
305 |
|
306 |
export default function XRConfig(props: XRConfigProps) {
|
|
|
25 |
import robotoFontFamilyJson from '../assets/RobotoMono-Regular-msdf.json?url';
|
26 |
import robotoFontTexture from '../assets/RobotoMono-Regular.png';
|
27 |
import {getURLParams} from '../URLParams';
|
28 |
+
import TextBlocks, {CHARS_PER_LINE} from './TextBlocks';
|
29 |
import {BufferedSpeechPlayer} from '../createBufferedSpeechPlayer';
|
30 |
import {CURSOR_BLINK_INTERVAL_MS} from '../cursorBlinkInterval';
|
|
|
31 |
|
32 |
// Adds on react JSX for add-on libraries to react-three-fiber
|
33 |
extend(ThreeMeshUI);
|
34 |
extend({TextGeometry});
|
35 |
|
36 |
+
async function fetchSupportedCharSet(): Promise<Set<string>> {
|
37 |
+
try {
|
38 |
+
const response = await fetch(robotoFontFamilyJson);
|
39 |
+
const fontFamily = await response.json();
|
40 |
+
|
41 |
+
return new Set(fontFamily.info.charset);
|
42 |
+
} catch (e) {
|
43 |
+
console.error('Failed to fetch supported XR charset', e);
|
44 |
+
return new Set();
|
45 |
+
}
|
46 |
+
}
|
47 |
+
|
48 |
+
let supportedCharSet = new Set();
|
49 |
+
fetchSupportedCharSet().then((result) => (supportedCharSet = result));
|
50 |
+
|
51 |
// This component wraps any children so it is positioned relative to the camera, rather than from the origin
|
52 |
function CameraLinkedObject({children}) {
|
53 |
const camera = useThree((state) => state.camera);
|
|
|
76 |
translationSentences={translationSentences}
|
77 |
/>
|
78 |
) : (
|
79 |
+
<TranscriptPanelBlocks
|
80 |
+
animateTextDisplay={animateTextDisplay}
|
81 |
+
translationSentences={translationSentences}
|
82 |
+
/>
|
83 |
)}
|
84 |
{skipARIntro ? null : (
|
85 |
<IntroPanel started={started} setStarted={setStarted} />
|
|
|
153 |
(wordChunks, currentWord) => {
|
154 |
const filteredWord = [...currentWord]
|
155 |
.filter((c) => {
|
156 |
+
if (supportedCharSet.has(c)) {
|
157 |
return true;
|
158 |
}
|
159 |
console.error(
|
|
|
223 |
// Splits up the lines into separate blocks to treat each one separately.
|
224 |
// This allows changing of opacity, animating per line, changing height / width per line etc
|
225 |
function TranscriptPanelBlocks({
|
226 |
+
animateTextDisplay,
|
227 |
translationSentences,
|
228 |
}: {
|
229 |
+
animateTextDisplay: boolean;
|
230 |
translationSentences: TranslationSentences;
|
231 |
}) {
|
232 |
+
const [didReceiveTranslationSentences, setDidReceiveTranslationSentences] =
|
233 |
+
// Currently causing issues with displaying dummy text, skip over
|
234 |
+
useState(false);
|
235 |
+
|
236 |
+
// Normally we don't setState in render, but here we need to for computed state, and this if statement assures it won't loop infinitely
|
237 |
+
if (!didReceiveTranslationSentences && translationSentences.length > 0) {
|
238 |
+
setDidReceiveTranslationSentences(true);
|
239 |
+
}
|
240 |
+
|
241 |
+
const initialPrompt = 'Listening...';
|
242 |
+
const transcriptSentences: string[] = didReceiveTranslationSentences
|
243 |
+
? translationSentences
|
244 |
+
: [initialPrompt];
|
245 |
+
|
246 |
+
// The transcript is an array of sentences. For each sentence we break this down into an array of words per line.
|
247 |
+
// This is needed so we can "scroll" through without changing the order of words in the transcript
|
248 |
+
const sentenceLines = transcriptSentences.map((sentence) => {
|
249 |
+
const words = sentence.split(/\s+/);
|
250 |
+
// Here we break each sentence up with newlines so all words per line fit within the panel
|
251 |
+
return words.reduce(
|
252 |
+
(wordChunks, currentWord) => {
|
253 |
+
const filteredWord = [...currentWord]
|
254 |
+
.filter((c) => {
|
255 |
+
if (supportedCharSet.has(c)) {
|
256 |
+
return true;
|
257 |
+
}
|
258 |
+
console.error(
|
259 |
+
`Unsupported char ${c} - make sure this is supported in the font family msdf file`,
|
260 |
+
);
|
261 |
+
return false;
|
262 |
+
})
|
263 |
+
.join('');
|
264 |
+
const lastLineSoFar = wordChunks[wordChunks.length - 1];
|
265 |
+
const charCount = lastLineSoFar.length + filteredWord.length + 1;
|
266 |
+
if (charCount <= CHARS_PER_LINE) {
|
267 |
+
wordChunks[wordChunks.length - 1] =
|
268 |
+
lastLineSoFar + ' ' + filteredWord;
|
269 |
+
} else {
|
270 |
+
wordChunks.push(filteredWord);
|
271 |
+
}
|
272 |
+
return wordChunks;
|
273 |
+
},
|
274 |
+
[''],
|
275 |
+
);
|
276 |
+
});
|
277 |
return (
|
278 |
+
<TextBlocks sentences={sentenceLines} blinkCursor={animateTextDisplay} />
|
|
|
|
|
279 |
);
|
280 |
}
|
281 |
|
|
|
361 |
startStreaming: () => Promise<void>;
|
362 |
stopStreaming: () => Promise<void>;
|
363 |
debugParam: boolean | null;
|
|
|
|
|
364 |
};
|
365 |
|
366 |
export default function XRConfig(props: XRConfigProps) {
|
streaming-react-app/src/react-xr/XRDialog.tsx
CHANGED
@@ -8,12 +8,27 @@ import {
|
|
8 |
Typography,
|
9 |
} from '@mui/material';
|
10 |
import CloseIcon from '@mui/icons-material/Close';
|
11 |
-
import {XRConfigProps} from './XRConfig';
|
12 |
import {useEffect, useRef, useState} from 'react';
|
13 |
import './XRDialog.css';
|
14 |
import {getRenderer, init, updatetranslationText} from './XRRendering';
|
15 |
import ARButton from './ARButton';
|
16 |
import {getURLParams} from '../URLParams';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
function XRContent(props: XRConfigProps) {
|
19 |
const debugParam = getURLParams().debug;
|
|
|
8 |
Typography,
|
9 |
} from '@mui/material';
|
10 |
import CloseIcon from '@mui/icons-material/Close';
|
|
|
11 |
import {useEffect, useRef, useState} from 'react';
|
12 |
import './XRDialog.css';
|
13 |
import {getRenderer, init, updatetranslationText} from './XRRendering';
|
14 |
import ARButton from './ARButton';
|
15 |
import {getURLParams} from '../URLParams';
|
16 |
+
import { BufferedSpeechPlayer } from '../createBufferedSpeechPlayer';
|
17 |
+
import { TranslationSentences } from '../types/StreamingTypes';
|
18 |
+
import { RoomState } from '../types/RoomState';
|
19 |
+
|
20 |
+
type XRConfigProps = {
|
21 |
+
animateTextDisplay: boolean;
|
22 |
+
bufferedSpeechPlayer: BufferedSpeechPlayer;
|
23 |
+
translationSentences: TranslationSentences;
|
24 |
+
roomState: RoomState | null;
|
25 |
+
roomID: string | null;
|
26 |
+
startStreaming: () => Promise<void>;
|
27 |
+
stopStreaming: () => Promise<void>;
|
28 |
+
debugParam: boolean | null;
|
29 |
+
onARVisible?: () => void;
|
30 |
+
onARHidden?: () => void;
|
31 |
+
};
|
32 |
|
33 |
function XRContent(props: XRConfigProps) {
|
34 |
const debugParam = getURLParams().debug;
|
streaming-react-app/src/types/StreamingTypes.ts
CHANGED
@@ -55,9 +55,9 @@ export const SUPPORTED_INPUT_SOURCES: Array<{
|
|
55 |
value: SupportedInputSource;
|
56 |
label: string;
|
57 |
}> = [
|
58 |
-
|
59 |
-
|
60 |
-
];
|
61 |
|
62 |
export type StartStreamEventConfig = {
|
63 |
event: 'config';
|
@@ -70,7 +70,6 @@ export type StartStreamEventConfig = {
|
|
70 |
};
|
71 |
|
72 |
export interface BrowserAudioStreamConfig {
|
73 |
-
echoCancellation: boolean;
|
74 |
noiseSuppression: boolean;
|
75 |
echoCancellation: boolean;
|
76 |
}
|
@@ -113,7 +112,6 @@ export type TranslationSentences = Array<string>;
|
|
113 |
export type DynamicConfig = {
|
114 |
// targetLanguage: a 3-letter string representing the desired output language.
|
115 |
targetLanguage: string;
|
116 |
-
expressive: boolean | null;
|
117 |
};
|
118 |
|
119 |
export type PartialDynamicConfig = Partial<DynamicConfig>;
|
|
|
55 |
value: SupportedInputSource;
|
56 |
label: string;
|
57 |
}> = [
|
58 |
+
{ value: 'userMedia', label: 'Microphone' },
|
59 |
+
{ value: 'displayMedia', label: 'Browser Tab' },
|
60 |
+
];
|
61 |
|
62 |
export type StartStreamEventConfig = {
|
63 |
event: 'config';
|
|
|
70 |
};
|
71 |
|
72 |
export interface BrowserAudioStreamConfig {
|
|
|
73 |
noiseSuppression: boolean;
|
74 |
echoCancellation: boolean;
|
75 |
}
|
|
|
112 |
export type DynamicConfig = {
|
113 |
// targetLanguage: a 3-letter string representing the desired output language.
|
114 |
targetLanguage: string;
|
|
|
115 |
};
|
116 |
|
117 |
export type PartialDynamicConfig = Partial<DynamicConfig>;
|
streaming-react-app/vite.config.ts
CHANGED
@@ -1,5 +1,10 @@
|
|
1 |
import { defineConfig } from 'vite';
|
2 |
import react from '@vitejs/plugin-react';
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
// https://vitejs.dev/config/
|
5 |
export default defineConfig(({ command }) => {
|
|
|
1 |
import { defineConfig } from 'vite';
|
2 |
import react from '@vitejs/plugin-react';
|
3 |
+
// import {resolve} from 'path';
|
4 |
+
|
5 |
+
// const rootDir = resolve(__dirname, 'src');
|
6 |
+
// const assetsDir = resolve(rootDir, 'assets');
|
7 |
+
// const typesDir = resolve(__dirname, 'types');
|
8 |
|
9 |
// https://vitejs.dev/config/
|
10 |
export default defineConfig(({ command }) => {
|
streaming-react-app/yarn.lock
CHANGED
@@ -1853,6 +1853,11 @@ isexe@^2.0.0:
|
|
1853 |
resolved "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz"
|
1854 |
integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==
|
1855 |
|
|
|
|
|
|
|
|
|
|
|
1856 |
isomorphic-unfetch@^3.0.0:
|
1857 |
version "3.1.0"
|
1858 |
resolved "https://registry.npmjs.org/isomorphic-unfetch/-/isomorphic-unfetch-3.1.0.tgz"
|
|
|
1853 |
resolved "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz"
|
1854 |
integrity sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==
|
1855 |
|
1856 |
+
iso-639-1@^3.1.0:
|
1857 |
+
version "3.1.0"
|
1858 |
+
resolved "https://registry.npmjs.org/iso-639-1/-/iso-639-1-3.1.0.tgz"
|
1859 |
+
integrity sha512-rWcHp9dcNbxa5C8jA/cxFlWNFNwy5Vup0KcFvgA8sPQs9ZeJHj/Eq0Y8Yz2eL8XlWYpxw4iwh9FfTeVxyqdRMw==
|
1860 |
+
|
1861 |
isomorphic-unfetch@^3.0.0:
|
1862 |
version "3.1.0"
|
1863 |
resolved "https://registry.npmjs.org/isomorphic-unfetch/-/isomorphic-unfetch-3.1.0.tgz"
|