Anna Sun commited on
Commit
1143e8d
·
1 Parent(s): 070b677

add dual non-expr/expressive agent, install sc from github

Browse files
Dockerfile CHANGED
@@ -71,6 +71,11 @@ RUN cd seamless_server && \
71
  COPY --from=frontend /app/dist ./streaming-react-app/dist
72
 
73
  WORKDIR $HOME/app/seamless_server
 
 
 
 
 
74
  USER root
75
  RUN ln -s /usr/lib/x86_64-linux-gnu/libsox.so.3 /usr/lib/x86_64-linux-gnu/libsox.so
76
  USER user
 
71
  COPY --from=frontend /app/dist ./streaming-react-app/dist
72
 
73
  WORKDIR $HOME/app/seamless_server
74
+ RUN --mount=type=secret,id=HF_TOKEN,mode=0444,required=true \
75
+ huggingface-cli login --token $(cat /run/secrets/HF_TOKEN) && \
76
+ huggingface-cli download meta-private/SeamlessExpressive pretssel_melhifigan_wm-final.pt --local-dir ./models/Seamless/ && \
77
+ ln -s $(readlink -f models/Seamless/pretssel_melhifigan_wm-final.pt) models/Seamless/pretssel_melhifigan_wm.pt
78
+
79
  USER root
80
  RUN ln -s /usr/lib/x86_64-linux-gnu/libsox.so.3 /usr/lib/x86_64-linux-gnu/libsox.so
81
  USER user
seamless_server/models/Seamless/vad_s2st_sc_24khz_main.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ agent_class: seamless_communication.streaming.agents.seamless_s2st.SeamlessS2STDualVocoderVADAgent
2
+ monotonic_decoder_model_name: seamless_streaming_monotonic_decoder
3
+ unity_model_name: seamless_streaming_unity
4
+ sentencepiece_model: spm_256k_nllb100.model
5
+
6
+ task: s2st
7
+ tgt_lang: "eng"
8
+ min_unit_chunk_size: 50
9
+ decision_threshold: 0.7
10
+ no_early_stop: True
11
+ block_ngrams: True
12
+ vocoder_name: vocoder_v2
13
+ expr_vocoder_name: vocoder_pretssel
14
+ gated_model_dir: .
15
+ expr_vocoder_gain: 3.0
16
+ upstream_idx: 1
17
+ wav2vec_yaml: wav2vec.yaml
18
+ min_starting_wait_w2vbert: 192
19
+
20
+ config_yaml: cfg_fbank_u2t.yaml
21
+ upstream_idx: 1
22
+ detokenize_only: True
23
+ device: cuda:0
24
+ max_len_a: 0
25
+ max_len_b: 1000
seamless_server/models/SeamlessStreaming/{vad_s2st_sc_24khz_main.yaml → vad_s2st_sc_main.yaml} RENAMED
File without changes
seamless_server/requirements.txt CHANGED
@@ -1,5 +1,6 @@
1
  # seamless_communication
2
- ./whl/seamless_communication-1.0.0-py3-none-any.whl
 
3
  Flask==2.1.3
4
  Flask_Sockets==0.2.1
5
  g2p_en==2.1.0
 
1
  # seamless_communication
2
+ git+https://github.com/facebookresearch/seamless_communication.git
3
+ # ./whl/seamless_communication-1.0.0-py3-none-any.whl
4
  Flask==2.1.3
5
  Flask_Sockets==0.2.1
6
  g2p_en==2.1.0
seamless_server/src/simuleval_agent_directory.py CHANGED
@@ -1,6 +1,7 @@
1
  # Creates a directory in which to look up available agents
2
 
3
- from typing import List
 
4
  from src.simuleval_transcoder import SimulevalTranscoder
5
  import json
6
  import logging
@@ -33,8 +34,10 @@ class AgentWithInfo:
33
  # Supported dynamic params are defined in StreamingTypes.ts
34
  dynamic_params: List[str] = [],
35
  description="",
 
36
  ):
37
  self.agent = agent
 
38
  self.name = name
39
  self.description = description
40
  self.modalities = modalities
@@ -75,6 +78,7 @@ class AgentWithInfo:
75
  class SimulevalAgentDirectory:
76
  # Available models. These are the directories where the models can be found, and also serve as an ID for the model.
77
  seamless_streaming_agent = "SeamlessStreaming"
 
78
 
79
  def __init__(self):
80
  self.agents = []
@@ -96,7 +100,12 @@ class SimulevalAgentDirectory:
96
  model_id,
97
  )
98
  except Exception as e:
 
99
  logger.warning("Failed to build agent %s: %s" % (model_id, e))
 
 
 
 
100
  raise e
101
 
102
  return agent
@@ -110,20 +119,32 @@ class SimulevalAgentDirectory:
110
  for agent_info in agent_infos:
111
  self.add_agent(agent_info)
112
  else:
113
- s2s_m4t_expr_agent = self.build_agent_if_available(
114
- SimulevalAgentDirectory.seamless_streaming_agent,
115
- config_name="vad_s2st_sc_24khz_main.yaml",
116
- )
 
 
 
 
 
 
 
 
 
 
 
117
 
118
- if s2s_m4t_expr_agent:
119
  self.add_agent(
120
  AgentWithInfo(
121
- agent=s2s_m4t_expr_agent,
122
  name=SimulevalAgentDirectory.seamless_streaming_agent,
123
  modalities=["s2t", "s2s"],
124
  target_langs=M4T_P0_LANGS,
125
  dynamic_params=["expressive"],
126
  description="multilingual expressive model that supports S2S and S2T",
 
127
  )
128
  )
129
 
@@ -137,7 +158,7 @@ class SimulevalAgentDirectory:
137
  def get_agent(self, name):
138
  for agent in self.agents:
139
  if agent.name == name:
140
- return agent.agent
141
  return None
142
 
143
  def get_agent_or_throw(self, name):
 
1
  # Creates a directory in which to look up available agents
2
 
3
+ import os
4
+ from typing import List, Optional
5
  from src.simuleval_transcoder import SimulevalTranscoder
6
  import json
7
  import logging
 
34
  # Supported dynamic params are defined in StreamingTypes.ts
35
  dynamic_params: List[str] = [],
36
  description="",
37
+ has_expressive: Optional[bool] = None,
38
  ):
39
  self.agent = agent
40
+ self.has_expressive = has_expressive
41
  self.name = name
42
  self.description = description
43
  self.modalities = modalities
 
78
  class SimulevalAgentDirectory:
79
  # Available models. These are the directories where the models can be found, and also serve as an ID for the model.
80
  seamless_streaming_agent = "SeamlessStreaming"
81
+ seamless_agent = "Seamless"
82
 
83
  def __init__(self):
84
  self.agents = []
 
100
  model_id,
101
  )
102
  except Exception as e:
103
+ from fairseq2.assets.error import AssetError
104
  logger.warning("Failed to build agent %s: %s" % (model_id, e))
105
+ if isinstance(e, AssetError):
106
+ logger.warning(
107
+ "Please download gated assets and set `gated_model_dir` in the config"
108
+ )
109
  raise e
110
 
111
  return agent
 
119
  for agent_info in agent_infos:
120
  self.add_agent(agent_info)
121
  else:
122
+ s2s_agent = None
123
+ if os.environ.get("USE_EXPRESSIVE_MODEL"):
124
+ logger.info("Building expressive model...")
125
+ s2s_agent = self.build_agent_if_available(
126
+ SimulevalAgentDirectory.seamless_agent,
127
+ config_name="vad_s2st_sc_24khz_main.yaml",
128
+ )
129
+ has_expressive = True
130
+ else:
131
+ logger.info("Building non-expressive model...")
132
+ s2s_agent = self.build_agent_if_available(
133
+ SimulevalAgentDirectory.seamless_streaming_agent,
134
+ config_name="vad_s2st_sc_main.yaml",
135
+ )
136
+ has_expressive = False
137
 
138
+ if s2s_agent:
139
  self.add_agent(
140
  AgentWithInfo(
141
+ agent=s2s_agent,
142
  name=SimulevalAgentDirectory.seamless_streaming_agent,
143
  modalities=["s2t", "s2s"],
144
  target_langs=M4T_P0_LANGS,
145
  dynamic_params=["expressive"],
146
  description="multilingual expressive model that supports S2S and S2T",
147
+ has_expressive=has_expressive,
148
  )
149
  )
150
 
 
158
  def get_agent(self, name):
159
  for agent in self.agents:
160
  if agent.name == name:
161
+ return agent
162
  return None
163
 
164
  def get_agent_or_throw(self, name):
seamless_server/src/simuleval_transcoder.py CHANGED
@@ -119,7 +119,8 @@ class OutputSegments:
119
 
120
  class SimulevalTranscoder:
121
  def __init__(self, agent, sample_rate, debug, buffer_limit):
122
- self.agent = agent
 
123
  self.input_queue = asyncio.Queue()
124
  self.output_queue = asyncio.Queue()
125
  self.states = self.agent.build_states()
@@ -185,7 +186,7 @@ class SimulevalTranscoder:
185
  logger.info(*args)
186
 
187
  @classmethod
188
- def build_agent(cls, model_path, config_name="vad_s2st_main.yaml"):
189
  logger.info(f"Building simuleval agent: {model_path}, {config_name}")
190
  agent = build_system_from_dir(
191
  Path(__file__).resolve().parent.parent / f"models/{model_path}",
@@ -208,6 +209,10 @@ class SimulevalTranscoder:
208
  tgt_lang=dynamic_config.get("targetLanguage"),
209
  config=dynamic_config,
210
  )
 
 
 
 
211
  # # segment is array([0, 0, 0, ..., 0, 0, 0], dtype=int16)
212
  self.input_queue.put_nowait(segment)
213
 
 
119
 
120
  class SimulevalTranscoder:
121
  def __init__(self, agent, sample_rate, debug, buffer_limit):
122
+ self.agent = agent.agent
123
+ self.has_expressive = agent.has_expressive
124
  self.input_queue = asyncio.Queue()
125
  self.output_queue = asyncio.Queue()
126
  self.states = self.agent.build_states()
 
186
  logger.info(*args)
187
 
188
  @classmethod
189
+ def build_agent(cls, model_path, config_name):
190
  logger.info(f"Building simuleval agent: {model_path}, {config_name}")
191
  agent = build_system_from_dir(
192
  Path(__file__).resolve().parent.parent / f"models/{model_path}",
 
209
  tgt_lang=dynamic_config.get("targetLanguage"),
210
  config=dynamic_config,
211
  )
212
+ if dynamic_config.get("expressive") is True and self.has_expressive is False:
213
+ logger.warning(
214
+ "Passing 'expressive' but the agent does not support expressive output!"
215
+ )
216
  # # segment is array([0, 0, 0, ..., 0, 0, 0], dtype=int16)
217
  self.input_queue.put_nowait(segment)
218
 
seamless_server/whl/seamless_communication-1.0.0-py3-none-any.whl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1df10e0c85ee0ffbc9f2e1bf8896850a52c551383df0332a94d26d9d39770c85
3
- size 201552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5b81add4d9917ac562c2e8a10bd5b3c88804b8bd94c56cef4e9a01ecde4a839
3
+ size 204321
streaming-react-app/src/StreamingInterface.tsx CHANGED
@@ -165,6 +165,9 @@ export default function StreamingInterface() {
165
 
166
  // Dynamic Params:
167
  const [targetLang, setTargetLang] = useState<string | null>(null);
 
 
 
168
 
169
  const [serverDebugFlag, setServerDebugFlag] = useState<boolean>(
170
  debugParam ?? false,
@@ -246,6 +249,7 @@ export default function StreamingInterface() {
246
  setAgent((prevAgent) => {
247
  if (prevAgent?.name !== newAgent?.name) {
248
  setTargetLang(newAgent?.targetLangs[0] ?? null);
 
249
  }
250
  return newAgent;
251
  });
@@ -421,6 +425,7 @@ export default function StreamingInterface() {
421
  // available before actually configuring and starting the stream
422
  const fullDynamicConfig: DynamicConfig = {
423
  targetLanguage: targetLang,
 
424
  };
425
 
426
  await onSetDynamicConfig(fullDynamicConfig);
@@ -906,6 +911,28 @@ export default function StreamingInterface() {
906
  spacing={1}
907
  alignItems="flex-start"
908
  sx={{flexGrow: 1}}>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
909
  {isListener && (
910
  <Box
911
  sx={{
 
165
 
166
  // Dynamic Params:
167
  const [targetLang, setTargetLang] = useState<string | null>(null);
168
+ const [enableExpressive, setEnableExpressive] = useState<boolean | null>(
169
+ null,
170
+ );
171
 
172
  const [serverDebugFlag, setServerDebugFlag] = useState<boolean>(
173
  debugParam ?? false,
 
249
  setAgent((prevAgent) => {
250
  if (prevAgent?.name !== newAgent?.name) {
251
  setTargetLang(newAgent?.targetLangs[0] ?? null);
252
+ setEnableExpressive(null);
253
  }
254
  return newAgent;
255
  });
 
425
  // available before actually configuring and starting the stream
426
  const fullDynamicConfig: DynamicConfig = {
427
  targetLanguage: targetLang,
428
+ expressive: enableExpressive,
429
  };
430
 
431
  await onSetDynamicConfig(fullDynamicConfig);
 
911
  spacing={1}
912
  alignItems="flex-start"
913
  sx={{flexGrow: 1}}>
914
+ {currentAgent?.dynamicParams?.includes(
915
+ 'expressive',
916
+ ) && (
917
+ <FormControlLabel
918
+ control={
919
+ <Switch
920
+ checked={enableExpressive ?? false}
921
+ onChange={(
922
+ event: React.ChangeEvent<HTMLInputElement>,
923
+ ) => {
924
+ const newValue = event.target.checked;
925
+ setEnableExpressive(newValue);
926
+ onSetDynamicConfig({
927
+ expressive: newValue,
928
+ });
929
+ }}
930
+ />
931
+ }
932
+ label="Expressive"
933
+ />
934
+ )}
935
+
936
  {isListener && (
937
  <Box
938
  sx={{
streaming-react-app/src/types/StreamingTypes.ts CHANGED
@@ -113,6 +113,7 @@ export type TranslationSentences = Array<string>;
113
  export type DynamicConfig = {
114
  // targetLanguage: a 3-letter string representing the desired output language.
115
  targetLanguage: string;
 
116
  };
117
 
118
  export type PartialDynamicConfig = Partial<DynamicConfig>;
 
113
  export type DynamicConfig = {
114
  // targetLanguage: a 3-letter string representing the desired output language.
115
  targetLanguage: string;
116
+ expressive: boolean | null;
117
  };
118
 
119
  export type PartialDynamicConfig = Partial<DynamicConfig>;