nisheeth commited on
Commit
3fe4d91
1 Parent(s): 2783814

Upload 7 files

Browse files
Files changed (7) hide show
  1. .gitattributes +6 -34
  2. .gitignore +163 -0
  3. README.md +6 -5
  4. app.py +317 -0
  5. packages.txt +2 -0
  6. pre-requirements.txt +0 -0
  7. requirements.txt +116 -0
.gitattributes CHANGED
@@ -1,35 +1,7 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  *.pth filter=lfs diff=lfs merge=lfs -text
3
+ *.pt* filter=lfs diff=lfs merge=lfs -text
4
+ *.ckpt* filter=lfs diff=lfs merge=lfs -text
5
+ *.pl filter=lfs diff=lfs merge=lfs -text
6
+ *.so filter=lfs diff=lfs merge=lfs -text
7
+ *.ini filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ # *.so
8
+
9
+ Temp_Audios/
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ share/python-wheels/
26
+ *.egg-info/
27
+ .installed.cfg
28
+ *.egg
29
+ MANIFEST
30
+
31
+ # PyInstaller
32
+ # Usually these files are written by a python script from a template
33
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+ cover/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ .pybuilder/
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # pyenv
88
+ # For a library or package, you might want to ignore these files since the code is
89
+ # intended to run in multiple environments; otherwise, check them in:
90
+ # .python-version
91
+
92
+ # pipenv
93
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
95
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
96
+ # install all needed dependencies.
97
+ #Pipfile.lock
98
+
99
+ # poetry
100
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
102
+ # commonly ignored for libraries.
103
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104
+ #poetry.lock
105
+
106
+ # pdm
107
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
108
+ #pdm.lock
109
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
110
+ # in version control.
111
+ # https://pdm.fming.dev/#use-with-ide
112
+ .pdm.toml
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+ # *.ini
133
+
134
+ # Spyder project settings
135
+ .spyderproject
136
+ .spyproject
137
+
138
+ # Rope project settings
139
+ .ropeproject
140
+
141
+ # mkdocs documentation
142
+ /site
143
+
144
+ # mypy
145
+ .mypy_cache/
146
+ .dmypy.json
147
+ dmypy.json
148
+
149
+ # Pyre type checker
150
+ .pyre/
151
+
152
+ # pytype static type analyzer
153
+ .pytype/
154
+
155
+ # Cython debug symbols
156
+ cython_debug/
157
+
158
+ # PyCharm
159
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
162
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
163
+ #.idea/
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: SSMTDemo
3
- emoji: 😻
4
- colorFrom: yellow
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 4.22.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: OcTra
3
+ emoji: 🏆
4
+ colorFrom: blue
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 3.39.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # load the libraries for the application
3
+ # -------------------------------------------
4
+ import os
5
+ import re
6
+ import nltk
7
+ import torch
8
+ import librosa
9
+ import tempfile
10
+ import subprocess
11
+
12
+ import gradio as gr
13
+
14
+ from scipy.io import wavfile
15
+ from nnet import utils, commons
16
+ from transformers import pipeline
17
+ from scipy.io.wavfile import write
18
+ from faster_whisper import WhisperModel
19
+ from nnet.models import SynthesizerTrn as vitsTRN
20
+ from nnet.models_vc import SynthesizerTrn as freeTRN
21
+ from nnet.mel_processing import mel_spectrogram_torch
22
+ from configurations.get_constants import constantConfig
23
+
24
+ from speaker_encoder.voice_encoder import SpeakerEncoder
25
+
26
+ from df_local.enhance import enhance, init_df, load_audio, save_audio
27
+ from configurations.get_hyperparameters import hyperparameterConfig
28
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
29
+
30
+ nltk.download('punkt')
31
+ from nltk.tokenize import sent_tokenize
32
+
33
+ # making the FreeVC function
34
+ # ---------------------------------
35
+ class FreeVCModel:
36
+ def __init__(self, config, ptfile, speaker_model, wavLM_model, device='cpu'):
37
+ self.hps = utils.get_hparams_from_file(config)
38
+
39
+ self.net_g = freeTRN(
40
+ self.hps.data.filter_length // 2 + 1,
41
+ self.hps.train.segment_size // self.hps.data.hop_length,
42
+ **self.hps.model
43
+ ).to(hyperparameters.device)
44
+ _ = self.net_g.eval()
45
+ _ = utils.load_checkpoint(ptfile, self.net_g, None, True)
46
+
47
+ self.cmodel = utils.get_cmodel(device, wavLM_model)
48
+
49
+ if self.hps.model.use_spk:
50
+ self.smodel = SpeakerEncoder(speaker_model)
51
+
52
+ def convert(self, src, tgt):
53
+ fs_src, src_audio = src
54
+ fs_tgt, tgt_audio = tgt
55
+
56
+ src = f"{constants.temp_audio_folder}/src.wav"
57
+ tgt = f"{constants.temp_audio_folder}/tgt.wav"
58
+ out = f"{constants.temp_audio_folder}/cnvr.wav"
59
+ with torch.no_grad():
60
+ wavfile.write(tgt, fs_tgt, tgt_audio)
61
+ wav_tgt, _ = librosa.load(tgt, sr=self.hps.data.sampling_rate)
62
+ wav_tgt, _ = librosa.effects.trim(wav_tgt, top_db=20)
63
+ if self.hps.model.use_spk:
64
+ g_tgt = self.smodel.embed_utterance(wav_tgt)
65
+ g_tgt = torch.from_numpy(g_tgt).unsqueeze(0).to(hyperparameters.device.type)
66
+ else:
67
+ wav_tgt = torch.from_numpy(wav_tgt).unsqueeze(0).to(hyperparameters.device.type)
68
+ mel_tgt = mel_spectrogram_torch(
69
+ wav_tgt,
70
+ self.hps.data.filter_length,
71
+ self.hps.data.n_mel_channels,
72
+ self.hps.data.sampling_rate,
73
+ self.hps.data.hop_length,
74
+ self.hps.data.win_length,
75
+ self.hps.data.mel_fmin,
76
+ self.hps.data.mel_fmax,
77
+ )
78
+ wavfile.write(src, fs_src, src_audio)
79
+ wav_src, _ = librosa.load(src, sr=self.hps.data.sampling_rate)
80
+ wav_src = torch.from_numpy(wav_src).unsqueeze(0).to(hyperparameters.device.type)
81
+ c = utils.get_content(self.cmodel, wav_src)
82
+
83
+ if self.hps.model.use_spk:
84
+ audio = self.net_g.infer(c, g=g_tgt)
85
+ else:
86
+ audio = self.net_g.infer(c, mel=mel_tgt)
87
+ audio = audio[0][0].data.cpu().float().numpy()
88
+ write(out, 24000, audio)
89
+
90
+ return out
91
+
92
+ # load the system configurations
93
+ constants = constantConfig()
94
+ hyperparameters = hyperparameterConfig()
95
+
96
+ # load the models
97
+ model, df_state, _ = init_df(hyperparameters.voice_enhacing_model, config_allow_defaults=True) # voice enhancing model
98
+ stt_model = WhisperModel(hyperparameters.stt_model, device=hyperparameters.device.type, compute_type="float32") #speech to text model
99
+
100
+ trans_model = AutoModelForSeq2SeqLM.from_pretrained(constants.model_name_dict[hyperparameters.nllb_model], torch_dtype=torch.bfloat16).to(hyperparameters.device)
101
+ trans_tokenizer = AutoTokenizer.from_pretrained(constants.model_name_dict[hyperparameters.nllb_model])
102
+
103
+ modelConvertSpeech = FreeVCModel(config=hyperparameters.text2speech_config, ptfile=hyperparameters.text2speech_model,
104
+ speaker_model=hyperparameters.text2speech_encoder, wavLM_model=hyperparameters.wavlm_model,
105
+ device=hyperparameters.device.type)
106
+
107
+ # download the language model if doesn't existing
108
+ # ----------------------------------------------------
109
+ def download(lang, lang_directory):
110
+
111
+ if not os.path.exists(f"{lang_directory}/{lang}"):
112
+ cmd = ";".join([
113
+ f"wget {constants.language_download_web}/{lang}.tar.gz -O {lang_directory}/{lang}.tar.gz",
114
+ f"tar zxvf {lang_directory}/{lang}.tar.gz -C {lang_directory}"
115
+ ])
116
+ subprocess.check_output(cmd, shell=True)
117
+ try:
118
+ os.remove(f"{lang_directory}/{lang}.tar.gz")
119
+ except:
120
+ pass
121
+ return f"{lang_directory}/{lang}"
122
+
123
+ def preprocess_char(text, lang=None):
124
+ """
125
+ Special treatement of characters in certain languages
126
+ """
127
+ if lang == 'ron':
128
+ text = text.replace("ț", "ţ")
129
+ return text
130
+
131
+ def preprocess_text(txt, text_mapper, hps, uroman_dir=None, lang=None):
132
+ txt = preprocess_char(txt, lang=lang)
133
+ is_uroman = hps.data.training_files.split('.')[-1] == 'uroman'
134
+ if is_uroman:
135
+ txt = text_mapper.uromanize(txt, f'{uroman_dir}/bin/uroman.pl')
136
+
137
+ txt = txt.lower()
138
+ txt = text_mapper.filter_oov(txt)
139
+ return txt
140
+
141
+ def detect_language(text,LID):
142
+ predictions = LID.predict(text)
143
+ detected_lang_code = predictions[0][0].replace("__label__", "")
144
+ return detected_lang_code
145
+
146
+ # text to speech
147
+ class TextMapper(object):
148
+ def __init__(self, vocab_file):
149
+ self.symbols = [x.replace("\n", "") for x in open(vocab_file, encoding="utf-8").readlines()]
150
+ self.SPACE_ID = self.symbols.index(" ")
151
+ self._symbol_to_id = {s: i for i, s in enumerate(self.symbols)}
152
+ self._id_to_symbol = {i: s for i, s in enumerate(self.symbols)}
153
+
154
+ def text_to_sequence(self, text, cleaner_names):
155
+ '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
156
+ Args:
157
+ text: string to convert to a sequence
158
+ cleaner_names: names of the cleaner functions to run the text through
159
+ Returns:
160
+ List of integers corresponding to the symbols in the text
161
+ '''
162
+ sequence = []
163
+ clean_text = text.strip()
164
+ for symbol in clean_text:
165
+ symbol_id = self._symbol_to_id[symbol]
166
+ sequence += [symbol_id]
167
+ return sequence
168
+
169
+ def uromanize(self, text, uroman_pl):
170
+ with tempfile.NamedTemporaryFile() as tf, \
171
+ tempfile.NamedTemporaryFile() as tf2:
172
+ with open(tf.name, "w") as f:
173
+ f.write("\n".join([text]))
174
+ cmd = f"perl " + uroman_pl
175
+ cmd += f" -l xxx "
176
+ cmd += f" < {tf.name} > {tf2.name}"
177
+ os.system(cmd)
178
+ outtexts = []
179
+ with open(tf2.name) as f:
180
+ for line in f:
181
+ line = re.sub(r"\s+", " ", line).strip()
182
+ outtexts.append(line)
183
+ outtext = outtexts[0]
184
+ return outtext
185
+
186
+ def get_text(self, text, hps):
187
+ text_norm = self.text_to_sequence(text, hps.data.text_cleaners)
188
+ if hps.data.add_blank:
189
+ text_norm = commons.intersperse(text_norm, 0)
190
+ text_norm = torch.LongTensor(text_norm)
191
+ return text_norm
192
+
193
+ def filter_oov(self, text):
194
+ val_chars = self._symbol_to_id
195
+ txt_filt = "".join(list(filter(lambda x: x in val_chars, text)))
196
+ return txt_filt
197
+
198
+ def speech_to_text(audio_file):
199
+ try:
200
+ fs, audio = audio_file
201
+ wavfile.write(constants.input_speech_file, fs, audio)
202
+ audio0, _ = load_audio(constants.input_speech_file, sr=df_state.sr())
203
+
204
+ # Enhance the SNR of the audio
205
+ enhanced = enhance(model, df_state, audio0)
206
+ save_audio(constants.enhanced_speech_file, enhanced, df_state.sr())
207
+
208
+ segments, info = stt_model.transcribe(constants.enhanced_speech_file)
209
+
210
+ speech_text = ''
211
+ for segment in segments:
212
+ speech_text = f'{speech_text}{segment.text}'
213
+ try:
214
+ source_lang_nllb = [k for k, v in constants.flores_codes_to_tts_codes.items() if v[:2] == info.language][0]
215
+ except:
216
+ source_lang_nllb = 'language cant be determined, select manually'
217
+
218
+ # text translation
219
+ return speech_text, gr.Dropdown.update(value=source_lang_nllb)
220
+ except:
221
+ return '', gr.Dropdown.update(value='English')
222
+
223
+ # Text tp speech
224
+ def text_to_speech(text, target_lang):
225
+ txt = text
226
+
227
+ # LANG = get_target_tts_lang(target_lang)
228
+ LANG = constants.flores_codes_to_tts_codes[target_lang]
229
+ ckpt_dir = download(LANG, lang_directory=constants.language_directory)
230
+
231
+ vocab_file = f"{ckpt_dir}/{constants.language_vocab_text}"
232
+ config_file = f"{ckpt_dir}/{constants.language_vocab_configuration}"
233
+ hps = utils.get_hparams_from_file(config_file)
234
+ text_mapper = TextMapper(vocab_file)
235
+ net_g = vitsTRN(
236
+ len(text_mapper.symbols),
237
+ hps.data.filter_length // 2 + 1,
238
+ hps.train.segment_size // hps.data.hop_length,
239
+ **hps.model)
240
+ net_g.to(hyperparameters.device)
241
+ _ = net_g.eval()
242
+
243
+ g_pth = f"{ckpt_dir}/{constants.language_vocab_model}"
244
+
245
+ _ = utils.load_checkpoint(g_pth, net_g, None)
246
+
247
+ txt = preprocess_text(txt, text_mapper, hps, lang=LANG, uroman_dir=constants.uroman_directory)
248
+ stn_tst = text_mapper.get_text(txt, hps)
249
+ with torch.no_grad():
250
+ x_tst = stn_tst.unsqueeze(0).to(hyperparameters.device)
251
+ x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(hyperparameters.device)
252
+ hyp = net_g.infer(
253
+ x_tst, x_tst_lengths, noise_scale=.667,
254
+ noise_scale_w=0.8, length_scale=1.0
255
+ )[0][0,0].cpu().float().numpy()
256
+
257
+ return hps.data.sampling_rate, hyp
258
+
259
+ def translation(audio, text, source_lang_nllb, target_code_nllb, output_type, sentence_mode):
260
+ target_code = constants.flores_codes[target_code_nllb]
261
+ translator = pipeline('translation', model=trans_model, tokenizer=trans_tokenizer, src_lang=source_lang_nllb, tgt_lang=target_code, device=hyperparameters.device)
262
+
263
+ # output = translator(text, max_length=400)[0]['translation_text']
264
+ if sentence_mode == "Sentence-wise":
265
+ sentences = sent_tokenize(text)
266
+ translated_sentences = []
267
+ for sentence in sentences:
268
+ translated_sentence = translator(sentence, max_length=400)[0]['translation_text']
269
+ translated_sentences.append(translated_sentence)
270
+ output = ' '.join(translated_sentences)
271
+ else:
272
+ output = translator(text, max_length=1024)[0]['translation_text']
273
+
274
+ # get the text to speech
275
+ fs_out, audio_out = text_to_speech(output, target_code_nllb)
276
+
277
+ if output_type == 'own voice':
278
+ out_file = modelConvertSpeech.convert((fs_out, audio_out), audio)
279
+ return output, out_file
280
+
281
+ wavfile.write(constants.text2speech_wavfile, fs_out, audio_out)
282
+ return output, constants.text2speech_wavfile
283
+
284
+ with gr.Blocks(title = "Octopus Translation App") as octopus_translator:
285
+ with gr.Row():
286
+ audio_file = gr.Audio(source="microphone")
287
+
288
+ with gr.Row():
289
+ input_text = gr.Textbox(label="Input text")
290
+ source_language = gr.Dropdown(list(constants.flores_codes.keys()), value='English', label='Source (Autoselected)', interactive=True)
291
+
292
+ with gr.Row():
293
+ output_text = gr.Textbox(label='Translated text')
294
+ target_language = gr.Dropdown(list(constants.flores_codes.keys()), value='German', label='Target', interactive=True)
295
+
296
+
297
+ with gr.Row():
298
+ output_speech = gr.Audio(label='Translated speech')
299
+ translate_button = gr.Button('Translate')
300
+
301
+
302
+ with gr.Row():
303
+ enhance_audio = gr.Radio(['yes', 'no'], value='yes', label='Enhance input voice', interactive=True)
304
+ input_type = gr.Radio(['Whole text', 'Sentence-wise'],value='Sentence-wise', label="Translation Mode", interactive=True)
305
+ output_audio_type = gr.Radio(['standard speaker', 'voice transfer'], value='voice transfer', label='Enhance output voice', interactive=True)
306
+
307
+ audio_file.change(speech_to_text,
308
+ inputs=[audio_file],
309
+ outputs=[input_text, source_language])
310
+
311
+ translate_button.click(translation,
312
+ inputs=[audio_file, input_text,
313
+ source_language, target_language,
314
+ output_audio_type, input_type],
315
+ outputs=[output_text, output_speech])
316
+
317
+ octopus_translator.launch(share=False)
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ ffmpeg
2
+ rustc
pre-requirements.txt ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ aiohttp==3.8.5
3
+ aiosignal==1.3.1
4
+ altair==5.0.1
5
+ annotated-types==0.5.0
6
+ anyio==3.7.1
7
+ asttokens==2.2.1
8
+ async-timeout==4.0.3
9
+ attrs==23.1.0
10
+ audioread==3.0.0
11
+ av==10.0.0
12
+ certifi==2022.12.7
13
+ cffi==1.15.1
14
+ charset-normalizer==2.1.1
15
+ click==8.1.6
16
+ colorama==0.4.6
17
+ coloredlogs==15.0.1
18
+ contourpy==1.1.0
19
+ ctranslate2==3.18.0
20
+ cycler==0.11.0
21
+ Cython==3.0.0
22
+ decorator==5.1.1
23
+ DeepFilterLib==0.2.4
24
+ deepfilternet==0.2.4
25
+ executing==1.2.0
26
+ fastapi==0.101.1
27
+ faster-whisper==0.7.1
28
+ ffmpeg-python==0.2.0
29
+ ffmpy==0.3.1
30
+ filelock==3.9.0
31
+ flatbuffers==23.5.26
32
+ fonttools==4.42.0
33
+ frozenlist==1.4.0
34
+ fsspec==2023.6.0
35
+ future==0.18.3
36
+ gradio==3.40.1
37
+ gradio_client==0.4.0
38
+ h11==0.14.0
39
+ httpcore==0.17.3
40
+ httpx==0.24.1
41
+ huggingface-hub==0.16.4
42
+ humanfriendly==10.0
43
+ icecream==2.1.3
44
+ idna==3.4
45
+ importlib-resources==6.0.1
46
+ Jinja2==3.1.2
47
+ joblib==1.3.2
48
+ jsonschema==4.19.0
49
+ jsonschema-specifications==2023.7.1
50
+ kiwisolver==1.4.4
51
+ lazy_loader==0.3
52
+ librosa==0.10.1
53
+ linkify-it-py==2.0.2
54
+ llvmlite==0.40.1
55
+ loguru==0.7.0
56
+ markdown-it-py==2.2.0
57
+ MarkupSafe==2.1.2
58
+ matplotlib==3.7.2
59
+ mdit-py-plugins==0.3.3
60
+ mdurl==0.1.2
61
+ mpmath==1.2.1
62
+ msgpack==1.0.5
63
+ multidict==6.0.4
64
+ networkx==3.0
65
+ nltk==3.8.1
66
+ numba==0.57.1
67
+ numpy==1.24.4
68
+ onnxruntime==1.15.1
69
+ orjson==3.9.5
70
+ packaging==23.1
71
+ pandas==2.0.3
72
+ Pillow==9.3.0
73
+ platformdirs==3.10.0
74
+ pooch==1.7.0
75
+ protobuf==4.24.0
76
+ pycparser==2.21
77
+ pydantic==2.1.1
78
+ pydantic_core==2.4.0
79
+ pydub==0.25.1
80
+ Pygments==2.16.1
81
+ pyparsing==3.0.9
82
+ python-dateutil==2.8.2
83
+ python-multipart==0.0.6
84
+ pytz==2023.3
85
+ PyYAML==6.0.1
86
+ referencing==0.30.2
87
+ regex==2023.8.8
88
+ requests==2.28.1
89
+ rpds-py==0.9.2
90
+ safetensors==0.3.2
91
+ scikit-learn==1.3.0
92
+ scipy==1.11.1
93
+ semantic-version==2.10.0
94
+ six==1.16.0
95
+ sniffio==1.3.0
96
+ soundfile==0.12.1
97
+ soxr==0.3.6
98
+ starlette==0.27.0
99
+ sympy==1.11.1
100
+ threadpoolctl==3.2.0
101
+ tokenizers==0.13.3
102
+ toolz==0.12.0
103
+ --find-links https://download.pytorch.org/whl/torch_stable.html
104
+ torch==2.0.1+cpu
105
+ torchaudio==2.0.2+cpu
106
+ torchvision==0.15.2+cpu
107
+ tqdm==4.66.1
108
+ transformers==4.31.0
109
+ typing_extensions==4.7.1
110
+ tzdata==2023.3
111
+ uc-micro-py==1.0.2
112
+ urllib3==1.26.13
113
+ uvicorn==0.23.2
114
+ webrtcvad==2.0.10
115
+ websockets==11.0.3
116
+ yarl==1.9.2