Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,6 @@ import torch
|
|
4 |
from transformers import pipeline, VitsModel, AutoTokenizer, AutoTokenizer
|
5 |
from transformers import SpeechT5ForTextToSpeech, SpeechT5HifiGan, SpeechT5Processor
|
6 |
from transformers import WhisperTokenizer, GenerationConfig
|
7 |
-
#from transformers import BarkModel, AutoProcessor
|
8 |
|
9 |
|
10 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
@@ -39,17 +38,6 @@ speaker_embeddings2 = np.load('speaker_embeddings.npy')
|
|
39 |
speaker_embeddings2 = torch.tensor(speaker_embeddings2)
|
40 |
print(speaker_embeddings2)
|
41 |
|
42 |
-
#lang_detector = pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection")
|
43 |
-
|
44 |
-
# ---------------- Speech generator bark--------------------------#
|
45 |
-
|
46 |
-
|
47 |
-
#model = BarkModel.from_pretrained("suno/bark-small")
|
48 |
-
#processor = BarkProcessor.from_pretrained("suno/bark-small")
|
49 |
-
|
50 |
-
# processor = AutoProcessor.from_pretrained("suno/bark-small")
|
51 |
-
# model = BarkModel.from_pretrained("suno/bark-small")
|
52 |
-
|
53 |
|
54 |
def language_detector(text):
|
55 |
resultado = lang_detector(text)
|
@@ -62,23 +50,6 @@ def translate(audio):
|
|
62 |
print(outputs["text"])
|
63 |
return outputs["text"]
|
64 |
|
65 |
-
|
66 |
-
# def synthesise(text):
|
67 |
-
# inputs = processor(text=text, voice_preset="v2/es_speaker_8")
|
68 |
-
# speech_output = model.generate(**inputs).cpu()
|
69 |
-
# return speech_output
|
70 |
-
|
71 |
-
# def speech_to_speech_translation(audio):
|
72 |
-
# translated_text = translate(audio)
|
73 |
-
# synthesised_speech = synthesise(translated_text)
|
74 |
-
|
75 |
-
# sample_rate = model.generation_config.sample_rate
|
76 |
-
|
77 |
-
# synthesised_speech = synthesised_speech.numpy().squeeze()
|
78 |
-
|
79 |
-
# return sample_rate, synthesised_speech
|
80 |
-
|
81 |
-
|
82 |
def synthesise(text):
|
83 |
inputs = processor(text=text, return_tensors="pt")
|
84 |
output = model.generate_speech(inputs["input_ids"], speaker_embeddings2, vocoder=vocoder)
|
|
|
4 |
from transformers import pipeline, VitsModel, AutoTokenizer, AutoTokenizer
|
5 |
from transformers import SpeechT5ForTextToSpeech, SpeechT5HifiGan, SpeechT5Processor
|
6 |
from transformers import WhisperTokenizer, GenerationConfig
|
|
|
7 |
|
8 |
|
9 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
|
38 |
speaker_embeddings2 = torch.tensor(speaker_embeddings2)
|
39 |
print(speaker_embeddings2)
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
def language_detector(text):
|
43 |
resultado = lang_detector(text)
|
|
|
50 |
print(outputs["text"])
|
51 |
return outputs["text"]
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
def synthesise(text):
|
54 |
inputs = processor(text=text, return_tensors="pt")
|
55 |
output = model.generate_speech(inputs["input_ids"], speaker_embeddings2, vocoder=vocoder)
|