Everton Aleixo commited on
Commit
efac2a4
1 Parent(s): 6f6baeb
Files changed (1) hide show
  1. app.py +15 -0
app.py CHANGED
@@ -4,9 +4,22 @@ import torch
4
  from datasets import load_dataset
5
 
6
  from transformers import SpeechT5ForTextToSpeech, SpeechT5HifiGan, SpeechT5Processor, pipeline
 
 
7
 
8
  from gradio_client import serializing
9
  print('kesy', serializing.COMPONENT_MAPPING.keys())
 
 
 
 
 
 
 
 
 
 
 
10
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
11
 
12
  # load speech translation checkpoint
@@ -24,6 +37,7 @@ speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze
24
 
25
  def translate(audio):
26
  outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe", "language":"portuguese"})
 
27
  return outputs["text"]
28
 
29
 
@@ -35,6 +49,7 @@ def synthesise(text):
35
 
36
  def speech_to_speech_translation(audio):
37
  translated_text = translate(audio)
 
38
  synthesised_speech = synthesise(translated_text)
39
  synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
40
  return 16000, synthesised_speech
 
4
  from datasets import load_dataset
5
 
6
  from transformers import SpeechT5ForTextToSpeech, SpeechT5HifiGan, SpeechT5Processor, pipeline
7
+ from huggingface_hub import HfFolder
8
+ import requests
9
 
10
  from gradio_client import serializing
11
  print('kesy', serializing.COMPONENT_MAPPING.keys())
12
+ print('HF', HfFolder().get_token())
13
+
14
+ def query(text, model_id="tiiuae/falcon-7b-instruct"):
15
+ api_url = f"https://api-inference.huggingface.co/models/{model_id}"
16
+ headers = {"Authorization": f"Bearer {HfFolder().get_token()}"}
17
+ payload = {"inputs": text}
18
+
19
+ print(f"Querying...: {text}")
20
+ response = requests.post(api_url, headers=headers, json=payload)
21
+ return response.json()[0]["generated_text"][len(text) + 1 :]
22
+
23
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
24
 
25
  # load speech translation checkpoint
 
37
 
38
  def translate(audio):
39
  outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe", "language":"portuguese"})
40
+ print('outputs', outputs)
41
  return outputs["text"]
42
 
43
 
 
49
 
50
  def speech_to_speech_translation(audio):
51
  translated_text = translate(audio)
52
+ print('translated', translated_text)
53
  synthesised_speech = synthesise(translated_text)
54
  synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
55
  return 16000, synthesised_speech