Spaces:
Runtime error
Runtime error
Uploading files to GCP and refactor
Browse files- .gitignore +3 -1
- app.py +65 -20
- gcp.py +0 -47
- requirements.txt +3 -2
- audio.py → services/audio.py +39 -32
- audio_model.py → services/audio_model.py +3 -4
- chatbot.py → services/chatbot.py +88 -31
- services/gcp.py +69 -0
- utils.py → services/utils.py +142 -39
- video.py → services/video.py +37 -28
.gitignore
CHANGED
@@ -4,4 +4,6 @@ __pycache__/
|
|
4 |
.env
|
5 |
|
6 |
assets/
|
7 |
-
tts_model/
|
|
|
|
|
|
4 |
.env
|
5 |
|
6 |
assets/
|
7 |
+
tts_model/
|
8 |
+
|
9 |
+
output.wav
|
app.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
from dotenv import load_dotenv
|
2 |
load_dotenv()
|
3 |
|
4 |
-
import
|
5 |
-
import chatbot
|
6 |
import gradio as gr
|
7 |
|
8 |
|
@@ -26,7 +26,7 @@ with gr.Blocks() as app:
|
|
26 |
)
|
27 |
|
28 |
with gr.Tab('Images'):
|
29 |
-
base_image = gr.Image(label='Imagen base para los videos', sources=['upload'])
|
30 |
|
31 |
with gr.Tab('Greeting and goodbye'):
|
32 |
_ = gr.Markdown(
|
@@ -39,7 +39,7 @@ with gr.Blocks() as app:
|
|
39 |
info='Seleccione si es saludo, despedida o mensaje de error.', label='Tipo mensaje'
|
40 |
)
|
41 |
language_greet = gr.Dropdown(
|
42 |
-
choices=['español'], value=
|
43 |
info='Seleccione el idioma en el que esta el texto.', label='Idioma'
|
44 |
)
|
45 |
send_greet_button = gr.Button(value='Añadir')
|
@@ -85,13 +85,40 @@ with gr.Blocks() as app:
|
|
85 |
context_prompt = gr.Text(
|
86 |
placeholder='Ingrese el prompt usado para encontrar el contexto', label='Standalone prompt'
|
87 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
with gr.Tab('Test'):
|
90 |
start_test_button = gr.Button(value='Iniciar test')
|
91 |
-
with gr.
|
92 |
chat = gr.Chatbot(label='Chat')
|
93 |
output_audio = gr.Audio(interactive=False, label='Audio', autoplay=True, visible=False)
|
94 |
-
user_input = gr.Text(label='
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
with gr.Tab('Submit'):
|
97 |
_ = gr.Markdown(
|
@@ -102,6 +129,11 @@ with gr.Blocks() as app:
|
|
102 |
|
103 |
# ----------------------------------------------- ACTIONS -----------------------------------------------------
|
104 |
|
|
|
|
|
|
|
|
|
|
|
105 |
# Add info to the tables
|
106 |
send_greet_button.click(
|
107 |
utils.add_data_table,
|
@@ -109,10 +141,19 @@ with gr.Blocks() as app:
|
|
109 |
[messages_table, greet, type_greet, language_greet]
|
110 |
)
|
111 |
send_random_button.click(
|
112 |
-
utils.add_data_table,
|
|
|
|
|
113 |
)
|
114 |
send_question_button.click(
|
115 |
-
utils.add_data_table,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
)
|
117 |
|
118 |
# Remove info from the tables
|
@@ -128,39 +169,43 @@ with gr.Blocks() as app:
|
|
128 |
|
129 |
# Create the chatbot: create media (csv files, audio and video) and vectorstore
|
130 |
create_chatbot_button.click(
|
131 |
-
lambda: gr.update(value='Creating chatbot...', interactive=False),
|
132 |
-
None,
|
133 |
-
create_chatbot_button
|
134 |
).then(
|
135 |
utils.create_chatbot,
|
136 |
-
[client, name, messages_table, random_table, questions_table],
|
137 |
create_chatbot_button
|
138 |
)
|
139 |
|
140 |
-
# Update the dataframes based on the languages selected in the first tab
|
141 |
-
languages.change(
|
142 |
-
utils.add_language, languages, [language_greet, language_random]
|
143 |
-
)
|
144 |
-
|
145 |
# Initialize chat
|
146 |
start_test_button.click(
|
147 |
lambda: gr.update(value='Iniciando chat...'), None, start_test_button
|
148 |
).then(
|
149 |
-
|
|
|
|
|
150 |
).then(
|
151 |
lambda: gr.update(value='Reiniciar chat'), None, start_test_button
|
152 |
)
|
153 |
|
154 |
# Chat with the chatbot
|
155 |
user_input.submit(
|
156 |
-
chatbot.get_random_data,
|
157 |
).then(
|
158 |
chatbot.get_answer,
|
159 |
[chat, user_input, client, general_prompt, context_prompt],
|
160 |
[chat, user_input, output_audio], show_progress='hidden'
|
161 |
)
|
162 |
|
163 |
-
# Submit chatbot: save prompts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
|
165 |
|
166 |
app.launch(debug=True)
|
|
|
1 |
from dotenv import load_dotenv
|
2 |
load_dotenv()
|
3 |
|
4 |
+
import time
|
5 |
+
from services import chatbot, utils
|
6 |
import gradio as gr
|
7 |
|
8 |
|
|
|
26 |
)
|
27 |
|
28 |
with gr.Tab('Images'):
|
29 |
+
base_image = gr.Image(label='Imagen base para los videos', sources=['upload'], type='pil')
|
30 |
|
31 |
with gr.Tab('Greeting and goodbye'):
|
32 |
_ = gr.Markdown(
|
|
|
39 |
info='Seleccione si es saludo, despedida o mensaje de error.', label='Tipo mensaje'
|
40 |
)
|
41 |
language_greet = gr.Dropdown(
|
42 |
+
choices=['español'], value='español', interactive=True,
|
43 |
info='Seleccione el idioma en el que esta el texto.', label='Idioma'
|
44 |
)
|
45 |
send_greet_button = gr.Button(value='Añadir')
|
|
|
85 |
context_prompt = gr.Text(
|
86 |
placeholder='Ingrese el prompt usado para encontrar el contexto', label='Standalone prompt'
|
87 |
)
|
88 |
+
_ = gr.Markdown(
|
89 |
+
"```\n"
|
90 |
+
"Recuerde dejar estos formatos en los prompts: \n"
|
91 |
+
"----------------------- General --------------------------\n"
|
92 |
+
"=========\n"
|
93 |
+
"Contexto:\n"
|
94 |
+
"CONTEXTO\n"
|
95 |
+
"=========\n"
|
96 |
+
"\n"
|
97 |
+
"----------------------- Standalone -----------------------\n"
|
98 |
+
"Chat History:\n"
|
99 |
+
"\n"
|
100 |
+
"HISTORY\n"
|
101 |
+
"Follow-up message: QUESTION\n"
|
102 |
+
"Standalone message:\n"
|
103 |
+
"```", line_breaks=True
|
104 |
+
)
|
105 |
|
106 |
with gr.Tab('Test'):
|
107 |
start_test_button = gr.Button(value='Iniciar test')
|
108 |
+
with gr.Column(visible=False) as chat_row:
|
109 |
chat = gr.Chatbot(label='Chat')
|
110 |
output_audio = gr.Audio(interactive=False, label='Audio', autoplay=True, visible=False)
|
111 |
+
user_input = gr.Text(label='Escribe tus preguntas')
|
112 |
+
|
113 |
+
with gr.Tab('Prompts by languages'):
|
114 |
+
with gr.Row():
|
115 |
+
prompt_data = gr.Text(placeholder='Ingrese el prompt', info='Ingrese el prompt.', label='Prompt')
|
116 |
+
language_prompt = gr.Dropdown(
|
117 |
+
choices=['español'], value='español', interactive=True,
|
118 |
+
info='Seleccione el idioma en el que esta el texto.', label='Idioma'
|
119 |
+
)
|
120 |
+
send_prompt_button = gr.Button(value='Añadir')
|
121 |
+
prompts_table = gr.DataFrame(headers=['Eliminar', 'Prompts', 'Idioma'], type='array', interactive=False)
|
122 |
|
123 |
with gr.Tab('Submit'):
|
124 |
_ = gr.Markdown(
|
|
|
129 |
|
130 |
# ----------------------------------------------- ACTIONS -----------------------------------------------------
|
131 |
|
132 |
+
# Update the dataframes based on the languages selected in the first tab
|
133 |
+
languages.change(
|
134 |
+
utils.add_language, languages, [language_greet, language_random, language_prompt]
|
135 |
+
)
|
136 |
+
|
137 |
# Add info to the tables
|
138 |
send_greet_button.click(
|
139 |
utils.add_data_table,
|
|
|
141 |
[messages_table, greet, type_greet, language_greet]
|
142 |
)
|
143 |
send_random_button.click(
|
144 |
+
utils.add_data_table,
|
145 |
+
[random_table, random_data, language_random],
|
146 |
+
[random_table, random_data, language_random]
|
147 |
)
|
148 |
send_question_button.click(
|
149 |
+
utils.add_data_table,
|
150 |
+
[questions_table, question, context],
|
151 |
+
[questions_table, question, context]
|
152 |
+
)
|
153 |
+
send_prompt_button.click(
|
154 |
+
utils.add_data_table,
|
155 |
+
[prompts_table, prompt_data, language_prompt],
|
156 |
+
[prompts_table, prompt_data, language_prompt]
|
157 |
)
|
158 |
|
159 |
# Remove info from the tables
|
|
|
169 |
|
170 |
# Create the chatbot: create media (csv files, audio and video) and vectorstore
|
171 |
create_chatbot_button.click(
|
172 |
+
lambda: gr.update(value='Creating chatbot...', interactive=False), None, create_chatbot_button
|
|
|
|
|
173 |
).then(
|
174 |
utils.create_chatbot,
|
175 |
+
[client, name, messages_table, random_table, questions_table, base_image],
|
176 |
create_chatbot_button
|
177 |
)
|
178 |
|
|
|
|
|
|
|
|
|
|
|
179 |
# Initialize chat
|
180 |
start_test_button.click(
|
181 |
lambda: gr.update(value='Iniciando chat...'), None, start_test_button
|
182 |
).then(
|
183 |
+
lambda: time.sleep(1.5), None, None
|
184 |
+
).then(
|
185 |
+
chatbot.start_chat, client, [chat, output_audio, chat_row]
|
186 |
).then(
|
187 |
lambda: gr.update(value='Reiniciar chat'), None, start_test_button
|
188 |
)
|
189 |
|
190 |
# Chat with the chatbot
|
191 |
user_input.submit(
|
192 |
+
chatbot.get_random_data, client, output_audio
|
193 |
).then(
|
194 |
chatbot.get_answer,
|
195 |
[chat, user_input, client, general_prompt, context_prompt],
|
196 |
[chat, user_input, output_audio], show_progress='hidden'
|
197 |
)
|
198 |
|
199 |
+
# Submit chatbot: save prompts
|
200 |
+
submit_button.click(
|
201 |
+
lambda: gr.update(value='Subiendo la información', interactive=False), None, submit_button
|
202 |
+
).then(
|
203 |
+
utils.save_prompts, [client, context_prompt, prompts_table]
|
204 |
+
).then(
|
205 |
+
utils.generate_json, [client, languages, max_num_questions, name], output_file
|
206 |
+
).then(
|
207 |
+
lambda: gr.update(value='Información subida!!', interactive=False), None, submit_button
|
208 |
+
)
|
209 |
|
210 |
|
211 |
app.launch(debug=True)
|
gcp.py
DELETED
@@ -1,47 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
from pathlib import Path
|
3 |
-
from datetime import timedelta
|
4 |
-
from google.cloud import storage
|
5 |
-
from huggingface_hub import hf_hub_download
|
6 |
-
from google.cloud.storage import transfer_manager
|
7 |
-
|
8 |
-
|
9 |
-
def download_credentials():
|
10 |
-
os.makedirs('assets', exist_ok=True)
|
11 |
-
|
12 |
-
# Download credentials file
|
13 |
-
hf_hub_download(
|
14 |
-
repo_id=os.environ.get('DATA'), repo_type='dataset', filename="credentials.json",
|
15 |
-
token=os.environ.get('HUB_TOKEN'), local_dir="assets"
|
16 |
-
)
|
17 |
-
|
18 |
-
|
19 |
-
def upload_folder(bucket_name: str, source_directory: str) -> None:
|
20 |
-
# Filter so the list only includes files, not directories themselves.
|
21 |
-
string_paths = [
|
22 |
-
str(path.relative_to(source_directory)) for path in Path(source_directory).rglob("*") if path.is_file()
|
23 |
-
]
|
24 |
-
|
25 |
-
# Start the upload.
|
26 |
-
bucket = STORAGE_CLIENT.bucket(bucket_name)
|
27 |
-
results = transfer_manager.upload_many_from_filenames(
|
28 |
-
bucket, string_paths, source_directory=source_directory, max_workers=2
|
29 |
-
)
|
30 |
-
|
31 |
-
for name, result in zip(string_paths, results):
|
32 |
-
if isinstance(result, Exception):
|
33 |
-
print(f"Failed to upload {name} due to exception: {result}")
|
34 |
-
else:
|
35 |
-
print(f"Uploaded {name} to {bucket.name}.")
|
36 |
-
|
37 |
-
|
38 |
-
def get_link_file(bucket_name: str, client_name: str, type_media: str, media_name: str):
|
39 |
-
bucket = STORAGE_CLIENT.bucket(bucket_name)
|
40 |
-
blobs = bucket.list_blobs(prefix=f'{client_name}/media/{type_media}/{media_name}')
|
41 |
-
blob = next(blobs)
|
42 |
-
signed_url = blob.generate_signed_url(expiration=timedelta(minutes=15))
|
43 |
-
return signed_url
|
44 |
-
|
45 |
-
|
46 |
-
download_credentials()
|
47 |
-
STORAGE_CLIENT = storage.Client.from_service_account_json(os.getenv('GOOGLE_APPLICATION_CREDENTIALS'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -4,11 +4,12 @@ google-cloud-storage==2.13.0
|
|
4 |
requests==2.31.0
|
5 |
tqdm==4.66.1
|
6 |
nltk==3.8.1
|
7 |
-
|
8 |
torch==2.1.1
|
9 |
torchaudio==2.1.1
|
10 |
TTS==0.21.2
|
11 |
google-cloud-storage==2.13.0
|
12 |
numpy==1.22.0
|
13 |
openai==1.10.0
|
14 |
-
|
|
|
|
4 |
requests==2.31.0
|
5 |
tqdm==4.66.1
|
6 |
nltk==3.8.1
|
7 |
+
deepspeed==0.12.3
|
8 |
torch==2.1.1
|
9 |
torchaudio==2.1.1
|
10 |
TTS==0.21.2
|
11 |
google-cloud-storage==2.13.0
|
12 |
numpy==1.22.0
|
13 |
openai==1.10.0
|
14 |
+
gradio==4.13.0
|
15 |
+
pillow==10.2.0
|
audio.py → services/audio.py
RENAMED
@@ -5,17 +5,13 @@ import torch
|
|
5 |
import pickle
|
6 |
import torchaudio
|
7 |
import numpy as np
|
8 |
-
import gradio as gr
|
9 |
-
from typing import Optional
|
10 |
from TTS.tts.models.xtts import Xtts
|
11 |
from nltk.tokenize import sent_tokenize
|
12 |
from TTS.tts.configs.xtts_config import XttsConfig
|
13 |
|
14 |
|
15 |
def _load_array(filename):
|
16 |
-
"""
|
17 |
-
Opens a file a returns it, used with numpy files
|
18 |
-
"""
|
19 |
with open(filename, 'rb') as f:
|
20 |
return pickle.load(f)
|
21 |
|
@@ -23,8 +19,8 @@ def _load_array(filename):
|
|
23 |
os.environ['COQUI_TOS_AGREED'] = '1'
|
24 |
|
25 |
# Used to generate audio based on a sample
|
26 |
-
|
27 |
-
model_path = os.path.join("tts_model")
|
28 |
|
29 |
config = XttsConfig()
|
30 |
config.load_json(os.path.join(model_path, "config.json"))
|
@@ -35,7 +31,7 @@ model.load_checkpoint(
|
|
35 |
checkpoint_path=os.path.join(model_path, "model.pth"),
|
36 |
vocab_path=os.path.join(model_path, "vocab.json"),
|
37 |
eval=True,
|
38 |
-
|
39 |
)
|
40 |
|
41 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
@@ -43,30 +39,37 @@ model.to(device)
|
|
43 |
|
44 |
# Speaker latent
|
45 |
path_latents = 'assets/gpt_cond_latent.npy'
|
46 |
-
|
47 |
|
48 |
# Speaker embedding
|
49 |
path_embedding = 'assets/speaker_embedding.npy'
|
50 |
-
|
51 |
|
52 |
|
53 |
def get_audio(text: str, language: str = 'es', saving_path: str = 'output') -> None:
|
54 |
"""
|
55 |
-
Creates an audio
|
|
|
|
|
|
|
|
|
56 |
"""
|
57 |
# Creates an audio with the answer and saves it as output.wav
|
58 |
_save_audio(text, language, saving_path)
|
|
|
59 |
|
60 |
-
return None
|
61 |
|
62 |
-
|
63 |
-
def _save_audio(answer: str, language: str, path_audio: str) -> None:
|
64 |
"""
|
65 |
-
Splits the
|
66 |
-
all the audios and saves them into a file
|
|
|
|
|
|
|
|
|
67 |
"""
|
68 |
# Split the answer into sentences and clean it
|
69 |
-
sentences =
|
70 |
|
71 |
# Get the voice of each sentence
|
72 |
audio_segments = []
|
@@ -78,41 +81,42 @@ def _save_audio(answer: str, language: str, path_audio: str) -> None:
|
|
78 |
# Concatenate and save all audio segments
|
79 |
concatenated_audio = torch.cat(audio_segments, dim=0)
|
80 |
torchaudio.save(f'{path_audio}.wav', concatenated_audio.unsqueeze(0), 24000)
|
|
|
81 |
|
82 |
|
83 |
def _get_voice(sentence: str, language: str) -> np.ndarray:
|
84 |
"""
|
85 |
-
|
|
|
|
|
|
|
86 |
"""
|
87 |
-
|
88 |
sentence,
|
89 |
language=language,
|
90 |
gpt_cond_latent=gpt_cond_latent,
|
91 |
speaker_embedding=speaker_embedding,
|
92 |
temperature=0.1
|
93 |
-
)'''
|
94 |
-
out = model.synthesize(
|
95 |
-
sentence,
|
96 |
-
config,
|
97 |
-
speaker_wav='assets/orlando2_cleaned.wav',
|
98 |
-
language=language
|
99 |
)
|
100 |
return out['wav']
|
101 |
|
102 |
|
103 |
-
def
|
104 |
"""
|
105 |
-
|
|
|
|
|
|
|
106 |
"""
|
107 |
# Remove the links in the audio and add another sentence
|
108 |
if language == 'en':
|
109 |
-
clean_answer = re.sub(r'http[s]?://\S+', 'the following link',
|
110 |
max_characters = 250
|
111 |
elif language == 'es':
|
112 |
-
clean_answer = re.sub(r'http[s]?://\S+', 'el siguiente link',
|
113 |
max_characters = 239
|
114 |
else:
|
115 |
-
clean_answer = re.sub(r'http[s]?://\S+', 'o seguinte link',
|
116 |
max_characters = 203
|
117 |
|
118 |
# Change the name from Bella to Bela
|
@@ -136,9 +140,12 @@ def _get_clean_answer(answer: str, language: str) -> list[str]:
|
|
136 |
|
137 |
def _split_sentence(sentence: str, max_characters: int) -> list[str]:
|
138 |
"""
|
139 |
-
|
140 |
of the sentence, if there is no comma then a space is used or just the middle. If the
|
141 |
-
remaining sentences are still too long, another iteration is run
|
|
|
|
|
|
|
142 |
"""
|
143 |
# Get index of each comma
|
144 |
sentences = []
|
|
|
5 |
import pickle
|
6 |
import torchaudio
|
7 |
import numpy as np
|
|
|
|
|
8 |
from TTS.tts.models.xtts import Xtts
|
9 |
from nltk.tokenize import sent_tokenize
|
10 |
from TTS.tts.configs.xtts_config import XttsConfig
|
11 |
|
12 |
|
13 |
def _load_array(filename):
|
14 |
+
""" Opens a file a returns it, used with numpy files """
|
|
|
|
|
15 |
with open(filename, 'rb') as f:
|
16 |
return pickle.load(f)
|
17 |
|
|
|
19 |
os.environ['COQUI_TOS_AGREED'] = '1'
|
20 |
|
21 |
# Used to generate audio based on a sample
|
22 |
+
nltk.download('punkt')
|
23 |
+
model_path = os.path.join("../tts_model")
|
24 |
|
25 |
config = XttsConfig()
|
26 |
config.load_json(os.path.join(model_path, "config.json"))
|
|
|
31 |
checkpoint_path=os.path.join(model_path, "model.pth"),
|
32 |
vocab_path=os.path.join(model_path, "vocab.json"),
|
33 |
eval=True,
|
34 |
+
use_deepspeed=True,
|
35 |
)
|
36 |
|
37 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
|
39 |
|
40 |
# Speaker latent
|
41 |
path_latents = 'assets/gpt_cond_latent.npy'
|
42 |
+
gpt_cond_latent = _load_array(path_latents)
|
43 |
|
44 |
# Speaker embedding
|
45 |
path_embedding = 'assets/speaker_embedding.npy'
|
46 |
+
speaker_embedding = _load_array(path_embedding)
|
47 |
|
48 |
|
49 |
def get_audio(text: str, language: str = 'es', saving_path: str = 'output') -> None:
|
50 |
"""
|
51 |
+
Creates an audio
|
52 |
+
:param text: text to convert to audio
|
53 |
+
:param language: 'es', 'en' or 'pt', language used for the audio file
|
54 |
+
:param saving_path: path to save the audio
|
55 |
+
:return: None
|
56 |
"""
|
57 |
# Creates an audio with the answer and saves it as output.wav
|
58 |
_save_audio(text, language, saving_path)
|
59 |
+
return
|
60 |
|
|
|
61 |
|
62 |
+
def _save_audio(text: str, language: str, path_audio: str) -> None:
|
|
|
63 |
"""
|
64 |
+
Splits the text into sentences, clean and creates an audio for each one, then concatenates
|
65 |
+
all the audios and saves them into a file.
|
66 |
+
:param text: input text
|
67 |
+
:param language: language used in the audio
|
68 |
+
:param path_audio: saving path of the audio
|
69 |
+
:return: None
|
70 |
"""
|
71 |
# Split the answer into sentences and clean it
|
72 |
+
sentences = _get_clean_text(text, language)
|
73 |
|
74 |
# Get the voice of each sentence
|
75 |
audio_segments = []
|
|
|
81 |
# Concatenate and save all audio segments
|
82 |
concatenated_audio = torch.cat(audio_segments, dim=0)
|
83 |
torchaudio.save(f'{path_audio}.wav', concatenated_audio.unsqueeze(0), 24000)
|
84 |
+
return
|
85 |
|
86 |
|
87 |
def _get_voice(sentence: str, language: str) -> np.ndarray:
|
88 |
"""
|
89 |
+
Gets a numpy array with a wav of an audio with the given sentence and language
|
90 |
+
:param sentence: input sentence
|
91 |
+
:param language: languages used in the audio
|
92 |
+
:return: numpy array with the audio
|
93 |
"""
|
94 |
+
out = model.inference(
|
95 |
sentence,
|
96 |
language=language,
|
97 |
gpt_cond_latent=gpt_cond_latent,
|
98 |
speaker_embedding=speaker_embedding,
|
99 |
temperature=0.1
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
)
|
101 |
return out['wav']
|
102 |
|
103 |
|
104 |
+
def _get_clean_text(text: str, language: str) -> list[str]:
|
105 |
"""
|
106 |
+
Splits the text into smaller sentences using nltk and removes links.
|
107 |
+
:param text: input text for the audio
|
108 |
+
:param language: language used for the audio ('es', 'en', 'pt')
|
109 |
+
:return: list of sentences
|
110 |
"""
|
111 |
# Remove the links in the audio and add another sentence
|
112 |
if language == 'en':
|
113 |
+
clean_answer = re.sub(r'http[s]?://\S+', 'the following link', text)
|
114 |
max_characters = 250
|
115 |
elif language == 'es':
|
116 |
+
clean_answer = re.sub(r'http[s]?://\S+', 'el siguiente link', text)
|
117 |
max_characters = 239
|
118 |
else:
|
119 |
+
clean_answer = re.sub(r'http[s]?://\S+', 'o seguinte link', text)
|
120 |
max_characters = 203
|
121 |
|
122 |
# Change the name from Bella to Bela
|
|
|
140 |
|
141 |
def _split_sentence(sentence: str, max_characters: int) -> list[str]:
|
142 |
"""
|
143 |
+
Used when the sentences are still to long. The split point is the nearest comma to the middle
|
144 |
of the sentence, if there is no comma then a space is used or just the middle. If the
|
145 |
+
remaining sentences are still too long, another iteration is run.
|
146 |
+
:param sentence: sentence to be split
|
147 |
+
:param max_characters: max number of characters a sentence can have
|
148 |
+
:return: list of sentences
|
149 |
"""
|
150 |
# Get index of each comma
|
151 |
sentences = []
|
audio_model.py → services/audio_model.py
RENAMED
@@ -2,7 +2,6 @@ import os
|
|
2 |
import requests
|
3 |
from tqdm import tqdm
|
4 |
from google.cloud import storage
|
5 |
-
from huggingface_hub import hf_hub_download
|
6 |
|
7 |
|
8 |
def _download_starting_files() -> None:
|
@@ -46,8 +45,8 @@ def download_model():
|
|
46 |
'vocab.json': 'https://huggingface.co/coqui/XTTS-v2/resolve/v2.0.2/vocab.json?download=true',
|
47 |
}
|
48 |
|
49 |
-
if not os.path.exists("tts_model"):
|
50 |
-
os.makedirs("tts_model")
|
51 |
|
52 |
# Download files if they don't exist
|
53 |
print("[COQUI TTS] STARTUP: Checking Model is Downloaded.")
|
@@ -57,4 +56,4 @@ def download_model():
|
|
57 |
_download_file(url, destination)
|
58 |
|
59 |
# Downloads the embeddings from GCP
|
60 |
-
|
|
|
2 |
import requests
|
3 |
from tqdm import tqdm
|
4 |
from google.cloud import storage
|
|
|
5 |
|
6 |
|
7 |
def _download_starting_files() -> None:
|
|
|
45 |
'vocab.json': 'https://huggingface.co/coqui/XTTS-v2/resolve/v2.0.2/vocab.json?download=true',
|
46 |
}
|
47 |
|
48 |
+
if not os.path.exists("../tts_model"):
|
49 |
+
os.makedirs("../tts_model")
|
50 |
|
51 |
# Download files if they don't exist
|
52 |
print("[COQUI TTS] STARTUP: Checking Model is Downloaded.")
|
|
|
56 |
_download_file(url, destination)
|
57 |
|
58 |
# Downloads the embeddings from GCP
|
59 |
+
_download_starting_files()
|
chatbot.py → services/chatbot.py
RENAMED
@@ -1,5 +1,6 @@
|
|
1 |
import os
|
2 |
-
import
|
|
|
3 |
import random
|
4 |
import pinecone
|
5 |
import gradio as gr
|
@@ -7,42 +8,66 @@ from openai import OpenAI
|
|
7 |
|
8 |
|
9 |
OPENAI_CLIENT = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
random_options = []
|
25 |
path_audios = f'assets/{client_name}/media/audio'
|
26 |
for random_audio in os.listdir(path_audios):
|
27 |
if random_audio.startswith('random') and 'es' in random_audio:
|
28 |
-
random_options.append(random_audio)
|
29 |
|
|
|
30 |
num = random.randint(0, len(random_options) - 1)
|
31 |
return gr.update(value=random_options[num])
|
32 |
|
33 |
|
34 |
def get_answer(
|
35 |
-
chat_history: list[tuple[str, str]], user_input: str, client_name: str, general_prompt: str,context_prompt: str
|
36 |
-
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
# Format chat history to OpenAI format msg history
|
38 |
msg_history = [{'role': 'system', 'content': general_prompt}]
|
39 |
-
for i,
|
40 |
if i == 0:
|
41 |
-
|
42 |
-
if i % 2 == 0:
|
43 |
-
msg_history.append({'role': 'user', 'content': msg})
|
44 |
else:
|
45 |
-
msg_history.append({'role': '
|
|
|
46 |
|
47 |
# Get standalone question
|
48 |
standalone_question = _get_standalone_question(user_input, msg_history, context_prompt)
|
@@ -53,7 +78,7 @@ def get_answer(
|
|
53 |
# Get answer from chatbot
|
54 |
response = _get_response(context, msg_history, user_input, general_prompt)
|
55 |
|
56 |
-
# Get audio
|
57 |
audio.get_audio(response, 'es')
|
58 |
|
59 |
# Update chat_history
|
@@ -63,12 +88,25 @@ def get_answer(
|
|
63 |
|
64 |
|
65 |
def _get_response(context: str, message_history: list[dict], question: str, prompt: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
message_history[0]['content'] = prompt.replace('CONTEXT', context)
|
67 |
message_history.append({'role': 'user', 'content': question})
|
68 |
-
return _call_api(message_history)
|
69 |
|
70 |
|
71 |
def _get_embedding(text: str) -> list[float]:
|
|
|
|
|
|
|
|
|
|
|
72 |
response = OPENAI_CLIENT.embeddings.create(
|
73 |
input=text,
|
74 |
model='text-embedding-ada-002'
|
@@ -76,16 +114,30 @@ def _get_embedding(text: str) -> list[float]:
|
|
76 |
return response.data[0].embedding
|
77 |
|
78 |
|
79 |
-
def _call_api(message_history: list[dict]) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
response = OPENAI_CLIENT.chat.completions.create(
|
81 |
-
model=
|
82 |
-
temperature=
|
83 |
messages=message_history
|
84 |
)
|
85 |
return response.choices[0].message.content
|
86 |
|
87 |
|
88 |
def _get_standalone_question(question: str, message_history: list[dict], prompt_q: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
# Format the message history like: Human: blablablá \nAssistant: blablablá
|
90 |
history = ''
|
91 |
for i, msg in enumerate(message_history):
|
@@ -101,15 +153,20 @@ def _get_standalone_question(question: str, message_history: list[dict], prompt_
|
|
101 |
content = prompt_q.replace('HISTORY', history).replace('QUESTION', question)
|
102 |
prompt[0]['content'] = content
|
103 |
|
104 |
-
return _call_api(prompt)
|
105 |
|
106 |
|
107 |
def _get_context(question: str, client_name: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
q_embedding = _get_embedding(question)
|
109 |
|
110 |
# Get most similar vectors
|
111 |
-
|
112 |
-
result = index.query(
|
113 |
vector=q_embedding,
|
114 |
top_k=10,
|
115 |
include_metadata=True,
|
|
|
1 |
import os
|
2 |
+
import csv
|
3 |
+
from services import audio
|
4 |
import random
|
5 |
import pinecone
|
6 |
import gradio as gr
|
|
|
8 |
|
9 |
|
10 |
OPENAI_CLIENT = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
11 |
+
OPENAI_MODEL = os.getenv("OPENAI_MODEL")
|
12 |
+
pinecone.init(api_key=os.getenv("PINECONE_API_KEY"), environment=os.getenv("PINECONE_ENV"))
|
13 |
+
INDEX = pinecone.Index(os.getenv("PINECONE_INDEX"))
|
14 |
+
|
15 |
+
|
16 |
+
def start_chat(client_name: str) -> tuple[list[list[str | None]], gr.helpers, gr.helpers]:
|
17 |
+
"""
|
18 |
+
Initialize chat with greeting text and audio in spanish
|
19 |
+
:param client_name: name of the client
|
20 |
+
:return: (chat history with greeting, audio with updated file and gradio update with visible=True)
|
21 |
+
"""
|
22 |
+
# Get greeting text and audio, the first one available in spanish
|
23 |
+
with open(f'assets/{client_name}/greetings/es.csv', mode='r', encoding='utf-8') as infile:
|
24 |
+
reader = csv.reader(infile)
|
25 |
+
greeting = next(reader)[0]
|
26 |
+
audio_name = f'assets/{client_name}/media/audio/greeting_es_0.wav'
|
27 |
+
|
28 |
+
# Initialize chat
|
29 |
+
chat_history = [['', greeting]]
|
30 |
+
|
31 |
+
return chat_history, gr.update(value=f'{audio_name}'), gr.update(visible=True)
|
32 |
+
|
33 |
+
|
34 |
+
def get_random_data(client_name: str) -> gr.helpers:
|
35 |
+
"""
|
36 |
+
Returns an audio with a random data in spanish
|
37 |
+
:param client_name: name of the client for this chatbot
|
38 |
+
:return: gradio audio updated with a random data from the client
|
39 |
+
"""
|
40 |
random_options = []
|
41 |
path_audios = f'assets/{client_name}/media/audio'
|
42 |
for random_audio in os.listdir(path_audios):
|
43 |
if random_audio.startswith('random') and 'es' in random_audio:
|
44 |
+
random_options.append(os.path.join(path_audios, random_audio))
|
45 |
|
46 |
+
# Get any of the found random files
|
47 |
num = random.randint(0, len(random_options) - 1)
|
48 |
return gr.update(value=random_options[num])
|
49 |
|
50 |
|
51 |
def get_answer(
|
52 |
+
chat_history: list[tuple[str, str]], user_input: str, client_name: str, general_prompt: str, context_prompt: str
|
53 |
+
) -> tuple[list[tuple[str, str]], str, gr.helpers]:
|
54 |
+
"""
|
55 |
+
Gets the answer from the chatbot and returns it as an audio and text
|
56 |
+
:param chat_history: previous chat history
|
57 |
+
:param user_input: user question
|
58 |
+
:param client_name: name of the client
|
59 |
+
:param general_prompt: prompt used for answering the questions
|
60 |
+
:param context_prompt: prompt used for finding the context in the vectorstore
|
61 |
+
:return:
|
62 |
+
"""
|
63 |
# Format chat history to OpenAI format msg history
|
64 |
msg_history = [{'role': 'system', 'content': general_prompt}]
|
65 |
+
for i, (user, bot) in enumerate(chat_history):
|
66 |
if i == 0:
|
67 |
+
msg_history.append({'role': 'assistant', 'content': bot})
|
|
|
|
|
68 |
else:
|
69 |
+
msg_history.append({'role': 'user', 'content': user})
|
70 |
+
msg_history.append({'role': 'assistant', 'content': bot})
|
71 |
|
72 |
# Get standalone question
|
73 |
standalone_question = _get_standalone_question(user_input, msg_history, context_prompt)
|
|
|
78 |
# Get answer from chatbot
|
79 |
response = _get_response(context, msg_history, user_input, general_prompt)
|
80 |
|
81 |
+
# Get audio:
|
82 |
audio.get_audio(response, 'es')
|
83 |
|
84 |
# Update chat_history
|
|
|
88 |
|
89 |
|
90 |
def _get_response(context: str, message_history: list[dict], question: str, prompt: str) -> str:
|
91 |
+
"""
|
92 |
+
Gets the response from ChatGPT
|
93 |
+
:param context: text obtained from the vectorstore
|
94 |
+
:param message_history: chat history in the format used by OpenAI
|
95 |
+
:param question: user question
|
96 |
+
:param prompt: prompt used to answer the questions
|
97 |
+
:return: response from ChatGPT
|
98 |
+
"""
|
99 |
message_history[0]['content'] = prompt.replace('CONTEXT', context)
|
100 |
message_history.append({'role': 'user', 'content': question})
|
101 |
+
return _call_api(message_history, 0.7)
|
102 |
|
103 |
|
104 |
def _get_embedding(text: str) -> list[float]:
|
105 |
+
"""
|
106 |
+
Gets the embedding of a given text
|
107 |
+
:param text: input text
|
108 |
+
:return: embedding of the text
|
109 |
+
"""
|
110 |
response = OPENAI_CLIENT.embeddings.create(
|
111 |
input=text,
|
112 |
model='text-embedding-ada-002'
|
|
|
114 |
return response.data[0].embedding
|
115 |
|
116 |
|
117 |
+
def _call_api(message_history: list[dict], temperature: float) -> str:
|
118 |
+
"""
|
119 |
+
Gets response form OpenAI API
|
120 |
+
:param message_history: chat history in the format used by OpenAI
|
121 |
+
:param temperature: randomness of the output
|
122 |
+
:return: ChatGPT answer
|
123 |
+
"""
|
124 |
response = OPENAI_CLIENT.chat.completions.create(
|
125 |
+
model=OPENAI_MODEL,
|
126 |
+
temperature=temperature,
|
127 |
messages=message_history
|
128 |
)
|
129 |
return response.choices[0].message.content
|
130 |
|
131 |
|
132 |
def _get_standalone_question(question: str, message_history: list[dict], prompt_q: str) -> str:
|
133 |
+
"""
|
134 |
+
Gets a standalone question/phrase based on the user's question and the previous messages. Used since
|
135 |
+
some questions are too simple like "yes, please"
|
136 |
+
:param question: user question
|
137 |
+
:param message_history: msg history in the format used by OpenAI
|
138 |
+
:param prompt_q: prompt used to get a text that will be used in the vectorstore
|
139 |
+
:return: string with the standalone phrase
|
140 |
+
"""
|
141 |
# Format the message history like: Human: blablablá \nAssistant: blablablá
|
142 |
history = ''
|
143 |
for i, msg in enumerate(message_history):
|
|
|
153 |
content = prompt_q.replace('HISTORY', history).replace('QUESTION', question)
|
154 |
prompt[0]['content'] = content
|
155 |
|
156 |
+
return _call_api(prompt, 0.01)
|
157 |
|
158 |
|
159 |
def _get_context(question: str, client_name: str) -> str:
|
160 |
+
"""
|
161 |
+
Gets the 10 nearest vectors to the given question
|
162 |
+
:param question: standalone text
|
163 |
+
:param client_name: name of the client, used as namespace in the vectorstore
|
164 |
+
:return: formatted text with the nearest vectors
|
165 |
+
"""
|
166 |
q_embedding = _get_embedding(question)
|
167 |
|
168 |
# Get most similar vectors
|
169 |
+
result = INDEX.query(
|
|
|
170 |
vector=q_embedding,
|
171 |
top_k=10,
|
172 |
include_metadata=True,
|
services/gcp.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
3 |
+
from datetime import timedelta
|
4 |
+
from google.cloud import storage
|
5 |
+
from huggingface_hub import hf_hub_download
|
6 |
+
from google.cloud.storage import transfer_manager
|
7 |
+
|
8 |
+
|
9 |
+
def download_credentials() -> None:
|
10 |
+
"""
|
11 |
+
Downloads the GCP credentials from Hugging Face Hub
|
12 |
+
:return: None
|
13 |
+
"""
|
14 |
+
os.makedirs('../assets', exist_ok=True)
|
15 |
+
hf_hub_download(
|
16 |
+
repo_id=os.environ.get('DATA'), repo_type='dataset', filename="credentials.json",
|
17 |
+
token=os.environ.get('HUB_TOKEN'), local_dir="../assets"
|
18 |
+
)
|
19 |
+
return
|
20 |
+
|
21 |
+
|
22 |
+
def upload_folder(client_name: str, source_directory: str) -> None:
|
23 |
+
"""
|
24 |
+
Uploads the given source directory to the GCP bucket.
|
25 |
+
:param client_name: name of the client, used as name of the main folder
|
26 |
+
:param source_directory: directory with all the sub-folders and files to upload
|
27 |
+
:return: None
|
28 |
+
"""
|
29 |
+
# Filter so the list only includes files, not directories themselves.
|
30 |
+
string_paths = [
|
31 |
+
str(path.relative_to(source_directory)).replace("\\", "/") for path in Path(source_directory).rglob("*")
|
32 |
+
if path.is_file()
|
33 |
+
]
|
34 |
+
|
35 |
+
# Path where the files in the source directory will be saved
|
36 |
+
delimiter = source_directory.find(client_name)
|
37 |
+
destination_directory = source_directory[delimiter:] + "/"
|
38 |
+
|
39 |
+
# Start the upload. Threads was used instead of process because Gradio was rebooted with the second one.
|
40 |
+
bucket = STORAGE_CLIENT.bucket('clients-bella')
|
41 |
+
_ = transfer_manager.upload_many_from_filenames(
|
42 |
+
bucket, filenames=string_paths, source_directory=source_directory, max_workers=1,
|
43 |
+
blob_name_prefix=destination_directory, worker_type=transfer_manager.THREAD
|
44 |
+
)
|
45 |
+
return
|
46 |
+
|
47 |
+
|
48 |
+
def get_link_file(client_name: str, type_media: str, media_name: str) -> str:
|
49 |
+
"""
|
50 |
+
Gets a public link during 15 minutes to a given file in GCP
|
51 |
+
:param client_name: name of the client (name of the main folder)
|
52 |
+
:param type_media: if it is audio or video
|
53 |
+
:param media_name: name of the desired media file
|
54 |
+
:return: public link to the file
|
55 |
+
"""
|
56 |
+
|
57 |
+
if media_name == 'waiting.wav':
|
58 |
+
bucket = STORAGE_CLIENT.bucket('audios_bella')
|
59 |
+
blob = bucket.blob('waiting.wav')
|
60 |
+
else:
|
61 |
+
bucket = STORAGE_CLIENT.bucket('clients-bella')
|
62 |
+
blobs = bucket.list_blobs(prefix=f'{client_name}/media/{type_media}/{media_name}')
|
63 |
+
blob = next(blobs)
|
64 |
+
signed_url = blob.generate_signed_url(expiration=timedelta(minutes=15))
|
65 |
+
return signed_url
|
66 |
+
|
67 |
+
|
68 |
+
download_credentials()
|
69 |
+
STORAGE_CLIENT = storage.Client.from_service_account_json(os.getenv('GOOGLE_APPLICATION_CREDENTIALS'))
|
utils.py → services/utils.py
RENAMED
@@ -1,28 +1,37 @@
|
|
|
|
1 |
import csv
|
2 |
import uuid
|
|
|
|
|
3 |
import pinecone
|
|
|
|
|
4 |
from typing import Union
|
5 |
from openai import Client
|
6 |
from pinecone import Index
|
7 |
|
8 |
-
from
|
9 |
-
|
10 |
-
if not os.path.exists('tts_model'): # Get TTS model
|
11 |
audio_model.download_model()
|
12 |
-
|
13 |
-
from audio import *
|
14 |
from video import *
|
15 |
|
16 |
|
17 |
pinecone.init(api_key=os.getenv('PINECONE_API_KEY'), environment=os.getenv('PINECONE_ENV'))
|
18 |
INDEX = Index(os.getenv('PINECONE_INDEX'))
|
19 |
OPENAI_CLIENT = Client()
|
|
|
|
|
20 |
|
21 |
|
22 |
-
def add_data_table(table: list[list[str]], *data: str):
|
23 |
"""
|
24 |
Adds the data to the table. Some data consist of two columns others only one.
|
25 |
-
So depending on that, the new row and returned value will be different
|
|
|
|
|
|
|
26 |
"""
|
27 |
if len(data) == 3: # It is the greet tab
|
28 |
new_value = '', *data[1:]
|
@@ -42,9 +51,12 @@ def add_data_table(table: list[list[str]], *data: str):
|
|
42 |
return table, *new_value
|
43 |
|
44 |
|
45 |
-
def remove_data_table(table: list[list[str]], evt: gr.SelectData):
|
46 |
"""
|
47 |
-
Deletes a row on the table if the selected column is the first one
|
|
|
|
|
|
|
48 |
"""
|
49 |
# The clicked column is not the first one (the one with the X), do not do anything
|
50 |
if evt.index[1] != 0:
|
@@ -60,72 +72,114 @@ def remove_data_table(table: list[list[str]], evt: gr.SelectData):
|
|
60 |
return table
|
61 |
|
62 |
|
63 |
-
def add_language(languages: list[str]) -> Union[gr.Error, tuple[gr.helpers, gr.helpers]]:
|
|
|
|
|
|
|
|
|
|
|
64 |
if len(languages) == 0:
|
65 |
raise gr.Error('Debe seleccionar al menos 1 idioma')
|
66 |
|
67 |
return (
|
|
|
68 |
gr.update(choices=[i for i in languages], value=languages[0], interactive=True),
|
69 |
gr.update(choices=[i for i in languages], value=languages[0], interactive=True)
|
70 |
)
|
71 |
|
72 |
|
73 |
def create_chatbot(
|
74 |
-
client: str, name: str, messages_table: list[str
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
# Set up general info
|
80 |
client_name = client.lower().replace(' ', '-')
|
81 |
-
|
82 |
|
83 |
# Group messages by their type (greeting, goodbye or error) and language
|
84 |
messages = dict()
|
85 |
for message in messages_table:
|
86 |
-
|
87 |
-
|
|
|
88 |
os.makedirs(f'assets/{client_name}/{type_msg}s', exist_ok=True)
|
89 |
if type_msg not in messages:
|
90 |
-
messages[type_msg] = {language_msg: [
|
91 |
else:
|
92 |
if language_msg not in messages[type_msg]:
|
93 |
-
messages[type_msg][language_msg] = [
|
94 |
else:
|
95 |
-
messages[type_msg][language_msg].append(
|
96 |
|
97 |
# Create CSV files (greeting, goodbye and error)
|
98 |
for type_msg in messages:
|
99 |
for language in messages[type_msg]:
|
100 |
-
with open(f'assets/{client_name}/{type_msg}/{language}.csv', mode='w', encoding='utf-8')
|
101 |
-
|
102 |
-
writer.
|
|
|
|
|
103 |
|
104 |
# Create the audios (greeting, goodbye and error)
|
105 |
-
|
|
|
106 |
for type_msg in messages:
|
107 |
for language in messages[type_msg]:
|
108 |
for i, msg in enumerate(messages[type_msg][language]):
|
109 |
-
full_path = f'
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
# Create the random audios
|
113 |
-
for
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
# Upload files and audios to bucket in GCP
|
118 |
-
upload_folder(
|
119 |
|
120 |
-
# Create videos
|
121 |
-
|
122 |
-
|
|
|
|
|
123 |
name_file = audio_file.split('.')[0]
|
124 |
-
link_audio = get_link_file(
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
# Upload videos to GCP
|
128 |
-
upload_folder(
|
129 |
|
130 |
# Set up vectorstore
|
131 |
vectors = []
|
@@ -139,12 +193,61 @@ def create_chatbot(
|
|
139 |
INDEX.upsert(vectors=vectors, namespace=f'{client_name}-context')
|
140 |
|
141 |
# Change text in the button
|
142 |
-
return gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
|
144 |
|
145 |
def _get_embedding(sentence: str) -> list[float]:
|
146 |
"""
|
147 |
-
|
148 |
:param sentence: input of the model
|
149 |
:return: list of floats representing the embedding
|
150 |
"""
|
|
|
1 |
+
import os
|
2 |
import csv
|
3 |
import uuid
|
4 |
+
import json
|
5 |
+
import logging
|
6 |
import pinecone
|
7 |
+
import gradio as gr
|
8 |
+
from PIL import Image
|
9 |
from typing import Union
|
10 |
from openai import Client
|
11 |
from pinecone import Index
|
12 |
|
13 |
+
from services import audio_model, gcp
|
14 |
+
|
15 |
+
if not os.path.exists('../tts_model'): # Get TTS model
|
16 |
audio_model.download_model()
|
17 |
+
from services.audio import *
|
|
|
18 |
from video import *
|
19 |
|
20 |
|
21 |
pinecone.init(api_key=os.getenv('PINECONE_API_KEY'), environment=os.getenv('PINECONE_ENV'))
|
22 |
INDEX = Index(os.getenv('PINECONE_INDEX'))
|
23 |
OPENAI_CLIENT = Client()
|
24 |
+
TRANSLATE_LANGUAGES = {'español': 'es', 'ingles': 'en', 'portugués': 'pt'}
|
25 |
+
TRANSLATE_GREET = {'Saludo': 'greeting', 'Despedida': 'goodbye', 'Error': 'error'}
|
26 |
|
27 |
|
28 |
+
def add_data_table(table: list[list[str]], *data: str) -> tuple[list[list[str]], list[str]]:
|
29 |
"""
|
30 |
Adds the data to the table. Some data consist of two columns others only one.
|
31 |
+
So depending on that, the new row and returned value will be different.
|
32 |
+
:param table: table to add the data to
|
33 |
+
:param data: new row to be added to the table
|
34 |
+
:return: updated table and list of strings for cleaning the input
|
35 |
"""
|
36 |
if len(data) == 3: # It is the greet tab
|
37 |
new_value = '', *data[1:]
|
|
|
51 |
return table, *new_value
|
52 |
|
53 |
|
54 |
+
def remove_data_table(table: list[list[str]], evt: gr.SelectData) -> list[list[str]]:
|
55 |
"""
|
56 |
+
Deletes a row on the table if the selected column is the first one.
|
57 |
+
:param table: clicked table
|
58 |
+
:param evt: the event (has info of the position of the click)
|
59 |
+
:return: updated table
|
60 |
"""
|
61 |
# The clicked column is not the first one (the one with the X), do not do anything
|
62 |
if evt.index[1] != 0:
|
|
|
72 |
return table
|
73 |
|
74 |
|
75 |
+
def add_language(languages: list[str]) -> Union[gr.Error, tuple[gr.helpers, gr.helpers, gr.helpers]]:
|
76 |
+
"""
|
77 |
+
Updated the dropdown with the selected languages
|
78 |
+
:param languages: list of selected languages
|
79 |
+
:return: three updated dropdowns if at least 1 language was selected, otherwise an error
|
80 |
+
"""
|
81 |
if len(languages) == 0:
|
82 |
raise gr.Error('Debe seleccionar al menos 1 idioma')
|
83 |
|
84 |
return (
|
85 |
+
gr.update(choices=[i for i in languages], value=languages[0], interactive=True),
|
86 |
gr.update(choices=[i for i in languages], value=languages[0], interactive=True),
|
87 |
gr.update(choices=[i for i in languages], value=languages[0], interactive=True)
|
88 |
)
|
89 |
|
90 |
|
91 |
def create_chatbot(
|
92 |
+
client: str, name: str, messages_table: list[list[str]], random_table: list[list[str]],
|
93 |
+
questions_table: list[list[str]], image: Image
|
94 |
+
) -> gr.helpers:
|
95 |
+
"""
|
96 |
+
Creation of the chatbot. It creates all the audios, videos csv files for the given tables
|
97 |
+
(greetings, goodbyes, errors and random) and uploads them to GCP, and it creates the
|
98 |
+
vectorstore with the given questions and answers.
|
99 |
+
:param client: name of the client (Nosotras, Visit Orlando, etc.)
|
100 |
+
:param name: name of the chatbot (Bella, Roomie, etc.)
|
101 |
+
:param messages_table: table with the greetings, goodbyes and errors messages
|
102 |
+
:param random_table: table with the random data about the client
|
103 |
+
:param questions_table: table with the questions and answers for each question
|
104 |
+
:param image: image used as base for the videos
|
105 |
+
:return: updates the value of a button (know lets know the user if the process is done or there was an error)
|
106 |
+
"""
|
107 |
# Set up general info
|
108 |
client_name = client.lower().replace(' ', '-')
|
109 |
+
_ = name.lower() # TODO: use it
|
110 |
|
111 |
# Group messages by their type (greeting, goodbye or error) and language
|
112 |
messages = dict()
|
113 |
for message in messages_table:
|
114 |
+
msg = message[1]
|
115 |
+
type_msg = TRANSLATE_GREET[message[2]]
|
116 |
+
language_msg = TRANSLATE_LANGUAGES[message[-1]]
|
117 |
os.makedirs(f'assets/{client_name}/{type_msg}s', exist_ok=True)
|
118 |
if type_msg not in messages:
|
119 |
+
messages[type_msg] = {language_msg: [msg]}
|
120 |
else:
|
121 |
if language_msg not in messages[type_msg]:
|
122 |
+
messages[type_msg][language_msg] = [msg]
|
123 |
else:
|
124 |
+
messages[type_msg][language_msg].append(msg)
|
125 |
|
126 |
# Create CSV files (greeting, goodbye and error)
|
127 |
for type_msg in messages:
|
128 |
for language in messages[type_msg]:
|
129 |
+
with (open(f'assets/{client_name}/{type_msg}s/{language}.csv', mode='w', encoding='utf-8', newline='')
|
130 |
+
as outfile):
|
131 |
+
writer = csv.writer(outfile)
|
132 |
+
for msg in messages[type_msg][language]:
|
133 |
+
writer.writerow([msg])
|
134 |
|
135 |
# Create the audios (greeting, goodbye and error)
|
136 |
+
path_audios = f'assets/{client_name}/media/audio'
|
137 |
+
os.makedirs(path_audios, exist_ok=True)
|
138 |
for type_msg in messages:
|
139 |
for language in messages[type_msg]:
|
140 |
for i, msg in enumerate(messages[type_msg][language]):
|
141 |
+
full_path = f'{path_audios}/{type_msg}_{language}_{i}'
|
142 |
+
get_audio(msg, language, full_path)
|
143 |
+
|
144 |
+
# Group random audios by their language
|
145 |
+
random = dict()
|
146 |
+
for _, msg, language in random_table:
|
147 |
+
short_language = TRANSLATE_LANGUAGES[language]
|
148 |
+
if short_language not in random:
|
149 |
+
random[short_language] = [msg]
|
150 |
+
else:
|
151 |
+
random[short_language].append(msg)
|
152 |
|
153 |
# Create the random audios
|
154 |
+
for language in random:
|
155 |
+
for i, msg in enumerate(random[language]):
|
156 |
+
full_path = f'{path_audios}/random_{language}_{i}'
|
157 |
+
get_audio(msg, language, full_path)
|
158 |
+
|
159 |
+
# Save image
|
160 |
+
os.makedirs(f'assets/{client_name}/media/image', exist_ok=True)
|
161 |
+
image.save(f'assets/{client_name}/media/image/base.png')
|
162 |
|
163 |
# Upload files and audios to bucket in GCP
|
164 |
+
gcp.upload_folder(client_name, f'assets/{client_name}')
|
165 |
|
166 |
+
# Create videos for the generated audios and the waiting video (it is muted)
|
167 |
+
path_videos = f'assets/{client_name}/media/video'
|
168 |
+
os.makedirs(path_videos, exist_ok=True)
|
169 |
+
list_audios = os.listdir(path_audios) + ['waiting.wav']
|
170 |
+
for audio_file in list_audios:
|
171 |
name_file = audio_file.split('.')[0]
|
172 |
+
link_audio = gcp.get_link_file(client_name, 'audio', audio_file)
|
173 |
+
link_image = gcp.get_link_file(client_name, 'image', 'base.png')
|
174 |
+
try:
|
175 |
+
get_video(link_audio, link_image, f'{path_videos}/{name_file}')
|
176 |
+
except Exception as e:
|
177 |
+
gr.Error(f'Problema con la creación del video, hable con el administrador. Error: {e}')
|
178 |
+
logging.error(e)
|
179 |
+
return gr.update(value='ERROR!', interactive=False)
|
180 |
|
181 |
# Upload videos to GCP
|
182 |
+
gcp.upload_folder(client_name, path_videos)
|
183 |
|
184 |
# Set up vectorstore
|
185 |
vectors = []
|
|
|
193 |
INDEX.upsert(vectors=vectors, namespace=f'{client_name}-context')
|
194 |
|
195 |
# Change text in the button
|
196 |
+
return gr.update(value='Chatbot created!!!', interactive=False)
|
197 |
+
|
198 |
+
|
199 |
+
def save_prompts(client_name: str, context_prompt: str, prompts_table: list[list[str]]) -> None:
|
200 |
+
"""
|
201 |
+
Saves all the prompts (standalone and one for each language) and uploads them to Google Cloud Storage
|
202 |
+
:param client_name: name of the client
|
203 |
+
:param context_prompt: standalone prompt used to search into the vectorstore
|
204 |
+
:param prompts_table: table with the prompt of each language
|
205 |
+
:return: None
|
206 |
+
"""
|
207 |
+
path_prompts = f'assets/{client_name}/prompts'
|
208 |
+
os.makedirs(path_prompts, exist_ok=True)
|
209 |
+
|
210 |
+
# Save standalone prompt. It is the same for all languages
|
211 |
+
with open(f'{path_prompts}/prompt_standalone_q.txt', mode='w', encoding='utf-8') as outfile:
|
212 |
+
outfile.write(context_prompt)
|
213 |
+
|
214 |
+
# Save the prompt of each language
|
215 |
+
for _, prompt, language in prompts_table:
|
216 |
+
language_prompt = TRANSLATE_LANGUAGES[language]
|
217 |
+
with open(f'{path_prompts}/prompt_{language_prompt}.txt', mode='w', encoding='utf-8') as outfile:
|
218 |
+
outfile.write(prompt)
|
219 |
+
|
220 |
+
gcp.upload_folder(client_name, path_prompts)
|
221 |
+
return
|
222 |
+
|
223 |
+
|
224 |
+
def generate_json(client_name: str, languages: list[str], max_num_questions: int, chatbot_name: str) -> gr.helpers:
|
225 |
+
"""
|
226 |
+
Creates a json file with the environment variables used in the API
|
227 |
+
:param client_name:
|
228 |
+
:param languages:
|
229 |
+
:param max_num_questions:
|
230 |
+
:param chatbot_name:
|
231 |
+
:return: gradio file with the value as the path of the json file
|
232 |
+
"""
|
233 |
+
json_object = json.dumps(
|
234 |
+
{
|
235 |
+
'CLIENT_NAME': client_name, 'MODEL_OPENAI': os.getenv('OPENAI_MODEL'), 'LANGUAGES': languages,
|
236 |
+
'MAX_NUM_QUESTIONS': max_num_questions, 'NUM_VECTORS_CONTEXT': 10, 'THRESHOLD_RECYCLE': 0.97,
|
237 |
+
'OPENAI_API_KEY': 'Check OpenAI for this', 'CHATBOT_NAME': chatbot_name
|
238 |
+
},
|
239 |
+
indent=4
|
240 |
+
)
|
241 |
+
path_json = f"assets/{client_name}/chatbot_variables.json"
|
242 |
+
with open(path_json, mode='w', encoding='utf-8') as outfile:
|
243 |
+
outfile.write(json_object)
|
244 |
+
|
245 |
+
return gr.update(value=path_json, label='Output file', interactive=True)
|
246 |
|
247 |
|
248 |
def _get_embedding(sentence: str) -> list[float]:
|
249 |
"""
|
250 |
+
Gets the embedding of a word/sentence/paragraph
|
251 |
:param sentence: input of the model
|
252 |
:return: list of floats representing the embedding
|
253 |
"""
|
video.py → services/video.py
RENAMED
@@ -1,39 +1,47 @@
|
|
1 |
import os
|
2 |
import time
|
3 |
import requests
|
4 |
-
import logging
|
5 |
-
from clint.textui import progress
|
6 |
|
7 |
|
8 |
-
def get_video(link_audio: str, path_video: str) ->
|
9 |
"""
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
12 |
"""
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
return False
|
18 |
|
19 |
link_video = _get_url_talk(id_video)
|
20 |
|
21 |
# Saves the video into a file to later upload it to the cloud
|
22 |
name = f'{path_video}.mp4'
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
34 |
"""
|
35 |
-
Creates and returns the id of the talk made with d-id.
|
36 |
-
|
|
|
|
|
|
|
37 |
"""
|
38 |
url = "https://api.d-id.com/talks"
|
39 |
|
@@ -52,7 +60,7 @@ def _create_talk(link_audio: str) -> tuple[bool, str]:
|
|
52 |
"pad_audio": "0.0",
|
53 |
"stitch": True
|
54 |
},
|
55 |
-
"source_url":
|
56 |
}
|
57 |
headers = {
|
58 |
"accept": "application/json",
|
@@ -65,17 +73,18 @@ def _create_talk(link_audio: str) -> tuple[bool, str]:
|
|
65 |
|
66 |
try:
|
67 |
talk_id = r['id']
|
68 |
-
return
|
69 |
|
70 |
# Probably there are no more available credits
|
71 |
except KeyError:
|
72 |
-
|
73 |
-
return False, 'None'
|
74 |
|
75 |
|
76 |
def _get_url_talk(id_video: str) -> str:
|
77 |
"""
|
78 |
-
|
|
|
|
|
79 |
"""
|
80 |
url = f"https://api.d-id.com/talks/{id_video}"
|
81 |
|
|
|
1 |
import os
|
2 |
import time
|
3 |
import requests
|
|
|
|
|
4 |
|
5 |
|
6 |
+
def get_video(link_audio: str, image_url: str, path_video: str,) -> None:
|
7 |
"""
|
8 |
+
Creates a video with d-id and saves it.
|
9 |
+
:param link_audio: url of the audio in the bucket used for the video
|
10 |
+
:param path_video: path for saving the video file
|
11 |
+
:param image_url: url with the base image used for the video
|
12 |
+
:return: None
|
13 |
+
:raises Exception: if there was a problem with D-ID
|
14 |
"""
|
15 |
+
try:
|
16 |
+
id_video = _create_talk(link_audio, image_url)
|
17 |
+
except Exception as e:
|
18 |
+
raise e
|
|
|
19 |
|
20 |
link_video = _get_url_talk(id_video)
|
21 |
|
22 |
# Saves the video into a file to later upload it to the cloud
|
23 |
name = f'{path_video}.mp4'
|
24 |
+
try:
|
25 |
+
with requests.get(link_video) as r:
|
26 |
+
r.raise_for_status() # Raises an exception for HTTP errors
|
27 |
+
if r.status_code == 200:
|
28 |
+
with open(name, 'wb') as outfile:
|
29 |
+
outfile.write(r.content)
|
30 |
+
except requests.exceptions.RequestException as e:
|
31 |
+
raise Exception(f"Network-related error while downloading the video: {e}")
|
32 |
+
except ValueError as e:
|
33 |
+
raise Exception(e)
|
34 |
+
except Exception as e:
|
35 |
+
raise Exception(f"An unexpected error occurred: {e}")
|
36 |
+
|
37 |
+
|
38 |
+
def _create_talk(link_audio: str, image_url: str) -> str:
|
39 |
"""
|
40 |
+
Creates and returns the id of the talk made with d-id.
|
41 |
+
:param link_audio: url of the audio in the bucket used for the video
|
42 |
+
:param image_url: url with the base image used for the video
|
43 |
+
:return: id of the talk
|
44 |
+
:raises Exception: if there was a problem while generating the talk
|
45 |
"""
|
46 |
url = "https://api.d-id.com/talks"
|
47 |
|
|
|
60 |
"pad_audio": "0.0",
|
61 |
"stitch": True
|
62 |
},
|
63 |
+
"source_url": image_url
|
64 |
}
|
65 |
headers = {
|
66 |
"accept": "application/json",
|
|
|
73 |
|
74 |
try:
|
75 |
talk_id = r['id']
|
76 |
+
return talk_id
|
77 |
|
78 |
# Probably there are no more available credits
|
79 |
except KeyError:
|
80 |
+
raise Exception(f"D-ID response is missing 'id' key. Returned error: {r}")
|
|
|
81 |
|
82 |
|
83 |
def _get_url_talk(id_video: str) -> str:
|
84 |
"""
|
85 |
+
Gets the url of the finished talk
|
86 |
+
:param id_video: id of the previously created talk
|
87 |
+
:return: url of the video
|
88 |
"""
|
89 |
url = f"https://api.d-id.com/talks/{id_video}"
|
90 |
|