vmoras commited on
Commit
4b9cf05
1 Parent(s): d1701ad

Add more functions and improve front

Browse files
Files changed (9) hide show
  1. .gitignore +4 -1
  2. app.py +74 -36
  3. audio.py +19 -44
  4. audio_model.py +21 -0
  5. chatbot.py +123 -0
  6. gcp.py +47 -0
  7. requirements.txt +3 -1
  8. utils.py +105 -23
  9. video.py +94 -0
.gitignore CHANGED
@@ -1,4 +1,7 @@
1
  .idea/
2
  .venv/
3
  __pycache__/
4
- .env
 
 
 
 
1
  .idea/
2
  .venv/
3
  __pycache__/
4
+ .env
5
+
6
+ assets/
7
+ tts_model/
app.py CHANGED
@@ -1,14 +1,18 @@
1
  from dotenv import load_dotenv
2
  load_dotenv()
3
 
4
- from utils import *
 
5
  import gradio as gr
6
 
7
 
8
  with gr.Blocks() as app:
 
9
  with gr.Tab('General info'):
10
- client = gr.Textbox(label='Nombre del cliente', placeholder='Inserte el nombre del cliente')
11
- language = gr.Checkboxgroup(
 
 
12
  choices=['español', 'ingles', 'portugués'], value='español', label='Idiomas', interactive=True,
13
  info='Seleccione todos los idiomas que el chatbot va a hablar (al menos debe tener 1 idioma)'
14
  )
@@ -16,7 +20,7 @@ with gr.Blocks() as app:
16
  choices=['Bella'], value='Bella', label='Nombre del chatbot',
17
  info='Seleccione el nombre del chatbot, si no se encuentra en la lista, contacte al administrador'
18
  )
19
- num_questions = gr.Number(
20
  value=5, minimum=2, maximum=10, label='Número preguntas', interactive=True,
21
  info='Máximo numero de preguntas que puede hacer el usuario.'
22
  )
@@ -32,11 +36,15 @@ with gr.Blocks() as app:
32
  greet = gr.Textbox(label='Mensaje', info='Ingrese el mensaje a decir por el chatbot.')
33
  type_greet = gr.Dropdown(
34
  choices=['Saludo', 'Despedida', 'Error'], value='Saludo', interactive=True,
35
- info='Seleccione si es saludo, despedida o mensaje de error.'
 
 
 
 
36
  )
37
  send_greet_button = gr.Button(value='Añadir')
38
  messages_table = gr.DataFrame(
39
- headers=['Eliminar', 'Tipo mensaje', 'Mensaje'], type='array', interactive=False
40
  )
41
 
42
  with gr.Tab('Random data'):
@@ -44,9 +52,16 @@ with gr.Blocks() as app:
44
  'Si quiere que Bella diga algunos datos random mientras busca la información, ingrese dichos párrafos aca.'
45
  )
46
  with gr.Row():
47
- random_data = gr.Text(placeholder='Ingrese el dato random', label='Dato random')
 
 
 
 
 
 
 
48
  send_random_button = gr.Button(value='Añadir')
49
- random_table = gr.DataFrame(headers=['Eliminar', 'Dato random'], type='array', interactive=False)
50
 
51
  with gr.Tab('Questions - Context'):
52
  with gr.Row():
@@ -57,31 +72,26 @@ with gr.Blocks() as app:
57
  headers=['Eliminar', 'Pregunta', 'Contexto'], type='array', interactive=False
58
  )
59
 
60
- with gr.Tab('General prompt'):
61
- general_prompt = gr.Text(placeholder='Ingrese el prompt general del bot', label='Prompt')
62
-
63
- with gr.Tab('Context prompt'):
64
- context_prompt = gr.Text(placeholder='Ingrese el prompt usado para encontrar el contexto', label='Prompt')
65
-
66
  with gr.Tab('Create chatbot'):
67
  _ = gr.Markdown(
68
  "Asegúrese que toda la información este correcta antes de enviarla."
69
  )
70
  create_chatbot_button = gr.Button(value='Crear chatbot')
71
 
 
 
 
 
 
 
 
 
72
  with gr.Tab('Test'):
73
- with gr.Row():
74
- with gr.Column():
75
- with gr.Row():
76
- video = gr.Video(interactive=False, label='Video', autoplay=True)
77
- with gr.Row():
78
- output_audio = gr.Audio(interactive=False, label='Audio', autoplay=True)
79
-
80
- with gr.Column():
81
- with gr.Row():
82
- chat = gr.Chatbot(label='Chat')
83
- with gr.Row():
84
- text = gr.Text(label='Write your question')
85
 
86
  with gr.Tab('Submit'):
87
  _ = gr.Markdown(
@@ -94,35 +104,63 @@ with gr.Blocks() as app:
94
 
95
  # Add info to the tables
96
  send_greet_button.click(
97
- add_data_table, [messages_table, type_greet, greet], [messages_table, greet]
 
 
98
  )
99
  send_random_button.click(
100
- add_data_table, [random_table, random_data], [random_table, random_data]
101
  )
102
  send_question_button.click(
103
- add_data_table, [questions_table, question, context], [questions_table, question, context]
104
  )
105
 
106
  # Remove info from the tables
107
  messages_table.select(
108
- remove_data_table, messages_table, messages_table
109
  )
110
  random_table.select(
111
- remove_data_table, random_table, random_table
112
  )
113
  questions_table.select(
114
- remove_data_table, questions_table, questions_table
115
  )
116
 
117
- # Create the chatbot: create media and vectorstore
118
  create_chatbot_button.click(
119
- lambda: gr.Button(value='Creating chatbot...', interactive=False),
120
  None,
121
  create_chatbot_button
122
  ).then(
123
- create_chatbot,
124
- [client, language, name, base_image, messages_table, random_table, questions_table],
125
  create_chatbot_button
126
  )
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  app.launch(debug=True)
 
1
  from dotenv import load_dotenv
2
  load_dotenv()
3
 
4
+ import utils
5
+ import chatbot
6
  import gradio as gr
7
 
8
 
9
  with gr.Blocks() as app:
10
+ # ----------------------------------------------- FRONT -------------------------------------------------------
11
  with gr.Tab('General info'):
12
+ client = gr.Textbox(
13
+ label='Nombre del cliente', placeholder='Inserte el nombre del cliente, por ejemplo Visit Orlando'
14
+ )
15
+ languages = gr.Checkboxgroup(
16
  choices=['español', 'ingles', 'portugués'], value='español', label='Idiomas', interactive=True,
17
  info='Seleccione todos los idiomas que el chatbot va a hablar (al menos debe tener 1 idioma)'
18
  )
 
20
  choices=['Bella'], value='Bella', label='Nombre del chatbot',
21
  info='Seleccione el nombre del chatbot, si no se encuentra en la lista, contacte al administrador'
22
  )
23
+ max_num_questions = gr.Number(
24
  value=5, minimum=2, maximum=10, label='Número preguntas', interactive=True,
25
  info='Máximo numero de preguntas que puede hacer el usuario.'
26
  )
 
36
  greet = gr.Textbox(label='Mensaje', info='Ingrese el mensaje a decir por el chatbot.')
37
  type_greet = gr.Dropdown(
38
  choices=['Saludo', 'Despedida', 'Error'], value='Saludo', interactive=True,
39
+ info='Seleccione si es saludo, despedida o mensaje de error.', label='Tipo mensaje'
40
+ )
41
+ language_greet = gr.Dropdown(
42
+ choices=['español'], value= 'español', interactive=True,
43
+ info='Seleccione el idioma en el que esta el texto.', label='Idioma'
44
  )
45
  send_greet_button = gr.Button(value='Añadir')
46
  messages_table = gr.DataFrame(
47
+ headers=['Eliminar', 'Mensaje', 'Tipo mensaje', 'Idioma'], type='array', interactive=False
48
  )
49
 
50
  with gr.Tab('Random data'):
 
52
  'Si quiere que Bella diga algunos datos random mientras busca la información, ingrese dichos párrafos aca.'
53
  )
54
  with gr.Row():
55
+ random_data = gr.Text(
56
+ placeholder='Ingrese el dato random', info='Ingrese el mensaje a decir por el chatbot.',
57
+ label='Dato random'
58
+ )
59
+ language_random = gr.Dropdown(
60
+ choices=['español'], value='español', interactive=True,
61
+ info='Seleccione el idioma en el que esta el texto.', label='Idioma'
62
+ )
63
  send_random_button = gr.Button(value='Añadir')
64
+ random_table = gr.DataFrame(headers=['Eliminar', 'Dato random', 'Idioma'], type='array', interactive=False)
65
 
66
  with gr.Tab('Questions - Context'):
67
  with gr.Row():
 
72
  headers=['Eliminar', 'Pregunta', 'Contexto'], type='array', interactive=False
73
  )
74
 
 
 
 
 
 
 
75
  with gr.Tab('Create chatbot'):
76
  _ = gr.Markdown(
77
  "Asegúrese que toda la información este correcta antes de enviarla."
78
  )
79
  create_chatbot_button = gr.Button(value='Crear chatbot')
80
 
81
+ with gr.Tab('Prompts'):
82
+ general_prompt = gr.Text(
83
+ placeholder='Ingrese el prompt general del bot', label='General prompt'
84
+ )
85
+ context_prompt = gr.Text(
86
+ placeholder='Ingrese el prompt usado para encontrar el contexto', label='Standalone prompt'
87
+ )
88
+
89
  with gr.Tab('Test'):
90
+ start_test_button = gr.Button(value='Iniciar test')
91
+ with gr.Row(visible=False) as chat_row:
92
+ chat = gr.Chatbot(label='Chat')
93
+ output_audio = gr.Audio(interactive=False, label='Audio', autoplay=True, visible=False)
94
+ user_input = gr.Text(label='Write your question')
 
 
 
 
 
 
 
95
 
96
  with gr.Tab('Submit'):
97
  _ = gr.Markdown(
 
104
 
105
  # Add info to the tables
106
  send_greet_button.click(
107
+ utils.add_data_table,
108
+ [messages_table, greet, type_greet, language_greet],
109
+ [messages_table, greet, type_greet, language_greet]
110
  )
111
  send_random_button.click(
112
+ utils.add_data_table, [random_table, random_data, language_random], [random_table, random_data, language_random]
113
  )
114
  send_question_button.click(
115
+ utils.add_data_table, [questions_table, question, context], [questions_table, question, context]
116
  )
117
 
118
  # Remove info from the tables
119
  messages_table.select(
120
+ utils.remove_data_table, messages_table, messages_table
121
  )
122
  random_table.select(
123
+ utils.remove_data_table, random_table, random_table
124
  )
125
  questions_table.select(
126
+ utils.remove_data_table, questions_table, questions_table
127
  )
128
 
129
+ # Create the chatbot: create media (csv files, audio and video) and vectorstore
130
  create_chatbot_button.click(
131
+ lambda: gr.update(value='Creating chatbot...', interactive=False),
132
  None,
133
  create_chatbot_button
134
  ).then(
135
+ utils.create_chatbot,
136
+ [client, name, messages_table, random_table, questions_table],
137
  create_chatbot_button
138
  )
139
 
140
+ # Update the dataframes based on the languages selected in the first tab
141
+ languages.change(
142
+ utils.add_language, languages, [language_greet, language_random]
143
+ )
144
+
145
+ # Initialize chat
146
+ start_test_button.click(
147
+ lambda: gr.update(value='Iniciando chat...'), None, start_test_button
148
+ ).then(
149
+ chatbot.start_chat, [chat, general_prompt], [chat, output_audio, chat_row]
150
+ ).then(
151
+ lambda: gr.update(value='Reiniciar chat'), None, start_test_button
152
+ )
153
+
154
+ # Chat with the chatbot
155
+ user_input.submit(
156
+ chatbot.get_random_data, None, output_audio
157
+ ).then(
158
+ chatbot.get_answer,
159
+ [chat, user_input, client, general_prompt, context_prompt],
160
+ [chat, user_input, output_audio], show_progress='hidden'
161
+ )
162
+
163
+ # Submit chatbot: save prompts and no more ?
164
+
165
+
166
  app.launch(debug=True)
audio.py CHANGED
@@ -6,37 +6,12 @@ import pickle
6
  import torchaudio
7
  import numpy as np
8
  import gradio as gr
9
- from google.cloud import storage
10
  from TTS.tts.models.xtts import Xtts
11
  from nltk.tokenize import sent_tokenize
12
- from huggingface_hub import hf_hub_download
13
  from TTS.tts.configs.xtts_config import XttsConfig
14
 
15
 
16
- def _download_starting_files() -> None:
17
- """
18
- Downloads the embeddings from a bucket
19
- """
20
- os.makedirs('assets', exist_ok=True)
21
-
22
- # Download credentials file
23
- hf_hub_download(
24
- repo_id=os.environ.get('DATA'), repo_type='dataset', filename="credentials.json",
25
- token=os.environ.get('HUB_TOKEN'), local_dir="assets"
26
- )
27
-
28
- # Initialise a client
29
- credentials = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
30
- storage_client = storage.Client.from_service_account_json(credentials)
31
- bucket = storage_client.get_bucket('embeddings-bella')
32
-
33
- # Get both embeddings
34
- blob = bucket.blob("gpt_cond_latent.npy")
35
- blob.download_to_filename('assets/gpt_cond_latent.npy')
36
- blob = bucket.blob("speaker_embedding.npy")
37
- blob.download_to_filename('assets/speaker_embedding.npy')
38
-
39
-
40
  def _load_array(filename):
41
  """
42
  Opens a file a returns it, used with numpy files
@@ -45,12 +20,10 @@ def _load_array(filename):
45
  return pickle.load(f)
46
 
47
 
48
- # Get embeddings
49
- _download_starting_files()
50
  os.environ['COQUI_TOS_AGREED'] = '1'
51
 
52
  # Used to generate audio based on a sample
53
- nltk.download('punkt')
54
  model_path = os.path.join("tts_model")
55
 
56
  config = XttsConfig()
@@ -62,7 +35,7 @@ model.load_checkpoint(
62
  checkpoint_path=os.path.join(model_path, "model.pth"),
63
  vocab_path=os.path.join(model_path, "vocab.json"),
64
  eval=True,
65
- use_deepspeed=True,
66
  )
67
 
68
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -70,31 +43,27 @@ model.to(device)
70
 
71
  # Speaker latent
72
  path_latents = 'assets/gpt_cond_latent.npy'
73
- gpt_cond_latent = _load_array(path_latents)
74
 
75
  # Speaker embedding
76
  path_embedding = 'assets/speaker_embedding.npy'
77
- speaker_embedding = _load_array(path_embedding)
78
 
79
 
80
- def get_audio(text: str, language: str = 'es') -> gr.Audio:
81
  """
82
- Returns a link from a bucket in GCP that contains the generated audio given a text and language and the
83
- name of such audio
84
- :param text: used to generate the audio
85
- :param language: 'es', 'en' or 'pt'
86
- :return link_audio and name_audio
87
  """
88
  # Creates an audio with the answer and saves it as output.wav
89
- _save_audio(text, language)
90
 
91
- return gr.Audio(value='output.wav', interactive=False, visible=True)
92
 
93
 
94
- def _save_audio(answer: str, language: str) -> None:
95
  """
96
  Splits the answer into sentences, clean and creates an audio for each one, then concatenates
97
- all the audios and saves them into a file (output.wav)
98
  """
99
  # Split the answer into sentences and clean it
100
  sentences = _get_clean_answer(answer, language)
@@ -108,19 +77,25 @@ def _save_audio(answer: str, language: str) -> None:
108
 
109
  # Concatenate and save all audio segments
110
  concatenated_audio = torch.cat(audio_segments, dim=0)
111
- torchaudio.save('output.wav', concatenated_audio.unsqueeze(0), 24000)
112
 
113
 
114
  def _get_voice(sentence: str, language: str) -> np.ndarray:
115
  """
116
  Returns a numpy array with a wav of an audio with the given sentence and language
117
  """
118
- out = model.inference(
119
  sentence,
120
  language=language,
121
  gpt_cond_latent=gpt_cond_latent,
122
  speaker_embedding=speaker_embedding,
123
  temperature=0.1
 
 
 
 
 
 
124
  )
125
  return out['wav']
126
 
 
6
  import torchaudio
7
  import numpy as np
8
  import gradio as gr
9
+ from typing import Optional
10
  from TTS.tts.models.xtts import Xtts
11
  from nltk.tokenize import sent_tokenize
 
12
  from TTS.tts.configs.xtts_config import XttsConfig
13
 
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def _load_array(filename):
16
  """
17
  Opens a file a returns it, used with numpy files
 
20
  return pickle.load(f)
21
 
22
 
 
 
23
  os.environ['COQUI_TOS_AGREED'] = '1'
24
 
25
  # Used to generate audio based on a sample
26
+ # nltk.download('punkt')
27
  model_path = os.path.join("tts_model")
28
 
29
  config = XttsConfig()
 
35
  checkpoint_path=os.path.join(model_path, "model.pth"),
36
  vocab_path=os.path.join(model_path, "vocab.json"),
37
  eval=True,
38
+ # use_deepspeed=True,
39
  )
40
 
41
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
43
 
44
  # Speaker latent
45
  path_latents = 'assets/gpt_cond_latent.npy'
46
+ # gpt_cond_latent = _load_array(path_latents)
47
 
48
  # Speaker embedding
49
  path_embedding = 'assets/speaker_embedding.npy'
50
+ # speaker_embedding = _load_array(path_embedding)
51
 
52
 
53
+ def get_audio(text: str, language: str = 'es', saving_path: str = 'output') -> None:
54
  """
55
+ Creates an audio with the given text and language, the name of the audio file is output.wav
 
 
 
 
56
  """
57
  # Creates an audio with the answer and saves it as output.wav
58
+ _save_audio(text, language, saving_path)
59
 
60
+ return None
61
 
62
 
63
+ def _save_audio(answer: str, language: str, path_audio: str) -> None:
64
  """
65
  Splits the answer into sentences, clean and creates an audio for each one, then concatenates
66
+ all the audios and saves them into a file
67
  """
68
  # Split the answer into sentences and clean it
69
  sentences = _get_clean_answer(answer, language)
 
77
 
78
  # Concatenate and save all audio segments
79
  concatenated_audio = torch.cat(audio_segments, dim=0)
80
+ torchaudio.save(f'{path_audio}.wav', concatenated_audio.unsqueeze(0), 24000)
81
 
82
 
83
  def _get_voice(sentence: str, language: str) -> np.ndarray:
84
  """
85
  Returns a numpy array with a wav of an audio with the given sentence and language
86
  """
87
+ '''out = model.inference(
88
  sentence,
89
  language=language,
90
  gpt_cond_latent=gpt_cond_latent,
91
  speaker_embedding=speaker_embedding,
92
  temperature=0.1
93
+ )'''
94
+ out = model.synthesize(
95
+ sentence,
96
+ config,
97
+ speaker_wav='assets/orlando2_cleaned.wav',
98
+ language=language
99
  )
100
  return out['wav']
101
 
audio_model.py CHANGED
@@ -1,6 +1,24 @@
1
  import os
2
  import requests
3
  from tqdm import tqdm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
  def _download_file(url, destination):
@@ -37,3 +55,6 @@ def download_model():
37
  destination = f'tts_model/{filename}'
38
  print(f"[COQUI TTS] STARTUP: Downloading {filename}...")
39
  _download_file(url, destination)
 
 
 
 
1
  import os
2
  import requests
3
  from tqdm import tqdm
4
+ from google.cloud import storage
5
+ from huggingface_hub import hf_hub_download
6
+
7
+
8
+ def _download_starting_files() -> None:
9
+ """
10
+ Downloads the embeddings from a bucket
11
+ """
12
+ # Initialise a client
13
+ credentials = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
14
+ storage_client = storage.Client.from_service_account_json(credentials)
15
+ bucket = storage_client.get_bucket('embeddings-bella')
16
+
17
+ # Get both embeddings
18
+ blob = bucket.blob("gpt_cond_latent.npy")
19
+ blob.download_to_filename('assets/gpt_cond_latent.npy')
20
+ blob = bucket.blob("speaker_embedding.npy")
21
+ blob.download_to_filename('assets/speaker_embedding.npy')
22
 
23
 
24
  def _download_file(url, destination):
 
55
  destination = f'tts_model/{filename}'
56
  print(f"[COQUI TTS] STARTUP: Downloading {filename}...")
57
  _download_file(url, destination)
58
+
59
+ # Downloads the embeddings from GCP
60
+ # _download_starting_files()
chatbot.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import audio
3
+ import random
4
+ import pinecone
5
+ import gradio as gr
6
+ from openai import OpenAI
7
+
8
+
9
+ OPENAI_CLIENT = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
10
+ pinecone.init(api_key=os.getenv("PINECONE_API_TOKEN"), environment=os.getenv("PINECONE_ENVIRONMENT"))
11
+
12
+
13
+ def start_chat(chat_history: list[list[str | None]]):
14
+ # Get greeting text and audio
15
+ greeting = ''
16
+ audio_name = ''
17
+
18
+ chat_history.append(['', greeting])
19
+
20
+ return chat_history, gr.update(value=f'{audio_name}.wav'), gr.update(visible=False)
21
+
22
+
23
+ def get_random_data(client_name: str):
24
+ random_options = []
25
+ path_audios = f'assets/{client_name}/media/audio'
26
+ for random_audio in os.listdir(path_audios):
27
+ if random_audio.startswith('random') and 'es' in random_audio:
28
+ random_options.append(random_audio)
29
+
30
+ num = random.randint(0, len(random_options) - 1)
31
+ return gr.update(value=random_options[num])
32
+
33
+
34
+ def get_answer(
35
+ chat_history: list[tuple[str, str]], user_input: str, client_name: str, general_prompt: str,context_prompt: str
36
+ ):
37
+ # Format chat history to OpenAI format msg history
38
+ msg_history = [{'role': 'system', 'content': general_prompt}]
39
+ for i, msg in enumerate(chat_history):
40
+ if i == 0:
41
+ continue # Omit the prompt
42
+ if i % 2 == 0:
43
+ msg_history.append({'role': 'user', 'content': msg})
44
+ else:
45
+ msg_history.append({'role': 'assistant', 'content': msg})
46
+
47
+ # Get standalone question
48
+ standalone_question = _get_standalone_question(user_input, msg_history, context_prompt)
49
+
50
+ # Get context
51
+ context = _get_context(standalone_question, client_name)
52
+
53
+ # Get answer from chatbot
54
+ response = _get_response(context, msg_history, user_input, general_prompt)
55
+
56
+ # Get audio
57
+ audio.get_audio(response, 'es')
58
+
59
+ # Update chat_history
60
+ chat_history.append((user_input, response))
61
+
62
+ return chat_history, "", gr.update(value='output.wav')
63
+
64
+
65
+ def _get_response(context: str, message_history: list[dict], question: str, prompt: str) -> str:
66
+ message_history[0]['content'] = prompt.replace('CONTEXT', context)
67
+ message_history.append({'role': 'user', 'content': question})
68
+ return _call_api(message_history)
69
+
70
+
71
+ def _get_embedding(text: str) -> list[float]:
72
+ response = OPENAI_CLIENT.embeddings.create(
73
+ input=text,
74
+ model='text-embedding-ada-002'
75
+ )
76
+ return response.data[0].embedding
77
+
78
+
79
+ def _call_api(message_history: list[dict]) -> str:
80
+ response = OPENAI_CLIENT.chat.completions.create(
81
+ model='gpt-4-turbo-preview',
82
+ temperature=0.7,
83
+ messages=message_history
84
+ )
85
+ return response.choices[0].message.content
86
+
87
+
88
+ def _get_standalone_question(question: str, message_history: list[dict], prompt_q: str) -> str:
89
+ # Format the message history like: Human: blablablá \nAssistant: blablablá
90
+ history = ''
91
+ for i, msg in enumerate(message_history):
92
+ if i == 0:
93
+ continue # Omit the prompt
94
+ if i % 2 == 0:
95
+ history += f'Human: {msg["content"]}\n'
96
+ else:
97
+ history += f'Assistant: {msg["content"]}\n'
98
+
99
+ # Add history and question to the prompt and call chatgpt
100
+ prompt = [{'role': 'system', 'content': ''}]
101
+ content = prompt_q.replace('HISTORY', history).replace('QUESTION', question)
102
+ prompt[0]['content'] = content
103
+
104
+ return _call_api(prompt)
105
+
106
+
107
+ def _get_context(question: str, client_name: str) -> str:
108
+ q_embedding = _get_embedding(question)
109
+
110
+ # Get most similar vectors
111
+ index = pinecone.Index(client_name)
112
+ result = index.query(
113
+ vector=q_embedding,
114
+ top_k=10,
115
+ include_metadata=True,
116
+ namespace=f'{client_name}-context'
117
+ )['matches']
118
+
119
+ # Crete a string based on the text of each vector
120
+ context = ''
121
+ for r in result:
122
+ context += r['metadata']['Text'] + '\n'
123
+ return context
gcp.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from datetime import timedelta
4
+ from google.cloud import storage
5
+ from huggingface_hub import hf_hub_download
6
+ from google.cloud.storage import transfer_manager
7
+
8
+
9
+ def download_credentials():
10
+ os.makedirs('assets', exist_ok=True)
11
+
12
+ # Download credentials file
13
+ hf_hub_download(
14
+ repo_id=os.environ.get('DATA'), repo_type='dataset', filename="credentials.json",
15
+ token=os.environ.get('HUB_TOKEN'), local_dir="assets"
16
+ )
17
+
18
+
19
+ def upload_folder(bucket_name: str, source_directory: str) -> None:
20
+ # Filter so the list only includes files, not directories themselves.
21
+ string_paths = [
22
+ str(path.relative_to(source_directory)) for path in Path(source_directory).rglob("*") if path.is_file()
23
+ ]
24
+
25
+ # Start the upload.
26
+ bucket = STORAGE_CLIENT.bucket(bucket_name)
27
+ results = transfer_manager.upload_many_from_filenames(
28
+ bucket, string_paths, source_directory=source_directory, max_workers=2
29
+ )
30
+
31
+ for name, result in zip(string_paths, results):
32
+ if isinstance(result, Exception):
33
+ print(f"Failed to upload {name} due to exception: {result}")
34
+ else:
35
+ print(f"Uploaded {name} to {bucket.name}.")
36
+
37
+
38
+ def get_link_file(bucket_name: str, client_name: str, type_media: str, media_name: str):
39
+ bucket = STORAGE_CLIENT.bucket(bucket_name)
40
+ blobs = bucket.list_blobs(prefix=f'{client_name}/media/{type_media}/{media_name}')
41
+ blob = next(blobs)
42
+ signed_url = blob.generate_signed_url(expiration=timedelta(minutes=15))
43
+ return signed_url
44
+
45
+
46
+ download_credentials()
47
+ STORAGE_CLIENT = storage.Client.from_service_account_json(os.getenv('GOOGLE_APPLICATION_CREDENTIALS'))
requirements.txt CHANGED
@@ -9,4 +9,6 @@ torch==2.1.1
9
  torchaudio==2.1.1
10
  TTS==0.21.2
11
  google-cloud-storage==2.13.0
12
- numpy==1.22.0
 
 
 
9
  torchaudio==2.1.1
10
  TTS==0.21.2
11
  google-cloud-storage==2.13.0
12
+ numpy==1.22.0
13
+ openai==1.10.0
14
+ clint==0.5.1
utils.py CHANGED
@@ -1,36 +1,44 @@
1
- import os
2
- import gradio as gr
 
 
 
 
 
 
3
  import audio_model
4
  if not os.path.exists('tts_model'): # Get TTS model
5
  audio_model.download_model()
6
- import audio
 
 
7
 
8
 
9
- def add_data_table(table: list[list[str]], first: str, last: str = None):
 
 
 
 
 
10
  """
11
  Adds the data to the table. Some data consist of two columns others only one.
12
  So depending on that, the new row and returned value will be different-
13
  """
14
- if last is None:
15
- new_row = ['', first]
16
- new_value = ''
17
- elif first == 'Saludo' or first == 'Despedida' or first == 'Error':
18
- new_row = ['❌', first, last]
19
- new_value = '', first
20
  else:
21
- new_row = ['❌', first, last]
22
  new_value = '', ''
23
 
24
  # The table is empty, do not append it but replace the first row
25
  if all(column == '' for column in table[0]):
26
- table[0] = new_row
27
 
28
  # Add the new data
29
  else:
30
- table.append(new_row)
31
 
32
- if last is None:
33
- return table, new_value
34
  return table, *new_value
35
 
36
 
@@ -52,22 +60,96 @@ def remove_data_table(table: list[list[str]], evt: gr.SelectData):
52
  return table
53
 
54
 
 
 
 
 
 
 
 
 
 
 
55
  def create_chatbot(
56
- client: str, language: list[str], chatbot: str, messages_table, random_table, questions_table,
57
  ):
 
 
 
58
  # Set up general info
59
  client_name = client.lower().replace(' ', '-')
60
- chatbot_name = chatbot.lower()
61
 
62
- # Create prerecorded media (greeting, goodbye, error, random and waiting)
 
63
  for message in messages_table:
64
- pass
65
-
66
- # get_audio()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  # Set up vectorstore
69
-
70
- # Upload data to bucket in CP (videos, audio, prompts and csv files)
 
 
 
 
 
 
 
71
 
72
  # Change text in the button
73
  return gr.Button(value='Chatbot created!!!', interactive=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import uuid
3
+ import pinecone
4
+ from typing import Union
5
+ from openai import Client
6
+ from pinecone import Index
7
+
8
+ from gcp import *
9
  import audio_model
10
  if not os.path.exists('tts_model'): # Get TTS model
11
  audio_model.download_model()
12
+ pass
13
+ from audio import *
14
+ from video import *
15
 
16
 
17
+ pinecone.init(api_key=os.getenv('PINECONE_API_KEY'), environment=os.getenv('PINECONE_ENV'))
18
+ INDEX = Index(os.getenv('PINECONE_INDEX'))
19
+ OPENAI_CLIENT = Client()
20
+
21
+
22
+ def add_data_table(table: list[list[str]], *data: str):
23
  """
24
  Adds the data to the table. Some data consist of two columns others only one.
25
  So depending on that, the new row and returned value will be different-
26
  """
27
+ if len(data) == 3: # It is the greet tab
28
+ new_value = '', *data[1:]
29
+ elif data[-1] in ['español', 'ingles', 'portugués']:
30
+ new_value = '', data[-1]
 
 
31
  else:
 
32
  new_value = '', ''
33
 
34
  # The table is empty, do not append it but replace the first row
35
  if all(column == '' for column in table[0]):
36
+ table[0] = ['❌', *data]
37
 
38
  # Add the new data
39
  else:
40
+ table.append(['❌', *data])
41
 
 
 
42
  return table, *new_value
43
 
44
 
 
60
  return table
61
 
62
 
63
+ def add_language(languages: list[str]) -> Union[gr.Error, tuple[gr.helpers, gr.helpers]]:
64
+ if len(languages) == 0:
65
+ raise gr.Error('Debe seleccionar al menos 1 idioma')
66
+
67
+ return (
68
+ gr.update(choices=[i for i in languages], value=languages[0], interactive=True),
69
+ gr.update(choices=[i for i in languages], value=languages[0], interactive=True)
70
+ )
71
+
72
+
73
  def create_chatbot(
74
+ client: str, name: str, messages_table: list[str, ], random_table, questions_table,
75
  ):
76
+ translate_language = {'español': 'es', 'ingles': 'en', 'portugués': 'pt'}
77
+ translate_greet = {'Saludo': 'greeting', 'Despedida': 'goodbye', 'Error': 'error'}
78
+
79
  # Set up general info
80
  client_name = client.lower().replace(' ', '-')
81
+ chatbot_name = name.lower()
82
 
83
+ # Group messages by their type (greeting, goodbye or error) and language
84
+ messages = dict()
85
  for message in messages_table:
86
+ type_msg = translate_greet[message[1]]
87
+ language_msg = translate_language[message[-1]]
88
+ os.makedirs(f'assets/{client_name}/{type_msg}s', exist_ok=True)
89
+ if type_msg not in messages:
90
+ messages[type_msg] = {language_msg: [message[2]]}
91
+ else:
92
+ if language_msg not in messages[type_msg]:
93
+ messages[type_msg][language_msg] = [message[2]]
94
+ else:
95
+ messages[type_msg][language_msg].append(message[2])
96
+
97
+ # Create CSV files (greeting, goodbye and error)
98
+ for type_msg in messages:
99
+ for language in messages[type_msg]:
100
+ with open(f'assets/{client_name}/{type_msg}/{language}.csv', mode='w', encoding='utf-8') as outfile:
101
+ writer = csv.writer(outfile, delimiter=',')
102
+ writer.writerows(messages[type_msg][language])
103
+
104
+ # Create the audios (greeting, goodbye and error)
105
+ os.makedirs(f'assets/{client_name}/media/audio', exist_ok=True)
106
+ for type_msg in messages:
107
+ for language in messages[type_msg]:
108
+ for i, msg in enumerate(messages[type_msg][language]):
109
+ full_path = f'assets/{client_name}/media/audio/{type_msg}_{language}_{i}.wav'
110
+ # get_audio(msg, language, full_path)
111
+
112
+ # Create the random audios
113
+ for i, (_, msg, language) in enumerate(random_table):
114
+ full_path = f'assets/{client_name}/media/audio/random_{language}_{i}.wav'
115
+ # get_audio(msg, language, full_path)
116
+
117
+ # Upload files and audios to bucket in GCP
118
+ upload_folder('clients-bella', f'assets/{client_name}')
119
+
120
+ # Create videos
121
+ os.makedirs(f'assets/{client_name}/media/video', exist_ok=True)
122
+ for audio_file in os.listdir(f'assets/{client_name}/media/audio'):
123
+ name_file = audio_file.split('.')[0]
124
+ link_audio = get_link_file('clients-bella', client_name, 'audio', audio_file)
125
+ get_video(link_audio, f'assets/{client_name}/media/audio/{name_file}.mp4')
126
+
127
+ # Upload videos to GCP
128
+ upload_folder('clients-bella', f'assets/{client_name}/media/video')
129
 
130
  # Set up vectorstore
131
+ vectors = []
132
+ for _, question, context in questions_table:
133
+ vector = {
134
+ "id": str(uuid.uuid4()),
135
+ "values": _get_embedding(question),
136
+ "metadata": {'Text': context},
137
+ }
138
+ vectors.append(vector)
139
+ INDEX.upsert(vectors=vectors, namespace=f'{client_name}-context')
140
 
141
  # Change text in the button
142
  return gr.Button(value='Chatbot created!!!', interactive=True)
143
+
144
+
145
+ def _get_embedding(sentence: str) -> list[float]:
146
+ """
147
+ Returns the embedding of a sentence
148
+ :param sentence: input of the model
149
+ :return: list of floats representing the embedding
150
+ """
151
+ response = OPENAI_CLIENT.embeddings.create(
152
+ input=sentence,
153
+ model='text-embedding-ada-002'
154
+ )
155
+ return response.data[0].embedding
video.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import requests
4
+ import logging
5
+ from clint.textui import progress
6
+
7
+
8
+ def get_video(link_audio: str, path_video: str) -> bool:
9
+ """
10
+ Saves a video created with d-id into a file (video.mp4). It returns True if there was not a
11
+ problem during the process, False otherwise
12
+ """
13
+ status, id_video = _create_talk(link_audio)
14
+
15
+ # There was a problem with D-ID
16
+ if not status:
17
+ return False
18
+
19
+ link_video = _get_url_talk(id_video)
20
+
21
+ # Saves the video into a file to later upload it to the cloud
22
+ name = f'{path_video}.mp4'
23
+ r = requests.get(link_video + name, stream=True)
24
+ with open(name, 'wb') as f:
25
+ total_length = int(r.headers.get('content-length'))
26
+ for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_length / 1024) + 1):
27
+ if chunk:
28
+ f.write(chunk)
29
+ f.flush()
30
+ return True
31
+
32
+
33
+ def _create_talk(link_audio: str) -> tuple[bool, str]:
34
+ """
35
+ Creates and returns the id of the talk made with d-id. It receives the status of the call and
36
+ the link of an audio that is in a bucket and contains the answer of the bot
37
+ """
38
+ url = "https://api.d-id.com/talks"
39
+
40
+ payload = {
41
+ "script": {
42
+ "type": "audio",
43
+ "provider": {
44
+ "type": "microsoft",
45
+ "voice_id": "en-US-JennyNeural"
46
+ },
47
+ "ssml": "false",
48
+ "audio_url": link_audio
49
+ },
50
+ "config": {
51
+ "fluent": "false",
52
+ "pad_audio": "0.0",
53
+ "stitch": True
54
+ },
55
+ "source_url": os.getenv('D_ID_IMAGE')
56
+ }
57
+ headers = {
58
+ "accept": "application/json",
59
+ "content-type": "application/json",
60
+ "authorization": f"Basic {os.getenv('D_ID_KEY')}"
61
+ }
62
+
63
+ response = requests.post(url, json=payload, headers=headers)
64
+ r = response.json()
65
+
66
+ try:
67
+ talk_id = r['id']
68
+ return True, talk_id
69
+
70
+ # Probably there are no more available credits
71
+ except KeyError:
72
+ logging.error(f"D-ID response is missing 'id' key. Returned error: {r}")
73
+ return False, 'None'
74
+
75
+
76
+ def _get_url_talk(id_video: str) -> str:
77
+ """
78
+ Returns the url of the video given the id of a talk
79
+ """
80
+ url = f"https://api.d-id.com/talks/{id_video}"
81
+
82
+ while True:
83
+ headers = {
84
+ "accept": "application/json",
85
+ "authorization": f"Basic {os.getenv('D_ID_KEY')}"
86
+ }
87
+ response = requests.get(url, headers=headers)
88
+ r = response.json()
89
+
90
+ if r['status'] == 'done':
91
+ break
92
+ time.sleep(1) # Sleep until the video is ready
93
+
94
+ return r['result_url']