Spaces:

AiKontent
/

demo-creator

Runtime error

App Files Files Community

vmoras commited on Feb 15, 2024

Commit

e0d9c8e

1 Parent(s): 4b9cf05

Uploading files to GCP and refactor

Browse files

Files changed (10) hide show

.gitignore +3 -1
app.py +65 -20
gcp.py +0 -47
requirements.txt +3 -2
audio.py → services/audio.py +39 -32
audio_model.py → services/audio_model.py +3 -4
chatbot.py → services/chatbot.py +88 -31
services/gcp.py +69 -0
utils.py → services/utils.py +142 -39
video.py → services/video.py +37 -28

.gitignore CHANGED Viewed

@@ -4,4 +4,6 @@ __pycache__/
 .env
 assets/
-tts_model/

 .env
 assets/
+tts_model/
+output.wav

app.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from dotenv import load_dotenv
 load_dotenv()
-import utils
-import chatbot
 import gradio as gr
@@ -26,7 +26,7 @@ with gr.Blocks() as app:
         )
     with gr.Tab('Images'):
-        base_image = gr.Image(label='Imagen base para los videos', sources=['upload'])
     with gr.Tab('Greeting and goodbye'):
         _ = gr.Markdown(
@@ -39,7 +39,7 @@ with gr.Blocks() as app:
                 info='Seleccione si es saludo, despedida o mensaje de error.', label='Tipo mensaje'
             )
             language_greet = gr.Dropdown(
-                choices=['español'], value= 'español', interactive=True,
                 info='Seleccione el idioma en el que esta el texto.', label='Idioma'
             )
             send_greet_button = gr.Button(value='Añadir')
@@ -85,13 +85,40 @@ with gr.Blocks() as app:
         context_prompt = gr.Text(
             placeholder='Ingrese el prompt usado para encontrar el contexto', label='Standalone prompt'
         )
     with gr.Tab('Test'):
         start_test_button = gr.Button(value='Iniciar test')
-        with gr.Row(visible=False) as chat_row:
             chat = gr.Chatbot(label='Chat')
             output_audio = gr.Audio(interactive=False, label='Audio', autoplay=True, visible=False)
-            user_input = gr.Text(label='Write your question')
     with gr.Tab('Submit'):
         _ = gr.Markdown(
@@ -102,6 +129,11 @@ with gr.Blocks() as app:
     # ----------------------------------------------- ACTIONS -----------------------------------------------------
     # Add info to the tables
     send_greet_button.click(
         utils.add_data_table,
@@ -109,10 +141,19 @@ with gr.Blocks() as app:
         [messages_table, greet, type_greet, language_greet]
     )
     send_random_button.click(
-        utils.add_data_table, [random_table, random_data, language_random], [random_table, random_data, language_random]
     )
     send_question_button.click(
-        utils.add_data_table, [questions_table, question, context], [questions_table, question, context]
     )
     # Remove info from the tables
@@ -128,39 +169,43 @@ with gr.Blocks() as app:
     # Create the chatbot: create media (csv files, audio and video) and vectorstore
     create_chatbot_button.click(
-        lambda: gr.update(value='Creating chatbot...', interactive=False),
-        None,
-        create_chatbot_button
     ).then(
         utils.create_chatbot,
-        [client, name, messages_table, random_table, questions_table],
         create_chatbot_button
     )
-    # Update the dataframes based on the languages selected in the first tab
-    languages.change(
-        utils.add_language, languages, [language_greet, language_random]
-    )
     # Initialize chat
     start_test_button.click(
         lambda: gr.update(value='Iniciando chat...'), None, start_test_button
     ).then(
-        chatbot.start_chat, [chat, general_prompt], [chat, output_audio, chat_row]
     ).then(
         lambda: gr.update(value='Reiniciar chat'), None, start_test_button
     )
     # Chat with the chatbot
     user_input.submit(
-        chatbot.get_random_data, None, output_audio
     ).then(
         chatbot.get_answer,
         [chat, user_input, client, general_prompt, context_prompt],
         [chat, user_input, output_audio], show_progress='hidden'
     )
-    # Submit chatbot: save prompts and no more ?
 app.launch(debug=True)

 from dotenv import load_dotenv
 load_dotenv()
+import time
+from services import chatbot, utils
 import gradio as gr
         )
     with gr.Tab('Images'):
+        base_image = gr.Image(label='Imagen base para los videos', sources=['upload'], type='pil')
     with gr.Tab('Greeting and goodbye'):
         _ = gr.Markdown(
                 info='Seleccione si es saludo, despedida o mensaje de error.', label='Tipo mensaje'
             )
             language_greet = gr.Dropdown(
+                choices=['español'], value='español', interactive=True,
                 info='Seleccione el idioma en el que esta el texto.', label='Idioma'
             )
             send_greet_button = gr.Button(value='Añadir')
         context_prompt = gr.Text(
             placeholder='Ingrese el prompt usado para encontrar el contexto', label='Standalone prompt'
         )
+        _ = gr.Markdown(
+            "```\n"
+            "Recuerde dejar estos formatos en los prompts: \n"
+            "----------------------- General --------------------------\n"
+            "=========\n"
+            "Contexto:\n"
+            "CONTEXTO\n"
+            "=========\n"
+            "\n"
+            "----------------------- Standalone -----------------------\n"
+            "Chat History:\n"
+            "\n"
+            "HISTORY\n"
+            "Follow-up message: QUESTION\n"
+            "Standalone message:\n"
+            "```", line_breaks=True
+        )
     with gr.Tab('Test'):
         start_test_button = gr.Button(value='Iniciar test')
+        with gr.Column(visible=False) as chat_row:
             chat = gr.Chatbot(label='Chat')
             output_audio = gr.Audio(interactive=False, label='Audio', autoplay=True, visible=False)
+            user_input = gr.Text(label='Escribe tus preguntas')
+    with gr.Tab('Prompts by languages'):
+        with gr.Row():
+            prompt_data = gr.Text(placeholder='Ingrese el prompt', info='Ingrese el prompt.', label='Prompt')
+            language_prompt = gr.Dropdown(
+                choices=['español'], value='español', interactive=True,
+                info='Seleccione el idioma en el que esta el texto.', label='Idioma'
+            )
+            send_prompt_button = gr.Button(value='Añadir')
+        prompts_table = gr.DataFrame(headers=['Eliminar', 'Prompts', 'Idioma'], type='array', interactive=False)
     with gr.Tab('Submit'):
         _ = gr.Markdown(
     # ----------------------------------------------- ACTIONS -----------------------------------------------------
+    # Update the dataframes based on the languages selected in the first tab
+    languages.change(
+        utils.add_language, languages, [language_greet, language_random, language_prompt]
+    )
     # Add info to the tables
     send_greet_button.click(
         utils.add_data_table,
         [messages_table, greet, type_greet, language_greet]
     )
     send_random_button.click(
+        utils.add_data_table,
+        [random_table, random_data, language_random],
+        [random_table, random_data, language_random]
     )
     send_question_button.click(
+        utils.add_data_table,
+        [questions_table, question, context],
+        [questions_table, question, context]
+    )
+    send_prompt_button.click(
+        utils.add_data_table,
+        [prompts_table, prompt_data, language_prompt],
+        [prompts_table, prompt_data, language_prompt]
     )
     # Remove info from the tables
     # Create the chatbot: create media (csv files, audio and video) and vectorstore
     create_chatbot_button.click(
+        lambda: gr.update(value='Creating chatbot...', interactive=False), None, create_chatbot_button
     ).then(
         utils.create_chatbot,
+        [client, name, messages_table, random_table, questions_table, base_image],
         create_chatbot_button
     )
     # Initialize chat
     start_test_button.click(
         lambda: gr.update(value='Iniciando chat...'), None, start_test_button
     ).then(
+        lambda: time.sleep(1.5), None, None
+    ).then(
+        chatbot.start_chat, client, [chat, output_audio, chat_row]
     ).then(
         lambda: gr.update(value='Reiniciar chat'), None, start_test_button
     )
     # Chat with the chatbot
     user_input.submit(
+        chatbot.get_random_data, client, output_audio
     ).then(
         chatbot.get_answer,
         [chat, user_input, client, general_prompt, context_prompt],
         [chat, user_input, output_audio], show_progress='hidden'
     )
+    # Submit chatbot: save prompts
+    submit_button.click(
+        lambda: gr.update(value='Subiendo la información', interactive=False), None, submit_button
+    ).then(
+        utils.save_prompts, [client, context_prompt, prompts_table]
+    ).then(
+        utils.generate_json, [client, languages, max_num_questions, name], output_file
+    ).then(
+        lambda: gr.update(value='Información subida!!', interactive=False), None, submit_button
+    )
 app.launch(debug=True)

gcp.py DELETED Viewed

@@ -1,47 +0,0 @@
-import os
-from pathlib import Path
-from datetime import timedelta
-from google.cloud import storage
-from huggingface_hub import hf_hub_download
-from google.cloud.storage import transfer_manager
-def download_credentials():
-    os.makedirs('assets', exist_ok=True)
-    # Download credentials file
-    hf_hub_download(
-        repo_id=os.environ.get('DATA'), repo_type='dataset', filename="credentials.json",
-        token=os.environ.get('HUB_TOKEN'), local_dir="assets"
-    )
-def upload_folder(bucket_name: str, source_directory: str) -> None:
-    # Filter so the list only includes files, not directories themselves.
-    string_paths = [
-        str(path.relative_to(source_directory)) for path in Path(source_directory).rglob("*") if path.is_file()
-    ]
-    # Start the upload.
-    bucket = STORAGE_CLIENT.bucket(bucket_name)
-    results = transfer_manager.upload_many_from_filenames(
-        bucket, string_paths, source_directory=source_directory, max_workers=2
-    )
-    for name, result in zip(string_paths, results):
-        if isinstance(result, Exception):
-            print(f"Failed to upload {name} due to exception: {result}")
-        else:
-            print(f"Uploaded {name} to {bucket.name}.")
-def get_link_file(bucket_name: str, client_name: str, type_media: str, media_name: str):
-    bucket = STORAGE_CLIENT.bucket(bucket_name)
-    blobs = bucket.list_blobs(prefix=f'{client_name}/media/{type_media}/{media_name}')
-    blob = next(blobs)
-    signed_url = blob.generate_signed_url(expiration=timedelta(minutes=15))
-    return signed_url
-download_credentials()
-STORAGE_CLIENT = storage.Client.from_service_account_json(os.getenv('GOOGLE_APPLICATION_CREDENTIALS'))

requirements.txt CHANGED Viewed

@@ -4,11 +4,12 @@ google-cloud-storage==2.13.0
 requests==2.31.0
 tqdm==4.66.1
 nltk==3.8.1
-# deepspeed==0.12.3
 torch==2.1.1
 torchaudio==2.1.1
 TTS==0.21.2
 google-cloud-storage==2.13.0
 numpy==1.22.0
 openai==1.10.0
-clint==0.5.1

 requests==2.31.0
 tqdm==4.66.1
 nltk==3.8.1
+deepspeed==0.12.3
 torch==2.1.1
 torchaudio==2.1.1
 TTS==0.21.2
 google-cloud-storage==2.13.0
 numpy==1.22.0
 openai==1.10.0
+gradio==4.13.0
+pillow==10.2.0

audio.py → services/audio.py RENAMED Viewed

@@ -5,17 +5,13 @@ import torch
 import pickle
 import torchaudio
 import numpy as np
-import gradio as gr
-from typing import Optional
 from TTS.tts.models.xtts import Xtts
 from nltk.tokenize import sent_tokenize
 from TTS.tts.configs.xtts_config import XttsConfig
 def _load_array(filename):
-    """
-    Opens a file a returns it, used with numpy files
-    """
     with open(filename, 'rb') as f:
         return pickle.load(f)
@@ -23,8 +19,8 @@ def _load_array(filename):
 os.environ['COQUI_TOS_AGREED'] = '1'
 # Used to generate audio based on a sample
-# nltk.download('punkt')
-model_path = os.path.join("tts_model")
 config = XttsConfig()
 config.load_json(os.path.join(model_path, "config.json"))
@@ -35,7 +31,7 @@ model.load_checkpoint(
     checkpoint_path=os.path.join(model_path, "model.pth"),
     vocab_path=os.path.join(model_path, "vocab.json"),
     eval=True,
-    # use_deepspeed=True,
 )
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -43,30 +39,37 @@ model.to(device)
 # Speaker latent
 path_latents = 'assets/gpt_cond_latent.npy'
-# gpt_cond_latent = _load_array(path_latents)
 # Speaker embedding
 path_embedding = 'assets/speaker_embedding.npy'
-# speaker_embedding = _load_array(path_embedding)
 def get_audio(text: str, language: str = 'es', saving_path: str = 'output') -> None:
     """
-    Creates an audio with the given text and language, the name of the audio file is output.wav
     """
     # Creates an audio with the answer and saves it as output.wav
     _save_audio(text, language, saving_path)
-    return None
-def _save_audio(answer: str, language: str, path_audio: str) -> None:
     """
-    Splits the answer into sentences, clean and creates an audio for each one, then concatenates
-    all the audios and saves them into a file
     """
     # Split the answer into sentences and clean it
-    sentences = _get_clean_answer(answer, language)
     # Get the voice of each sentence
     audio_segments = []
@@ -78,41 +81,42 @@ def _save_audio(answer: str, language: str, path_audio: str) -> None:
     # Concatenate and save all audio segments
     concatenated_audio = torch.cat(audio_segments, dim=0)
     torchaudio.save(f'{path_audio}.wav', concatenated_audio.unsqueeze(0), 24000)
 def _get_voice(sentence: str, language: str) -> np.ndarray:
     """
-    Returns a numpy array with a wav of an audio with the given sentence and language
     """
-    '''out = model.inference(
         sentence,
         language=language,
         gpt_cond_latent=gpt_cond_latent,
         speaker_embedding=speaker_embedding,
         temperature=0.1
-    )'''
-    out = model.synthesize(
-        sentence,
-        config,
-        speaker_wav='assets/orlando2_cleaned.wav',
-        language=language
     )
     return out['wav']
-def _get_clean_answer(answer: str, language: str) -> list[str]:
     """
-    Returns a list of sentences of the answer. It also removes links
     """
     # Remove the links in the audio and add another sentence
     if language == 'en':
-        clean_answer = re.sub(r'http[s]?://\S+', 'the following link', answer)
         max_characters = 250
     elif language == 'es':
-        clean_answer = re.sub(r'http[s]?://\S+', 'el siguiente link', answer)
         max_characters = 239
     else:
-        clean_answer = re.sub(r'http[s]?://\S+', 'o seguinte link', answer)
         max_characters = 203
     # Change the name from Bella to Bela
@@ -136,9 +140,12 @@ def _get_clean_answer(answer: str, language: str) -> list[str]:
 def _split_sentence(sentence: str, max_characters: int) -> list[str]:
     """
-    Returns a split sentences. The split point is the nearest comma to the middle
     of the sentence, if there is no comma then a space is used or just the middle. If the
-    remaining sentences are still too long, another iteration is run
     """
     # Get index of each comma
     sentences = []

 import pickle
 import torchaudio
 import numpy as np
 from TTS.tts.models.xtts import Xtts
 from nltk.tokenize import sent_tokenize
 from TTS.tts.configs.xtts_config import XttsConfig
 def _load_array(filename):
+    """ Opens a file a returns it, used with numpy files """
     with open(filename, 'rb') as f:
         return pickle.load(f)
 os.environ['COQUI_TOS_AGREED'] = '1'
 # Used to generate audio based on a sample
+nltk.download('punkt')
+model_path = os.path.join("../tts_model")
 config = XttsConfig()
 config.load_json(os.path.join(model_path, "config.json"))
     checkpoint_path=os.path.join(model_path, "model.pth"),
     vocab_path=os.path.join(model_path, "vocab.json"),
     eval=True,
+    use_deepspeed=True,
 )
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 # Speaker latent
 path_latents = 'assets/gpt_cond_latent.npy'
+gpt_cond_latent = _load_array(path_latents)
 # Speaker embedding
 path_embedding = 'assets/speaker_embedding.npy'
+speaker_embedding = _load_array(path_embedding)
 def get_audio(text: str, language: str = 'es', saving_path: str = 'output') -> None:
     """
+    Creates an audio
+    :param text: text to convert to audio
+    :param language: 'es', 'en' or 'pt', language used for the audio file
+    :param saving_path: path to save the audio
+    :return: None
     """
     # Creates an audio with the answer and saves it as output.wav
     _save_audio(text, language, saving_path)
+    return
+def _save_audio(text: str, language: str, path_audio: str) -> None:
     """
+    Splits the text into sentences, clean and creates an audio for each one, then concatenates
+    all the audios and saves them into a file.
+    :param text: input text
+    :param language: language used in the audio
+    :param path_audio: saving path of the audio
+    :return: None
     """
     # Split the answer into sentences and clean it
+    sentences = _get_clean_text(text, language)
     # Get the voice of each sentence
     audio_segments = []
     # Concatenate and save all audio segments
     concatenated_audio = torch.cat(audio_segments, dim=0)
     torchaudio.save(f'{path_audio}.wav', concatenated_audio.unsqueeze(0), 24000)
+    return
 def _get_voice(sentence: str, language: str) -> np.ndarray:
     """
+    Gets a numpy array with a wav of an audio with the given sentence and language
+    :param sentence: input sentence
+    :param language: languages used in the audio
+    :return: numpy array with the audio
     """
+    out = model.inference(
         sentence,
         language=language,
         gpt_cond_latent=gpt_cond_latent,
         speaker_embedding=speaker_embedding,
         temperature=0.1
     )
     return out['wav']
+def _get_clean_text(text: str, language: str) -> list[str]:
     """
+    Splits the text into smaller sentences using nltk and removes links.
+    :param text: input text for the audio
+    :param language: language used for the audio ('es', 'en', 'pt')
+    :return: list of sentences
     """
     # Remove the links in the audio and add another sentence
     if language == 'en':
+        clean_answer = re.sub(r'http[s]?://\S+', 'the following link', text)
         max_characters = 250
     elif language == 'es':
+        clean_answer = re.sub(r'http[s]?://\S+', 'el siguiente link', text)
         max_characters = 239
     else:
+        clean_answer = re.sub(r'http[s]?://\S+', 'o seguinte link', text)
         max_characters = 203
     # Change the name from Bella to Bela
 def _split_sentence(sentence: str, max_characters: int) -> list[str]:
     """
+    Used when the sentences are still to long. The split point is the nearest comma to the middle
     of the sentence, if there is no comma then a space is used or just the middle. If the
+    remaining sentences are still too long, another iteration is run.
+    :param sentence: sentence to be split
+    :param max_characters: max number of characters a sentence can have
+    :return: list of sentences
     """
     # Get index of each comma
     sentences = []

audio_model.py → services/audio_model.py RENAMED Viewed

@@ -2,7 +2,6 @@ import os
 import requests
 from tqdm import tqdm
 from google.cloud import storage
-from huggingface_hub import hf_hub_download
 def _download_starting_files() -> None:
@@ -46,8 +45,8 @@ def download_model():
         'vocab.json': 'https://huggingface.co/coqui/XTTS-v2/resolve/v2.0.2/vocab.json?download=true',
     }
-    if not os.path.exists("tts_model"):
-        os.makedirs("tts_model")
     # Download files if they don't exist
     print("[COQUI TTS] STARTUP: Checking Model is Downloaded.")
@@ -57,4 +56,4 @@ def download_model():
         _download_file(url, destination)
     # Downloads the embeddings from GCP
-    # _download_starting_files()

 import requests
 from tqdm import tqdm
 from google.cloud import storage
 def _download_starting_files() -> None:
         'vocab.json': 'https://huggingface.co/coqui/XTTS-v2/resolve/v2.0.2/vocab.json?download=true',
     }
+    if not os.path.exists("../tts_model"):
+        os.makedirs("../tts_model")
     # Download files if they don't exist
     print("[COQUI TTS] STARTUP: Checking Model is Downloaded.")
         _download_file(url, destination)
     # Downloads the embeddings from GCP
+    _download_starting_files()

chatbot.py → services/chatbot.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import os
-import audio
 import random
 import pinecone
 import gradio as gr
@@ -7,42 +8,66 @@ from openai import OpenAI
 OPENAI_CLIENT = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-pinecone.init(api_key=os.getenv("PINECONE_API_TOKEN"), environment=os.getenv("PINECONE_ENVIRONMENT"))
-def start_chat(chat_history: list[list[str | None]]):
-    # Get greeting text and audio
-    greeting = ''
-    audio_name = ''
-    chat_history.append(['', greeting])
-    return chat_history, gr.update(value=f'{audio_name}.wav'), gr.update(visible=False)
-def get_random_data(client_name: str):
     random_options = []
     path_audios = f'assets/{client_name}/media/audio'
     for random_audio in os.listdir(path_audios):
         if random_audio.startswith('random') and 'es' in random_audio:
-            random_options.append(random_audio)
     num = random.randint(0, len(random_options) - 1)
     return gr.update(value=random_options[num])
 def get_answer(
-        chat_history: list[tuple[str, str]], user_input: str, client_name: str, general_prompt: str,context_prompt: str
-):
     # Format chat history to OpenAI format msg history
     msg_history = [{'role': 'system', 'content': general_prompt}]
-    for i, msg in enumerate(chat_history):
         if i == 0:
-            continue  # Omit the prompt
-        if i % 2 == 0:
-            msg_history.append({'role': 'user', 'content': msg})
         else:
-            msg_history.append({'role': 'assistant', 'content': msg})
     # Get standalone question
     standalone_question = _get_standalone_question(user_input, msg_history, context_prompt)
@@ -53,7 +78,7 @@ def get_answer(
     # Get answer from chatbot
     response = _get_response(context, msg_history, user_input, general_prompt)
-    # Get audio
     audio.get_audio(response, 'es')
     # Update chat_history
@@ -63,12 +88,25 @@ def get_answer(
 def _get_response(context: str, message_history: list[dict], question: str, prompt: str) -> str:
     message_history[0]['content'] = prompt.replace('CONTEXT', context)
     message_history.append({'role': 'user', 'content': question})
-    return _call_api(message_history)
 def _get_embedding(text: str) -> list[float]:
     response = OPENAI_CLIENT.embeddings.create(
         input=text,
         model='text-embedding-ada-002'
@@ -76,16 +114,30 @@ def _get_embedding(text: str) -> list[float]:
     return response.data[0].embedding
-def _call_api(message_history: list[dict]) -> str:
     response = OPENAI_CLIENT.chat.completions.create(
-        model='gpt-4-turbo-preview',
-        temperature=0.7,
         messages=message_history
     )
     return response.choices[0].message.content
 def _get_standalone_question(question: str, message_history: list[dict], prompt_q: str) -> str:
     # Format the message history like: Human: blablablá \nAssistant: blablablá
     history = ''
     for i, msg in enumerate(message_history):
@@ -101,15 +153,20 @@ def _get_standalone_question(question: str, message_history: list[dict], prompt_
     content = prompt_q.replace('HISTORY', history).replace('QUESTION', question)
     prompt[0]['content'] = content
-    return _call_api(prompt)
 def _get_context(question: str, client_name: str) -> str:
     q_embedding = _get_embedding(question)
     # Get most similar vectors
-    index = pinecone.Index(client_name)
-    result = index.query(
         vector=q_embedding,
         top_k=10,
         include_metadata=True,

 import os
+import csv
+from services import audio
 import random
 import pinecone
 import gradio as gr
 OPENAI_CLIENT = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+OPENAI_MODEL = os.getenv("OPENAI_MODEL")
+pinecone.init(api_key=os.getenv("PINECONE_API_KEY"), environment=os.getenv("PINECONE_ENV"))
+INDEX = pinecone.Index(os.getenv("PINECONE_INDEX"))
+def start_chat(client_name: str) -> tuple[list[list[str | None]], gr.helpers, gr.helpers]:
+    """
+    Initialize chat with greeting text and audio in spanish
+    :param client_name: name of the client
+    :return: (chat history with greeting, audio with updated file and gradio update with visible=True)
+    """
+    # Get greeting text and audio, the first one available in spanish
+    with open(f'assets/{client_name}/greetings/es.csv', mode='r', encoding='utf-8') as infile:
+        reader = csv.reader(infile)
+        greeting = next(reader)[0]
+    audio_name = f'assets/{client_name}/media/audio/greeting_es_0.wav'
+    # Initialize chat
+    chat_history = [['', greeting]]
+    return chat_history, gr.update(value=f'{audio_name}'), gr.update(visible=True)
+def get_random_data(client_name: str) -> gr.helpers:
+    """
+    Returns an audio with a random data in spanish
+    :param client_name: name of the client for this chatbot
+    :return: gradio audio updated with a random data from the client
+    """
     random_options = []
     path_audios = f'assets/{client_name}/media/audio'
     for random_audio in os.listdir(path_audios):
         if random_audio.startswith('random') and 'es' in random_audio:
+            random_options.append(os.path.join(path_audios, random_audio))
+    # Get any of the found random files
     num = random.randint(0, len(random_options) - 1)
     return gr.update(value=random_options[num])
 def get_answer(
+        chat_history: list[tuple[str, str]], user_input: str, client_name: str, general_prompt: str, context_prompt: str
+) -> tuple[list[tuple[str, str]], str, gr.helpers]:
+    """
+    Gets the answer from the chatbot and returns it as an audio and text
+    :param chat_history: previous chat history
+    :param user_input: user question
+    :param client_name: name of the client
+    :param general_prompt: prompt used for answering the questions
+    :param context_prompt: prompt used for finding the context in the vectorstore
+    :return:
+    """
     # Format chat history to OpenAI format msg history
     msg_history = [{'role': 'system', 'content': general_prompt}]
+    for i, (user, bot) in enumerate(chat_history):
         if i == 0:
+            msg_history.append({'role': 'assistant', 'content': bot})
         else:
+            msg_history.append({'role': 'user', 'content': user})
+            msg_history.append({'role': 'assistant', 'content': bot})
     # Get standalone question
     standalone_question = _get_standalone_question(user_input, msg_history, context_prompt)
     # Get answer from chatbot
     response = _get_response(context, msg_history, user_input, general_prompt)
+    # Get audio:
     audio.get_audio(response, 'es')
     # Update chat_history
 def _get_response(context: str, message_history: list[dict], question: str, prompt: str) -> str:
+    """
+    Gets the response from ChatGPT
+    :param context: text obtained from the vectorstore
+    :param message_history: chat history in the format used by OpenAI
+    :param question: user question
+    :param prompt: prompt used to answer the questions
+    :return: response from ChatGPT
+    """
     message_history[0]['content'] = prompt.replace('CONTEXT', context)
     message_history.append({'role': 'user', 'content': question})
+    return _call_api(message_history, 0.7)
 def _get_embedding(text: str) -> list[float]:
+    """
+    Gets the embedding of a given text
+    :param text: input text
+    :return: embedding of the text
+    """
     response = OPENAI_CLIENT.embeddings.create(
         input=text,
         model='text-embedding-ada-002'
     return response.data[0].embedding
+def _call_api(message_history: list[dict], temperature: float) -> str:
+    """
+    Gets response form OpenAI API
+    :param message_history: chat history in the format used by OpenAI
+    :param temperature: randomness of the output
+    :return: ChatGPT answer
+    """
     response = OPENAI_CLIENT.chat.completions.create(
+        model=OPENAI_MODEL,
+        temperature=temperature,
         messages=message_history
     )
     return response.choices[0].message.content
 def _get_standalone_question(question: str, message_history: list[dict], prompt_q: str) -> str:
+    """
+    Gets a standalone question/phrase based on the user's question and the previous messages. Used since
+    some questions are too simple like "yes, please"
+    :param question: user question
+    :param message_history: msg history in the format used by OpenAI
+    :param prompt_q: prompt used to get a text that will be used in the vectorstore
+    :return: string with the standalone phrase
+    """
     # Format the message history like: Human: blablablá \nAssistant: blablablá
     history = ''
     for i, msg in enumerate(message_history):
     content = prompt_q.replace('HISTORY', history).replace('QUESTION', question)
     prompt[0]['content'] = content
+    return _call_api(prompt, 0.01)
 def _get_context(question: str, client_name: str) -> str:
+    """
+    Gets the 10 nearest vectors to the given question
+    :param question: standalone text
+    :param client_name: name of the client, used as namespace in the vectorstore
+    :return: formatted text with the nearest vectors
+    """
     q_embedding = _get_embedding(question)
     # Get most similar vectors
+    result = INDEX.query(
         vector=q_embedding,
         top_k=10,
         include_metadata=True,

services/gcp.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+from pathlib import Path
+from datetime import timedelta
+from google.cloud import storage
+from huggingface_hub import hf_hub_download
+from google.cloud.storage import transfer_manager
+def download_credentials() -> None:
+    """
+    Downloads the GCP credentials from Hugging Face Hub
+    :return: None
+    """
+    os.makedirs('../assets', exist_ok=True)
+    hf_hub_download(
+        repo_id=os.environ.get('DATA'), repo_type='dataset', filename="credentials.json",
+        token=os.environ.get('HUB_TOKEN'), local_dir="../assets"
+    )
+    return
+def upload_folder(client_name: str, source_directory: str) -> None:
+    """
+    Uploads the given source directory to the GCP bucket.
+    :param client_name: name of the client, used as name of the main folder
+    :param source_directory: directory with all the sub-folders and files to upload
+    :return: None
+    """
+    # Filter so the list only includes files, not directories themselves.
+    string_paths = [
+        str(path.relative_to(source_directory)).replace("\\", "/") for path in Path(source_directory).rglob("*")
+        if path.is_file()
+    ]
+    # Path where the files in the source directory will be saved
+    delimiter = source_directory.find(client_name)
+    destination_directory = source_directory[delimiter:] + "/"
+    # Start the upload. Threads was used instead of process because Gradio was rebooted with the second one.
+    bucket = STORAGE_CLIENT.bucket('clients-bella')
+    _ = transfer_manager.upload_many_from_filenames(
+        bucket, filenames=string_paths, source_directory=source_directory, max_workers=1,
+        blob_name_prefix=destination_directory, worker_type=transfer_manager.THREAD
+    )
+    return
+def get_link_file(client_name: str, type_media: str, media_name: str) -> str:
+    """
+    Gets a public link during 15 minutes to a given file in GCP
+    :param client_name: name of the client (name of the main folder)
+    :param type_media: if it is audio or video
+    :param media_name: name of the desired media file
+    :return: public link to the file
+    """
+    if media_name == 'waiting.wav':
+        bucket = STORAGE_CLIENT.bucket('audios_bella')
+        blob = bucket.blob('waiting.wav')
+    else:
+        bucket = STORAGE_CLIENT.bucket('clients-bella')
+        blobs = bucket.list_blobs(prefix=f'{client_name}/media/{type_media}/{media_name}')
+        blob = next(blobs)
+    signed_url = blob.generate_signed_url(expiration=timedelta(minutes=15))
+    return signed_url
+download_credentials()
+STORAGE_CLIENT = storage.Client.from_service_account_json(os.getenv('GOOGLE_APPLICATION_CREDENTIALS'))

utils.py → services/utils.py RENAMED Viewed

@@ -1,28 +1,37 @@
 import csv
 import uuid
 import pinecone
 from typing import Union
 from openai import Client
 from pinecone import Index
-from gcp import *
-import audio_model
-if not os.path.exists('tts_model'):  # Get TTS model
     audio_model.download_model()
-    pass
-from audio import *
 from video import *
 pinecone.init(api_key=os.getenv('PINECONE_API_KEY'), environment=os.getenv('PINECONE_ENV'))
 INDEX = Index(os.getenv('PINECONE_INDEX'))
 OPENAI_CLIENT = Client()
-def add_data_table(table: list[list[str]], *data: str):
     """
     Adds the data to the table. Some data consist of two columns others only one.
-    So depending on that, the new row and returned value will be different-
     """
     if len(data) == 3:  # It is the greet tab
         new_value = '', *data[1:]
@@ -42,9 +51,12 @@ def add_data_table(table: list[list[str]], *data: str):
     return table, *new_value
-def remove_data_table(table: list[list[str]], evt: gr.SelectData):
     """
-    Deletes a row on the table if the selected column is the first one
     """
     # The clicked column is not the first one (the one with the X), do not do anything
     if evt.index[1] != 0:
@@ -60,72 +72,114 @@ def remove_data_table(table: list[list[str]], evt: gr.SelectData):
     return table
-def add_language(languages: list[str]) -> Union[gr.Error, tuple[gr.helpers, gr.helpers]]:
     if len(languages) == 0:
         raise gr.Error('Debe seleccionar al menos 1 idioma')
     return (
         gr.update(choices=[i for i in languages], value=languages[0], interactive=True),
         gr.update(choices=[i for i in languages], value=languages[0], interactive=True)
     )
 def create_chatbot(
-        client: str, name: str, messages_table: list[str, ], random_table, questions_table,
-):
-    translate_language = {'español': 'es', 'ingles': 'en', 'portugués': 'pt'}
-    translate_greet = {'Saludo': 'greeting', 'Despedida': 'goodbye', 'Error': 'error'}
     # Set up general info
     client_name = client.lower().replace(' ', '-')
-    chatbot_name = name.lower()
     # Group messages by their type (greeting, goodbye or error) and language
     messages = dict()
     for message in messages_table:
-        type_msg = translate_greet[message[1]]
-        language_msg = translate_language[message[-1]]
         os.makedirs(f'assets/{client_name}/{type_msg}s', exist_ok=True)
         if type_msg not in messages:
-            messages[type_msg] = {language_msg: [message[2]]}
         else:
             if language_msg not in messages[type_msg]:
-                messages[type_msg][language_msg] = [message[2]]
             else:
-                messages[type_msg][language_msg].append(message[2])
     # Create CSV files (greeting, goodbye and error)
     for type_msg in messages:
         for language in messages[type_msg]:
-            with open(f'assets/{client_name}/{type_msg}/{language}.csv', mode='w', encoding='utf-8') as outfile:
-                writer = csv.writer(outfile, delimiter=',')
-                writer.writerows(messages[type_msg][language])
     # Create the audios (greeting, goodbye and error)
-    os.makedirs(f'assets/{client_name}/media/audio', exist_ok=True)
     for type_msg in messages:
         for language in messages[type_msg]:
             for i, msg in enumerate(messages[type_msg][language]):
-                full_path = f'assets/{client_name}/media/audio/{type_msg}_{language}_{i}.wav'
-                # get_audio(msg, language, full_path)
     # Create the random audios
-    for i, (_, msg, language) in enumerate(random_table):
-        full_path = f'assets/{client_name}/media/audio/random_{language}_{i}.wav'
-        # get_audio(msg, language, full_path)
     # Upload files and audios to bucket in GCP
-    upload_folder('clients-bella', f'assets/{client_name}')
-    # Create videos
-    os.makedirs(f'assets/{client_name}/media/video', exist_ok=True)
-    for audio_file in os.listdir(f'assets/{client_name}/media/audio'):
         name_file = audio_file.split('.')[0]
-        link_audio = get_link_file('clients-bella', client_name, 'audio', audio_file)
-        get_video(link_audio, f'assets/{client_name}/media/audio/{name_file}.mp4')
     # Upload videos to GCP
-    upload_folder('clients-bella', f'assets/{client_name}/media/video')
     # Set up vectorstore
     vectors = []
@@ -139,12 +193,61 @@ def create_chatbot(
     INDEX.upsert(vectors=vectors, namespace=f'{client_name}-context')
     # Change text in the button
-    return gr.Button(value='Chatbot created!!!', interactive=True)
 def _get_embedding(sentence: str) -> list[float]:
     """
-    Returns the embedding of a sentence
     :param sentence: input of the model
     :return: list of floats representing the embedding
     """

+import os
 import csv
 import uuid
+import json
+import logging
 import pinecone
+import gradio as gr
+from PIL import Image
 from typing import Union
 from openai import Client
 from pinecone import Index
+from services import audio_model, gcp
+if not os.path.exists('../tts_model'):  # Get TTS model
     audio_model.download_model()
+from services.audio import *
 from video import *
 pinecone.init(api_key=os.getenv('PINECONE_API_KEY'), environment=os.getenv('PINECONE_ENV'))
 INDEX = Index(os.getenv('PINECONE_INDEX'))
 OPENAI_CLIENT = Client()
+TRANSLATE_LANGUAGES = {'español': 'es', 'ingles': 'en', 'portugués': 'pt'}
+TRANSLATE_GREET = {'Saludo': 'greeting', 'Despedida': 'goodbye', 'Error': 'error'}
+def add_data_table(table: list[list[str]], *data: str) -> tuple[list[list[str]], list[str]]:
     """
     Adds the data to the table. Some data consist of two columns others only one.
+    So depending on that, the new row and returned value will be different.
+    :param table: table to add the data to
+    :param data: new row to be added to the table
+    :return: updated table and list of strings for cleaning the input
     """
     if len(data) == 3:  # It is the greet tab
         new_value = '', *data[1:]
     return table, *new_value
+def remove_data_table(table: list[list[str]], evt: gr.SelectData) -> list[list[str]]:
     """
+    Deletes a row on the table if the selected column is the first one.
+    :param table: clicked table
+    :param evt: the event (has info of the position of the click)
+    :return: updated table
     """
     # The clicked column is not the first one (the one with the X), do not do anything
     if evt.index[1] != 0:
     return table
+def add_language(languages: list[str]) -> Union[gr.Error, tuple[gr.helpers, gr.helpers, gr.helpers]]:
+    """
+    Updated the dropdown with the selected languages
+    :param languages: list of selected languages
+    :return: three updated dropdowns if at least 1 language was selected, otherwise an error
+    """
     if len(languages) == 0:
         raise gr.Error('Debe seleccionar al menos 1 idioma')
     return (
+        gr.update(choices=[i for i in languages], value=languages[0], interactive=True),
         gr.update(choices=[i for i in languages], value=languages[0], interactive=True),
         gr.update(choices=[i for i in languages], value=languages[0], interactive=True)
     )
 def create_chatbot(
+        client: str, name: str, messages_table: list[list[str]], random_table: list[list[str]],
+        questions_table: list[list[str]], image: Image
+) -> gr.helpers:
+    """
+    Creation of the chatbot. It creates all the audios, videos csv files for the given tables
+    (greetings, goodbyes, errors and random) and uploads them to GCP, and it creates the
+    vectorstore with the given questions and answers.
+    :param client: name of the client (Nosotras, Visit Orlando, etc.)
+    :param name: name of the chatbot (Bella, Roomie, etc.)
+    :param messages_table: table with the greetings, goodbyes and errors messages
+    :param random_table: table with the random data about the client
+    :param questions_table: table with the questions and answers for each question
+    :param image: image used as base for the videos
+    :return: updates the value of a button (know lets know the user if the process is done or there was an error)
+    """
     # Set up general info
     client_name = client.lower().replace(' ', '-')
+    _ = name.lower()  # TODO: use it
     # Group messages by their type (greeting, goodbye or error) and language
     messages = dict()
     for message in messages_table:
+        msg = message[1]
+        type_msg = TRANSLATE_GREET[message[2]]
+        language_msg = TRANSLATE_LANGUAGES[message[-1]]
         os.makedirs(f'assets/{client_name}/{type_msg}s', exist_ok=True)
         if type_msg not in messages:
+            messages[type_msg] = {language_msg: [msg]}
         else:
             if language_msg not in messages[type_msg]:
+                messages[type_msg][language_msg] = [msg]
             else:
+                messages[type_msg][language_msg].append(msg)
     # Create CSV files (greeting, goodbye and error)
     for type_msg in messages:
         for language in messages[type_msg]:
+            with (open(f'assets/{client_name}/{type_msg}s/{language}.csv', mode='w', encoding='utf-8', newline='')
+                  as outfile):
+                writer = csv.writer(outfile)
+                for msg in messages[type_msg][language]:
+                    writer.writerow([msg])
     # Create the audios (greeting, goodbye and error)
+    path_audios = f'assets/{client_name}/media/audio'
+    os.makedirs(path_audios, exist_ok=True)
     for type_msg in messages:
         for language in messages[type_msg]:
             for i, msg in enumerate(messages[type_msg][language]):
+                full_path = f'{path_audios}/{type_msg}_{language}_{i}'
+                get_audio(msg, language, full_path)
+    # Group random audios by their language
+    random = dict()
+    for _, msg, language in random_table:
+        short_language = TRANSLATE_LANGUAGES[language]
+        if short_language not in random:
+            random[short_language] = [msg]
+        else:
+            random[short_language].append(msg)
     # Create the random audios
+    for language in random:
+        for i, msg in enumerate(random[language]):
+            full_path = f'{path_audios}/random_{language}_{i}'
+            get_audio(msg, language, full_path)
+    # Save image
+    os.makedirs(f'assets/{client_name}/media/image', exist_ok=True)
+    image.save(f'assets/{client_name}/media/image/base.png')
     # Upload files and audios to bucket in GCP
+    gcp.upload_folder(client_name, f'assets/{client_name}')
+    # Create videos for the generated audios and the waiting video (it is muted)
+    path_videos = f'assets/{client_name}/media/video'
+    os.makedirs(path_videos, exist_ok=True)
+    list_audios = os.listdir(path_audios) + ['waiting.wav']
+    for audio_file in list_audios:
         name_file = audio_file.split('.')[0]
+        link_audio = gcp.get_link_file(client_name, 'audio', audio_file)
+        link_image = gcp.get_link_file(client_name, 'image', 'base.png')
+        try:
+            get_video(link_audio, link_image, f'{path_videos}/{name_file}')
+        except Exception as e:
+            gr.Error(f'Problema con la creación del video, hable con el administrador. Error: {e}')
+            logging.error(e)
+            return gr.update(value='ERROR!', interactive=False)
     # Upload videos to GCP
+    gcp.upload_folder(client_name, path_videos)
     # Set up vectorstore
     vectors = []
     INDEX.upsert(vectors=vectors, namespace=f'{client_name}-context')
     # Change text in the button
+    return gr.update(value='Chatbot created!!!', interactive=False)
+def save_prompts(client_name: str, context_prompt: str, prompts_table: list[list[str]]) -> None:
+    """
+    Saves all the prompts (standalone and one for each language) and uploads them to Google Cloud Storage
+    :param client_name: name of the client
+    :param context_prompt: standalone prompt used to search into the vectorstore
+    :param prompts_table: table with the prompt of each language
+    :return: None
+    """
+    path_prompts = f'assets/{client_name}/prompts'
+    os.makedirs(path_prompts, exist_ok=True)
+    # Save standalone prompt. It is the same for all languages
+    with open(f'{path_prompts}/prompt_standalone_q.txt', mode='w', encoding='utf-8') as outfile:
+        outfile.write(context_prompt)
+    # Save the prompt of each language
+    for _, prompt, language in prompts_table:
+        language_prompt = TRANSLATE_LANGUAGES[language]
+        with open(f'{path_prompts}/prompt_{language_prompt}.txt', mode='w', encoding='utf-8') as outfile:
+            outfile.write(prompt)
+    gcp.upload_folder(client_name, path_prompts)
+    return
+def generate_json(client_name: str, languages: list[str], max_num_questions: int, chatbot_name: str) -> gr.helpers:
+    """
+    Creates a json file with the environment variables used in the API
+    :param client_name:
+    :param languages:
+    :param max_num_questions:
+    :param chatbot_name:
+    :return: gradio file with the value as the path of the json file
+    """
+    json_object = json.dumps(
+        {
+            'CLIENT_NAME': client_name, 'MODEL_OPENAI': os.getenv('OPENAI_MODEL'), 'LANGUAGES': languages,
+            'MAX_NUM_QUESTIONS': max_num_questions, 'NUM_VECTORS_CONTEXT': 10, 'THRESHOLD_RECYCLE': 0.97,
+            'OPENAI_API_KEY': 'Check OpenAI for this', 'CHATBOT_NAME': chatbot_name
+        },
+        indent=4
+    )
+    path_json = f"assets/{client_name}/chatbot_variables.json"
+    with open(path_json, mode='w', encoding='utf-8') as outfile:
+        outfile.write(json_object)
+    return gr.update(value=path_json, label='Output file', interactive=True)
 def _get_embedding(sentence: str) -> list[float]:
     """
+    Gets the embedding of a word/sentence/paragraph
     :param sentence: input of the model
     :return: list of floats representing the embedding
     """

video.py → services/video.py RENAMED Viewed

@@ -1,39 +1,47 @@
 import os
 import time
 import requests
-import logging
-from clint.textui import progress
-def get_video(link_audio: str, path_video: str) -> bool:
     """
-    Saves a video created with d-id into a file (video.mp4). It returns True if there was not a
-    problem during the process, False otherwise
     """
-    status, id_video = _create_talk(link_audio)
-    # There was a problem with D-ID
-    if not status:
-        return False
     link_video = _get_url_talk(id_video)
     # Saves the video into a file to later upload it to the cloud
     name = f'{path_video}.mp4'
-    r = requests.get(link_video + name, stream=True)
-    with open(name, 'wb') as f:
-        total_length = int(r.headers.get('content-length'))
-        for chunk in progress.bar(r.iter_content(chunk_size=1024), expected_size=(total_length / 1024) + 1):
-            if chunk:
-                f.write(chunk)
-                f.flush()
-    return True
-def _create_talk(link_audio: str) -> tuple[bool, str]:
     """
-    Creates and returns the id of the talk made with d-id. It receives the status of the call and
-    the link of an audio that is in a bucket and contains the answer of the bot
     """
     url = "https://api.d-id.com/talks"
@@ -52,7 +60,7 @@ def _create_talk(link_audio: str) -> tuple[bool, str]:
             "pad_audio": "0.0",
             "stitch": True
         },
-        "source_url": os.getenv('D_ID_IMAGE')
     }
     headers = {
         "accept": "application/json",
@@ -65,17 +73,18 @@ def _create_talk(link_audio: str) -> tuple[bool, str]:
     try:
         talk_id = r['id']
-        return True, talk_id
     # Probably there are no more available credits
     except KeyError:
-        logging.error(f"D-ID response is missing 'id' key. Returned error: {r}")
-        return False, 'None'
 def _get_url_talk(id_video: str) -> str:
     """
-    Returns the url of the video given the id of a talk
     """
     url = f"https://api.d-id.com/talks/{id_video}"

 import os
 import time
 import requests
+def get_video(link_audio: str,  image_url: str, path_video: str,) -> None:
     """
+    Creates a video with d-id and saves it.
+    :param link_audio: url of the audio in the bucket used for the video
+    :param path_video: path for saving the video file
+    :param image_url: url with the base image used for the video
+    :return: None
+    :raises Exception: if there was a problem with D-ID
     """
+    try:
+        id_video = _create_talk(link_audio, image_url)
+    except Exception as e:
+        raise e
     link_video = _get_url_talk(id_video)
     # Saves the video into a file to later upload it to the cloud
     name = f'{path_video}.mp4'
+    try:
+        with requests.get(link_video) as r:
+            r.raise_for_status()  # Raises an exception for HTTP errors
+            if r.status_code == 200:
+                with open(name, 'wb') as outfile:
+                    outfile.write(r.content)
+    except requests.exceptions.RequestException as e:
+        raise Exception(f"Network-related error while downloading the video: {e}")
+    except ValueError as e:
+        raise Exception(e)
+    except Exception as e:
+        raise Exception(f"An unexpected error occurred: {e}")
+def _create_talk(link_audio: str, image_url: str) -> str:
     """
+    Creates and returns the id of the talk made with d-id.
+    :param link_audio: url of the audio in the bucket used for the video
+    :param image_url: url with the base image used for the video
+    :return: id of the talk
+    :raises Exception: if there was a problem while generating the talk
     """
     url = "https://api.d-id.com/talks"
             "pad_audio": "0.0",
             "stitch": True
         },
+        "source_url": image_url
     }
     headers = {
         "accept": "application/json",
     try:
         talk_id = r['id']
+        return talk_id
     # Probably there are no more available credits
     except KeyError:
+        raise Exception(f"D-ID response is missing 'id' key. Returned error: {r}")
 def _get_url_talk(id_video: str) -> str:
     """
+    Gets the url of the finished talk
+    :param id_video: id of the previously created talk
+    :return: url of the video
     """
     url = f"https://api.d-id.com/talks/{id_video}"