Spaces:

AiKontent
/

demo-creator

Runtime error

App Files Files Community

vmoras commited on Feb 1, 2024

Commit

d1701ad

1 Parent(s): 37890b1

Improve front and add some util functions

Browse files

Files changed (6) hide show

.gitignore +4 -0
app.py +128 -0
audio.py +201 -0
audio_model.py +39 -0
requirements.txt +12 -0
utils.py +73 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.idea/
+.venv/
+__pycache__/
+.env

app.py ADDED Viewed

	@@ -0,0 +1,128 @@

+from dotenv import load_dotenv
+load_dotenv()
+from utils import *
+import gradio as gr
+with gr.Blocks() as app:
+    with gr.Tab('General info'):
+        client = gr.Textbox(label='Nombre del cliente', placeholder='Inserte el nombre del cliente')
+        language = gr.Checkboxgroup(
+            choices=['español', 'ingles', 'portugués'], value='español', label='Idiomas', interactive=True,
+            info='Seleccione todos los idiomas que el chatbot va a hablar (al menos debe tener 1 idioma)'
+        )
+        name = gr.Dropdown(
+            choices=['Bella'], value='Bella', label='Nombre del chatbot',
+            info='Seleccione el nombre del chatbot, si no se encuentra en la lista, contacte al administrador'
+        )
+        num_questions = gr.Number(
+            value=5, minimum=2, maximum=10, label='Número preguntas', interactive=True,
+            info='Máximo numero de preguntas que puede hacer el usuario.'
+        )
+    with gr.Tab('Images'):
+        base_image = gr.Image(label='Imagen base para los videos', sources=['upload'])
+    with gr.Tab('Greeting and goodbye'):
+        _ = gr.Markdown(
+            'Ingrese los saludos, despedidas y mensajes de error que deba usar el chatbot.'
+        )
+        with gr.Row():
+            greet = gr.Textbox(label='Mensaje', info='Ingrese el mensaje a decir por el chatbot.')
+            type_greet = gr.Dropdown(
+                choices=['Saludo', 'Despedida', 'Error'], value='Saludo', interactive=True,
+                info='Seleccione si es saludo, despedida o mensaje de error.'
+            )
+            send_greet_button = gr.Button(value='Añadir')
+        messages_table = gr.DataFrame(
+            headers=['Eliminar', 'Tipo mensaje', 'Mensaje'], type='array', interactive=False
+        )
+    with gr.Tab('Random data'):
+        _ = gr.Markdown(
+            'Si quiere que Bella diga algunos datos random mientras busca la información, ingrese dichos párrafos aca.'
+        )
+        with gr.Row():
+            random_data = gr.Text(placeholder='Ingrese el dato random', label='Dato random')
+            send_random_button = gr.Button(value='Añadir')
+        random_table = gr.DataFrame(headers=['Eliminar', 'Dato random'], type='array', interactive=False)
+    with gr.Tab('Questions - Context'):
+        with gr.Row():
+            question = gr.Text(placeholder='Ingrese su pregunta', label='Pregunta')
+            context = gr.Text(placeholder='Ingrese el párrafo u oración que contesta dicha pregunta', label='Contexto')
+            send_question_button = gr.Button(value='Añadir')
+        questions_table = gr.DataFrame(
+            headers=['Eliminar', 'Pregunta', 'Contexto'], type='array', interactive=False
+        )
+    with gr.Tab('General prompt'):
+        general_prompt = gr.Text(placeholder='Ingrese el prompt general del bot', label='Prompt')
+    with gr.Tab('Context prompt'):
+        context_prompt = gr.Text(placeholder='Ingrese el prompt usado para encontrar el contexto', label='Prompt')
+    with gr.Tab('Create chatbot'):
+        _ = gr.Markdown(
+            "Asegúrese que toda la información este correcta antes de enviarla."
+        )
+        create_chatbot_button = gr.Button(value='Crear chatbot')
+    with gr.Tab('Test'):
+        with gr.Row():
+            with gr.Column():
+                with gr.Row():
+                    video = gr.Video(interactive=False, label='Video', autoplay=True)
+                with gr.Row():
+                    output_audio = gr.Audio(interactive=False, label='Audio', autoplay=True)
+            with gr.Column():
+                with gr.Row():
+                    chat = gr.Chatbot(label='Chat')
+                with gr.Row():
+                    text = gr.Text(label='Write your question')
+    with gr.Tab('Submit'):
+        _ = gr.Markdown(
+            "Asegúrese que hizo las suficientes pruebas para aprobar el chatbot."
+        )
+        submit_button = gr.Button(value='ENVIAR!')
+        output_file = gr.File(interactive=False, label='Output file')
+    # ----------------------------------------------- ACTIONS -----------------------------------------------------
+    # Add info to the tables
+    send_greet_button.click(
+        add_data_table, [messages_table, type_greet, greet], [messages_table, greet]
+    )
+    send_random_button.click(
+        add_data_table, [random_table, random_data], [random_table, random_data]
+    )
+    send_question_button.click(
+        add_data_table, [questions_table, question, context], [questions_table, question, context]
+    )
+    # Remove info from the tables
+    messages_table.select(
+        remove_data_table, messages_table, messages_table
+    )
+    random_table.select(
+        remove_data_table, random_table, random_table
+    )
+    questions_table.select(
+        remove_data_table, questions_table, questions_table
+    )
+    # Create the chatbot: create media and vectorstore
+    create_chatbot_button.click(
+        lambda: gr.Button(value='Creating chatbot...', interactive=False),
+        None,
+        create_chatbot_button
+    ).then(
+        create_chatbot,
+        [client, language, name, base_image, messages_table, random_table, questions_table],
+        create_chatbot_button
+    )
+app.launch(debug=True)

audio.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import re
+import os
+import nltk
+import torch
+import pickle
+import torchaudio
+import numpy as np
+import gradio as gr
+from google.cloud import storage
+from TTS.tts.models.xtts import Xtts
+from nltk.tokenize import sent_tokenize
+from huggingface_hub import hf_hub_download
+from TTS.tts.configs.xtts_config import XttsConfig
+def _download_starting_files() -> None:
+    """
+    Downloads the embeddings from a bucket
+    """
+    os.makedirs('assets', exist_ok=True)
+    # Download credentials file
+    hf_hub_download(
+        repo_id=os.environ.get('DATA'), repo_type='dataset', filename="credentials.json",
+        token=os.environ.get('HUB_TOKEN'), local_dir="assets"
+    )
+    # Initialise a client
+    credentials = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
+    storage_client = storage.Client.from_service_account_json(credentials)
+    bucket = storage_client.get_bucket('embeddings-bella')
+    # Get both embeddings
+    blob = bucket.blob("gpt_cond_latent.npy")
+    blob.download_to_filename('assets/gpt_cond_latent.npy')
+    blob = bucket.blob("speaker_embedding.npy")
+    blob.download_to_filename('assets/speaker_embedding.npy')
+def _load_array(filename):
+    """
+    Opens a file a returns it, used with numpy files
+    """
+    with open(filename, 'rb') as f:
+        return pickle.load(f)
+# Get embeddings
+_download_starting_files()
+os.environ['COQUI_TOS_AGREED'] = '1'
+# Used to generate audio based on a sample
+nltk.download('punkt')
+model_path = os.path.join("tts_model")
+config = XttsConfig()
+config.load_json(os.path.join(model_path, "config.json"))
+model = Xtts.init_from_config(config)
+model.load_checkpoint(
+    config,
+    checkpoint_path=os.path.join(model_path, "model.pth"),
+    vocab_path=os.path.join(model_path, "vocab.json"),
+    eval=True,
+    use_deepspeed=True,
+)
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+model.to(device)
+# Speaker latent
+path_latents = 'assets/gpt_cond_latent.npy'
+gpt_cond_latent = _load_array(path_latents)
+# Speaker embedding
+path_embedding = 'assets/speaker_embedding.npy'
+speaker_embedding = _load_array(path_embedding)
+def get_audio(text: str, language: str = 'es') -> gr.Audio:
+    """
+    Returns a link from a bucket in GCP that contains the generated audio given a text and language and the
+    name of such audio
+    :param text: used to generate the audio
+    :param language: 'es', 'en' or 'pt'
+    :return link_audio and name_audio
+    """
+    # Creates an audio with the answer and saves it as output.wav
+    _save_audio(text, language)
+    return gr.Audio(value='output.wav', interactive=False, visible=True)
+def _save_audio(answer: str, language: str) -> None:
+    """
+    Splits the answer into sentences, clean and creates an audio for each one, then concatenates
+    all the audios and saves them into a file (output.wav)
+    """
+    # Split the answer into sentences and clean it
+    sentences = _get_clean_answer(answer, language)
+    # Get the voice of each sentence
+    audio_segments = []
+    for sentence in sentences:
+        audio_stream = _get_voice(sentence, language)
+        audio_stream = torch.tensor(audio_stream)
+        audio_segments.append(audio_stream)
+    # Concatenate and save all audio segments
+    concatenated_audio = torch.cat(audio_segments, dim=0)
+    torchaudio.save('output.wav', concatenated_audio.unsqueeze(0), 24000)
+def _get_voice(sentence: str, language: str) -> np.ndarray:
+    """
+    Returns a numpy array with a wav of an audio with the given sentence and language
+    """
+    out = model.inference(
+        sentence,
+        language=language,
+        gpt_cond_latent=gpt_cond_latent,
+        speaker_embedding=speaker_embedding,
+        temperature=0.1
+    )
+    return out['wav']
+def _get_clean_answer(answer: str, language: str) -> list[str]:
+    """
+    Returns a list of sentences of the answer. It also removes links
+    """
+    # Remove the links in the audio and add another sentence
+    if language == 'en':
+        clean_answer = re.sub(r'http[s]?://\S+', 'the following link', answer)
+        max_characters = 250
+    elif language == 'es':
+        clean_answer = re.sub(r'http[s]?://\S+', 'el siguiente link', answer)
+        max_characters = 239
+    else:
+        clean_answer = re.sub(r'http[s]?://\S+', 'o seguinte link', answer)
+        max_characters = 203
+    # Change the name from Bella to Bela
+    clean_answer = clean_answer.replace('Bella', 'Bela')
+    # Remove Florida and zipcode
+    clean_answer = re.sub(r', FL \d+', "", clean_answer)
+    # Split the answer into sentences with nltk and make sure they are shorter than the maximum possible
+    # characters
+    split_sentences = sent_tokenize(clean_answer)
+    sentences = []
+    for sentence in split_sentences:
+        if len(sentence) > max_characters:
+            sentences.extend(_split_sentence(sentence, max_characters))
+        else:
+            sentences.append(sentence)
+    return sentences
+def _split_sentence(sentence: str, max_characters: int) -> list[str]:
+    """
+    Returns a split sentences. The split point is the nearest comma to the middle
+    of the sentence, if there is no comma then a space is used or just the middle. If the
+    remaining sentences are still too long, another iteration is run
+    """
+    # Get index of each comma
+    sentences = []
+    commas = [i for i, c in enumerate(sentence) if c == ',']
+    # No commas, search for spaces
+    if len(commas) == 0:
+        commas = [i for i, c in enumerate(sentence) if c == ' ']
+    # No commas or spaces, split it in the middle
+    if len(commas) == 0:
+        sentences.append(sentence[:len(sentence) // 2])
+        sentences.append(sentence[len(sentence) // 2:])
+        return sentences
+    # Nearest index to the middle
+    split_point = min(commas, key=lambda x: abs(x - (len(sentence) // 2)))
+    if sentence[split_point] == ',':
+        left = sentence[:split_point]
+        right = sentence[split_point + 2:]
+    else:
+        left = sentence[:split_point]
+        right = sentence[split_point + 1:]
+    if len(left) > max_characters:
+        sentences.extend(_split_sentence(left, max_characters))
+    else:
+        sentences.append(left)
+    if len(right) > max_characters:
+        sentences.extend(_split_sentence(right, max_characters))
+    else:
+        sentences.append(right)
+    return sentences

audio_model.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import os
+import requests
+from tqdm import tqdm
+def _download_file(url, destination):
+    response = requests.get(url, stream=True)
+    total_size_in_bytes = int(response.headers.get('content-length', 0))
+    block_size = 1024
+    progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
+    with open(destination, 'wb') as file:
+        for data in response.iter_content(block_size):
+            progress_bar.update(len(data))
+            file.write(data)
+    progress_bar.close()
+def download_model():
+    # Define files and their corresponding URLs
+    files_to_download = {
+        'LICENSE.txt': 'https://huggingface.co/coqui/XTTS-v2/resolve/v2.0.2/LICENSE.txt?download=true',
+        'README.md': 'https://huggingface.co/coqui/XTTS-v2/resolve/v2.0.2/README.md?download=true',
+        'config.json': 'https://huggingface.co/coqui/XTTS-v2/resolve/v2.0.2/config.json?download=true',
+        'model.pth': 'https://huggingface.co/coqui/XTTS-v2/resolve/v2.0.2/model.pth?download=true',
+        'vocab.json': 'https://huggingface.co/coqui/XTTS-v2/resolve/v2.0.2/vocab.json?download=true',
+    }
+    if not os.path.exists("tts_model"):
+        os.makedirs("tts_model")
+    # Download files if they don't exist
+    print("[COQUI TTS] STARTUP: Checking Model is Downloaded.")
+    for filename, url in files_to_download.items():
+        destination = f'tts_model/{filename}'
+        print(f"[COQUI TTS] STARTUP: Downloading {filename}...")
+        _download_file(url, destination)

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+pinecone-client==2.2.4
+python-dotenv==1.0.0
+google-cloud-storage==2.13.0
+requests==2.31.0
+tqdm==4.66.1
+nltk==3.8.1
+# deepspeed==0.12.3
+torch==2.1.1
+torchaudio==2.1.1
+TTS==0.21.2
+google-cloud-storage==2.13.0
+numpy==1.22.0

utils.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import os
+import gradio as gr
+import audio_model
+if not os.path.exists('tts_model'):  # Get TTS model
+    audio_model.download_model()
+import audio
+def add_data_table(table: list[list[str]], first: str, last: str = None):
+    """
+    Adds the data to the table. Some data consist of two columns others only one.
+    So depending on that, the new row and returned value will be different-
+    """
+    if last is None:
+        new_row = ['❌', first]
+        new_value = ''
+    elif first == 'Saludo' or first == 'Despedida' or first == 'Error':
+        new_row = ['❌', first, last]
+        new_value = '', first
+    else:
+        new_row = ['❌', first, last]
+        new_value = '', ''
+    # The table is empty, do not append it but replace the first row
+    if all(column == '' for column in table[0]):
+        table[0] = new_row
+    # Add the new data
+    else:
+        table.append(new_row)
+    if last is None:
+        return table, new_value
+    return table, *new_value
+def remove_data_table(table: list[list[str]], evt: gr.SelectData):
+    """
+    Deletes a row on the table if the selected column is the first one
+    """
+    # The clicked column is not the first one (the one with the X), do not do anything
+    if evt.index[1] != 0:
+        return table
+    # The list only has one row, do not delete it, just put the default one
+    if len(table) == 1:
+        table[0] = ['' for _ in range(len(table[0]))]
+    # Delete the row
+    else:
+        del table[evt.index[0]]
+    return table
+def create_chatbot(
+        client: str, language: list[str], chatbot: str, messages_table, random_table, questions_table,
+):
+    # Set up general info
+    client_name = client.lower().replace(' ', '-')
+    chatbot_name = chatbot.lower()
+    # Create prerecorded media (greeting, goodbye, error, random and waiting)
+    for message in messages_table:
+        pass
+    # get_audio()
+    # Set up vectorstore
+    # Upload data to bucket in CP (videos, audio, prompts and csv files)
+    # Change text in the button
+    return gr.Button(value='Chatbot created!!!', interactive=True)