mrolando
fixed typo
766e636
raw
history blame
5.21 kB
from pytube import YouTube
from transformers import pipeline
import openai
from dotenv import load_dotenv
import os
import gradio as gr
import base64
with open("Iso_Logotipo_Ceibal.png", "rb") as image_file:
encoded_image = base64.b64encode(image_file.read()).decode()
# Load environment variables from the .env file de forma local
load_dotenv()
openai.api_key = os.environ["OPENAI_API_KEY"]
def download_audio_from_youtube(video_url, output_path):
try:
youtube = YouTube(video_url)
audio_stream = youtube.streams.filter(only_audio=True).first()
print("Downloading audio")
audio_stream.download(output_path)
print("Audio downloaded in")
return audio_stream.default_filename
except Exception as e:
raise e
whisper_asr = pipeline(
"automatic-speech-recognition",
model="openai/whisper-small",
generate_kwargs={
"task": "transcribe",
"language": "spanish",
},
)
def transcribe_audio_with_whisper(audio_file_path):
try:
transcription = whisper_asr(audio_file_path)
return transcription
except Exception as e:
return e
def add_new_message(message, chat_history):
new_chat = []
new_chat.append(
{
"role": "system",
"content": "Sos Albert Einstein y tendr谩s que responder preguntas que te har谩n ni帽os de escuela, las respuestas tienen que ser c贸mo si hablaras con albert Einstein y con la informaci贸n de su vida. Las respuestas tienen que estar orientadas a ni帽os entre 9 y 10 a帽os.",
}
)
for turn in chat_history:
user, bot = turn
new_chat.append({"role": "user", "content": user})
new_chat.append({"role": "assistant", "content": bot})
new_chat.append({"role": "user", "content": message})
return new_chat
def respond(message, choice_age):
chat = []
chat.append(
{
"role": "system",
"content": "Hola, tu funci贸n ser谩 devolver una lista de preguntas (m谩ximo 5) acerca de la transcipci贸n de un video, son videos educativos. Las preguntas deben estar formuladas c贸mo para ni帽os{choice_age}.",
}
)
chat.append(
{
"role": "user",
"content": "Lo siguiente es el transcripto del video: " + message,
}
)
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=chat,
temperature=0.5,
max_tokens=1000,
) # .choices[0].message.content
# chat_history.append((message, response))
return response.choices[0].message.content
def do_todo(url, choice_age, progress=gr.Progress()):
video_url = url
output_path = "./"
progress(0, "Descargando video...")
# Step 1: Download audio from YouTube video
try:
audio_file = download_audio_from_youtube(video_url, output_path)
except:
raise gr.Error(
"Fallo en la descarga del video! Puede ser porque el link est茅 mal o est谩 restringido el acceso."
)
progress(0.25, "Video descargado, generando transcripci贸n...")
if audio_file:
# Step 2: Transcribe the downloaded audio using Whisper ASR from Hugging Face
try:
transcription = transcribe_audio_with_whisper(audio_file)
except Exception as e:
print(e)
raise gr.Error("Fallo en la transcripic贸n!")
if transcription:
print("Transcription: ", transcription)
progress(0.75, "Preguntando a ChatGPT...")
answer = respond(transcription["text"], choice_age)
return answer, transcription
else:
print("Transcription failed.")
else:
print("Audio download failed.")
# gr.Interface(fn=do_todo, inputs="text", outputs="text").launch()
with gr.Blocks() as demo:
gr.Markdown(
"""
<center>
<h1>
Uso de AI para la generaci贸n de preguntas acerca de la transcripci贸n de un video.
</h1>
<img src='data:image/jpg;base64,{}' width=200px>
</center>
""".format(
encoded_image
)
)
with gr.Row():
with gr.Column():
gr.Markdown(
"Primero debes ingresar el rango de edades para formular las preguntas:"
)
choice_age = gr.Radio(
[
("<7", "menores de 7 a帽os"),
("7-12", "entre 7 y 12 a帽os"),
("12-15", "entre 12 y 15 a帽os"),
(">15", "mayores de 15"),
],
label="Rango de edad:",
)
gr.Markdown("Segundo debes ingresar el link del video de Youtube:")
prompt = gr.Textbox(label="Link")
btn = gr.Button("Generar")
with gr.Column():
output = gr.Textbox(label="Resultado") # Move the output up too
with gr.Accordion("Ver transcripci贸n:", open=False):
transcription = gr.Textbox("", lines=7)
btn.click(
fn=do_todo,
inputs=[prompt, choice_age],
outputs=[output, transcription],
) # steps,guidance,width,height]
demo.queue()
demo.launch()