import yt_dlp import os import streamlit as st import transformers from transformers import pipeline from transformers import AutoTokenizer import nltk from PIL import Image import torch icon = Image.open("Traçado laranja #f1863d.png") st.set_page_config( page_title = "Turing Videos", page_icon = icon, layout = "wide", initial_sidebar_state = "auto", ) #@st.cache_resource def download_audio(link): with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': '%(id)s.mp3'}) as video: info_dict = video.extract_info(link, download = True) id = info_dict['id'] video.download(link) return id #Load Whisper pipeline via HuggingFace @st.cache_resource def load_whisper(): return pipeline("automatic-speech-recognition", model="openai/whisper-tiny", chunk_length_s=30, ) #Load Extractive Summarizer pipeline via HuggingFace @st.cache_resource def load_extractive(): return pipeline("summarization", model = "NotXia/longformer-bio-ext-summ", tokenizer = AutoTokenizer.from_pretrained("NotXia/longformer-bio-ext-summ"), trust_remote_code = True, ) #Load QA pipeline via HuggingFace @st.cache_resource def load_qa(): return pipeline("question-answering", model="rsvp-ai/bertserini-bert-base-squad" ) #Download punkt function from nltk @st.cache_data def load_nltk(): nltk.download("punkt") #Make the ASR task @st.cache_data def audio_speech_recognition(_model_pipeline, video_id): return _model_pipeline(video_id + ".mp3", batch_size=64)["text"].strip() #Make the Summarization task @st.cache_data def text_summarization(_model_pipeline, full_text, ratio): sentences = nltk.sent_tokenize(full_text) extractive_sentences = _model_pipeline({"sentences": sentences}, strategy="ratio", strategy_args=ratio) extractive_text = " ".join(extractive_sentences[0]) return extractive_text.strip() #Make the QA task @st.cache_data def answer_questions(_model_pipeline, full_text, questionings): answers = [] for question in questionings: result = _model_pipeline(question=question, context=full_text) answers.append(result["answer"]) return answers def main(): header = st.container() model = st.container() model_1, model_2 = st.columns(2) with st.sidebar: st.title(":red[Turing]Videos") with st.form("data_collection"): language = st.selectbox('Qual a linguagem do seu modelo?', ('Inglês (en)', 'Português (pt)', 'Outra') ) link = st.text_area(label="Coloque o link do seu vídeo do YouTube:", height=25, placeholder="Digite seu link...") compression_rate = st.slider(label="Selecione a taxa de compressão:", min_value=0.05, max_value=0.35, value=0.25, step=0.05 ) questions = st.text_area(label="Coloque suas perguntas separadas por vírgula!", height=50, placeholder="Digite suas perguntas..." ).split(",") submitted = st.form_submit_button("Submit") if submitted: st.success('Dados coletados!', icon="✅") else: st.error('Dados ainda não coletados!', icon="🚨") with header: st.title(":red[Turing]Videos") st.subheader("Este projeto utiliza técnicas de inteligência artificial para simplificar e acelerar a compreensão de conteúdo audiovisual.", divider = "red" ) with model: if submitted: with st.spinner("Carregando modelos..."): if language == "Inglês (en)": id = download_audio(link) load_nltk() whisper = load_whisper() extractive = load_extractive() qa_model = load_qa() elif language == "Português (pt)": st.header("Modelo ainda não implementado.") else: st.header("Erro na seleção de linguagem.") with st.spinner("Transcrevendo texto..."): transcript_text = audio_speech_recognition(whisper, id) with model_1: st.header("Texto Sumarizado:") with st.spinner("Carregando sumarização..."): summary = text_summarization(extractive, transcript_text, compression_rate) st.subheader(summary) with model_2: st.header("Resposta das perguntas:") with st.spinner("Carregando respostas..."): answers = answer_questions(qa_model, transcript_text, questions) for i in range(len(answers)): st.subheader(questions[i]) st.subheader(answers[i]) st.write("\n\n") main()