retrAIced / pages /Summarization.py
JavierGon12's picture
Insert all files
d8e07ba
raw
history blame
4.14 kB
from transformers import BartForConditionalGeneration, BartTokenizer
import streamlit as st
import torch
from transformers import AutoProcessor, WhisperForConditionalGeneration
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torchaudio
from transformers import pipeline
# Load your own audio file
audio = st.file_uploader(label="Upload your audio file here",type=["wav",'mp3'])
option_language = st.selectbox(
'Select the language of your audio',
('English', 'Spanish', 'German','French','Chinese'))
if audio == None:
st.write("Please upload the audio in the box above")
else:
if option_language == "English":
def transcribe_audio(audio_file):
# Load the audio file
waveform, sample_rate = torchaudio.load(audio_file)
# Ensure mono-channel audio
if waveform.shape[0] > 1:
waveform = torch.mean(waveform, dim=0, keepdim=True)
# Convert to a 16kHz sample rate if not already
if sample_rate != 16000:
waveform = torchaudio.transforms.Resample(sample_rate, 16000)(waveform)
# Convert to a list of integers
audio_input = waveform.squeeze().numpy().astype(int).tolist()
# Use Hugging Face's ASR pipeline
asr_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
# Transcribe the audio
transcript = asr_pipeline(waveform.numpy()[0])
return transcript
transcription = transcribe_audio(audio)
print("Transcription",transcription)
## Inititate Summary Model
tokenizer_summary = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
model_summary = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")
def summarize_text(text, model, tokenizer, max_length=100):
input_ids = tokenizer.encode(text, return_tensors="pt")
summary_ids = model.generate(input_ids, max_length=max_length, num_beams=4, early_stopping=True)
return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
summary = summarize_text(transcription['text'], model_summary, tokenizer_summary)
st.write("Here is your summary!")
st.write(summary)
elif option_language == 'Spanish':
def transcribe_audio(audio_file):
# Load the audio file
waveform, sample_rate = torchaudio.load(audio_file)
# Ensure mono-channel audio
if waveform.shape[0] > 1:
waveform = torch.mean(waveform, dim=0, keepdim=True)
# Convert to a 16kHz sample rate if not already
if sample_rate != 16000:
waveform = torchaudio.transforms.Resample(sample_rate, 16000)(waveform)
# Convert to a list of integers
audio_input = waveform.squeeze().numpy().astype(int).tolist()
# Use Hugging Face's ASR pipeline
asr_pipeline = pipeline("automatic-speech-recognition", model="Sandiago21/whisper-large-v2-spanish")
# Transcribe the audio
transcript = asr_pipeline(waveform.numpy()[0])
return transcript
transcription = transcribe_audio(audio)
print("Aqui tienes tu transcripción:",transcription)
## Inititate Summary Model
tokenizer_summary = AutoTokenizer.from_pretrained("facebook/mbart-large-50", src_lang="es_XX")
model_summary = AutoModelForSeq2SeqLM.from_pretrained("facebook/mbart-large-50")
def summarize_text(text, model, tokenizer, max_length=100):
input_ids = tokenizer.encode(text, return_tensors="pt")
summary_ids = model.generate(input_ids, max_length=max_length, num_beams=4, early_stopping=True)
return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
summary = summarize_text(transcription['text'], model_summary, tokenizer_summary)
st.write("Aqui tienes tu resumen!")
st.write(summary)