Spaces:
Sleeping
Sleeping
import streamlit as st | |
import tempfile | |
import soundfile as sf | |
from transformers import pipeline | |
# Load models | |
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=-1) | |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1) | |
question_generator = pipeline("text2text-generation", model="google/t5-efficient-tiny", device=-1) | |
# Upload audio file | |
uploaded_file = st.file_uploader("Upload Audio", type=["wav", "mp3"]) | |
if uploaded_file is not None: | |
# Save the uploaded file to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file: | |
temp_audio_file.write(uploaded_file.getbuffer()) | |
temp_audio_path = temp_audio_file.name | |
# Read the audio file using SoundFile | |
try: | |
# Load audio data | |
audio_data, sample_rate = sf.read(temp_audio_path) | |
# Transcribing audio | |
lecture_text = transcriber(temp_audio_path)["text"] | |
# Preprocessing data | |
num_words = len(lecture_text.split()) | |
max_length = min(num_words, 1024) # BART model max input length is 1024 tokens | |
max_length = int(max_length * 0.75) # Convert max words to approx tokens | |
if max_length > 1024: | |
lecture_text = lecture_text[:int(1024 / 0.75)] # Truncate to fit the model's token limit | |
# Summarization | |
summary = summarizer( | |
lecture_text, | |
max_length=1024, # DistilBART max input length is 1024 tokens | |
min_length=int(max_length * 0.1), | |
truncation=True | |
) | |
# Clean up the summary text | |
if not summary[0]["summary_text"].endswith((".", "!", "?")): | |
last_period_index = summary[0]["summary_text"].rfind(".") | |
if last_period_index != -1: | |
summary[0]["summary_text"] = summary[0]["summary_text"][:last_period_index + 1] | |
# Questions Generation | |
context = f"Based on the following lecture summary: {summary[0]['summary_text']}, generate some relevant practice questions." | |
questions = question_generator(context, max_new_tokens=50) | |
# Output | |
st.write("\nSummary:\n", summary[0]["summary_text"]) | |
for question in questions: | |
st.write(question["generated_text"]) # Output the generated questions | |
except Exception as e: | |
st.error(f"Error during processing: {str(e)}") | |