import streamlit as st import tempfile import soundfile as sf from transformers import pipeline # Load models transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=-1) summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1) question_generator = pipeline("text2text-generation", model="google/t5-efficient-tiny", device=-1) # Upload audio file uploaded_file = st.file_uploader("Upload Audio", type=["wav", "mp3"]) if uploaded_file is not None: # Save the uploaded file to a temporary file with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file: temp_audio_file.write(uploaded_file.getbuffer()) temp_audio_path = temp_audio_file.name # Read the audio file using SoundFile try: # Load audio data audio_data, sample_rate = sf.read(temp_audio_path) # Transcribing audio lecture_text = transcriber(temp_audio_path)["text"] # Preprocessing data num_words = len(lecture_text.split()) max_length = min(num_words, 1024) # BART model max input length is 1024 tokens max_length = int(max_length * 0.75) # Convert max words to approx tokens if max_length > 1024: lecture_text = lecture_text[:int(1024 / 0.75)] # Truncate to fit the model's token limit # Summarization summary = summarizer( lecture_text, max_length=1024, # DistilBART max input length is 1024 tokens min_length=int(max_length * 0.1), truncation=True ) # Clean up the summary text if not summary[0]["summary_text"].endswith((".", "!", "?")): last_period_index = summary[0]["summary_text"].rfind(".") if last_period_index != -1: summary[0]["summary_text"] = summary[0]["summary_text"][:last_period_index + 1] # Questions Generation context = f"Based on the following lecture summary: {summary[0]['summary_text']}, generate some relevant practice questions." questions = question_generator(context, max_new_tokens=50) # Output st.write("\nSummary:\n", summary[0]["summary_text"]) for question in questions: st.write(question["generated_text"]) # Output the generated questions except Exception as e: st.error(f"Error during processing: {str(e)}")