Cryptic
test
eb91ddc
import streamlit as st
import tempfile
import soundfile as sf
from transformers import pipeline
# Load models
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en", device=-1)
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=-1)
question_generator = pipeline("text2text-generation", model="google/t5-efficient-tiny", device=-1)
# Upload audio file
uploaded_file = st.file_uploader("Upload Audio", type=["wav", "mp3"])
if uploaded_file is not None:
# Save the uploaded file to a temporary file
with tempfile.NamedTemporaryFile(delete=False) as temp_audio_file:
temp_audio_file.write(uploaded_file.getbuffer())
temp_audio_path = temp_audio_file.name
# Read the audio file using SoundFile
try:
# Load audio data
audio_data, sample_rate = sf.read(temp_audio_path)
# Transcribing audio
lecture_text = transcriber(temp_audio_path)["text"]
# Preprocessing data
num_words = len(lecture_text.split())
max_length = min(num_words, 1024) # BART model max input length is 1024 tokens
max_length = int(max_length * 0.75) # Convert max words to approx tokens
if max_length > 1024:
lecture_text = lecture_text[:int(1024 / 0.75)] # Truncate to fit the model's token limit
# Summarization
summary = summarizer(
lecture_text,
max_length=1024, # DistilBART max input length is 1024 tokens
min_length=int(max_length * 0.1),
truncation=True
)
# Clean up the summary text
if not summary[0]["summary_text"].endswith((".", "!", "?")):
last_period_index = summary[0]["summary_text"].rfind(".")
if last_period_index != -1:
summary[0]["summary_text"] = summary[0]["summary_text"][:last_period_index + 1]
# Questions Generation
context = f"Based on the following lecture summary: {summary[0]['summary_text']}, generate some relevant practice questions."
questions = question_generator(context, max_new_tokens=50)
# Output
st.write("\nSummary:\n", summary[0]["summary_text"])
for question in questions:
st.write(question["generated_text"]) # Output the generated questions
except Exception as e:
st.error(f"Error during processing: {str(e)}")