Imageye commited on
Commit
9f2532f
1 Parent(s): 998048f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -145
app.py CHANGED
@@ -1,132 +1,97 @@
 
1
  import streamlit as st
2
  from youtube_transcript_api import YouTubeTranscriptApi
3
  import re
4
  import tempfile
5
  import os
 
 
6
  import warnings
7
- from groq import Groq
8
 
9
- # Set up Groq client
10
- client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
 
 
11
 
12
- # Supported file types for Groq API
13
- SUPPORTED_FILE_TYPES = ["mp3", "mp4", "mpeg", "mpga", "m4a", "wav", "webm"]
14
-
15
- # Function to transcribe audio using Groq Whisper API
16
  def transcribe_audio(file_path):
17
- file_extension = os.path.splitext(file_path)[1][1:]
18
- if file_extension not in SUPPORTED_FILE_TYPES:
19
- return f"Error: Unsupported file type '{file_extension}'. Please upload a valid file."
20
-
21
- try:
22
- with open(file_path, "rb") as file:
23
- transcription = client.audio.transcriptions.create(
24
- file=(file_path, file.read()),
25
- model="whisper-large-v3",
26
- )
27
- return transcription.text
28
- except Exception as e:
29
- return f"Error during transcription: {e}"
30
 
31
- # Function to get transcript from YouTube
32
  def get_transcript(url):
33
  try:
34
  video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url)
35
- if not video_id_match:
 
 
36
  return "Error: Invalid YouTube URL"
37
- video_id = video_id_match.group(1)
38
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
39
  transcript_text = ' '.join([entry['text'] for entry in transcript])
40
  return transcript_text
41
  except Exception as e:
42
  return str(e)
43
 
44
- # Function to summarize text using Groq API
45
  def summarize_text(text):
46
- try:
47
- response = client.chat.completions.create(
48
- messages=[
49
- {
50
- "role": "user",
51
- "content": f"Summarize the following text:\n\n{text}"
52
- }
53
- ],
54
- model="llama3-8b-8192",
55
- )
56
- summary = response.choices[0].message.content.strip()
57
- return summary
58
- except Exception as e:
59
- return f"Error summarizing text: {e}"
60
 
61
- # Function to generate quiz questions using Groq API
62
  def generate_quiz_questions(text):
63
- try:
64
- response = client.chat.completions.create(
65
- messages=[
66
- {
67
- "role": "user",
68
- "content": f"Generate quiz questions for the following text:\n\n{text}"
69
- }
70
- ],
71
- model="llama3-8b-8192",
72
- )
73
- quiz_questions = response.choices[0].message.content.strip()
74
- return quiz_questions
75
- except Exception as e:
76
- return f"Error generating quiz questions: {e}"
77
 
78
- # Function to parse quiz questions from generated text
79
  def parse_quiz_questions(quiz_text):
80
  questions = []
81
  question_blocks = quiz_text.split("\n\n")
82
- current_question = None
83
- current_choices = []
84
- correct_answer = None
85
-
86
  for block in question_blocks:
87
  lines = block.strip().split("\n")
88
- if lines:
89
- if re.match(r'^\d+\.', lines[0]): # This line is a question number
90
- if current_question and current_choices and correct_answer:
91
- questions.append({
92
- "question": current_question,
93
- "choices": current_choices,
94
- "correct_answer": correct_answer
95
- })
96
- current_question = lines[0]
97
- current_choices = lines[1:5]
98
- correct_answer = lines[-1].split(": ")[-1].strip() if len(lines) > 5 else None
99
- else: # This line is an answer
100
- correct_answer = lines[-1].split(": ")[-1].strip()
101
-
102
- # Add the last question if it exists
103
- if current_question and current_choices and correct_answer:
104
- questions.append({
105
- "question": current_question,
106
- "choices": current_choices,
107
- "correct_answer": correct_answer
108
- })
109
-
110
  return questions
111
 
112
- # Function to generate explanation for quiz answers using Groq API
113
  def generate_explanation(question, correct_answer, user_answer):
114
- try:
115
- response = client.chat.completions.create(
116
- messages=[
117
- {
118
- "role": "user",
119
- "content": f"Explain why the correct answer to the following question is '{correct_answer}' and not '{user_answer}':\n\n{question}"
120
- }
121
- ],
122
- model="llama3-8b-8192",
123
- )
124
- explanation = response.choices[0].message.content.strip()
125
- return explanation
126
- except Exception as e:
127
- return f"Error generating explanation: {e}"
128
 
129
- # Function to check answers and provide feedback
130
  def check_answers(questions, user_answers):
131
  feedback = []
132
  correct_count = 0
@@ -152,15 +117,14 @@ def check_answers(questions, user_answers):
152
  })
153
  return feedback
154
 
155
- # Function to handle uploaded files
156
  def handle_uploaded_file(uploaded_file):
157
- file_path = tempfile.mktemp(suffix=os.path.splitext(uploaded_file.name)[1])
158
- with open(file_path, "wb") as f:
159
- f.write(uploaded_file.read())
160
- return file_path
161
 
162
- # Streamlit app layout and functionality
163
  st.title("YouTube Transcript Quiz Generator")
 
164
  st.markdown("**Instructions:** Paste a YouTube link or upload a media file to generate a quiz.")
165
 
166
  option = st.selectbox("Choose input type", ("YouTube URL", "Upload audio/video file"))
@@ -178,52 +142,52 @@ if option == "YouTube URL":
178
  quiz_text = generate_quiz_questions(transcript_text)
179
  questions = parse_quiz_questions(quiz_text)
180
 
181
- if not questions:
182
- st.error("No valid quiz questions could be generated.")
183
- else:
184
- st.session_state.summary = summary
185
- st.session_state.questions = questions
186
- st.session_state.user_answers = {}
187
- st.session_state.generated_quiz = True
188
- else:
189
- st.error(transcript_text)
190
-
191
- if option == "Upload audio/video file":
192
- uploaded_file = st.file_uploader("Choose an audio or video file", type=SUPPORTED_FILE_TYPES)
 
 
 
 
 
 
 
193
  if uploaded_file:
194
- if st.button("Generate Quiz"):
195
- tmp_file_path = handle_uploaded_file(uploaded_file)
196
- transcript_text = transcribe_audio(tmp_file_path)
197
- os.remove(tmp_file_path)
198
- if "Error" not in transcript_text:
199
- summary = summarize_text(transcript_text)
200
- quiz_text = generate_quiz_questions(transcript_text)
201
- questions = parse_quiz_questions(quiz_text)
202
-
203
- if not questions:
204
- st.error("No valid quiz questions could be generated.")
205
- else:
206
- st.session_state.summary = summary
207
- st.session_state.questions = questions
208
- st.session_state.user_answers = {}
209
- st.session_state.generated_quiz = True
210
- else:
211
- st.error(transcript_text)
 
 
 
 
 
 
212
 
213
  if st.session_state.generated_quiz:
214
- st.write("## Summary")
215
- st.write(st.session_state.summary)
216
-
217
- st.write("## Quiz Questions")
218
- for i, question in enumerate(st.session_state.questions):
219
- st.write(f"### Question {i+1}")
220
- st.write(question['question'])
221
- st.session_state.user_answers[f"question_{i+1}"] = st.radio(
222
- label="",
223
- options=question['choices'],
224
- key=f"question_{i+1}"
225
- )
226
-
227
  if st.button("Submit Answers"):
228
  if "questions" in st.session_state and st.session_state.questions:
229
  with st.spinner('Processing your answers...'):
 
1
+ import openai
2
  import streamlit as st
3
  from youtube_transcript_api import YouTubeTranscriptApi
4
  import re
5
  import tempfile
6
  import os
7
+ from pydub import AudioSegment
8
+ import logging
9
  import warnings
 
10
 
11
+ def convert_to_supported_format(file_path):
12
+ audio = AudioSegment.from_file(file_path)
13
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp:
14
+ audio.export(temp.name, format="wav")
15
+ return temp.name
16
 
 
 
 
 
17
  def transcribe_audio(file_path):
18
+ logging.info(f"Transcribing audio file: {file_path}")
19
+ file_path = convert_to_supported_format(file_path)
20
+ logging.info(f"Converted file path: {file_path}")
21
+ with warnings.catch_warnings():
22
+ warnings.simplefilter("ignore")
23
+ with open(file_path, "rb") as audio_file:
24
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
25
+ os.remove(file_path) # Clean up temporary file
26
+ return transcript["text"]
 
 
 
 
27
 
 
28
  def get_transcript(url):
29
  try:
30
  video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url)
31
+ if video_id_match:
32
+ video_id = video_id_match.group(1)
33
+ else:
34
  return "Error: Invalid YouTube URL"
35
+
36
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
37
  transcript_text = ' '.join([entry['text'] for entry in transcript])
38
  return transcript_text
39
  except Exception as e:
40
  return str(e)
41
 
 
42
  def summarize_text(text):
43
+ response = openai.ChatCompletion.create(
44
+ model="gpt-3.5-turbo",
45
+ messages=[
46
+ {"role": "system", "content": "You are a helpful assistant."},
47
+ {"role": "user", "content": f"Summarize the following text:\n\n{text}"}
48
+ ],
49
+ max_tokens=150
50
+ )
51
+ summary = response['choices'][0]['message']['content'].strip()
52
+ return summary
 
 
 
 
53
 
 
54
  def generate_quiz_questions(text):
55
+ response = openai.ChatCompletion.create(
56
+ model="gpt-3.5-turbo",
57
+ messages=[
58
+ {"role": "system", "content": "You are a helpful assistant that generates quiz questions. Your task is to generate ten quiz questions and four multiple choice answers for each question from the given text. Make sure to mark the correct answer with an asterisk (*) at the beginning of the answer line. Use the following format for each question:\n\n1. Question\n a) Answer 1\n b) Answer 2\n c) Answer 3\n d) Answer 4\n\n2. Question\n a) Answer 1\n b) Answer 2\n c) Answer 3\n d) Answer 4\n\n..."},
59
+ {"role": "user", "content": f"Generate quiz questions from the following text:\n\n{text}"}
60
+ ],
61
+ max_tokens=300
62
+ )
63
+ quiz_questions = response['choices'][0]['message']['content'].strip()
64
+ return quiz_questions
 
 
 
 
65
 
 
66
  def parse_quiz_questions(quiz_text):
67
  questions = []
68
  question_blocks = quiz_text.split("\n\n")
 
 
 
 
69
  for block in question_blocks:
70
  lines = block.strip().split("\n")
71
+ if len(lines) >= 5:
72
+ question = lines[0].split(". ")[1]
73
+ choices = [line.split(") ")[1].strip() for line in lines[1:5]]
74
+ correct_answer_lines = [line for line in lines[1:5] if "*" in line]
75
+ if correct_answer_lines:
76
+ correct_answer = correct_answer_lines[0].split(") ")[1].replace("*", "").strip()
77
+ else:
78
+ correct_answer = "No correct answer provided"
79
+ questions.append({"question": question, "choices": choices, "correct_answer": correct_answer})
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  return questions
81
 
 
82
  def generate_explanation(question, correct_answer, user_answer):
83
+ prompt = f"Explain why the correct answer to the following question is '{correct_answer}' and not '{user_answer}':\n\n{question}"
84
+ response = openai.ChatCompletion.create(
85
+ model="gpt-3.5-turbo",
86
+ messages=[
87
+ {"role": "system", "content": "You are a helpful assistant."},
88
+ {"role": "user", "content": prompt}
89
+ ],
90
+ max_tokens=150
91
+ )
92
+ explanation = response['choices'][0]['message']['content'].strip()
93
+ return explanation
 
 
 
94
 
 
95
  def check_answers(questions, user_answers):
96
  feedback = []
97
  correct_count = 0
 
117
  })
118
  return feedback
119
 
 
120
  def handle_uploaded_file(uploaded_file):
121
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
122
+ tmp_file.write(uploaded_file.read())
123
+ tmp_file_path = tmp_file.name
124
+ return tmp_file_path
125
 
 
126
  st.title("YouTube Transcript Quiz Generator")
127
+
128
  st.markdown("**Instructions:** Paste a YouTube link or upload a media file to generate a quiz.")
129
 
130
  option = st.selectbox("Choose input type", ("YouTube URL", "Upload audio/video file"))
 
142
  quiz_text = generate_quiz_questions(transcript_text)
143
  questions = parse_quiz_questions(quiz_text)
144
 
145
+ st.write("## Summary")
146
+ st.write(summary)
147
+
148
+ st.write("## Quiz Questions")
149
+ st.session_state.questions = questions
150
+ st.session_state.user_answers = {}
151
+ st.session_state.generated_quiz = True
152
+
153
+ for i, question in enumerate(questions):
154
+ st.write(f"### Question {i+1}")
155
+ st.write(question['question'])
156
+ st.session_state.user_answers[f"question_{i+1}"] = st.radio(
157
+ label="",
158
+ options=question['choices'],
159
+ key=f"question_{i+1}"
160
+ )
161
+
162
+ elif option == "Upload audio/video file":
163
+ uploaded_file = st.file_uploader("Choose an audio or video file", type=["mp3", "wav", "mp4", "mov"])
164
  if uploaded_file:
165
+ tmp_file_path = handle_uploaded_file(uploaded_file)
166
+ transcript_text = transcribe_audio(tmp_file_path)
167
+ os.remove(tmp_file_path)
168
+ if "Error" not in transcript_text:
169
+ summary = summarize_text(transcript_text)
170
+ quiz_text = generate_quiz_questions(transcript_text)
171
+ questions = parse_quiz_questions(quiz_text)
172
+
173
+ st.write("## Summary")
174
+ st.write(summary)
175
+
176
+ st.write("## Quiz Questions")
177
+ st.session_state.questions = questions
178
+ st.session_state.user_answers = {}
179
+ st.session_state.generated_quiz = True
180
+
181
+ for i, question in enumerate(questions):
182
+ st.write(f"### Question {i+1}")
183
+ st.write(question['question'])
184
+ st.session_state.user_answers[f"question_{i+1}"] = st.radio(
185
+ label="",
186
+ options=question['choices'],
187
+ key=f"question_{i+1}"
188
+ )
189
 
190
  if st.session_state.generated_quiz:
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  if st.button("Submit Answers"):
192
  if "questions" in st.session_state and st.session_state.questions:
193
  with st.spinner('Processing your answers...'):