muhh-b commited on
Commit
74885f3
·
1 Parent(s): c832707

first commit

Browse files
Files changed (6) hide show
  1. app.py +66 -0
  2. audio.wav +0 -0
  3. credentials.json +1 -0
  4. quiz_generation.py +227 -0
  5. requirements.txt +0 -0
  6. transcription.py +53 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from audiorecorder import audiorecorder
4
+ from apiclient import discovery
5
+ from httplib2 import Http
6
+ from oauth2client import client, file, tools
7
+
8
+
9
+ import warnings
10
+
11
+ from transcription import transcribe
12
+ from quiz_generation import generate_quiz_url, explain_quiz_answers
13
+
14
+ SCOPES = "https://www.googleapis.com/auth/forms.body"
15
+
16
+
17
+ def main():
18
+ warnings.filterwarnings("ignore")
19
+
20
+ # Initialize Google Sheets and Forms API services
21
+ store = file.Storage("credentials.json")
22
+ creds = store.get()
23
+ if not creds or creds.invalid:
24
+ flow = client.flow_from_clientsecrets(
25
+ r"C:\Users\Admin\Downloads\client_secret_535279977482-ttq1qb18v1crma5bkf70015qk9e9r2vv.apps.googleusercontent.com.json",
26
+ SCOPES
27
+ )
28
+ creds = tools.run_flow(flow, store)
29
+ form_service = discovery.build("forms", "v1", http=creds.authorize(Http()))
30
+
31
+ st.title("Quiz Generator")
32
+ st.markdown("Record an audio clip and generate a quiz based on the transcribed text.")
33
+ audio = audiorecorder("Click to record", "Stop recording")
34
+
35
+ if len(audio) > 0:
36
+ # To play audio in the frontend:
37
+ st.audio(audio.tobytes(), format="audio/wav")
38
+
39
+ # To save audio to a file:
40
+ wav_file = open("audio.wav", "wb")
41
+ wav_file.write(audio.tobytes())
42
+
43
+ # Quiz generation section
44
+ st.header("Quiz Generation")
45
+
46
+ if st.button("Generate Quiz"):
47
+ with st.spinner("Transcribing audio to generate the quiz..."):
48
+ #transcribed_text = transcribe("audio.wav")
49
+ transcribed_text = " can you please generate a quiz of 4 questions about ML, each of them with 4 answers and indicate the correct answer"
50
+ # Get the explanations for the quiz
51
+
52
+
53
+ quiz_url, explanations = generate_quiz_url(transcribed_text, form_service)
54
+ st.success("Quiz generated successfully!")
55
+ st.text("Quiz Link: " + quiz_url)
56
+ st.text("Transcribed Text:\n" + transcribed_text)
57
+
58
+ # Display the explanations
59
+ st.header("Quiz Explanations")
60
+ for i, explanation in enumerate(explanations):
61
+ st.subheader(f"Question {i+1}")
62
+ st.text(explanation)
63
+
64
+
65
+ if __name__ == '__main__':
66
+ main()
audio.wav ADDED
Binary file (20.3 kB). View file
 
credentials.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"access_token": "ya29.a0AWY7Cknnaz0R8i2DlngKKKx4C_IZKzUVFPdZqk-e7diB_cGu1FcQDncMZArWOrjejGLUHakobPGddDruqRMC5Eu5ZKopv4BsKFPJi9mDLEwJBh8a7cYuIjMTZIQMkHQMtDm1Oz9T-QWjf26tLo_3iKKMOX7Gds8aCgYKARYSARESFQG1tDrpuJfmVxN7kf1ZQkwiDIKA5g0166", "client_id": "535279977482-ttq1qb18v1crma5bkf70015qk9e9r2vv.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//03cifxY_-1uh0CgYIARAAGAMSNwF-L9IrA86QsxrDPYOR3JWrekwFt42ZYG5RCssKeYOv0YWqEwEr75FCT6S5hEloEG2wKomo91c", "token_expiry": "2023-05-27T18:43:18Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AWY7Cknnaz0R8i2DlngKKKx4C_IZKzUVFPdZqk-e7diB_cGu1FcQDncMZArWOrjejGLUHakobPGddDruqRMC5Eu5ZKopv4BsKFPJi9mDLEwJBh8a7cYuIjMTZIQMkHQMtDm1Oz9T-QWjf26tLo_3iKKMOX7Gds8aCgYKARYSARESFQG1tDrpuJfmVxN7kf1ZQkwiDIKA5g0166", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/forms.body", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/forms.body"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}
quiz_generation.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import os
3
+ from apiclient import discovery
4
+ from oauth2client import client, file, tools
5
+ import bardapi
6
+ from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
7
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
8
+
9
+ SCOPES = "https://www.googleapis.com/auth/forms.body"
10
+ DISCOVERY_DOC = "https://forms.googleapis.com/$discovery/rest?version=v1"
11
+
12
+ NEW_FORM = {
13
+ "info": {
14
+ "title": "Quiz"
15
+ }
16
+ }
17
+
18
+ model_name = "t5-base"
19
+ model = T5ForConditionalGeneration.from_pretrained(model_name)
20
+ tokenizer = T5Tokenizer.from_pretrained(model_name)
21
+
22
+ def generate_quiz_questions(prompt):
23
+ # Set your Bard API key as an environment variable
24
+ os.environ['_BARD_API_KEY'] = "WwgqSrcbBC71HsiWpTlqnbDC9TQ3-9N1YyY6CHxOEfFp_qeCe0laziZoOT_dkTEjhJmOcw."
25
+
26
+ prompt_suffix = ". Each generated question has to begin with '🔹', each choice has to begin with '🔸', and each correct answer has to begin with '✔️'."
27
+
28
+
29
+ # Send API requests and get responses
30
+ response = bardapi.core.Bard().get_answer(prompt + prompt_suffix)
31
+
32
+ quiz = response["content"]
33
+
34
+ return quiz
35
+
36
+ '''
37
+ def generate_quiz_url(prompt_text, form_service):
38
+ # Generate quiz questions based on the transcribed text
39
+ text = generate_quiz_questions(prompt_text)
40
+
41
+ # Questions, choices, and correct answers
42
+ questions = re.findall(r"🔹 (.*?)\n", text)
43
+ choices = re.findall(r"🔸 (.*?)\n", text)
44
+ answers = re.findall(r"✔️ (.*?)\n", text)
45
+
46
+ # Remove the '**' from the questions list (they are not part of the question), choices, and correct answers
47
+ questions = [question.replace('**', '') for question in questions]
48
+ answers = [answer.replace('**', '') for answer in answers]
49
+
50
+ questions_list = []
51
+
52
+ # Fill the questions_list variable
53
+ for i, question in enumerate(questions):
54
+ choices_for_question = choices[i * 4:(i + 1) * 4]
55
+ correct_answer = answers[i] if i < len(answers) else ""
56
+
57
+ question_dict = {
58
+ "question": question,
59
+ "choices": choices_for_question,
60
+ "correct_answer": correct_answer
61
+ }
62
+
63
+ questions_list.append(question_dict)
64
+
65
+ # Create the initial form
66
+ result = form_service.forms().create(body=NEW_FORM).execute()
67
+
68
+ # Add the questions to the form
69
+ question_requests = []
70
+ for index, question in enumerate(questions_list):
71
+ new_question = {
72
+ "createItem": {
73
+ "item": {
74
+ "title": question["question"],
75
+ "questionItem": {
76
+ "question": {
77
+ "required": True,
78
+ "choiceQuestion": {
79
+ "type": "RADIO",
80
+ "options": [
81
+ {"value": choice} for choice in question["choices"]
82
+ ],
83
+ "shuffle": True
84
+ }
85
+ }
86
+ }
87
+ },
88
+ "location": {
89
+ "index": index
90
+ }
91
+ }
92
+ }
93
+ question_requests.append(new_question)
94
+
95
+ NEW_QUESTIONS = {
96
+ "requests": question_requests
97
+ }
98
+
99
+ question_setting = form_service.forms().batchUpdate(formId=result["formId"], body=NEW_QUESTIONS).execute()
100
+
101
+ # Retrieve the updated form result
102
+ get_result = form_service.forms().get(formId=result["formId"]).execute()
103
+
104
+ # Get the form ID
105
+ form_id = get_result["formId"]
106
+
107
+ # Construct the quiz link using the form ID
108
+ form_url = result["responderUri"]
109
+
110
+ return form_url
111
+ '''
112
+
113
+ def explain_quiz_answers(questions_list):
114
+ explanations = []
115
+
116
+ for question in questions_list:
117
+ context = question["question"]
118
+ choices = question["choices"]
119
+ correct_answer = question["correct_answer"]
120
+
121
+ explanation = f"Question: {context}\n"
122
+
123
+ for choice in choices:
124
+ # Construct a query with each choice as a question
125
+ query = f"What is the reason for choosing '{choice}' in {context}?"
126
+
127
+ # Tokenize the query and context
128
+ inputs = tokenizer.encode_plus(query, context, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
129
+
130
+ # Generate the explanation using the T5 model
131
+ outputs = model.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_length=256)
132
+
133
+ # Decode the explanation
134
+ explanation_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
135
+
136
+ # Add the explanation to the overall explanation
137
+ explanation += f"\nChoice: {choice}\nExplanation: {explanation_text}"
138
+
139
+ # Add an indicator if the choice is the correct answer
140
+ if choice == correct_answer:
141
+ explanation += " (Correct Answer)"
142
+
143
+ explanation += "\n"
144
+
145
+ explanations.append(explanation)
146
+
147
+ return explanations
148
+
149
+
150
+ def generate_quiz_url(prompt_text, form_service):
151
+ # Generate quiz questions based on the transcribed text
152
+ text = generate_quiz_questions(prompt_text)
153
+
154
+ # Questions, choices, and correct answers
155
+ questions = re.findall(r"🔹 (.*?)\n", text)
156
+ choices = re.findall(r"🔸 (.*?)\n", text)
157
+ answers = re.findall(r"✔️ (.*?)\n", text)
158
+
159
+ # Remove the '**' from the questions list (they are not part of the question), choices, and correct answers
160
+ questions = [question.replace('**', '') for question in questions]
161
+ answers = [answer.replace('**', '') for answer in answers]
162
+
163
+ questions_list = []
164
+
165
+ # Fill the questions_list variable
166
+ for i, question in enumerate(questions):
167
+ choices_for_question = choices[i * 4:(i + 1) * 4]
168
+ correct_answer = answers[i] if i < len(answers) else ""
169
+
170
+ question_dict = {
171
+ "question": question,
172
+ "choices": choices_for_question,
173
+ "correct_answer": correct_answer
174
+ }
175
+
176
+ questions_list.append(question_dict)
177
+
178
+ # Create the initial form
179
+ result = form_service.forms().create(body=NEW_FORM).execute()
180
+
181
+ # Add the questions to the form
182
+ question_requests = []
183
+ for index, question in enumerate(questions_list):
184
+ new_question = {
185
+ "createItem": {
186
+ "item": {
187
+ "title": question["question"],
188
+ "questionItem": {
189
+ "question": {
190
+ "required": True,
191
+ "choiceQuestion": {
192
+ "type": "RADIO",
193
+ "options": [
194
+ {"value": choice} for choice in question["choices"]
195
+ ],
196
+ "shuffle": True
197
+ }
198
+ }
199
+ }
200
+ },
201
+ "location": {
202
+ "index": index
203
+ }
204
+ }
205
+ }
206
+ question_requests.append(new_question)
207
+
208
+ NEW_QUESTIONS = {
209
+ "requests": question_requests
210
+ }
211
+
212
+ question_setting = form_service.forms().batchUpdate(formId=result["formId"], body=NEW_QUESTIONS).execute()
213
+
214
+ # Retrieve the updated form result
215
+ get_result = form_service.forms().get(formId=result["formId"]).execute()
216
+
217
+ # Get the form ID
218
+ form_id = get_result["formId"]
219
+
220
+ # Construct the quiz link using the form ID
221
+ form_url = result["responderUri"]
222
+
223
+ # Get the explanations for the quiz
224
+ explanations = explain_quiz_answers(questions_list)
225
+
226
+ return form_url, explanations
227
+
requirements.txt ADDED
File without changes
transcription.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
+ import torch
3
+ import whisper
4
+
5
+
6
+
7
+ tokenizer = AutoTokenizer.from_pretrained("Bhuvana/t5-base-spellchecker")
8
+
9
+ model = AutoModelForSeq2SeqLM.from_pretrained("Bhuvana/t5-base-spellchecker")
10
+
11
+
12
+ def correct(inputs):
13
+ input_ids = tokenizer.encode(inputs,return_tensors='pt')
14
+ sample_output = model.generate(
15
+ input_ids,
16
+ do_sample=True,
17
+ max_length=50,
18
+ top_p=0.99,
19
+ num_return_sequences=1
20
+ )
21
+ res = tokenizer.decode(sample_output[0], skip_special_tokens=True)
22
+ return res
23
+
24
+ whisper_model = whisper.load_model("base")
25
+ def transcribe(audio_file):
26
+ # Load audio and pad/trim it to fit 30 seconds
27
+ audio = whisper.load_audio(audio_file)
28
+ audio = whisper.pad_or_trim(audio)
29
+
30
+ # Convert audio data to PyTorch tensor and float data type
31
+ mel = torch.from_numpy(audio).float()
32
+
33
+ # Make log-Mel spectrogram and move to the same device as the model
34
+ mel = whisper.log_mel_spectrogram(mel).to(model.device)
35
+
36
+ # Detect the spoken language
37
+ _, probs = whisper_model.detect_language(mel)
38
+
39
+ # Decode the audio
40
+ options = whisper.DecodingOptions(fp16=False)
41
+ result = whisper.decode(whisper_model, mel, options)
42
+ result_text = result.text
43
+
44
+ print('result_text:'+result_text)
45
+
46
+ return correct(result_text)
47
+
48
+
49
+
50
+
51
+
52
+
53
+