# Workflow of QuizTube 🧠

## 1. Extract the YouTube ID from a given link

In [1]:
from pytube import extract

urls = [
 'http://youtu.be/SA2iWivDJiE',
 'http://www.youtube.com/watch?v=_oPAwA_Udwc&feature=feedu',
 'http://www.youtube.com/embed/SA2iWivDJiE',
 'http://www.youtube.com/v/SA2iWivDJiE?version=3&hl=en_US',
 'https://www.youtube.com/watch?v=rTHlyTphWP0&index=6&list=PLjeDyYvG6-40qawYNR4juzvSOg-ezZ2a6',
 'https://www.youtube.com/watch?time_continue=9&v=n0g-Y0oo5Qs&feature=emb_logo'
]

for url in urls:
 video_id = extract.video_id(url)
 print(video_id)

SA2iWivDJiE
_oPAwA_Udwc
SA2iWivDJiE
SA2iWivDJiE
rTHlyTphWP0
n0g-Y0oo5Qs


## 2. Extract & transform video captions

In [2]:
from youtube_transcript_api import YouTubeTranscriptApi
video_id = "bcYwiwsDfGE"
transcript = YouTubeTranscriptApi.get_transcript(video_id)
transcript

[{'text': 'Okay, some of you might have heard already,\nbut you can now use Python inside Excel.',
 'start': 0.13,
 'duration': 4.74},
 {'text': "In this video, I will show some basic examples\nof how to use it, and I'll guide you through",
 'start': 4.87,
 'duration': 5.2},
 {'text': 'a real-world example to give you an idea of\nthe possibilities.',
 'start': 10.07,
 'duration': 4.199},
 {'text': "In that example, I'll show you some very useful\npandas features and some more advanced charts.",
 'start': 14.269,
 'duration': 5.301},
 {'text': "Towards the end, I'll also share my personal\nopinion about this new feature and point out",
 'start': 19.57,
 'duration': 4.58},
 {'text': 'some limitations.', 'start': 24.15, 'duration': 1.26},
 {'text': 'But, before we start, a quick shout-out to\nLuke.',
 'start': 25.41,
 'duration': 3.1},
 {'text': 'Luke was kind enough to connect me with people\nfrom Anaconda so that I could test this feature.',
 'start': 28.51,
 'duration': 4.93},
 {'text'

In [3]:
transcript = " ".join([item["text"] for item in transcript])
transcript

'Okay, some of you might have heard already,\nbut you can now use Python inside Excel. In this video, I will show some basic examples\nof how to use it, and I\'ll guide you through a real-world example to give you an idea of\nthe possibilities. In that example, I\'ll show you some very useful\npandas features and some more advanced charts. Towards the end, I\'ll also share my personal\nopinion about this new feature and point out some limitations. But, before we start, a quick shout-out to\nLuke. Luke was kind enough to connect me with people\nfrom Anaconda so that I could test this feature. So, thank you very much, Luke and the team\nfrom Anaconda. And with that said, let\'s dive in. Ok, let\'s start with a basic example. Under the formulas ribbon, there\'s a new Python\nsection. When I click on the Python logo, you\'ll notice\nthat the appearance of the formula bar changes. This is where you can now write your Python\ncode. For the first step, I\'ll create a dataframe\nwith the comma

## 3. Feed video captions into LLM (OpenAI)

In [4]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.chains import LLMChain

#########################################
OPENAI_API_KEY = "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"
#########################################


template = f"""
You are a helpful assistant programmed to generate questions based on any text provided. For every chunk of text you receive, you're tasked with designing 5 distinct questions. Each of these questions will be accompanied by 3 possible answers: one correct answer and two incorrect ones. 

For clarity and ease of processing, structure your response in a way that emulates a Python list of lists. 

Your output should be shaped as follows:

1. An outer list that contains 5 inner lists.
2. Each inner list represents a set of question and answers, and contains exactly 4 strings in this order:
- The generated question.
- The correct answer.
- The first incorrect answer.
- The second incorrect answer.

Your output should mirror this structure:
[
 ["Generated Question 1", "Correct Answer 1", "Incorrect Answer 1.1", "Incorrect Answer 1.2"],
 ["Generated Question 2", "Correct Answer 2", "Incorrect Answer 2.1", "Incorrect Answer 2.2"],
 ...
]

It is crucial that you adhere to this format as it's optimized for further Python processing.

"""

system_message_prompt = SystemMessagePromptTemplate.from_template(template)
human_message_prompt = HumanMessagePromptTemplate.from_template("{text}")
chat_prompt = ChatPromptTemplate.from_messages(
 [system_message_prompt, human_message_prompt]
)
chain = LLMChain(
 llm=ChatOpenAI(openai_api_key=OPENAI_API_KEY),
 prompt=chat_prompt,
)
quiz_data = chain.run(transcript)
print(type(quiz_data))
print(quiz_data)


[
 ["What is the purpose of the video?", "To demonstrate how to use Python inside Excel", "To explain the limitations of using Python in Excel", "To showcase different data visualization libraries"],
 ["What is the advantage of using pandas in Python?", "It makes certain calculations with dates and times easier", "It allows for the creation of fancy charts", "It speeds up the calculation process in Excel"],
 ["How can you display a statistical summary of a dataframe in Excel?", "By selecting 'Excel value' from the drop-down list", "By clicking on the preview icon", "By right-clicking on the dataframe object"],
 ["What is the purpose of the 'Calc_Method' cell?", "To specify the aggregation method for the top 5 selling products", "To determine the color scheme for the charts", "To calculate the month-over-month growth rate"],
 ["What are some limitations of using Python in Excel?", "Animated charts and external file connections are not supported", "It only works on Windows and with Exce

## 4. Transform output

In [5]:
import ast

quiz_data_clean = ast.literal_eval(quiz_data)
print(type(quiz_data_clean))
print(quiz_data_clean)


[['What is the purpose of the video?', 'To demonstrate how to use Python inside Excel', 'To explain the limitations of using Python in Excel', 'To showcase different data visualization libraries'], ['What is the advantage of using pandas in Python?', 'It makes certain calculations with dates and times easier', 'It allows for the creation of fancy charts', 'It speeds up the calculation process in Excel'], ['How can you display a statistical summary of a dataframe in Excel?', "By selecting 'Excel value' from the drop-down list", 'By clicking on the preview icon', 'By right-clicking on the dataframe object'], ["What is the purpose of the 'Calc_Method' cell?", 'To specify the aggregation method for the top 5 selling products', 'To determine the color scheme for the charts', 'To calculate the month-over-month growth rate'], ['What are some limitations of using Python in Excel?', 'Animated charts and external file connections are not supported', 'It only works on Windows and with Excel Desk