tushar310 commited on
Commit
d17095f
·
1 Parent(s): 12c0f88

Upload 6 files

Browse files
Files changed (7) hide show
  1. .gitattributes +1 -0
  2. alphaGPT-2k.png +0 -0
  3. banner.png +3 -0
  4. logo.png +0 -0
  5. main.py +170 -0
  6. packages.txt +1 -0
  7. requirements.txt +11 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ banner.png filter=lfs diff=lfs merge=lfs -text
alphaGPT-2k.png ADDED
banner.png ADDED

Git LFS Details

  • SHA256: c34e3aebd1b5cfb30810b766edab076c16973fcbd0ab8e72537fbe62ad18cd47
  • Pointer size: 132 Bytes
  • Size of remote file: 1.62 MB
logo.png ADDED
main.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __import__('pysqlite3')
2
+ import sys
3
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
4
+
5
+ import streamlit as st
6
+ from PIL import Image
7
+ import whisper
8
+ import torch
9
+ import os
10
+ from streamlit_lottie import st_lottie
11
+ from pytube import YouTube
12
+ from langchain.text_splitter import CharacterTextSplitter
13
+ from langchain.document_loaders import DataFrameLoader
14
+ from langchain.vectorstores import Chroma
15
+ from langchain.chains import RetrievalQAWithSourcesChain
16
+ from langchain.embeddings.openai import OpenAIEmbeddings
17
+ from langchain.llms import OpenAI
18
+ import pandas as pd
19
+ import requests
20
+
21
+ st.set_page_config(layout="centered", page_title="Youtube QnA")
22
+
23
+ #header of the application
24
+ # image = Image.open('logo.png')
25
+
26
+ hide_streamlit_style = """
27
+ <style>
28
+ #MainMenu {visibility: hidden;}
29
+ footer {visibility: hidden;}
30
+ </style>
31
+
32
+ """
33
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
34
+
35
+
36
+ def load_lottieurl(url: str):
37
+ r = requests.get(url)
38
+ if r.status_code != 200:
39
+ return None
40
+ return r.json()
41
+
42
+ url_lottie1 = "https://lottie.host/d860aaf2-a646-42f2-8a51-3efe3be59bf2/tpZB5YYkuT.json"
43
+ url_lottie2 = "https://lottie.host/93dcafc4-8531-4406-891c-89c28e4f76e1/lWpokVrjB9.json"
44
+ lottie_hello1 = load_lottieurl(url_lottie2)
45
+ place1 = st.empty()
46
+
47
+
48
+ logo1 = "aai_white.png"
49
+ logo2 = "alphaGPT-2k.png"
50
+ logo3 = "banner.png"
51
+ with place1.container():
52
+ #App title
53
+ st.header("Youtube Question Answering Bot")
54
+ anima1 , anima2 = st.columns([1,1])
55
+ with anima1:
56
+ # st.image("aai_black.png", width = 350, use_column_width=True)
57
+ st.image("logo.png", width = 300, use_column_width=True)
58
+ with anima2:
59
+ st_lottie(
60
+ lottie_hello1,
61
+ speed=1,
62
+ reverse=False,
63
+ loop=True,
64
+ quality="high", # medium ; high
65
+ height=250,
66
+ width=250,
67
+ key=None,
68
+ )
69
+
70
+ def extract_and_save_audio(video_URL, destination, final_filename):
71
+ video = YouTube(video_URL)#get video
72
+ audio = video.streams.filter(only_audio=True).first()#seperate audio
73
+ output = audio.download(output_path = destination)#download and save for transcription
74
+ _, ext = os.path.splitext(output)
75
+ new_file = final_filename + '.mp3'
76
+ os.rename(output, new_file)
77
+
78
+ def chunk_clips(transcription, clip_size):
79
+ texts = []
80
+ sources = []
81
+ for i in range(0,len(transcription),clip_size):
82
+ clip_df = transcription.iloc[i:i+clip_size,:]
83
+ text = " ".join(clip_df['text'].to_list())
84
+ source = str(round(clip_df.iloc[0]['start']/60,2))+ " - "+str(round(clip_df.iloc[-1]['end']/60,2)) + " min"
85
+ print(text)
86
+ print(source)
87
+ texts.append(text)
88
+ sources.append(source)
89
+
90
+ return [texts,sources]
91
+
92
+ openai_api_key = st.sidebar.text_input("OpenAI API Key", type="password")
93
+ if not openai_api_key:
94
+ st.info("Please add your OpenAI API key to continue.")
95
+ st.stop()
96
+
97
+
98
+ # #App title
99
+ # st.header("Youtube Question Answering Bot")
100
+ state = st.session_state
101
+ site = st.text_input("Enter your URL here")
102
+ if st.button("Build Model"):
103
+ if site is None:
104
+ st.info(f"""Enter URL to Build QnA Bot""")
105
+ elif site:
106
+ try:
107
+ my_bar = st.progress(0, text="Fetching the video. Please wait.")
108
+ # Set the device
109
+ device = "cuda" if torch.cuda.is_available() else "cpu"
110
+
111
+ # Load the model
112
+ whisper_model = whisper.load_model("base", device=device)
113
+
114
+ # Video to audio
115
+ video_URL = site
116
+ destination = "."
117
+ final_filename = "AlphaGPT"
118
+ extract_and_save_audio(video_URL, destination, final_filename)
119
+
120
+ # run the whisper model
121
+ audio_file = "AlphaGPT.mp3"
122
+ my_bar.progress(50, text="Transcribing the video.")
123
+ result = whisper_model.transcribe(audio_file, fp16=False, language='English')
124
+
125
+ transcription = pd.DataFrame(result['segments'])
126
+
127
+ chunks = chunk_clips(transcription, 50)
128
+ documents = chunks[0]
129
+ sources = chunks[1]
130
+
131
+
132
+ my_bar.progress(75, text="Building QnA model.")
133
+ embeddings = OpenAIEmbeddings(openai_api_key = openai_api_key)
134
+ #vstore with metadata. Here we will store page numbers.
135
+ vStore = Chroma.from_texts(documents, embeddings, metadatas=[{"source": s} for s in sources])
136
+ #deciding model
137
+ model_name = "gpt-3.5-turbo"
138
+
139
+ retriever = vStore.as_retriever()
140
+ retriever.search_kwargs = {'k':2}
141
+ llm = OpenAI(model_name=model_name, openai_api_key = openai_api_key)
142
+ model = RetrievalQAWithSourcesChain.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
143
+
144
+ my_bar.progress(100, text="Model is ready.")
145
+ st.session_state['crawling'] = True
146
+ st.session_state['model'] = model
147
+ st.session_state['site'] = site
148
+
149
+ except Exception as e:
150
+ st.error(f"An error occurred: {e}")
151
+ st.error('Oops, crawling resulted in an error :( Please try again with a different URL.')
152
+
153
+ if site and ("crawling" in state):
154
+ st.header("Ask your data")
155
+ model = st.session_state['model']
156
+ site = st.session_state['site']
157
+ st.video(site, format="video/mp4", start_time=0)
158
+ user_q = st.text_input("Enter your questions here")
159
+ if st.button("Get Response"):
160
+ try:
161
+ with st.spinner("Model is working on it..."):
162
+ # st.write(model)
163
+ result = model({"question":user_q}, return_only_outputs=True)
164
+ st.subheader('Your response:')
165
+ st.write(result["answer"])
166
+ st.subheader('Sources:')
167
+ st.write(result["sources"])
168
+ except Exception as e:
169
+ st.error(f"An error occurred: {e}")
170
+ st.error('Oops, the GPT response resulted in an error :( Please try again with a different question.')
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pytube
3
+ openai
4
+ chromadb
5
+ tiktoken
6
+ langchain
7
+ typing-inspect==0.8.0
8
+ typing_extensions==4.5.0
9
+ pandas
10
+ openai-whisper
11
+ pysqlite3-binary