Spaces:

tushar310
/

youtube_QA

Sleeping

App Files Files Community

tushar310 commited on Sep 23, 2023

Commit

d17095f

1 Parent(s): 12c0f88

Upload 6 files

Browse files

Files changed (7) hide show

.gitattributes +1 -0
alphaGPT-2k.png +0 -0
banner.png +3 -0
logo.png +0 -0
main.py +170 -0
packages.txt +1 -0
requirements.txt +11 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+banner.png filter=lfs diff=lfs merge=lfs -text

alphaGPT-2k.png ADDED Viewed

banner.png ADDED Viewed

Git LFS Details

SHA256: c34e3aebd1b5cfb30810b766edab076c16973fcbd0ab8e72537fbe62ad18cd47
Pointer size: 132 Bytes
Size of remote file: 1.62 MB

logo.png ADDED Viewed

main.py ADDED Viewed

	@@ -0,0 +1,170 @@

+__import__('pysqlite3')
+import sys
+sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
+import streamlit as st
+from PIL import Image
+import whisper
+import torch
+import os
+from streamlit_lottie import st_lottie
+from pytube import YouTube
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.document_loaders import DataFrameLoader
+from langchain.vectorstores import Chroma
+from langchain.chains import RetrievalQAWithSourcesChain
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.llms import OpenAI
+import pandas as pd
+import requests
+st.set_page_config(layout="centered", page_title="Youtube QnA")
+#header of the application
+# image = Image.open('logo.png')
+hide_streamlit_style = """
+<style>
+#MainMenu {visibility: hidden;}
+footer {visibility: hidden;}
+</style>
+"""
+st.markdown(hide_streamlit_style, unsafe_allow_html=True)
+def load_lottieurl(url: str):
+    r = requests.get(url)
+    if r.status_code != 200:
+        return None
+    return r.json()
+url_lottie1 = "https://lottie.host/d860aaf2-a646-42f2-8a51-3efe3be59bf2/tpZB5YYkuT.json"
+url_lottie2 = "https://lottie.host/93dcafc4-8531-4406-891c-89c28e4f76e1/lWpokVrjB9.json"
+lottie_hello1 = load_lottieurl(url_lottie2)
+place1 = st.empty()
+logo1 = "aai_white.png"
+logo2 = "alphaGPT-2k.png"
+logo3 = "banner.png"
+with place1.container():
+    #App title
+    st.header("Youtube Question Answering Bot")
+    anima1 , anima2 = st.columns([1,1])
+    with anima1:
+        # st.image("aai_black.png", width = 350, use_column_width=True)
+        st.image("logo.png", width = 300, use_column_width=True)
+    with anima2:
+        st_lottie(
+        lottie_hello1,
+        speed=1,
+        reverse=False,
+        loop=True,
+        quality="high", # medium ; high
+        height=250,
+        width=250,
+        key=None,
+        )
+def extract_and_save_audio(video_URL, destination, final_filename):
+  video = YouTube(video_URL)#get video
+  audio = video.streams.filter(only_audio=True).first()#seperate audio
+  output = audio.download(output_path = destination)#download and save for transcription
+  _, ext = os.path.splitext(output)
+  new_file = final_filename + '.mp3'
+  os.rename(output, new_file)
+def chunk_clips(transcription, clip_size):
+  texts = []
+  sources = []
+  for i in range(0,len(transcription),clip_size):
+    clip_df = transcription.iloc[i:i+clip_size,:]
+    text = " ".join(clip_df['text'].to_list())
+    source = str(round(clip_df.iloc[0]['start']/60,2))+ " - "+str(round(clip_df.iloc[-1]['end']/60,2)) + " min"
+    print(text)
+    print(source)
+    texts.append(text)
+    sources.append(source)
+  return [texts,sources]
+openai_api_key = st.sidebar.text_input("OpenAI API Key", type="password")
+if not openai_api_key:
+    st.info("Please add your OpenAI API key to continue.")
+    st.stop()
+# #App title
+# st.header("Youtube Question Answering Bot")
+state = st.session_state
+site = st.text_input("Enter your URL here")
+if st.button("Build Model"):
+  if site is None:
+    st.info(f"""Enter URL to Build QnA Bot""")
+  elif site:
+    try:
+      my_bar = st.progress(0, text="Fetching the video. Please wait.")
+      # Set the device
+      device = "cuda" if torch.cuda.is_available() else "cpu"
+      # Load the model
+      whisper_model = whisper.load_model("base", device=device)
+      # Video to audio
+      video_URL = site
+      destination = "."
+      final_filename = "AlphaGPT"
+      extract_and_save_audio(video_URL, destination, final_filename)
+      # run the whisper model
+      audio_file = "AlphaGPT.mp3"
+      my_bar.progress(50, text="Transcribing the video.")
+      result = whisper_model.transcribe(audio_file, fp16=False, language='English')
+      transcription = pd.DataFrame(result['segments'])
+      chunks = chunk_clips(transcription, 50)
+      documents = chunks[0]
+      sources = chunks[1]
+      my_bar.progress(75, text="Building QnA model.")
+      embeddings = OpenAIEmbeddings(openai_api_key = openai_api_key)
+      #vstore with metadata. Here we will store page numbers.
+      vStore = Chroma.from_texts(documents, embeddings, metadatas=[{"source": s} for s in sources])
+      #deciding model
+      model_name = "gpt-3.5-turbo"
+      retriever = vStore.as_retriever()
+      retriever.search_kwargs = {'k':2}
+      llm = OpenAI(model_name=model_name, openai_api_key = openai_api_key)
+      model = RetrievalQAWithSourcesChain.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
+      my_bar.progress(100, text="Model is ready.")
+      st.session_state['crawling'] = True
+      st.session_state['model'] = model
+      st.session_state['site'] = site
+    except Exception as e:
+              st.error(f"An error occurred: {e}")
+              st.error('Oops, crawling resulted in an error :( Please try again with a different URL.')
+if site and ("crawling" in state):
+      st.header("Ask your data")
+      model = st.session_state['model']
+      site = st.session_state['site']
+      st.video(site, format="video/mp4", start_time=0)
+      user_q = st.text_input("Enter your questions here")
+      if st.button("Get Response"):
+        try:
+          with st.spinner("Model is working on it..."):
+#             st.write(model)
+            result = model({"question":user_q}, return_only_outputs=True)
+            st.subheader('Your response:')
+            st.write(result["answer"])
+            st.subheader('Sources:')
+            st.write(result["sources"])
+        except Exception as e:
+          st.error(f"An error occurred: {e}")
+          st.error('Oops, the GPT response resulted in an error :( Please try again with a different question.')

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+streamlit
+pytube
+openai
+chromadb
+tiktoken
+langchain
+typing-inspect==0.8.0
+typing_extensions==4.5.0
+pandas
+openai-whisper
+pysqlite3-binary