a commited on
Commit
5b412f0
·
1 Parent(s): 3a6c9ff

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -0
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ hidden_style = """
3
+ <style>
4
+ #MainMenu {visibility: hidden;}
5
+ footer {visibility: hidden;}
6
+ </style>
7
+ """
8
+ st.markdown(hidden_style, unsafe_allow_html=True)
9
+
10
+ def basic_version():
11
+ import argparse
12
+ import os
13
+ import shutil
14
+ import time
15
+ import torch
16
+ import textwrap
17
+ from urllib.parse import urlparse, parse_qs
18
+ from dotenv import load_dotenv
19
+ from langdetect import detect
20
+ from deep_translator import GoogleTranslator
21
+ from transformers import pipeline
22
+ import streamlit as st
23
+ from langchain import HuggingFaceHub
24
+ from langchain.chains import RetrievalQA
25
+ from langchain.chat_models import ChatOpenAI
26
+ from langchain.document_loaders import YoutubeLoader
27
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
28
+ from langchain.embeddings import OpenAIEmbeddings
29
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
30
+ from langchain.llms import OpenAI
31
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
32
+ from langchain.vectorstores import Chroma
33
+ load_dotenv()
34
+
35
+ def text_writer(input_text: str, speed: float):
36
+ container = st.empty()
37
+ displayed_text = ""
38
+
39
+ for char in input_text:
40
+ displayed_text += char
41
+ container.markdown(displayed_text)
42
+ time.sleep(1/speed)
43
+
44
+ def wrap_text_keep_newlines(input_text, width=110):
45
+ lines = input_text.split('\n')
46
+ wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
47
+ wrapped_text = '\n'.join(wrapped_lines)
48
+ return wrapped_text
49
+
50
+ def process_response(original_response):
51
+ text_writer(original_response["result"], speed=40)
52
+
53
+ def get_video_id(youtube_url):
54
+ try:
55
+ parsed_url = urlparse(youtube_url)
56
+ query_params = parse_qs(parsed_url.query)
57
+ video_id = query_params.get('v', [None])[0]
58
+
59
+ return video_id
60
+ except Exception as e:
61
+ print(f"Error extracting video ID: {e}")
62
+ return None
63
+
64
+ def start_basic_version():
65
+ HUGGINGFACE_API_TOKEN = os.environ["HUGGINGFACE_API_TOKEN"]
66
+ model_name = "BAAI/bge-base-en"
67
+ encode_kwargs = {'normalize_embeddings': True}
68
+
69
+ st.title('Chat with Youtube 🎬🤖')
70
+ st.markdown(""" Using AI to interact with Youtube! """)
71
+
72
+ video_url = st.text_input("Insert The video URL", placeholder="Format should be like: https://www.youtube.com/watch?v=pSLeYvld8Mk")
73
+ query = st.text_input("Ask any question about the video",help="Suggested queries: Summarize the key points of this video - What is this video about - Ask about a specific thing in the video ")
74
+ st.warning("⚠️ Please Keep in mind that the accuracy of the response relies on the :red[Video's quality] and the :red[prompt's Quality]. Occasionally, the response may not be entirely accurate. Consider using the response as a reference rather than a definitive answer.")
75
+
76
+ if st.button("Submit Question", type="primary"):
77
+ with st.spinner('Processing the Video...'):
78
+ video_id = get_video_id(video_url)
79
+ loader = YoutubeLoader(video_id)
80
+ documents = loader.load()
81
+
82
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
83
+ documents = text_splitter.split_documents(documents)
84
+ if os.path.exists('./data'):
85
+ shutil.rmtree('./data')
86
+ vector_db = Chroma.from_documents(
87
+ documents,
88
+ embedding= HuggingFaceBgeEmbeddings( model_name=model_name, model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}, encode_kwargs=encode_kwargs)
89
+ )
90
+
91
+ repo_id = "tiiuae/falcon-7b-instruct"
92
+ qa_chain = RetrievalQA.from_chain_type(
93
+ llm=HuggingFaceHub(huggingfacehub_api_token=HUGGINGFACE_API_TOKEN,
94
+ repo_id=repo_id,
95
+ model_kwargs={"temperature":0.2, "max_new_tokens":1000}),
96
+ retriever=vector_db.as_retriever(),
97
+ return_source_documents=False,
98
+ verbose=False
99
+ )
100
+ with st.spinner('Generating Answer...'):
101
+ llm_response = qa_chain(query)
102
+ process_response(llm_response)
103
+ start_basic_version()
104
+
105
+ basic_version()
106
+
107
+ st.sidebar.markdown("## Chat with Youtube using AI 🎬🤖")
108
+ st.sidebar.markdown("""Built by <a href="https://github.com/Ahmet-Dedeler"> Ahmet </a> & <a href="https://github.com/arhaamwanii"> Arhaam </a> for MLH All in Open Source Hackathon.""", unsafe_allow_html=True)
109
+ st.sidebar.markdown('<a href="https://github.com/Ahmet-Dedeler/Chat-With-Youtube_All-In-Hackathon"> Check out the project on GitHub <img src="https://cdn.jsdelivr.net/gh/devicons/devicon/icons/github/github-original.svg" alt="GitHub" width="30" height="30"></a>', unsafe_allow_html=True)