File size: 5,665 Bytes
0b0e355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import streamlit as st
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import LlamaCpp
from llama_cpp import Llama
from huggingface_hub import hf_hub_download 
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from huggingface_hub import hf_hub_download
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import YoutubeLoader
import streamlit as st
from langchain.chains import RetrievalQA
from langchain import PromptTemplate
import os
import re
from langchain_community.llms import Ollama

template = f"""Use the following pieces of information to answer the user's question.
If you don't know the answer or if the data might be outdated, just say that you don't know or acknowledge the potential time lapse, don't try to make up an answer.

Context: {{context}}
Question: {{question}}

Only return the helpful answer below and nothing else.
Helpful answer:
"""
def set_custom_prompt():     
    prompt = PromptTemplate(template=template, input_variables=['context', 'question'])
    return prompt

def load_data(url):
  data1 = YoutubeLoader.from_youtube_url(url , add_video_info=True)
  data = data1.load()
  return data

def split_data(data):
  splitter = RecursiveCharacterTextSplitter(chunk_size = 2000 , chunk_overlap = 200)
  splits = splitter.split_documents(data)
  return splits

def init_llm():
  model_name = "google/gemma-2b-it"
  model_file = "gemma-2b-it.gguf"
  HF_TOKEN = st.secrets["HF_TOKEN"]
  model_pth = hf_hub_download(model_name,
                                  filename=model_file,
                                  token= HF_TOKEN)
  callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])  
  llm = LlamaCpp(model_path = model_pth ,  max_tokens = 2000  , n_gpu_layers = -1 ,callback_manager= callback_manager, verbose=True,)
  # llm = Ollama(model="mistral:latest")
  # return llm 
  
def init_db(splits):
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device': 'cpu'})
  Chroma.from_documents(splits , embedding_func ,  persist_directory="./chroma_db5")

def init_chain(llm , db_chroma , prompt): 
    qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                            chain_type='stuff',
                                            retriever= db_chroma.as_retriever(search_kwargs={ "k": 4}),
                                            chain_type_kwargs={'prompt': prompt},
                                            )
    return qa_chain

def chat_bot():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                       model_kwargs={'device': 'cpu'})
    db_chroma = Chroma(persist_directory="./chroma_db5", embedding_function= embeddings)
    llm = init_llm()
    qa_prompt = set_custom_prompt()
    qa = init_chain(llm,  db_chroma  , qa_prompt)
    return qa
  
st.set_page_config(page_title="InsightBOT : Your YouTube Companion" ,page_icon = "πŸ€–")  
st.title("InsightBOT πŸ€–")

st.sidebar.subheader("Youtube URL πŸ”—")

url = st.sidebar.text_input('Enter Youtube Video URL:')

if st.sidebar.button("Analyze Video"):
    st.video(url, format='video/mp4')
    
    with st.spinner("Extracting insights... πŸ§ πŸ’‘"):
        data = load_data(url)
        splits = split_data(data)
        db = init_db(splits)
   
if st.sidebar.button('Summarise Video'):
    with st.spinner('Writing video synopsis... πŸ–ŠοΈ'):
        data = load_data(url)
        splits = split_data(data)
        llm = init_llm()
        sum_chain = load_summarize_chain(llm  = llm , chain_type = "map_reduce")
        summary = sum_chain.run(splits)
        st.write(summary)

st.markdown("Summarise and Engage with Your Video Content! πŸ’‘")

st.sidebar.markdown("---")
st.sidebar.caption("Created By: Sidhant Manale ")
               
if "messages" not in st.session_state:
    st.session_state["messages"] = [{"role": "assistant", "content": f"Hey! How can I help you?"}]

for msg in st.session_state.messages:
    st.chat_message(msg["role"]).write(msg["content"])

st.sidebar.markdown("#")
st.sidebar.markdown("#")
st.sidebar.subheader("Clear Chat")
if st.sidebar.button("Reset"):
    st.session_state.messages = []

if user_prompt := st.chat_input():
    
        st.session_state.messages.append({"role": "user", "content": user_prompt})
        st.chat_message("user").write(user_prompt)
        with st.spinner("Thinking..."):
            qa_result = chat_bot()
            response = qa_result({'query': user_prompt})
            bot_answer = response['result']
              
            if 'source_documents' in response and response['source_documentsz ']:
                document = response['source_documents'][0]
                metadata = document.metadata
                file = metadata['source'].split("\\")[-1]
                source = os.path.splitext(file)[0]
                assistant_answer = f"{bot_answer} \n\n Source : {source} Video"
            else:
                source = "Llama"
                assistant_answer = f"{bot_answer} \n\n Source : {source} Model"
                
            st.session_state.messages.append({"role": "assistant", "content": bot_answer})
            st.chat_message("assistant").write(bot_answer)