File size: 4,312 Bytes
acb0418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# importing all the necessary files

from IPython.display import YouTubeVideo

from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import LLMChain
from langchain.chains.summarize import load_summarize_chain
from langchain.llms import HuggingFacePipeline
from langchain import PromptTemplate
import locale
import gradio as gr

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

import torch

import langchain
print(langchain.__version__)

#Loading a sample video into transcript

loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=tAuRQs_d9F8&t=52s")
transcript = loader.load()

# Recursive splitting of text and storing it into texts

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=50)
texts = text_splitter.split_documents(transcript)

# Loading the model

model_repo = 'tiiuae/falcon-rw-1b'

tokenizer = AutoTokenizer.from_pretrained(model_repo)

model = AutoModelForCausalLM.from_pretrained(model_repo,
                                             load_in_8bit=True,
                                             device_map='auto',
                                             torch_dtype=torch.float16,
                                             low_cpu_mem_usage=True,
                                             trust_remote_code=True
                                            )
max_len = 2048 # 1024
task = "text-generation"
T = 0

# Building the pipeline

pipe = pipeline(
    task=task,
    model=model, 
    tokenizer=tokenizer, 
    max_length=max_len,
    temperature=T,
    top_p=0.95,
    repetition_penalty=1.15,
    pad_token_id = 11
)

llm = HuggingFacePipeline(pipeline=pipe, model_kwargs = {'temperature':0})

#Intitializing the LLM chain

template = """
              Write a concise summary of the following text delimited by triple backquotes.
              Return your response in bullet points which covers the key points of the text.
              ```{text}```
              BULLET POINT SUMMARY:
           """

prompt = PromptTemplate(template=template, input_variables=["text"])

llm_chain = LLMChain(prompt=prompt, llm=llm)

locale.getpreferredencoding = lambda: "UTF-8"

# import and intialize the question answer pipeline

model_checkpoint = "IProject-10/bert-base-uncased-finetuned-squad2"
question_answerer = pipeline("question-answering", model=model_checkpoint)

text1 = """{}""".format(transcript[0])[14:]

context = text1

# Get the context of the video

def get_context(input_text):
   loader = YoutubeLoader.from_youtube_url("{}".format(input_text))
   transcript = loader.load()
   texts = text_splitter.split_documents(transcript)
   text1 = """{}""".format(transcript[0])[14:]
   context = text1
   return context

# Building the bot function

def build_the_bot(text1):
  context = text1
  return('Bot Build Successfull!!!')

# Building the bot summarizer function

def build_the_bot_summarizer(text1):
  text = text1
  return llm_chain.run(text)

# The chat space for gradio is servered here

def chat(chat_history, user_input, context):

  output = question_answerer(question=user_input, context=context)
  bot_response = output["answer"]
  #print(bot_response)
  response = ""
  for letter in ''.join(bot_response): #[bot_response[i:i+1] for i in range(0, len(bot_response), 1)]:
      response += letter + ""
      yield chat_history + [(user_input, response)]

# Serving the entre gradio app

with gr.Blocks() as demo:
    gr.Markdown('# YouTube Q&A and Summarizer Bot')
    with gr.Tab("Input URL of video you wanna load -"):
        text_input = gr.Textbox()
        text_output = gr.Textbox()
        text_button1 = gr.Button("Build the Bot!!!")
        text_button1.click(build_the_bot, get_context(text_input), text_output)
        text_button2 = gr.Button("Summarize...")
        text_button2.click(build_the_bot_summarizer, get_context(text_input), text_output)
    with gr.Tab("Knowledge Base -"):
#          inputbox = gr.Textbox("Input your text to build a Q&A Bot here.....")
          chatbot = gr.Chatbot()
          message = gr.Textbox ("What is this Youtube Video about?")
          message.submit(chat, [chatbot, message], chatbot, get_context(text_input))

demo.queue().launch()