ighoshsubho commited on
Commit
acb0418
β€’
1 Parent(s): cea395c

Space enviornment setup done

Browse files
Files changed (2) hide show
  1. app.py +138 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # importing all the necessary files
2
+
3
+ from IPython.display import YouTubeVideo
4
+
5
+ from langchain.document_loaders import YoutubeLoader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.chains import LLMChain
8
+ from langchain.chains.summarize import load_summarize_chain
9
+ from langchain.llms import HuggingFacePipeline
10
+ from langchain import PromptTemplate
11
+ import locale
12
+ import gradio as gr
13
+
14
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
15
+
16
+ import torch
17
+
18
+ import langchain
19
+ print(langchain.__version__)
20
+
21
+ #Loading a sample video into transcript
22
+
23
+ loader = YoutubeLoader.from_youtube_url("https://www.youtube.com/watch?v=tAuRQs_d9F8&t=52s")
24
+ transcript = loader.load()
25
+
26
+ # Recursive splitting of text and storing it into texts
27
+
28
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=50)
29
+ texts = text_splitter.split_documents(transcript)
30
+
31
+ # Loading the model
32
+
33
+ model_repo = 'tiiuae/falcon-rw-1b'
34
+
35
+ tokenizer = AutoTokenizer.from_pretrained(model_repo)
36
+
37
+ model = AutoModelForCausalLM.from_pretrained(model_repo,
38
+ load_in_8bit=True,
39
+ device_map='auto',
40
+ torch_dtype=torch.float16,
41
+ low_cpu_mem_usage=True,
42
+ trust_remote_code=True
43
+ )
44
+ max_len = 2048 # 1024
45
+ task = "text-generation"
46
+ T = 0
47
+
48
+ # Building the pipeline
49
+
50
+ pipe = pipeline(
51
+ task=task,
52
+ model=model,
53
+ tokenizer=tokenizer,
54
+ max_length=max_len,
55
+ temperature=T,
56
+ top_p=0.95,
57
+ repetition_penalty=1.15,
58
+ pad_token_id = 11
59
+ )
60
+
61
+ llm = HuggingFacePipeline(pipeline=pipe, model_kwargs = {'temperature':0})
62
+
63
+ #Intitializing the LLM chain
64
+
65
+ template = """
66
+ Write a concise summary of the following text delimited by triple backquotes.
67
+ Return your response in bullet points which covers the key points of the text.
68
+ ```{text}```
69
+ BULLET POINT SUMMARY:
70
+ """
71
+
72
+ prompt = PromptTemplate(template=template, input_variables=["text"])
73
+
74
+ llm_chain = LLMChain(prompt=prompt, llm=llm)
75
+
76
+ locale.getpreferredencoding = lambda: "UTF-8"
77
+
78
+ # import and intialize the question answer pipeline
79
+
80
+ model_checkpoint = "IProject-10/bert-base-uncased-finetuned-squad2"
81
+ question_answerer = pipeline("question-answering", model=model_checkpoint)
82
+
83
+ text1 = """{}""".format(transcript[0])[14:]
84
+
85
+ context = text1
86
+
87
+ # Get the context of the video
88
+
89
+ def get_context(input_text):
90
+ loader = YoutubeLoader.from_youtube_url("{}".format(input_text))
91
+ transcript = loader.load()
92
+ texts = text_splitter.split_documents(transcript)
93
+ text1 = """{}""".format(transcript[0])[14:]
94
+ context = text1
95
+ return context
96
+
97
+ # Building the bot function
98
+
99
+ def build_the_bot(text1):
100
+ context = text1
101
+ return('Bot Build Successfull!!!')
102
+
103
+ # Building the bot summarizer function
104
+
105
+ def build_the_bot_summarizer(text1):
106
+ text = text1
107
+ return llm_chain.run(text)
108
+
109
+ # The chat space for gradio is servered here
110
+
111
+ def chat(chat_history, user_input, context):
112
+
113
+ output = question_answerer(question=user_input, context=context)
114
+ bot_response = output["answer"]
115
+ #print(bot_response)
116
+ response = ""
117
+ for letter in ''.join(bot_response): #[bot_response[i:i+1] for i in range(0, len(bot_response), 1)]:
118
+ response += letter + ""
119
+ yield chat_history + [(user_input, response)]
120
+
121
+ # Serving the entre gradio app
122
+
123
+ with gr.Blocks() as demo:
124
+ gr.Markdown('# YouTube Q&A and Summarizer Bot')
125
+ with gr.Tab("Input URL of video you wanna load -"):
126
+ text_input = gr.Textbox()
127
+ text_output = gr.Textbox()
128
+ text_button1 = gr.Button("Build the Bot!!!")
129
+ text_button1.click(build_the_bot, get_context(text_input), text_output)
130
+ text_button2 = gr.Button("Summarize...")
131
+ text_button2.click(build_the_bot_summarizer, get_context(text_input), text_output)
132
+ with gr.Tab("Knowledge Base -"):
133
+ # inputbox = gr.Textbox("Input your text to build a Q&A Bot here.....")
134
+ chatbot = gr.Chatbot()
135
+ message = gr.Textbox ("What is this Youtube Video about?")
136
+ message.submit(chat, [chatbot, message], chatbot, get_context(text_input))
137
+
138
+ demo.queue().launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ youtube_transcript_api
3
+ einops
4
+ accelerate
5
+ bitsandbytes
6
+ xformers
7
+ gradio
8
+ transformers