unpairedelectron07 commited on
Commit
9ca9bc8
1 Parent(s): 2f17c93

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.llms import CTransformers
2
+ import gradio as gr
3
+ import os
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
6
+ from langchain_community.vectorstores import Chroma
7
+ from fpdf import FPDF
8
+ from youtube_transcript_api import YouTubeTranscriptApi
9
+ from langchain.chains import RetrievalQA
10
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+ from langchain_community.document_loaders import PyPDFLoader
12
+ import random
13
+
14
+ def text_to_pdf(url, filename="output.pdf"):
15
+ video_id = url.split('=')[1]
16
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
17
+ res = ""
18
+ for i in transcript:
19
+ res += " " + i["text"]
20
+ pdf = FPDF()
21
+ pdf.add_page()
22
+ pdf.add_font('DejaVu', '', 'DejaVuSansCondensed.ttf', uni=True)
23
+ pdf.set_font("DejaVu",'', size=12)
24
+ pdf.multi_cell(0, 10, res)
25
+ pdf.output(filename)
26
+ print(f"{filename} generated!")
27
+
28
+ local_llm = "llama-2-13b-chat.Q4_K_M.gguf"
29
+
30
+ config = {
31
+ 'max_new_tokens':512,
32
+ 'context_length':700,
33
+ 'repetition_penalty':1.5,
34
+ 'temperature':0.4,
35
+ 'top_k':50,
36
+ 'top_p':0.9,
37
+ 'stream':True,
38
+ 'threads':int(os.cpu_count()/2)
39
+ }
40
+
41
+ llm_init = CTransformers(
42
+ model=local_llm,
43
+ model_type="llama",
44
+ lib="avx2",
45
+ **config
46
+ )
47
+
48
+ prompt_template = """Use the following pieces of information to answer the user's question.
49
+ Make sure to adhere by the user's request.
50
+
51
+ Context: {context}
52
+ Question: {question}
53
+
54
+ Relevant Answer:
55
+ """
56
+
57
+ model_name = "BAAI/bge-large-en"
58
+ model_kwargs = {'device': 'cpu'}
59
+ encode_kwargs = {'normalize_embeddings': False}
60
+ embeddings = HuggingFaceBgeEmbeddings(
61
+ model_name=model_name,
62
+ model_kwargs=model_kwargs,
63
+ encode_kwargs=encode_kwargs
64
+ )
65
+
66
+ def process_transcript():
67
+ loader = PyPDFLoader("output.pdf")
68
+ documents = loader.load()
69
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
70
+ texts = text_splitter.split_documents(documents)
71
+ # print(texts)
72
+ num = random.randint(0, 10000)
73
+ vectorstore = Chroma.from_documents(texts, embeddings, collection_metadata={"hnsw:space":"cosine"}, persist_directory=f"stores/tcp_cosine_{num}")
74
+ print("Vector Store created!")
75
+ return vectorstore
76
+
77
+ def get_response(url):
78
+ text_to_pdf(url)
79
+ load_vector_store = process_transcript()
80
+ prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
81
+ retriever = load_vector_store.as_retriever(search_kwargs={"k":1})
82
+ chain_type_kwargs = {"prompt": prompt}
83
+ query = "Summarise the given context in third person perspective in format of bullet points. Make sure to cover the entire content and only provide the crucial important gist in your response. Be as descriptive as you want, but keep the content relevant."
84
+ qa = RetrievalQA.from_chain_type(
85
+ llm=llm_init,
86
+ chain_type="stuff",
87
+ retriever=retriever,
88
+ return_source_documents=False,
89
+ chain_type_kwargs=chain_type_kwargs,
90
+ verbose=True
91
+ )
92
+ response = qa(query)
93
+ return response['result']
94
+
95
+ input = gr.Text(
96
+ label="Prompt",
97
+ show_label=False,
98
+ max_lines=1,
99
+ placeholder="Enter your URL",
100
+ container=False
101
+ )
102
+
103
+ iface = gr.Interface(
104
+ fn=get_response,
105
+ inputs=input,
106
+ outputs="text",
107
+ title="YouTube Video Summarizer",
108
+ description="Enter the URL to the YouTube video that you want to summarize: ",
109
+ allow_flagging=False
110
+ )
111
+
112
+ iface.launch()