perlinson commited on
Commit
1adbdd7
Β·
1 Parent(s): 26fbdbe

first commit

Browse files
Files changed (3) hide show
  1. LICENSE +21 -0
  2. app.py +80 -0
  3. requirements.txt +5 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 AI Anytime
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from urllib.parse import urlparse
3
+ from langchain.chat_models import ChatOpenAI
4
+ from dotenv import load_dotenv
5
+ import os
6
+ import openai
7
+ from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain, BaseCombineDocumentsChain
8
+ from langchain.tools.base import BaseTool
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from pydantic import Field
11
+ import os, asyncio, trafilatura
12
+ from langchain.docstore.document import Document
13
+ import requests
14
+
15
+ load_dotenv()
16
+
17
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
18
+ openai.api_key = os.getenv("OPENAI_API_KEY")
19
+
20
+ @st.cache_resource
21
+ def get_url_name(url):
22
+ parsed_url = urlparse(url)
23
+ return parsed_url.netloc
24
+
25
+ def _get_text_splitter():
26
+ return RecursiveCharacterTextSplitter(
27
+ chunk_size = 500,
28
+ chunk_overlap = 20,
29
+ length_function = len,
30
+ )
31
+
32
+ class WebpageQATool(BaseTool):
33
+ name = "query_webpage"
34
+ description = "Browse a webpage and retrieve the information and answers relevant to the question. Please use bullet points to list the answers"
35
+ text_splitter: RecursiveCharacterTextSplitter = Field(default_factory=_get_text_splitter)
36
+ qa_chain: BaseCombineDocumentsChain
37
+
38
+ def _run(self, url: str, question: str) -> str:
39
+ response = requests.get(url)
40
+ page_content = response.text
41
+ print(page_content)
42
+ docs = [Document(page_content=page_content, metadata={"source": url})]
43
+ web_docs = self.text_splitter.split_documents(docs)
44
+ results = []
45
+ for i in range(0, len(web_docs), 4):
46
+ input_docs = web_docs[i:i+4]
47
+ window_result = self.qa_chain({"input_documents": input_docs, "question": question}, return_only_outputs=True)
48
+ results.append(f"Response from window {i} - {window_result}")
49
+ results_docs = [Document(page_content="\n".join(results), metadata={"source": url})]
50
+ print(results_docs)
51
+ return self.qa_chain({"input_documents": results_docs, "question": question}, return_only_outputs=True)
52
+
53
+ async def _arun(self, url: str, question: str) -> str:
54
+ raise NotImplementedError
55
+
56
+ def run_llm(url, query):
57
+ llm = ChatOpenAI(temperature=0.5)
58
+ query_website_tool = WebpageQATool(qa_chain=load_qa_with_sources_chain(llm))
59
+ result = query_website_tool._run(url, query) # Pass the URL and query as arguments
60
+ return result
61
+
62
+ st.markdown("<h1 style='text-align: center; color: green;'>Info Retrieval from Website 🦜 </h1>", unsafe_allow_html=True)
63
+ st.markdown("<h3 style='text-align: center; color: green;'>Developed by <a href='https://github.com/AIAnytime'>AI Anytime with ❀️ </a></h3>", unsafe_allow_html=True)
64
+ st.markdown("<h2 style='text-align: center; color:red;'>Enter the Website URL πŸ‘‡</h2>", unsafe_allow_html=True)
65
+
66
+ input_url = st.text_input("Enter the URL")
67
+
68
+ if len(input_url)>0:
69
+ url_name = get_url_name(input_url)
70
+ st.info("Your URL is: πŸ‘‡")
71
+ st.write(url_name)
72
+
73
+ st.markdown("<h4 style='text-align: center; color:green;'>Enter Your Query πŸ‘‡</h4>", unsafe_allow_html=True)
74
+ your_query = st.text_area("Query the Website")
75
+ if st.button("Get Answers"):
76
+ if len(your_query)>0:
77
+ st.info("Your query is: "+ your_query)
78
+
79
+ final_answer = run_llm(input_url, your_query)
80
+ st.write(final_answer)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ trafilatura
4
+ streamlit
5
+ python-dotenv