Climate-GPT

Runtime error

App Files Files Community

Rams901 commited on Sep 4, 2023

Commit

33a7c5b

•

0 Parent(s):

Duplicate from Rams901/rent-qa

Browse files

Files changed (6) hide show

.gitattributes +36 -0
README.md +13 -0
app.py +278 -0
rent_data/index.faiss +3 -0
rent_data/index.pkl +3 -0
requirements.txt +10 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+availability.json filter=lfs diff=lfs merge=lfs -text
+rent_data/index.faiss filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Rent QA
+emoji: null
+colorFrom: blue
+colorTo: gray
+sdk: gradio
+sdk_version: 3.27.0
+app_file: app.py
+pinned: false
+duplicated_from: Rams901/rent-qa
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,278 @@

+import gradio as gr
+import numpy as np
+from langchain.document_loaders import UnstructuredPDFLoader
+from langchain.indexes import VectorstoreIndexCreator
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.chains import LLMChain
+from langchain import PromptTemplate
+import re
+import pandas as pd
+from langchain.vectorstores import FAISS
+import requests
+from bs4 import BeautifulSoup
+from typing import List
+from langchain.document_loaders import YoutubeLoader
+from langchain.schema import (
+    SystemMessage,
+    HumanMessage,
+    AIMessage
+)
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.chat_models import ChatOpenAI
+CHARACTER_CUT_OFF = 20000
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+embeddings = HuggingFaceEmbeddings()
+db = FAISS.load_local('rent_data', embeddings)
+llm = ChatOpenAI(
+            temperature=0,
+            model='gpt-3.5-turbo'
+        )
+def remove_tags(soup: BeautifulSoup) -> str:
+    for data in soup(["style", "script"]):
+        # Remove tags
+        data.decompose()
+    # return data by retrieving the tag content
+    return " ".join(soup.stripped_strings)
+def read_webpage(url: str) -> str:
+    print(f"Getting the response from url : {url})")
+    response = requests.get(url)
+    html_content = response.content
+    # Parse the HTML content using BeautifulSoup
+    soup = BeautifulSoup(html_content, "html.parser")
+    # Get all the text content from the relevant HTML tags
+    text_content = remove_tags(soup)
+    # for tag in ["p", "h1", "h2", "h3", "h4", "h5", "h6", "li", "div"]:
+    #     for element in soup.find_all(tag):
+    #         text_content += element.get_text() + " "
+    print(text_content)
+    return text_content
+def grab_transcript(url):
+      loader = YoutubeLoader.from_youtube_url(url)
+      transcript = loader.load()
+      return transcript[0].page_content
+def process_webpages(urls: List[str]):
+    # A set to keep track of visited pages
+    visited_pages = set()
+    content = []
+    for url in urls:
+        aggregated_text = ""
+        try:
+          if 'youtube' not in url:
+            visited_pages.add(url)
+            aggregated_text += f"\nGetting the content of {url}:\n"
+            try:
+              aggregated_text += read_webpage(url)
+            except e as Exception:
+              print(read_webpage(url))
+              aggregated_text += "No Transcript found"
+          else:
+            # Youtube work:
+            aggregated_text += f"\nGetting the transcription of {url}:\n"
+            aggregated_text += grab_transcript(url)
+        except Exception as e:
+          print(e)
+        content.append(aggregated_text)
+    return content
+def extract_urls(text):
+    url_regex = r"(https?://\S+)"
+    urls = re.findall(url_regex, text)
+    return urls
+def add_text(history, text):
+    print(history)
+    history = history + [(text, None)]
+    return history, ""
+def create_db_from_urls(urls: str) -> FAISS:
+    content = process_webpages(urls)
+    # 1K CHUNK SIZE
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+    print(len(content), len(urls))
+    docs = text_splitter.create_documents(content)
+    # print(docs[0])
+    global db
+    if(type(db) != str):
+      # local_db =
+      docs += list(db.docstore._dict.values())
+      print(docs)
+    # db = FAISS.from_documents(docs, embeddings)
+    # print(db.docstore())
+    # print(docs[0])
+    db = FAISS.from_documents(docs, embeddings)
+    return db
+# How to access the file? Where is it saved?
+def add_file(history, files):
+    history = []
+    files = files[0]
+    docs = []
+    for file in files:
+      loader = UnstructuredPDFLoader(file.name)
+      text = loader.load()
+      # pdf_content = pdf2text(file.name)
+      docs += text_splitter.split_documents(text)
+      # print(docs[0])
+    global db
+    if(type(db) != str):
+      # local_db =
+      docs += list(db.docstore._dict.values())
+    print(docs)
+    history = history + [(f"{len(files)} PDF(s) Uploaded", None),]
+    db = FAISS.from_documents(docs, embeddings)
+    print(f"History in add file: {history}")
+    # print(db.docstore())
+    print(type(history), type(history[0]))
+    return ([history,],)
+def qa_retrieve(chatlog, ):
+    print(f"Chatlog qa: {chatlog}")
+    query = chatlog[-1][0]
+    docs = ""
+    # Extracting urls from query
+    # urls = None
+    # if (urls):
+    #     create_db_from_urls(urls)
+    # global db
+    # if(type(db) != str):
+    #   # local_db =
+    #   docs = list(db.docstore._dict.values())
+    #   print(docs)
+    #   db = FAISS.from_documents(docs, embeddings)
+    #   # db.merge_from(local_db)
+    # else:
+    #   db = FAISS.from_documents(docs, embeddings)
+    global db
+    print(db)
+  # if (type(db) == str):
+  #     discussion = [j for i in chatlog for j in i]
+  #     messages = [[SystemMessage(content = "You are Wikibot. You are a WikiPedia assistant that that can digest articles and answer questions based on your library of content.")]]
+  #     messages += [([HumanMessage(content = x[0])],AIMessage(content = x[-1])) for x in chatlog[:-1] ]
+  #     messages = [j for i in messages for j in i]
+  #     print(messages)
+  #     messages.append(HumanMessage(content = chatlog[-1][0]))
+  #     print(messages)
+  #     response = llm(messages=messages
+  #               ).content
+  # else:
+    docs = db.similarity_search(query, k=4)
+    docs_page_content = " ".join([d.page_content for d in docs])
+    prompt = PromptTemplate(
+            input_variables=["question", "docs"],
+            template="""
+            As a consultant, your role is to assist the user in analyzing different cases of lord and tenants behaviours.
+            You will help the user with questions related to any of the information provided by the documents. You will make sure to give as much help as possible
+            even if sometimes the information seeked does not exist. Your priority is to find the information asked by the user and if it doesn't exist you will try
+            to think of how to answer using your own thoughts.
+            Answer the following question: {question}
+            Use the following documents: {docs}
+            If you feel like you don't have enough information to answer the question, say "I don't know".
+            """,
+        )
+    # llm = BardLLM()
+    chain = LLMChain(llm=llm, prompt = prompt)
+    response = chain.run(question=query, docs=docs_page_content)
+    chatlog[-1][1] = response
+    return chatlog
+def flush():
+  global db
+  db = ""
+  return None
+with gr.Blocks() as demo:
+    chatbot = gr.Chatbot([], elem_id="chatbot").style(height=750)
+    with gr.Row():
+        with gr.Column(scale=0.65):
+            txt = gr.components.Textbox(
+             placeholder="Ask me anything",show_label=False
+        )
+        with gr.Column(scale=0.15, min_width=0):
+            btn = gr.UploadButton("📁", file_types=["text"], file_count = 'multiple')
+    # with gr.Row():
+    #   with gr.Column(scale=0.85):
+    #     url = gr.components.Textbox(
+    #           label="Website URLs",
+    #           placeholder="https://www.example.org/ https://www.example.com/",
+    #       )
+        with gr.Column(scale=0.15, min_width = 0):
+            send_btn = gr.Button("📨")
+    with gr.Row():
+      with gr.Column():
+        clear = gr.Button("Clear")
+    pdf_content = gr.Textbox("", visible = False)
+    txt.submit(add_text, [chatbot, txt], [chatbot, txt]).then(
+        qa_retrieve, [chatbot], chatbot
+    ).then(lambda : (None), outputs = [ pdf_content])
+    btn.upload(add_file, [chatbot, btn], [chatbot,], batch = True).then(qa_retrieve, [chatbot], chatbot)
+    send_btn.click(add_text, [chatbot, txt, ], [chatbot, txt]).then(
+        qa_retrieve, [chatbot, ], chatbot).then(lambda :  None, outputs = [ pdf_content])
+    clear.click(flush, None, outputs = chatbot, queue=False)
+demo.queue(concurrency_count = 4)
+demo.launch()

rent_data/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2944eec8a548a3d4c97a9f26d2a87054ede0b063f9139f2f788c37888f32b02
+size 171764781

rent_data/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76630ed8bbc38dd10e113c088bca6a0531cd321469a3bcec99f427920468a35d
+size 60863161

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+unstructured
+pdf2image
+langchain
+gradio
+openai
+sentence_transformers
+youtube-transcript-api
+FAISS-gpu
+BeautifulSoup4