Spaces:
Sleeping
Sleeping
Bimal Bhattarai
commited on
Commit
·
5dc421a
0
Parent(s):
first commit
Browse files- .gitattributes +3 -0
- .gitignore +12 -0
- __pycache__/constants.cpython-311.pyc +0 -0
- __pycache__/constants.cpython-39.pyc +0 -0
- app_palm.py +94 -0
- constants.py +14 -0
- db_google/chroma.sqlite3 +3 -0
- db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/data_level0.bin +3 -0
- db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/header.bin +3 -0
- db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/index_metadata.pickle +3 -0
- db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/length.bin +3 -0
- db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/link_lists.bin +3 -0
- requirements.txt +17 -0
.gitattributes
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
*.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
db
|
2 |
+
db_bgai
|
3 |
+
db2
|
4 |
+
models
|
5 |
+
source_documents
|
6 |
+
app_v2.py
|
7 |
+
app_v3.py
|
8 |
+
app.py
|
9 |
+
ingest.py
|
10 |
+
ingest_v3.py
|
11 |
+
.env
|
12 |
+
README.md
|
__pycache__/constants.cpython-311.pyc
ADDED
Binary file (669 Bytes). View file
|
|
__pycache__/constants.cpython-39.pyc
ADDED
Binary file (471 Bytes). View file
|
|
app_palm.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain import PromptTemplate, LLMChain
|
2 |
+
from langchain.llms import CTransformers, HuggingFacePipeline, GooglePalm
|
3 |
+
import os
|
4 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
+
from langchain.vectorstores import Chroma
|
6 |
+
from langchain.chains import RetrievalQA
|
7 |
+
from langchain.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings
|
8 |
+
from io import BytesIO
|
9 |
+
from langchain.document_loaders import PyPDFLoader
|
10 |
+
import gradio as gr
|
11 |
+
import chromadb
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
from constants import CHROMA_SETTINGS
|
14 |
+
from io import BytesIO
|
15 |
+
import gradio as gr
|
16 |
+
import torch
|
17 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM, AutoModel
|
18 |
+
import gc
|
19 |
+
|
20 |
+
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
|
21 |
+
from langchain.chat_models import ChatGooglePalm
|
22 |
+
import google.generativeai as genai
|
23 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
24 |
+
|
25 |
+
gc.collect()
|
26 |
+
torch.cuda.empty_cache()
|
27 |
+
|
28 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
29 |
+
|
30 |
+
|
31 |
+
#model= AutoModelForCausalLM.from_pretrained(local_llm, device_map= device)
|
32 |
+
llm= ChatGooglePalm()
|
33 |
+
#llm= HuggingFacePipeline.from_model_id(model_id=local_llm, task='text-generation', device=0, pipeline_kwargs={"max_new_tokens": 1000})
|
34 |
+
|
35 |
+
embeddings_model_name = os.environ.get('EMBEDDINGS_MODEL_NAME')
|
36 |
+
persist_directory = os.environ.get('PERSIST_DIRECTORY')
|
37 |
+
target_source_chunks = int(os.environ.get('TARGET_SOURCE_CHUNKS',4))
|
38 |
+
google_api_key= os.environ.get('GOOGLE_API_KEY')
|
39 |
+
if not load_dotenv():
|
40 |
+
print("Could not load .env file or it is empty. Please check if it exists and is readable.")
|
41 |
+
exit(1)
|
42 |
+
|
43 |
+
print("Loading embeddings model...")
|
44 |
+
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
|
45 |
+
#embeddings= pipeline("feature-extraction", model="WhereIsAI/UAE-Large-V1")
|
46 |
+
# Chroma client
|
47 |
+
chroma_client = chromadb.PersistentClient(settings=CHROMA_SETTINGS , path=persist_directory)
|
48 |
+
db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS, client=chroma_client)
|
49 |
+
|
50 |
+
prompt_template = """Use the following pieces of information to answer the user's question.
|
51 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
52 |
+
|
53 |
+
Context: {context}
|
54 |
+
Question: {question}
|
55 |
+
|
56 |
+
Only return the helpful answer below and nothing else.
|
57 |
+
Helpful answer:
|
58 |
+
"""
|
59 |
+
|
60 |
+
prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
|
61 |
+
retriever = db.as_retriever(search_kwargs={"k": target_source_chunks})
|
62 |
+
# activate/deactivate the streaming StdOut callback for LLMs
|
63 |
+
|
64 |
+
chain_type_kwargs = {"prompt": prompt}
|
65 |
+
|
66 |
+
input_gradio= gr.Text(
|
67 |
+
label="Prompt",
|
68 |
+
show_label=False,
|
69 |
+
max_lines=2,
|
70 |
+
placeholder="Enter your question here",
|
71 |
+
container=False,
|
72 |
+
|
73 |
+
)
|
74 |
+
|
75 |
+
|
76 |
+
def get_response(input_gradio ):
|
77 |
+
query=input_gradio
|
78 |
+
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents= False, chain_type_kwargs=chain_type_kwargs, verbose=True)
|
79 |
+
response= qa(query)
|
80 |
+
return response['result']
|
81 |
+
|
82 |
+
iface= gr.Interface(
|
83 |
+
fn=get_response,
|
84 |
+
inputs=input_gradio,
|
85 |
+
outputs="text",
|
86 |
+
title="Tsetlin Machine Chatbot",
|
87 |
+
description="A chatbot that uses the LLM to answer anything regarding TM",
|
88 |
+
allow_flagging='never'
|
89 |
+
|
90 |
+
)
|
91 |
+
# Interactive questions and answers
|
92 |
+
iface.launch()
|
93 |
+
|
94 |
+
|
constants.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
from chromadb.config import Settings
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
|
7 |
+
PERSIST_DIRECTORY = os.environ.get('PERSIST_DIRECTORY')
|
8 |
+
if PERSIST_DIRECTORY is None:
|
9 |
+
raise Exception("please put the directory path in chroma db")
|
10 |
+
|
11 |
+
CHROMA_SETTINGS = Settings(
|
12 |
+
persist_directory=PERSIST_DIRECTORY,
|
13 |
+
anonymized_telemetry=False
|
14 |
+
)
|
db_google/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8eba354226ba88c591f1c149f3e965403e48dba1f085f71c7713bae1bd895f1f
|
3 |
+
size 39329792
|
db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b2cb81ecbebf24556e4af8b17c5f6606e0b4a4b428c64814f74c798c38ec50e
|
3 |
+
size 16060000
|
db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cd215c9f6b14de8a55607f23f90af168c5493da60c02482ee8f7087809be863
|
3 |
+
size 100
|
db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2dc24cfc543dfc34ed403220c5ffa07e3d2caf5143d6eb2cb731d1dcd46d6336
|
3 |
+
size 288034
|
db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ad4eb880e8012f662c48d0a8492c6006672085d2e3345ab37d4c05076c9a747
|
3 |
+
size 20000
|
db_google/e072cde8-61bc-4eaa-9876-6068c95e6f74/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b22486b1e0558438145eb5c5c3c063e3ec5f38af73a3a04639d53d13ea0b50a
|
3 |
+
size 44752
|
requirements.txt
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain==0.0.274
|
2 |
+
gradio==3.50.2
|
3 |
+
gpt4all==1.0.8
|
4 |
+
chromadb==0.4.7
|
5 |
+
urllib3==2.0.4
|
6 |
+
PyMuPDF==1.23.1
|
7 |
+
python-dotenv==1.0.0
|
8 |
+
unstructured==0.10.8
|
9 |
+
extract-msg==0.45.0
|
10 |
+
tabulate==0.9.0
|
11 |
+
pandoc==2.3
|
12 |
+
pypandoc==1.11
|
13 |
+
tqdm==4.66.1
|
14 |
+
sentence_transformers==2.2.2
|
15 |
+
pypdf
|
16 |
+
google-generativeai
|
17 |
+
protobuf==3.20.*
|