Spaces:
Paused
Paused
hamxahbhattii
commited on
Commit
•
6330947
1
Parent(s):
04be7ab
added Jine
Browse files- .gitattributes +0 -35
- .gitignore +4 -0
- Dockerfile +19 -0
- Langchain_bot.ipynb +0 -0
- Logs/chatbot.log +0 -0
- README.md +2 -10
- Requirements Documents/Requirement specification Questionier.docx +0 -0
- Vector Store/chroma.sqlite3 +0 -0
- app.py +40 -0
- chainlit_interface.py +0 -0
- environment.yml +210 -0
- jine.py +130 -0
- jine_v1.py +182 -0
- requirements +6 -0
- requirements.txt +218 -0
- streamlit_interface.py +73 -0
.gitattributes
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
___pycache__
|
2 |
+
Data/
|
3 |
+
Front-end/
|
4 |
+
.env
|
Dockerfile
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10
|
2 |
+
|
3 |
+
|
4 |
+
WORKDIR /code
|
5 |
+
|
6 |
+
COPY ./requirements.txt /code/requirements.txt
|
7 |
+
|
8 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
9 |
+
|
10 |
+
RUN useradd -m -u 1000 user
|
11 |
+
USER user
|
12 |
+
ENV HOME=/home/user \
|
13 |
+
PATH=/home/user/.local/bin:$PATH
|
14 |
+
|
15 |
+
WORKDIR $HOME/app
|
16 |
+
|
17 |
+
COPY --chown=user . $HOME/app
|
18 |
+
|
19 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
Langchain_bot.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
Logs/chatbot.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
README.md
CHANGED
@@ -1,10 +1,2 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
emoji: 👀
|
4 |
-
colorFrom: red
|
5 |
-
colorTo: purple
|
6 |
-
sdk: docker
|
7 |
-
pinned: false
|
8 |
-
---
|
9 |
-
|
10 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
# JIN-e
|
2 |
+
This is a bot based on Chatgpt using langchain to answers Questions related to Policies.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Requirements Documents/Requirement specification Questionier.docx
ADDED
Binary file (498 kB). View file
|
|
Vector Store/chroma.sqlite3
ADDED
Binary file (127 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Query
|
2 |
+
|
3 |
+
# Import the Jine class and other necessary modules
|
4 |
+
from jine import Jine # Replace 'your_module_name' with the actual module name
|
5 |
+
from pydantic import BaseModel
|
6 |
+
|
7 |
+
# Load your environment variables
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
import os
|
10 |
+
|
11 |
+
|
12 |
+
load_dotenv()
|
13 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
14 |
+
DATA_DIRECTORY = os.getenv("DATA_DIRECTORY")
|
15 |
+
VECTOR_STORE_DIRECTORY = os.getenv("VECTOR_STORE_DIRCTORY")
|
16 |
+
VECTOR_STORE_CHECK = os.getenv("VECTOR_STORE_CHECK")
|
17 |
+
DEBUG = os.getenv("DEBUG")
|
18 |
+
|
19 |
+
# Initialize Jine
|
20 |
+
jine = Jine(OPENAI_API_KEY, VECTOR_STORE_DIRECTORY, VECTOR_STORE_CHECK, DATA_DIRECTORY, DEBUG)
|
21 |
+
jine.load_model()
|
22 |
+
|
23 |
+
# Create a FastAPI app
|
24 |
+
app = FastAPI()
|
25 |
+
|
26 |
+
# Define a request model
|
27 |
+
class ChatRequest(BaseModel):
|
28 |
+
user_question: str
|
29 |
+
|
30 |
+
# Define a response model
|
31 |
+
class ChatResponse(BaseModel):
|
32 |
+
user_question: str
|
33 |
+
chatbot_response: str
|
34 |
+
|
35 |
+
# Define the chatbot endpoint
|
36 |
+
@app.post("/chatbot/")
|
37 |
+
def chat_with_bot(request: ChatRequest):
|
38 |
+
user_question = request.user_question
|
39 |
+
chatbot_response = jine.chat(user_question)
|
40 |
+
return ChatResponse(user_question=user_question, chatbot_response=chatbot_response)
|
chainlit_interface.py
ADDED
File without changes
|
environment.yml
ADDED
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: jine
|
2 |
+
channels:
|
3 |
+
- conda-forge
|
4 |
+
- defaults
|
5 |
+
dependencies:
|
6 |
+
- asttokens=2.4.0=pyhd8ed1ab_0
|
7 |
+
- backcall=0.2.0=pyh9f0ad1d_0
|
8 |
+
- backports=1.0=pyhd8ed1ab_3
|
9 |
+
- backports.functools_lru_cache=1.6.5=pyhd8ed1ab_0
|
10 |
+
- bzip2=1.0.8=he774522_0
|
11 |
+
- ca-certificates=2023.7.22=h56e8100_0
|
12 |
+
- colorama=0.4.6=pyhd8ed1ab_0
|
13 |
+
- comm=0.1.4=pyhd8ed1ab_0
|
14 |
+
- debugpy=1.6.7=py310hd77b12b_0
|
15 |
+
- decorator=5.1.1=pyhd8ed1ab_0
|
16 |
+
- exceptiongroup=1.1.3=pyhd8ed1ab_0
|
17 |
+
- executing=1.2.0=pyhd8ed1ab_0
|
18 |
+
- importlib-metadata=6.8.0=pyha770c72_0
|
19 |
+
- importlib_metadata=6.8.0=hd8ed1ab_0
|
20 |
+
- ipykernel=6.25.2=pyh60829e3_0
|
21 |
+
- ipython=8.16.1=pyh5737063_0
|
22 |
+
- jedi=0.19.1=pyhd8ed1ab_0
|
23 |
+
- jupyter_client=8.4.0=pyhd8ed1ab_0
|
24 |
+
- jupyter_core=5.4.0=py310h5588dad_0
|
25 |
+
- libffi=3.4.4=hd77b12b_0
|
26 |
+
- libsodium=1.0.18=h8d14728_1
|
27 |
+
- matplotlib-inline=0.1.6=pyhd8ed1ab_0
|
28 |
+
- nest-asyncio=1.5.8=pyhd8ed1ab_0
|
29 |
+
- openssl=1.1.1l=h8ffe710_0
|
30 |
+
- packaging=23.2=pyhd8ed1ab_0
|
31 |
+
- parso=0.8.3=pyhd8ed1ab_0
|
32 |
+
- pickleshare=0.7.5=py_1003
|
33 |
+
- pip=23.3=py310haa95532_0
|
34 |
+
- platformdirs=3.11.0=pyhd8ed1ab_0
|
35 |
+
- prompt-toolkit=3.0.39=pyha770c72_0
|
36 |
+
- prompt_toolkit=3.0.39=hd8ed1ab_0
|
37 |
+
- psutil=5.9.0=py310h2bbff1b_0
|
38 |
+
- pure_eval=0.2.2=pyhd8ed1ab_0
|
39 |
+
- pygments=2.16.1=pyhd8ed1ab_0
|
40 |
+
- python=3.10.0=h96c0403_3
|
41 |
+
- python-dateutil=2.8.2=pyhd8ed1ab_0
|
42 |
+
- python_abi=3.10=2_cp310
|
43 |
+
- pyzmq=23.2.1=py310h73ada01_0
|
44 |
+
- setuptools=68.0.0=py310haa95532_0
|
45 |
+
- six=1.16.0=pyh6c4a22f_0
|
46 |
+
- sqlite=3.41.2=h2bbff1b_0
|
47 |
+
- stack_data=0.6.2=pyhd8ed1ab_0
|
48 |
+
- tk=8.6.12=h2bbff1b_0
|
49 |
+
- tornado=6.2=py310he2412df_0
|
50 |
+
- traitlets=5.11.2=pyhd8ed1ab_0
|
51 |
+
- typing-extensions=4.8.0=hd8ed1ab_0
|
52 |
+
- typing_extensions=4.8.0=pyha770c72_0
|
53 |
+
- vc=14.2=h21ff451_1
|
54 |
+
- vs2015_runtime=14.27.29016=h5e58377_2
|
55 |
+
- wcwidth=0.2.8=pyhd8ed1ab_0
|
56 |
+
- wheel=0.41.2=py310haa95532_0
|
57 |
+
- xz=5.4.2=h8cc25b3_0
|
58 |
+
- zeromq=4.3.4=h0e60522_1
|
59 |
+
- zipp=3.17.0=pyhd8ed1ab_0
|
60 |
+
- zlib=1.2.13=h8cc25b3_0
|
61 |
+
- pip:
|
62 |
+
- aiofiles==23.2.1
|
63 |
+
- aiohttp==3.8.6
|
64 |
+
- aiosignal==1.3.1
|
65 |
+
- annotated-types==0.6.0
|
66 |
+
- antlr4-python3-runtime==4.9.3
|
67 |
+
- anyio==3.7.1
|
68 |
+
- async-timeout==4.0.3
|
69 |
+
- asyncer==0.0.2
|
70 |
+
- attrs==23.1.0
|
71 |
+
- backoff==2.2.1
|
72 |
+
- beautifulsoup4==4.12.2
|
73 |
+
- bidict==0.22.1
|
74 |
+
- certifi==2023.7.22
|
75 |
+
- cffi==1.16.0
|
76 |
+
- chainlit==0.7.301
|
77 |
+
- chardet==5.2.0
|
78 |
+
- charset-normalizer==3.3.0
|
79 |
+
- click==8.1.7
|
80 |
+
- contourpy==1.1.1
|
81 |
+
- cryptography==41.0.4
|
82 |
+
- cycler==0.12.1
|
83 |
+
- dataclasses-json==0.5.14
|
84 |
+
- deprecated==1.2.14
|
85 |
+
- effdet==0.4.1
|
86 |
+
- emoji==2.8.0
|
87 |
+
- fastapi==0.99.1
|
88 |
+
- fastapi-socketio==0.0.10
|
89 |
+
- filelock==3.12.4
|
90 |
+
- filetype==1.2.0
|
91 |
+
- flatbuffers==23.5.26
|
92 |
+
- fonttools==4.43.1
|
93 |
+
- frozenlist==1.4.0
|
94 |
+
- fsspec==2023.10.0
|
95 |
+
- googleapis-common-protos==1.61.0
|
96 |
+
- greenlet==3.0.0
|
97 |
+
- grpcio==1.59.0
|
98 |
+
- h11==0.14.0
|
99 |
+
- httpcore==0.18.0
|
100 |
+
- httptools==0.6.1
|
101 |
+
- httpx==0.25.0
|
102 |
+
- huggingface-hub==0.17.3
|
103 |
+
- humanfriendly==10.0
|
104 |
+
- idna==3.4
|
105 |
+
- importlib-resources==6.1.0
|
106 |
+
- iopath==0.1.10
|
107 |
+
- jinja2==3.1.2
|
108 |
+
- joblib==1.3.2
|
109 |
+
- jsonpatch==1.33
|
110 |
+
- jsonpointer==2.4
|
111 |
+
- kiwisolver==1.4.5
|
112 |
+
- langchain==0.0.320
|
113 |
+
- langdetect==1.0.9
|
114 |
+
- langsmith==0.0.49
|
115 |
+
- layoutparser==0.3.4
|
116 |
+
- lazify==0.4.0
|
117 |
+
- lxml==4.9.3
|
118 |
+
- markupsafe==2.1.3
|
119 |
+
- marshmallow==3.20.1
|
120 |
+
- matplotlib==3.8.0
|
121 |
+
- monotonic==1.6
|
122 |
+
- mpmath==1.3.0
|
123 |
+
- multidict==6.0.4
|
124 |
+
- mypy-extensions==1.0.0
|
125 |
+
- networkx==3.2
|
126 |
+
- nltk==3.8.1
|
127 |
+
- nodeenv==1.8.0
|
128 |
+
- numpy==1.26.1
|
129 |
+
- omegaconf==2.3.0
|
130 |
+
- onnx==1.14.1
|
131 |
+
- openai==0.28.1
|
132 |
+
- opencv-python==4.8.1.78
|
133 |
+
- opentelemetry-api==1.20.0
|
134 |
+
- opentelemetry-exporter-otlp==1.20.0
|
135 |
+
- opentelemetry-exporter-otlp-proto-common==1.20.0
|
136 |
+
- opentelemetry-exporter-otlp-proto-grpc==1.20.0
|
137 |
+
- opentelemetry-exporter-otlp-proto-http==1.20.0
|
138 |
+
- opentelemetry-instrumentation==0.41b0
|
139 |
+
- opentelemetry-proto==1.20.0
|
140 |
+
- opentelemetry-sdk==1.20.0
|
141 |
+
- opentelemetry-semantic-conventions==0.41b0
|
142 |
+
- overrides==7.4.0
|
143 |
+
- pdf2image==1.16.3
|
144 |
+
- pdfminer-six==20221105
|
145 |
+
- pdfplumber==0.10.2
|
146 |
+
- pillow==10.1.0
|
147 |
+
- portalocker==2.8.2
|
148 |
+
- prisma==0.10.0
|
149 |
+
- protobuf==4.24.4
|
150 |
+
- pulsar-client==3.3.0
|
151 |
+
- pycocotools==2.0.7
|
152 |
+
- pycparser==2.21
|
153 |
+
- pydantic==1.10.13
|
154 |
+
- pydantic-core==2.10.1
|
155 |
+
- pyjwt==2.8.0
|
156 |
+
- pymupdf==1.23.5
|
157 |
+
- pymupdfb==1.23.5
|
158 |
+
- pyparsing==3.1.1
|
159 |
+
- pypdfium2==4.22.0
|
160 |
+
- pypika==0.48.9
|
161 |
+
- pyreadline3==3.4.1
|
162 |
+
- pytesseract==0.3.10
|
163 |
+
- python-docx==1.0.1
|
164 |
+
- python-dotenv==1.0.0
|
165 |
+
- python-engineio==4.8.0
|
166 |
+
- python-graphql-client==0.4.3
|
167 |
+
- python-iso639==2023.6.15
|
168 |
+
- python-magic==0.4.27
|
169 |
+
- python-multipart==0.0.6
|
170 |
+
- python-socketio==5.10.0
|
171 |
+
- pytz==2023.3.post1
|
172 |
+
- pywin32==306
|
173 |
+
- pyyaml==6.0.1
|
174 |
+
- rank-bm25==0.2.2
|
175 |
+
- rapidfuzz==3.4.0
|
176 |
+
- regex==2023.10.3
|
177 |
+
- requests==2.31.0
|
178 |
+
- safetensors==0.4.0
|
179 |
+
- scipy==1.11.3
|
180 |
+
- simple-websocket==1.0.0
|
181 |
+
- sniffio==1.3.0
|
182 |
+
- soupsieve==2.5
|
183 |
+
- sqlalchemy==2.0.22
|
184 |
+
- sympy==1.12
|
185 |
+
- syncer==2.0.3
|
186 |
+
- tabulate==0.9.0
|
187 |
+
- tenacity==8.2.3
|
188 |
+
- tiktoken==0.5.1
|
189 |
+
- timm==0.9.8
|
190 |
+
- tokenizers==0.14.1
|
191 |
+
- tomli==2.0.1
|
192 |
+
- tomlkit==0.12.1
|
193 |
+
- torch==2.1.0
|
194 |
+
- torchvision==0.16.0
|
195 |
+
- tqdm==4.66.1
|
196 |
+
- transformers==4.34.1
|
197 |
+
- typing-inspect==0.9.0
|
198 |
+
- tzdata==2023.3
|
199 |
+
- unstructured==0.10.25
|
200 |
+
- unstructured-inference==0.7.9
|
201 |
+
- unstructured-pytesseract==0.3.12
|
202 |
+
- uptrace==1.20.2
|
203 |
+
- urllib3==2.0.7
|
204 |
+
- uvicorn==0.23.2
|
205 |
+
- watchfiles==0.20.0
|
206 |
+
- websockets==11.0.3
|
207 |
+
- wrapt==1.15.0
|
208 |
+
- wsproto==1.2.0
|
209 |
+
- yarl==1.9.2
|
210 |
+
prefix: D:\anaconda3\envs\jine
|
jine.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import os
|
3 |
+
from langchain.vectorstores import Chroma
|
4 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
+
from langchain.document_loaders import DirectoryLoader
|
6 |
+
from langchain.embeddings import OpenAIEmbeddings
|
7 |
+
from langchain.chat_models import ChatOpenAI
|
8 |
+
from langchain.retrievers.multi_query import MultiQueryRetriever
|
9 |
+
from langchain.prompts import PromptTemplate
|
10 |
+
from langchain.chains import RetrievalQA
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
|
13 |
+
from langchain.retrievers import BM25Retriever, EnsembleRetriever
|
14 |
+
from langchain.llms import OpenAI
|
15 |
+
from langchain.embeddings import OpenAIEmbeddings
|
16 |
+
from langchain.chains import LLMChain, HypotheticalDocumentEmbedder
|
17 |
+
|
18 |
+
## Setting up Log configuration
|
19 |
+
logging.basicConfig(
|
20 |
+
filename='Logs/chatbot.log', # Name of the log file
|
21 |
+
level=logging.INFO, # Logging level (you can use logging.DEBUG for more detailed logs)
|
22 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
23 |
+
)
|
24 |
+
|
25 |
+
class Jine:
|
26 |
+
|
27 |
+
def __init__(self, OPENAI_API_KEY, VECTOR_STORE_DIRECTORY, VECTOR_STORE_CHECK, DATA_DIRECTORY, DEBUG,USE_HYDE=False):
|
28 |
+
self.OPENAI_API_KEY = OPENAI_API_KEY
|
29 |
+
self.DATA_DIRECTORY = DATA_DIRECTORY
|
30 |
+
self.VECTOR_STORE_DIRECTORY = VECTOR_STORE_DIRECTORY
|
31 |
+
self.VECTOR_STORE_CHECK = VECTOR_STORE_CHECK
|
32 |
+
# self.DEBUG = DEBUG
|
33 |
+
self.vectorstore = None
|
34 |
+
self.bot = None
|
35 |
+
|
36 |
+
def create_vectorstore(self):
|
37 |
+
|
38 |
+
if self.VECTOR_STORE_CHECK:
|
39 |
+
print("Loading Vectorstore")
|
40 |
+
self.load_vectorstore()
|
41 |
+
else:
|
42 |
+
print("Creating Vectorstore")
|
43 |
+
docs = DirectoryLoader(self.DATA_DIRECTORY).load()
|
44 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=10)
|
45 |
+
all_splits = text_splitter.split_documents(docs)
|
46 |
+
self.vectorstore = Chroma.from_documents(documents=docs, embedding=OpenAIEmbeddings(),
|
47 |
+
persist_directory=self.VECTOR_STORE_DIRECTORY)
|
48 |
+
|
49 |
+
|
50 |
+
def load_vectorstore(self):
|
51 |
+
self.vectorstore = Chroma(persist_directory=self.VECTOR_STORE_DIRECTORY, embedding_function=OpenAIEmbeddings())
|
52 |
+
|
53 |
+
|
54 |
+
def log(self, user_question, chatbot_reply):
|
55 |
+
# Log the user's question
|
56 |
+
logging.info(f"User: {user_question}")
|
57 |
+
# Log the chatbot's reply
|
58 |
+
logging.info(f"JIN-e: {chatbot_reply}")
|
59 |
+
|
60 |
+
def load_model(self):
|
61 |
+
self.create_vectorstore()
|
62 |
+
self.create_ensemble_retriever()
|
63 |
+
|
64 |
+
def chat(self, user_question):
|
65 |
+
result = self.bot({"query": user_question})
|
66 |
+
response = result["result"]
|
67 |
+
self.log(user_question, response)
|
68 |
+
return response
|
69 |
+
|
70 |
+
### Adding Ensemble retriver
|
71 |
+
def create_ensemble_retriever(self):
|
72 |
+
template = """
|
73 |
+
You are an Expert Policy Advisor.These Below are the Documents that are extracted from the different Policies.Your Job
|
74 |
+
is to Provide the Answer to below question based on the text below.
|
75 |
+
Here are few instructions for you to follow when answering a question.
|
76 |
+
- When you didnt find the relevant answers from below text Just Say "I dont know this,Please contact your HRBP for more details."
|
77 |
+
- These are policy Documents, When answering a question Do Not return in response that "This information is At Annex A/B".Provide a Complete response to request.
|
78 |
+
- Try to answer the questions in bullet format if possible.
|
79 |
+
- Use three sentences maximum to Answer the question in very concise manner
|
80 |
+
|
81 |
+
{context}
|
82 |
+
Question: {question}
|
83 |
+
Helpful Answer:
|
84 |
+
"""
|
85 |
+
|
86 |
+
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
|
87 |
+
print("====================="*10)
|
88 |
+
print("Loading Documents for Ensemble Retriver")
|
89 |
+
print("====================="*10)
|
90 |
+
|
91 |
+
docs = DirectoryLoader(self.DATA_DIRECTORY).load()
|
92 |
+
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=10)
|
93 |
+
# all_splits = text_splitter.split_documents(docs)
|
94 |
+
|
95 |
+
bm25_retriever = BM25Retriever.from_documents(docs)
|
96 |
+
# GEttting only two relevant documents
|
97 |
+
bm25_retriever.k = 2
|
98 |
+
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever,
|
99 |
+
self.vectorstore.as_retriever(search_kwargs={"k": 2})],
|
100 |
+
weights=[0.5, 0.5])
|
101 |
+
|
102 |
+
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
|
103 |
+
|
104 |
+
self.bot = RetrievalQA.from_chain_type(
|
105 |
+
llm,
|
106 |
+
retriever=ensemble_retriever,
|
107 |
+
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})
|
108 |
+
|
109 |
+
|
110 |
+
|
111 |
+
if __name__ == "__main__":
|
112 |
+
# Set your configuration here
|
113 |
+
load_dotenv()
|
114 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
115 |
+
DATA_DIRECTORY = os.getenv("DATA_DIRECTORY")
|
116 |
+
VECTOR_STORE_DIRECTORY = os.getenv("VECTOR_STORE_DIRCTORY")
|
117 |
+
VECTOR_STORE_CHECK = os.getenv("VECTOR_STORE_CHECK")
|
118 |
+
|
119 |
+
DEBUG = os.getenv("DEBUG")
|
120 |
+
USE_HYDE = os.getenv("USE_HYDE")
|
121 |
+
# Initialize Jine and start chatting
|
122 |
+
jine = Jine(OPENAI_API_KEY, VECTOR_STORE_DIRECTORY, VECTOR_STORE_CHECK, DATA_DIRECTORY, DEBUG)
|
123 |
+
# print(jine.VECTOR_STORE_CHECK)
|
124 |
+
jine.load_model()
|
125 |
+
while True:
|
126 |
+
user_question = input("You: ")
|
127 |
+
if user_question.lower() in ["exit", "quit"]:
|
128 |
+
break
|
129 |
+
response = jine.chat(user_question)
|
130 |
+
print("JIN-e:", response)
|
jine_v1.py
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import os
|
3 |
+
from langchain.vectorstores import Chroma
|
4 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
+
from langchain.document_loaders import DirectoryLoader
|
6 |
+
from langchain.embeddings import OpenAIEmbeddings
|
7 |
+
from langchain.chat_models import ChatOpenAI
|
8 |
+
from langchain.retrievers.multi_query import MultiQueryRetriever
|
9 |
+
from langchain.prompts import PromptTemplate
|
10 |
+
from langchain.chains import RetrievalQA
|
11 |
+
from dotenv import load_dotenv
|
12 |
+
|
13 |
+
from langchain.retrievers import BM25Retriever, EnsembleRetriever
|
14 |
+
from langchain.llms import OpenAI
|
15 |
+
from langchain.embeddings import OpenAIEmbeddings
|
16 |
+
from langchain.chains import LLMChain, HypotheticalDocumentEmbedder
|
17 |
+
|
18 |
+
## Setting up Log configuration
|
19 |
+
logging.basicConfig(
|
20 |
+
filename='Logs/chatbot.log', # Name of the log file
|
21 |
+
level=logging.INFO, # Logging level (you can use logging.DEBUG for more detailed logs)
|
22 |
+
format='%(asctime)s - %(levelname)s - %(message)s'
|
23 |
+
)
|
24 |
+
|
25 |
+
|
26 |
+
class Jine:
|
27 |
+
|
28 |
+
def __init__(self, OPENAI_API_KEY, VECTOR_STORE_DIRECTORY, VECTOR_STORE_CHECK, DATA_DIRECTORY, DEBUG,USE_HYDE=False):
|
29 |
+
self.OPENAI_API_KEY = OPENAI_API_KEY
|
30 |
+
self.DATA_DIRECTORY = DATA_DIRECTORY
|
31 |
+
self.VECTOR_STORE_DIRECTORY = VECTOR_STORE_DIRECTORY
|
32 |
+
self.VECTOR_STORE_CHECK = VECTOR_STORE_CHECK
|
33 |
+
self.DEBUG = DEBUG
|
34 |
+
self.vectorstore = None
|
35 |
+
self.bot = None
|
36 |
+
self.USE_HYDE = USE_HYDE
|
37 |
+
# creating this variable for BM25 Retriver.
|
38 |
+
# self.docs = None
|
39 |
+
|
40 |
+
def create_vectorstore(self):
|
41 |
+
|
42 |
+
if self.VECTOR_STORE_CHECK:
|
43 |
+
print("Loading Vectorstore")
|
44 |
+
self.load_vectorstore()
|
45 |
+
print('im running')
|
46 |
+
else:
|
47 |
+
print("Creating Vectorstore")
|
48 |
+
docs = DirectoryLoader(self.DATA_DIRECTORY).load()
|
49 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=10)
|
50 |
+
all_splits = text_splitter.split_documents(docs)
|
51 |
+
if self.USE_HYDE:
|
52 |
+
base_embeddings = OpenAIEmbeddings()
|
53 |
+
llm = OpenAI()
|
54 |
+
embeddings_hyde = HypotheticalDocumentEmbedder.from_llm(llm, base_embeddings, "web_search")
|
55 |
+
self.vectorstore = Chroma.from_documents(documents=all_splits, embedding=embeddings_hyde,
|
56 |
+
persist_directory=self.VECTOR_STORE_DIRECTORY)
|
57 |
+
|
58 |
+
else:
|
59 |
+
self.vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings(),
|
60 |
+
persist_directory=self.VECTOR_STORE_DIRECTORY)
|
61 |
+
|
62 |
+
|
63 |
+
def multi_query_retriever(self):
|
64 |
+
retriever_from_llm = MultiQueryRetriever.from_llm(retriever=self.vectorstore.as_retriever(),
|
65 |
+
llm=ChatOpenAI(temperature=0))
|
66 |
+
template = """Use the following pieces of context to answer the question at the end.
|
67 |
+
If you don't know the answer, just say that "i am unable to answer your query, for more information contact your HRBP", don't try to make up an answer.
|
68 |
+
Use three sentences maximum and keep the answer as concise as possible.
|
69 |
+
{context}
|
70 |
+
Question: {question}
|
71 |
+
Helpful Answer:"""
|
72 |
+
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
|
73 |
+
|
74 |
+
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
|
75 |
+
|
76 |
+
self.bot = RetrievalQA.from_chain_type(
|
77 |
+
llm,
|
78 |
+
retriever=retriever_from_llm,
|
79 |
+
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
|
80 |
+
)
|
81 |
+
|
82 |
+
|
83 |
+
def single_query_retriever(self):
|
84 |
+
template = """Use the following pieces of context to answer the question at the end.
|
85 |
+
If you don't know the answer, just say that "i am unable to answer your query, for more information contact your HRBP", don't try to make up an answer.
|
86 |
+
Use three sentences maximum and keep the answer as concise as possible.
|
87 |
+
{context}
|
88 |
+
Question: {question}
|
89 |
+
Helpful Answer:"""
|
90 |
+
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
|
91 |
+
|
92 |
+
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
|
93 |
+
|
94 |
+
self.bot = RetrievalQA.from_chain_type(
|
95 |
+
llm,
|
96 |
+
retriever=self.vectorstore.as_retriever(),
|
97 |
+
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})
|
98 |
+
|
99 |
+
def load_vectorstore(self):
|
100 |
+
if self.USE_HYDE:
|
101 |
+
print("Using HYDE embeddings vectorstore")
|
102 |
+
base_embeddings = OpenAIEmbeddings()
|
103 |
+
llm = OpenAI()
|
104 |
+
embeddings_hyde = HypotheticalDocumentEmbedder.from_llm(llm, base_embeddings, "web_search")
|
105 |
+
self.vectorstore = Chroma(persist_directory=self.VECTOR_STORE_DIRECTORY, embedding_function=embeddings_hyde)
|
106 |
+
else:
|
107 |
+
print("Using Simple embeddings vectorstore")
|
108 |
+
self.vectorstore = Chroma(persist_directory=self.VECTOR_STORE_DIRECTORY, embedding_function=OpenAIEmbeddings())
|
109 |
+
|
110 |
+
def log(self, user_question, chatbot_reply):
|
111 |
+
# Log the user's question
|
112 |
+
logging.info(f"User: {user_question}")
|
113 |
+
# Log the chatbot's reply
|
114 |
+
logging.info(f"JIN-e: {chatbot_reply}")
|
115 |
+
|
116 |
+
def load_model(self):
|
117 |
+
self.create_vectorstore()
|
118 |
+
# self.multi_query_retriever()
|
119 |
+
# self.single_query_retriever()
|
120 |
+
self.create_ensemble_retriever()
|
121 |
+
|
122 |
+
def chat(self, user_question):
|
123 |
+
result = self.bot({"query": user_question})
|
124 |
+
response = result["result"]
|
125 |
+
self.log(user_question, response)
|
126 |
+
return response
|
127 |
+
|
128 |
+
### Adding Ensemble retriver
|
129 |
+
def create_ensemble_retriever(self):
|
130 |
+
template = """Use the following pieces of context to answer the question at the end.
|
131 |
+
If you don't know the answer, just say that "i am unable to answer your query, for more information contact your HRBP", don't try to make up an answer.
|
132 |
+
Use three sentences maximum and keep the answer as concise as possible.
|
133 |
+
{context}
|
134 |
+
Question: {question}
|
135 |
+
Helpful Answer:"""
|
136 |
+
|
137 |
+
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
|
138 |
+
print("====================="*10)
|
139 |
+
print("Loading Documents for Ensemble Retriver")
|
140 |
+
print("====================="*10)
|
141 |
+
|
142 |
+
docs = DirectoryLoader(self.DATA_DIRECTORY).load()
|
143 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=10)
|
144 |
+
all_splits = text_splitter.split_documents(docs)
|
145 |
+
|
146 |
+
bm25_retriever = BM25Retriever.from_documents(all_splits)
|
147 |
+
# GEttting only two relevant documents
|
148 |
+
bm25_retriever.k = 2
|
149 |
+
|
150 |
+
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever,
|
151 |
+
self.vectorstore.as_retriever(search_kwargs={"k": 2})],
|
152 |
+
weights=[0.5, 0.5])
|
153 |
+
|
154 |
+
llm = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0)
|
155 |
+
|
156 |
+
self.bot = RetrievalQA.from_chain_type(
|
157 |
+
llm,
|
158 |
+
retriever=ensemble_retriever,
|
159 |
+
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})
|
160 |
+
|
161 |
+
|
162 |
+
|
163 |
+
if __name__ == "__main__":
|
164 |
+
# Set your configuration here
|
165 |
+
load_dotenv()
|
166 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
167 |
+
DATA_DIRECTORY = os.getenv("DATA_DIRECTORY")
|
168 |
+
VECTOR_STORE_DIRECTORY = os.getenv("VECTOR_STORE_DIRCTORY")
|
169 |
+
VECTOR_STORE_CHECK = os.getenv("VECTOR_STORE_CHECK")
|
170 |
+
|
171 |
+
DEBUG = os.getenv("DEBUG")
|
172 |
+
USE_HYDE = os.getenv("USE_HYDE")
|
173 |
+
# Initialize Jine and start chatting
|
174 |
+
jine = Jine(OPENAI_API_KEY, VECTOR_STORE_DIRECTORY, VECTOR_STORE_CHECK, DATA_DIRECTORY, DEBUG)
|
175 |
+
# print(jine.VECTOR_STORE_CHECK)
|
176 |
+
jine.load_model()
|
177 |
+
while True:
|
178 |
+
user_question = input("You: ")
|
179 |
+
if user_question.lower() in ["exit", "quit"]:
|
180 |
+
break
|
181 |
+
response = jine.chat(user_question)
|
182 |
+
print("JIN-e:", response)
|
requirements
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
chromadb
|
3 |
+
"unstructured[all-docs]"
|
4 |
+
openai
|
5 |
+
fastapi
|
6 |
+
uvicorn
|
requirements.txt
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file may be used to create an environment using:
|
2 |
+
# $ conda create --name <env> --file <this file>
|
3 |
+
# platform: win-64
|
4 |
+
aiohttp==3.8.4
|
5 |
+
aiosignal==1.3.1
|
6 |
+
altair==4.2.2
|
7 |
+
anyio==3.6.2
|
8 |
+
argilla==1.5.0
|
9 |
+
asttokens==2.0.5
|
10 |
+
async-timeout==4.0.2
|
11 |
+
attrs==22.2.0
|
12 |
+
backcall==0.2.0
|
13 |
+
backoff==2.2.1
|
14 |
+
beautifulsoup4==4.12.0
|
15 |
+
bertopic==0.13.0
|
16 |
+
blinker==1.6.2
|
17 |
+
blis==0.7.9
|
18 |
+
bs4==0.0.1
|
19 |
+
cachetools==5.3.0
|
20 |
+
catalogue==2.0.8
|
21 |
+
certifi==2022.12.7
|
22 |
+
cffi==1.15.1
|
23 |
+
charset-normalizer==2.1.1
|
24 |
+
chromadb==0.3.11
|
25 |
+
click==8.1.3
|
26 |
+
clickhouse-connect==0.5.16
|
27 |
+
colorama==0.4.6
|
28 |
+
commonmark==0.9.1
|
29 |
+
confection==0.0.3
|
30 |
+
cryptography==40.0.0
|
31 |
+
cymem==2.0.7
|
32 |
+
cython==0.29.32
|
33 |
+
dataclasses-json==0.5.7
|
34 |
+
debugpy==1.5.1
|
35 |
+
decorator==5.1.1
|
36 |
+
deprecated==1.2.13
|
37 |
+
docx2txt==0.8
|
38 |
+
duckdb==0.7.1
|
39 |
+
entrypoints==0.4
|
40 |
+
et-xmlfile==1.1.0
|
41 |
+
executing==0.8.3
|
42 |
+
faker==17.6.0
|
43 |
+
fastapi==0.95.0
|
44 |
+
filelock==3.9.0
|
45 |
+
flask==2.3.3
|
46 |
+
flask-sqlalchemy==3.0.5
|
47 |
+
flatbuffers==23.5.26
|
48 |
+
frozenlist==1.3.3
|
49 |
+
fst-pso==1.8.1
|
50 |
+
funcy==1.17
|
51 |
+
future==0.18.2
|
52 |
+
fuzzytm==2.0.5
|
53 |
+
gensim==4.3.0
|
54 |
+
gitdb==4.0.10
|
55 |
+
gitpython==3.1.31
|
56 |
+
google-search-results==2.4.2
|
57 |
+
greenlet==2.0.1
|
58 |
+
h11==0.14.0
|
59 |
+
hdbscan==0.8.29
|
60 |
+
hnswlib==0.7.0
|
61 |
+
httpcore==0.16.3
|
62 |
+
httptools==0.5.0
|
63 |
+
httpx==0.23.3
|
64 |
+
huggingface-hub==0.11.1
|
65 |
+
humanfriendly==10.0
|
66 |
+
idna==3.4
|
67 |
+
importlib-metadata==6.1.0
|
68 |
+
importlib-resources==6.0.1
|
69 |
+
ipykernel==6.15.2
|
70 |
+
ipython==8.7.0
|
71 |
+
itsdangerous==2.1.2
|
72 |
+
jedi==0.18.1
|
73 |
+
jinja2==3.1.2
|
74 |
+
joblib==1.2.0
|
75 |
+
jsonschema==4.17.3
|
76 |
+
jupyter_client==7.4.8
|
77 |
+
jupyter_core==5.1.1
|
78 |
+
langchain==0.0.284
|
79 |
+
langcodes==3.3.0
|
80 |
+
langsmith==0.0.33
|
81 |
+
llama-index==0.5.5
|
82 |
+
llvmlite==0.39.1
|
83 |
+
lxml==4.9.2
|
84 |
+
lz4==4.3.2
|
85 |
+
markdown==3.4.3
|
86 |
+
markdown-it-py==2.2.0
|
87 |
+
markupsafe==2.1.1
|
88 |
+
marshmallow==3.19.0
|
89 |
+
marshmallow-enum==1.5.1
|
90 |
+
matplotlib-inline==0.1.6
|
91 |
+
mdurl==0.1.2
|
92 |
+
miniful==0.0.6
|
93 |
+
monotonic==1.6
|
94 |
+
mpmath==1.3.0
|
95 |
+
multidict==6.0.4
|
96 |
+
murmurhash==1.0.9
|
97 |
+
mypy-extensions==1.0.0
|
98 |
+
nest-asyncio==1.5.6
|
99 |
+
nltk==3.8.1
|
100 |
+
numba==0.56.4
|
101 |
+
numexpr==2.8.4
|
102 |
+
numpy==1.23.5
|
103 |
+
openai==0.27.2
|
104 |
+
opencv-python==4.7.0.72
|
105 |
+
openpyxl==3.1.1
|
106 |
+
overrides==7.4.0
|
107 |
+
packaging==22.0
|
108 |
+
pandas==1.5.2
|
109 |
+
parso==0.8.3
|
110 |
+
pathy==0.10.1
|
111 |
+
pdfminer-six==20221105
|
112 |
+
pickleshare==0.7.5
|
113 |
+
pillow==9.4.0
|
114 |
+
pip==22.3.1
|
115 |
+
platformdirs==2.5.2
|
116 |
+
plotly==5.11.0
|
117 |
+
preshed==3.0.8
|
118 |
+
prompt-toolkit==3.0.36
|
119 |
+
protobuf==3.20.3
|
120 |
+
psutil==5.9.0
|
121 |
+
pulsar-client==3.3.0
|
122 |
+
pure_eval==0.2.2
|
123 |
+
pyarrow==11.0.0
|
124 |
+
pycparser==2.21
|
125 |
+
pydantic==1.10.4
|
126 |
+
pydeck==0.8.0
|
127 |
+
pyfume==0.2.25
|
128 |
+
pygments==2.14.0
|
129 |
+
pyldavis==3.3.1
|
130 |
+
pympler==1.0.1
|
131 |
+
pynndescent==0.5.8
|
132 |
+
pyodbc==4.0.35
|
133 |
+
pypandoc==1.11
|
134 |
+
pypdf2==3.0.1
|
135 |
+
pypika==0.48.9
|
136 |
+
pypyodbc==1.3.6
|
137 |
+
pyreadline3==3.4.1
|
138 |
+
pyrsistent==0.19.3
|
139 |
+
python-dateutil==2.8.2
|
140 |
+
python-docx==0.8.11
|
141 |
+
python-dotenv==1.0.0
|
142 |
+
python-magic==0.4.27
|
143 |
+
python-pptx==0.6.21
|
144 |
+
pytz==2022.7
|
145 |
+
pytz-deprecation-shim==0.1.0.post0
|
146 |
+
pywin32==305
|
147 |
+
pyyaml==6.0
|
148 |
+
pyzmq==23.2.0
|
149 |
+
regex==2022.10.31
|
150 |
+
requests==2.28.1
|
151 |
+
rfc3986==1.5.0
|
152 |
+
rich==13.0.1
|
153 |
+
scikit-learn==1.2.0
|
154 |
+
scipy==1.10.0
|
155 |
+
semver==2.13.0
|
156 |
+
sentence-transformers==2.2.2
|
157 |
+
sentencepiece==0.1.97
|
158 |
+
setuptools==65.5.0
|
159 |
+
simpful==2.9.0
|
160 |
+
six==1.16.0
|
161 |
+
sklearn==0.0.post1
|
162 |
+
smart-open==6.3.0
|
163 |
+
smmap==5.0.0
|
164 |
+
sniffio==1.3.0
|
165 |
+
soupsieve==2.4
|
166 |
+
spacy==3.4.4
|
167 |
+
spacy-legacy==3.0.11
|
168 |
+
spacy-loggers==1.0.4
|
169 |
+
sqlalchemy==2.0.20
|
170 |
+
sqlite==3.40.0
|
171 |
+
srsly==2.4.5
|
172 |
+
stack_data==0.2.0
|
173 |
+
starlette==0.26.1
|
174 |
+
streamlit==1.20.0
|
175 |
+
streamlit-chat==0.0.2.2
|
176 |
+
sympy==1.12
|
177 |
+
tenacity==8.2.2
|
178 |
+
thinc==8.1.6
|
179 |
+
threadpoolctl==3.1.0
|
180 |
+
tiktoken==0.3.2
|
181 |
+
tk==8.6.12
|
182 |
+
tokenizers==0.13.2
|
183 |
+
toml==0.10.2
|
184 |
+
toolz==0.12.0
|
185 |
+
torch==1.13.1
|
186 |
+
torchvision==0.14.1
|
187 |
+
tornado==6.2
|
188 |
+
tqdm==4.66.1
|
189 |
+
traitlets==5.7.1
|
190 |
+
transformers==4.25.1
|
191 |
+
typer==0.7.0
|
192 |
+
typing-extensions==4.7.1
|
193 |
+
typing-inspect==0.8.0
|
194 |
+
tzdata==2023.3
|
195 |
+
tzlocal==4.3
|
196 |
+
umap-learn==0.5.3
|
197 |
+
unstructured==0.5.7
|
198 |
+
urllib3==1.26.13
|
199 |
+
uvicorn==0.21.1
|
200 |
+
validators==0.20.0
|
201 |
+
vc==14.2
|
202 |
+
vs2015_runtime==14.27.29016
|
203 |
+
wasabi==0.10.1
|
204 |
+
watchdog==3.0.0
|
205 |
+
watchfiles==0.18.1
|
206 |
+
wcwidth==0.2.5
|
207 |
+
websockets==10.4
|
208 |
+
werkzeug==2.3.7
|
209 |
+
wheel==0.37.1
|
210 |
+
wincertstore==0.2
|
211 |
+
wrapt==1.14.1
|
212 |
+
xlsxwriter==3.0.9
|
213 |
+
xz==5.2.8
|
214 |
+
yarl==1.8.2
|
215 |
+
zeromq==4.3.4
|
216 |
+
zipp==3.15.0
|
217 |
+
zlib==1.2.13
|
218 |
+
zstandard==0.20.0
|
streamlit_interface.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from streamlit_chat import message
|
3 |
+
import os
|
4 |
+
|
5 |
+
##### Importing JIN-e
|
6 |
+
from jine import Jine
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
import os
|
9 |
+
|
10 |
+
|
11 |
+
load_dotenv()
|
12 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
13 |
+
DATA_DIRECTORY = os.getenv("DATA_DIRECTORY")
|
14 |
+
VECTOR_STORE_DIRECTORY = os.getenv("VECTOR_STORE_DIRCTORY")
|
15 |
+
VECTOR_STORE_CHECK = os.getenv("VECTOR_STORE_CHECK")
|
16 |
+
DEBUG = os.getenv("DEBUG")
|
17 |
+
USE_HYDE = os.getenv("USE_HYDE")
|
18 |
+
|
19 |
+
# Initialize Jine
|
20 |
+
|
21 |
+
|
22 |
+
@st.cache_resource()
|
23 |
+
def load_model():
|
24 |
+
jine = Jine(OPENAI_API_KEY, VECTOR_STORE_DIRECTORY, VECTOR_STORE_CHECK, DATA_DIRECTORY, DEBUG,USE_HYDE)
|
25 |
+
jine.load_model()
|
26 |
+
return jine
|
27 |
+
|
28 |
+
jine =load_model()
|
29 |
+
|
30 |
+
import streamlit as st
|
31 |
+
from streamlit_chat import message
|
32 |
+
|
33 |
+
# st.set_page_config(
|
34 |
+
# page_title="JIN-e",
|
35 |
+
# page_icon=":robot:"
|
36 |
+
# )
|
37 |
+
# #
|
38 |
+
|
39 |
+
st.header("JIN-e")
|
40 |
+
st.markdown("Powered by People Analytics")
|
41 |
+
|
42 |
+
if 'generated' not in st.session_state:
|
43 |
+
st.session_state['generated'] = []
|
44 |
+
|
45 |
+
if 'past' not in st.session_state:
|
46 |
+
st.session_state['past'] = []
|
47 |
+
|
48 |
+
# def query(payload):
|
49 |
+
# response = requests.post(API_URL, headers=headers, json=payload)
|
50 |
+
# return response.json()
|
51 |
+
|
52 |
+
def get_text():
|
53 |
+
input_text = st.text_input("You: ","Hello, how are you?", key="input")
|
54 |
+
return input_text
|
55 |
+
|
56 |
+
|
57 |
+
user_input = get_text()
|
58 |
+
|
59 |
+
if user_input:
|
60 |
+
|
61 |
+
response = jine.chat(user_input)
|
62 |
+
|
63 |
+
st.session_state.past.append(user_input)
|
64 |
+
st.session_state.generated.append(response)
|
65 |
+
|
66 |
+
if st.session_state['generated']:
|
67 |
+
|
68 |
+
for i in range(len(st.session_state['generated'])-1, -1, -1):
|
69 |
+
message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')
|
70 |
+
message(st.session_state["generated"][i], key=str(i))
|
71 |
+
|
72 |
+
|
73 |
+
|