Joshua Sundance Bailey commited on
Commit
693876a
0 Parent(s):

initial commit

Browse files
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.github/ISSUE_TEMPLATE/bug_report.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Bug report
3
+ about: Create a report to help us improve
4
+ title: ''
5
+ labels: bug
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Describe the bug**
11
+ A clear and concise description of what the bug is.
12
+
13
+ **To Reproduce**
14
+ Steps to reproduce the behavior:
15
+ 1. Go to '...'
16
+ 2. Click on '....'
17
+ 3. Scroll down to '....'
18
+ 4. See error
19
+
20
+ **Expected behavior**
21
+ A clear and concise description of what you expected to happen.
22
+
23
+ **Screenshots**
24
+ If applicable, add screenshots to help explain your problem.
25
+
26
+ **Desktop (please complete the following information):**
27
+ - OS: [e.g. iOS]
28
+ - Browser [e.g. chrome, safari]
29
+ - Version [e.g. 22]
30
+
31
+ **Smartphone (please complete the following information):**
32
+ - Device: [e.g. iPhone6]
33
+ - OS: [e.g. iOS8.1]
34
+ - Browser [e.g. stock browser, safari]
35
+ - Version [e.g. 22]
36
+
37
+ **Additional context**
38
+ Add any other context about the problem here.
.github/ISSUE_TEMPLATE/feature_request.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project
4
+ title: ''
5
+ labels: enhancement
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Describe the solution you'd like**
11
+ A clear and concise description of what you want to happen.
12
+
13
+ **Describe alternatives you've considered**
14
+ A clear and concise description of any alternative solutions or features you've considered.
15
+
16
+ **Additional context**
17
+ Add any other context or screenshots about the feature request here.
.github/dependabot.yml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # To get started with Dependabot version updates, you'll need to specify which
2
+ # package ecosystems to update and where the package manifests are located.
3
+ # Please see the documentation for all configuration options:
4
+ # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5
+
6
+ version: 2
7
+ updates:
8
+ - package-ecosystem: "pip" # See documentation for possible values
9
+ directory: "/" # Location of package manifests
10
+ schedule:
11
+ interval: "weekly"
.github/pull_request_template.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Thank you for contributing!
2
+ Before submitting this PR, please make sure:
3
+
4
+ - [ ] Your code builds clean without any errors or warnings
5
+ - [ ] Your code doesn't break anything we can't fix
6
+ - [ ] You have added appropriate tests
7
+
8
+ Please check one or more of the following to describe the nature of this PR:
9
+ - [ ] New feature
10
+ - [ ] Bug fix
11
+ - [ ] Documentation
12
+ - [ ] Other
.github/workflows/check-file-size-limit.yml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: 10 MB file size limit
2
+ on:
3
+ pull_request:
4
+ branches: [main]
5
+
6
+ jobs:
7
+ check-file-sizes:
8
+ runs-on: ubuntu-latest
9
+ steps:
10
+ - name: Check large files
11
+ uses: ActionsDesk/lfs-warning@v2.0
12
+ with:
13
+ filesizelimit: 10485760 # this is 10MB so we can sync to HF Spaces
14
+ token: ${{ secrets.WORKFLOW_GIT_ACCESS_TOKEN }}
.github/workflows/hf-space.yml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Push to HuggingFace Space
2
+
3
+ on:
4
+ workflow_dispatch:
5
+
6
+ jobs:
7
+ push-to-huggingface:
8
+ runs-on: ubuntu-latest
9
+ steps:
10
+ - uses: actions/checkout@v2
11
+ with:
12
+ fetch-depth: 0
13
+ token: ${{ secrets.WORKFLOW_GIT_ACCESS_TOKEN }}
14
+
15
+ - name: Push to HuggingFace Space
16
+ env:
17
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
18
+ run: |
19
+ git push https://joshuasundance:$HF_TOKEN@huggingface.co/spaces/joshuasundance/langchain-streamlit-demo main
.gitignore ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ hf_cache/
2
+ govgis-nov2023/
3
+ *$py.class
4
+ *.chainlit
5
+ *.chroma
6
+ *.cover
7
+ *.egg
8
+ *.egg-info/
9
+ *.env
10
+ *.langchain.db
11
+ *.log
12
+ *.manifest
13
+ *.mo
14
+ *.pot
15
+ *.py,cover
16
+ *.py[cod]
17
+ *.sage.py
18
+ *.so
19
+ *.spec
20
+ .DS_STORE
21
+ .Python
22
+ .cache
23
+ .coverage
24
+ .coverage.*
25
+ .dmypy.json
26
+ .eggs/
27
+ .env
28
+ .hypothesis/
29
+ .idea
30
+ .installed.cfg
31
+ .ipynb_checkpoints
32
+ .mypy_cache/
33
+ .nox/
34
+ .pyre/
35
+ .pytest_cache/
36
+ .python-version
37
+ .ropeproject
38
+ .ruff_cache/
39
+ .scrapy
40
+ .spyderproject
41
+ .spyproject
42
+ .tox/
43
+ .venv
44
+ .vscode
45
+ .webassets-cache
46
+ /site
47
+ ENV/
48
+ MANIFEST
49
+ __pycache__
50
+ __pycache__/
51
+ __pypackages__/
52
+ build/
53
+ celerybeat-schedule
54
+ celerybeat.pid
55
+ coverage.xml
56
+ credentials.json
57
+ data/
58
+ db.sqlite3
59
+ db.sqlite3-journal
60
+ develop-eggs/
61
+ dist/
62
+ dmypy.json
63
+ docs/_build/
64
+ downloads/
65
+ eggs/
66
+ env.bak/
67
+ env/
68
+ fly.toml
69
+ htmlcov/
70
+ instance/
71
+ ipython_config.py
72
+ junk/
73
+ lib/
74
+ lib64/
75
+ local_settings.py
76
+ models/*.bin
77
+ nosetests.xml
78
+ lab/scratch/
79
+ lab/
80
+ parts/
81
+ pip-delete-this-directory.txt
82
+ pip-log.txt
83
+ pip-wheel-metadata/
84
+ profile_default/
85
+ sdist/
86
+ share/python-wheels/
87
+ storage
88
+ target/
89
+ token.json
90
+ var/
91
+ venv
92
+ venv.bak/
93
+ venv/
94
+ wheels/
.pre-commit-config.yaml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Don't know what this file is? See https://pre-commit.com/
2
+ # pip install pre-commit
3
+ # pre-commit install
4
+ # pre-commit autoupdate
5
+ # Apply to all files without commiting:
6
+ # pre-commit run --all-files
7
+ # I recommend running this until you pass all checks, and then commit.
8
+ # Fix what you need to and then let the pre-commit hooks resolve their conflicts.
9
+ # You may need to git add -u between runs.
10
+ exclude: "AI_CHANGELOG.md"
11
+ repos:
12
+ - repo: https://github.com/charliermarsh/ruff-pre-commit
13
+ rev: "v0.1.6"
14
+ hooks:
15
+ - id: ruff
16
+ args: [--fix, --exit-non-zero-on-fix, --ignore, E501]
17
+ - repo: https://github.com/koalaman/shellcheck-precommit
18
+ rev: v0.9.0
19
+ hooks:
20
+ - id: shellcheck
21
+ - repo: https://github.com/pre-commit/pre-commit-hooks
22
+ rev: v4.5.0
23
+ hooks:
24
+ - id: check-ast
25
+ - id: check-builtin-literals
26
+ - id: check-merge-conflict
27
+ - id: check-symlinks
28
+ - id: check-toml
29
+ - id: check-xml
30
+ - id: debug-statements
31
+ - id: check-case-conflict
32
+ - id: check-docstring-first
33
+ - id: check-executables-have-shebangs
34
+ - id: check-json
35
+ # - id: check-yaml
36
+ - id: debug-statements
37
+ - id: fix-byte-order-marker
38
+ - id: detect-private-key
39
+ - id: end-of-file-fixer
40
+ - id: trailing-whitespace
41
+ - id: mixed-line-ending
42
+ - id: requirements-txt-fixer
43
+ - repo: https://github.com/pre-commit/mirrors-mypy
44
+ rev: v1.7.1
45
+ hooks:
46
+ - id: mypy
47
+ additional_dependencies:
48
+ - types-PyYAML
49
+ - repo: https://github.com/asottile/add-trailing-comma
50
+ rev: v3.1.0
51
+ hooks:
52
+ - id: add-trailing-comma
53
+ #- repo: https://github.com/dannysepler/rm_unneeded_f_str
54
+ # rev: v0.2.0
55
+ # hooks:
56
+ # - id: rm-unneeded-f-str
57
+ - repo: https://github.com/psf/black
58
+ rev: 23.11.0
59
+ hooks:
60
+ - id: black
61
+ - repo: https://github.com/PyCQA/bandit
62
+ rev: 1.7.5
63
+ hooks:
64
+ - id: bandit
65
+ args: ["-x", "tests/*.py"]
LICENSE ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Joshua Sundance Bailey
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Govgis Nov2023-slim-faiss
3
+ emoji: 🐨
4
+ colorFrom: red
5
+ colorTo: gray
6
+ sdk: streamlit
7
+ sdk_version: 1.28.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ # govgis_nov2023-slim-faiss
14
+
15
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
16
+ [![python](https://img.shields.io/badge/Python-3.11-3776AB.svg?style=flat&logo=python&logoColor=white)](https://www.python.org)
17
+
18
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
19
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v1.json)](https://github.com/charliermarsh/ruff)
20
+ [![Checked with mypy](http://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/)
21
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
22
+
23
+ [![security: bandit](https://img.shields.io/badge/security-bandit-yellow.svg)](https://github.com/PyCQA/bandit)
app.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from operator import itemgetter
3
+ from typing import Optional
4
+
5
+ import streamlit as st
6
+ import yaml
7
+ from huggingface_hub import hf_hub_download
8
+ from langchain.chat_models import ChatAnthropic
9
+ from langchain.embeddings import HuggingFaceBgeEmbeddings
10
+ from langchain.prompts import ChatPromptTemplate, PromptTemplate
11
+ from langchain.schema.document import Document
12
+ from langchain.schema.output_parser import StrOutputParser
13
+ from langchain.vectorstores import FAISS
14
+
15
+ DEFAULT_TEMPERATURE = 0.5
16
+ DEFAULT_MAX_TOKENS = 512
17
+ DEFAULT_SEARCH_RESULT_LIMIT = 3
18
+ default_hf_home = os.path.join(os.path.expanduser("~"), ".cache", "huggingface")
19
+ HF_HOME = os.environ.get("HF_HOME", default_hf_home)
20
+
21
+ if "chain" not in st.session_state:
22
+ st.session_state.chain = None
23
+
24
+ with st.sidebar:
25
+ st.session_state.search_result_limit = st.slider(
26
+ "Search Result Limit",
27
+ min_value=1,
28
+ max_value=10,
29
+ value=DEFAULT_SEARCH_RESULT_LIMIT,
30
+ step=1,
31
+ )
32
+
33
+ st.session_state.anthropic_api_key = st.text_input(
34
+ "Anthropic API Key",
35
+ type="password",
36
+ )
37
+
38
+ st.session_state.temperature = st.slider(
39
+ "Temperature",
40
+ min_value=0.0,
41
+ max_value=1.0,
42
+ value=DEFAULT_TEMPERATURE,
43
+ step=0.05,
44
+ )
45
+
46
+ st.session_state.max_tokens = st.slider(
47
+ "Max Tokens",
48
+ min_value=512,
49
+ max_value=12800,
50
+ value=DEFAULT_MAX_TOKENS,
51
+ step=256,
52
+ )
53
+
54
+ st.session_state.use_instant_for_rephrase = st.checkbox(
55
+ "Use `claude-instant-v1` to generate search query",
56
+ value=True,
57
+ )
58
+
59
+
60
+ @st.cache_resource
61
+ def get_embedding_model(device: str = "cpu", **kwargs) -> HuggingFaceBgeEmbeddings:
62
+ model_name = "BAAI/bge-large-en-v1.5"
63
+ model_kwargs = {"device": device}
64
+ encode_kwargs = {"normalize_embeddings": True}
65
+ return HuggingFaceBgeEmbeddings(
66
+ model_name=model_name,
67
+ model_kwargs=model_kwargs,
68
+ encode_kwargs=encode_kwargs,
69
+ cache_folder=HF_HOME,
70
+ **kwargs,
71
+ )
72
+
73
+
74
+ @st.cache_data
75
+ def download_data_from_hub(**kwargs) -> str:
76
+ repo_id = "joshuasundance/govgis_nov2023-slim-spatial"
77
+ filename = "govgis_nov2023-slim-nospatial.faiss.bytes"
78
+ repo_type = "dataset"
79
+ return hf_hub_download(
80
+ repo_id=repo_id,
81
+ filename=filename,
82
+ repo_type=repo_type,
83
+ cache_dir=HF_HOME,
84
+ **kwargs,
85
+ )
86
+
87
+
88
+ @st.cache_resource
89
+ def get_faiss(
90
+ serialized_bytes_path: Optional[str] = None,
91
+ embeddings: Optional[HuggingFaceBgeEmbeddings] = None,
92
+ ) -> FAISS:
93
+ serialized_bytes_path = serialized_bytes_path or download_data_from_hub()
94
+ with open(serialized_bytes_path, "rb") as infile:
95
+ return FAISS.deserialize_from_bytes(
96
+ embeddings=embeddings or get_embedding_model(),
97
+ serialized=infile.read(),
98
+ )
99
+
100
+
101
+ def _combine_documents(
102
+ docs: list[Document],
103
+ document_separator: str = "\n\n",
104
+ ) -> str:
105
+ return document_separator.join(f"```yaml\n{doc.page_content}\n```" for doc in docs)
106
+
107
+
108
+ rephrase_template = """Given the User Input, return an English natural language Search Query that will return the most relevant documents.
109
+ Remember, you are working with a semantic search engine. It is not based solely on keywords or Google-Fu.
110
+ Be creative with your search query.
111
+ Your entire response will be fed directly into the search engine. Omit any text that is not part of the search query.
112
+
113
+ User Input: {question}"""
114
+ REPHRASE_QUESTION_PROMPT = PromptTemplate.from_template(rephrase_template)
115
+
116
+
117
+ answer_template = """The following search results were found for the given user query.
118
+ Provide a description of the relevant search results, providing relevant URLs and details.
119
+ Describing the search results in the context of the query is more important than answering the query.
120
+ Do not answer without referring to the search results; the search results are the most important part of the answer.
121
+ Base your response on the search results.
122
+ Always provide a URL when referencing a specific service, dataset, or API.
123
+ If multiple search results are relevant to the user's query, describe each result separately.
124
+ Describe what sets each result apart from the others.
125
+ Be detailed and specific, so the user can find the information they need.
126
+ Format your response as markdown as appropriate.
127
+ ----------------
128
+ Search Results:
129
+ {context}
130
+ ----------------
131
+ Question: {question}"""
132
+ ANSWER_PROMPT = ChatPromptTemplate.from_template(answer_template)
133
+
134
+
135
+ def get_chain(rephrase_llm, answer_llm, retriever):
136
+ """
137
+ Return a chain that rephrases, retrieves, and responds.
138
+
139
+ Output keys:
140
+ - search_query: str
141
+ - docs: list[Document]
142
+ - answer: str
143
+ """
144
+ return (
145
+ # rephrase
146
+ REPHRASE_QUESTION_PROMPT
147
+ | rephrase_llm
148
+ | {"search_query": StrOutputParser()}
149
+ # retrieve
150
+ | {
151
+ "search_query": itemgetter("search_query"),
152
+ "docs": itemgetter("search_query") | retriever,
153
+ "question": itemgetter("search_query"),
154
+ }
155
+ # respond
156
+ | {
157
+ "search_query": itemgetter("search_query"),
158
+ "docs": itemgetter("docs"),
159
+ "answer": (
160
+ {
161
+ "context": (lambda x: _combine_documents(x["docs"])),
162
+ "question": itemgetter("question"),
163
+ }
164
+ | ANSWER_PROMPT
165
+ | answer_llm
166
+ | StrOutputParser()
167
+ ),
168
+ }
169
+ )
170
+
171
+
172
+ db = get_faiss()
173
+ retriever = db.as_retriever(
174
+ search_kwargs={"k": st.session_state.search_result_limit},
175
+ )
176
+
177
+ if st.session_state.anthropic_api_key:
178
+ rephrase_llm = ChatAnthropic(
179
+ model="claude-instant-v1"
180
+ if st.session_state.use_instant_for_rephrase
181
+ else "claude-2.1",
182
+ temperature=st.session_state.temperature,
183
+ max_tokens_to_sample=512,
184
+ anthropic_api_key=st.session_state.anthropic_api_key,
185
+ )
186
+
187
+ answer_llm = ChatAnthropic(
188
+ model="claude-2.1",
189
+ temperature=st.session_state.temperature,
190
+ max_tokens_to_sample=st.session_state.max_tokens,
191
+ anthropic_api_key=st.session_state.anthropic_api_key,
192
+ )
193
+
194
+ st.session_state.chain = get_chain(rephrase_llm, answer_llm, retriever)
195
+
196
+
197
+ user_input = st.text_input(
198
+ "What are you looking for?",
199
+ value="",
200
+ )
201
+
202
+ doc_md = """## [{name}]({url})
203
+
204
+ ### Type
205
+ {type}
206
+
207
+ ### Description
208
+ {description}
209
+
210
+ ### Parent Service Description
211
+ {parent_service_description}
212
+
213
+ ### Fields
214
+ {fields}
215
+ """
216
+
217
+
218
+ def display_docs(docs: list[Document]) -> None:
219
+ missing_value = ""
220
+ for doc in docs:
221
+ data = yaml.safe_load(doc.page_content)
222
+ st.markdown(f"## [{data['name']}]({data['url']})")
223
+ st.markdown(f"### Type\n{data['type']}")
224
+ st.markdown("### Description")
225
+ st.components.v1.html(data.get("description", missing_value))
226
+ st.markdown("### Parent Service Description")
227
+ st.components.v1.html(data.get("parent_service_description", missing_value))
228
+ if data.get("fields", None):
229
+ st.markdown("### Fields")
230
+ for field in data["fields"]:
231
+ st.markdown(f"- {field}")
232
+
233
+
234
+ if user_input:
235
+ if st.session_state.chain is not None:
236
+ result = st.session_state.chain.invoke(dict(question=user_input))
237
+ st.markdown("# Query")
238
+ st.markdown(result["search_query"])
239
+ st.markdown("# Answer")
240
+ st.markdown(result["answer"])
241
+ st.markdown("# Documents")
242
+ display_docs(result["docs"])
243
+ else:
244
+ results = retriever.invoke(user_input)
245
+ display_docs(results)
bumpver.toml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [bumpver]
2
+ current_version = "0.0.1"
3
+ version_pattern = "MAJOR.MINOR.PATCH"
4
+ commit_message = "bump version {old_version} -> {new_version}"
5
+ tag_message = "{new_version}"
6
+ tag_scope = "default"
7
+ pre_commit_hook = ""
8
+ post_commit_hook = ""
9
+ commit = true
10
+ tag = true
11
+ push = true
12
+
13
+ [bumpver.file_patterns]
14
+ "bumpver.toml" = [
15
+ 'current_version = "{version}"',
16
+ ]
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ anthropic==0.7.5
2
+ faiss-cpu==1.7.4
3
+ huggingface-hub==0.19.4
4
+ langchain==0.0.341
5
+ langsmith==0.0.66
6
+ openai==1.3.5
7
+ pydantic==2.5.2
8
+ PyYAML==6.0.1
9
+ sentence-transformers==2.2.2
10
+ streamlit==1.28.2
11
+ torch==2.1.1 -f https://download.pytorch.org/whl/cpu/torch_stable.html