vSiddi
commited on
Commit
·
8e29341
1
Parent(s):
b14917a
fix files
Browse files- .gitignore +4 -0
- LICENSE +201 -0
- app-content.json +17 -0
- app.py +79 -0
- modules/app_constants.py +56 -0
- modules/app_logger.py +31 -0
- modules/app_page_definitions.py +56 -0
- modules/app_prompt.py +64 -0
- modules/app_researcher.py +159 -0
- modules/app_st_session_utils.py +109 -0
- modules/app_to_vectorstore.py +102 -0
- modules/common_utils.py +118 -0
- modules/database_utils.py +75 -0
- modules/file_utils.py +178 -0
- modules/message_store.py +23 -0
- modules/nav_about.py +121 -0
- modules/nav_file_manager.py +136 -0
- modules/nav_query_docs.py +62 -0
- modules/nav_researcher.py +77 -0
- modules/nav_summarizer.py +113 -0
- pyvenv.cfg +5 -0
- requirements.txt +240 -0
- start_model_server.sh +66 -0
- start_web_ui.sh +31 -0
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.pyc
|
2 |
+
workspace/*
|
3 |
+
*.bak
|
4 |
+
.DS_Store
|
LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
app-content.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"name": "Digital Identity Guidelines",
|
4 |
+
"url": "https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-63-3.pdf",
|
5 |
+
"content_type": "Standards"
|
6 |
+
},
|
7 |
+
{
|
8 |
+
"name": "OWASP Top 10 for LLM Applications",
|
9 |
+
"url": "https://owasp.org/www-project-top-10-for-large-language-model-applications/assets/PDF/OWASP-Top-10-for-LLMs-2023-v1_0_1.pdf",
|
10 |
+
"content_type": "References"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"name": "UAE NESA Regulation",
|
14 |
+
"url": "https://u.ae/-/media/guidelines/Guidelines-2020/UAE-IA-Regulation-v11-1.pdf",
|
15 |
+
"content_type": "Standards"
|
16 |
+
}
|
17 |
+
]
|
app.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from streamlit_option_menu import option_menu
|
3 |
+
# Import your page modules
|
4 |
+
from modules import nav_about, nav_query_docs, nav_researcher, nav_summarizer, nav_file_manager
|
5 |
+
from modules import app_constants, app_logger, common_utils
|
6 |
+
from modules.message_store import MessageStore
|
7 |
+
|
8 |
+
app_logger = app_logger.app_logger
|
9 |
+
WORKSPACE_DIRECTORY = app_constants.WORKSPACE_DIRECTORY
|
10 |
+
|
11 |
+
# Page configuration
|
12 |
+
st.set_page_config(page_title="ZySec AI", page_icon=":sparkles:", layout="wide")
|
13 |
+
|
14 |
+
# Initialize MessageStore in the session state
|
15 |
+
if 'message_store' not in st.session_state:
|
16 |
+
st.session_state['message_store'] = MessageStore()
|
17 |
+
def request_username():
|
18 |
+
st.title("Welcome to ZySec AI")
|
19 |
+
app_logger.info("App started")
|
20 |
+
username = st.text_input("How do you want me to call you?",value="Security Ninja" ,placeholder="Enter your name")
|
21 |
+
submit_button = st.button('Submit')
|
22 |
+
|
23 |
+
if submit_button and username:
|
24 |
+
st.session_state['username'] = username
|
25 |
+
return True # Indicates that a username was submitted
|
26 |
+
return False
|
27 |
+
|
28 |
+
def main():
|
29 |
+
common_utils.setup_initial_folders()
|
30 |
+
if 'messages' not in st.session_state:
|
31 |
+
st.session_state['messages'] = []
|
32 |
+
|
33 |
+
if 'username' not in st.session_state or not st.session_state['username']:
|
34 |
+
if request_username():
|
35 |
+
st.rerun()
|
36 |
+
return
|
37 |
+
|
38 |
+
# Sidebar navigation
|
39 |
+
with st.sidebar:
|
40 |
+
selected = option_menu(
|
41 |
+
"ZySec AI",
|
42 |
+
["Private AI", "Playbooks", "Standards", "Policies", "Researcher", "Summarizer","Files", "About"],
|
43 |
+
icons=["shield-lock", "book-half", "file-earmark-ruled", "journal-bookmark", "search", "file-text","files", "info-circle"],
|
44 |
+
default_index=0,
|
45 |
+
menu_icon="cast",
|
46 |
+
styles={}
|
47 |
+
)
|
48 |
+
st.markdown("---")
|
49 |
+
|
50 |
+
|
51 |
+
try:
|
52 |
+
message_store = st.session_state['message_store']
|
53 |
+
|
54 |
+
|
55 |
+
if selected == "Private AI":
|
56 |
+
#nav_private_ai.app(message_store)
|
57 |
+
nav_query_docs.app(message_store,current_page="nav_private_ai")
|
58 |
+
elif selected == "Playbooks":
|
59 |
+
nav_query_docs.app(message_store,current_page="nav_playbooks",use_retrieval_chain=True)
|
60 |
+
elif selected == "Standards":
|
61 |
+
nav_query_docs.app(message_store,current_page="nav_standards",use_retrieval_chain=True)
|
62 |
+
elif selected == "Policies":
|
63 |
+
nav_query_docs.app(message_store,current_page="nav_policies",use_retrieval_chain=True)
|
64 |
+
elif selected == "Researcher":
|
65 |
+
nav_researcher.app(message_store)
|
66 |
+
elif selected == "Summarizer":
|
67 |
+
nav_summarizer.app()
|
68 |
+
elif selected == "Files":
|
69 |
+
nav_file_manager.app()
|
70 |
+
elif selected == "About":
|
71 |
+
nav_about.app()
|
72 |
+
else:
|
73 |
+
pass
|
74 |
+
|
75 |
+
except Exception as e:
|
76 |
+
st.error(f"Looks like there's a gap here; maybe we forgot to add some data!: {e}")
|
77 |
+
|
78 |
+
if __name__ == "__main__":
|
79 |
+
main()
|
modules/app_constants.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from langchain_community.document_loaders import (CSVLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader,
|
3 |
+
UnstructuredFileLoader, UnstructuredMarkdownLoader, UnstructuredHTMLLoader, JSONLoader)
|
4 |
+
from chromadb.config import Settings
|
5 |
+
|
6 |
+
from modules import app_logger
|
7 |
+
|
8 |
+
app_logger = app_logger.app_logger
|
9 |
+
# Use shared_variable in this module
|
10 |
+
openai_api_key = os.environ.get("OPENAI_API_KEY", "NONE")
|
11 |
+
|
12 |
+
# Set default values if environment variables are not found
|
13 |
+
#mongodb_uri = os.environ.get("MONGODB_URI", "mongodb://localhost:27017")
|
14 |
+
local_model_uri = os.environ.get("LOCAL_OPENAI_URI", "http://localhost:8000/v1")
|
15 |
+
#local_model_uri = os.environ.get("LOCAL_OPENAI_URI", None)
|
16 |
+
DOCUMENT_MAP = {
|
17 |
+
".html": UnstructuredHTMLLoader,
|
18 |
+
".txt": TextLoader,
|
19 |
+
".md": UnstructuredMarkdownLoader,
|
20 |
+
".py": TextLoader,
|
21 |
+
".json": JSONLoader,
|
22 |
+
".jsonl": JSONLoader,
|
23 |
+
".pdf": UnstructuredFileLoader,
|
24 |
+
".csv": CSVLoader,
|
25 |
+
".xls": UnstructuredExcelLoader,
|
26 |
+
".xlsx": UnstructuredExcelLoader,
|
27 |
+
".docx": Docx2txtLoader,
|
28 |
+
".doc": Docx2txtLoader,
|
29 |
+
}
|
30 |
+
MODELS_PATH = "./models"
|
31 |
+
EMBEDDING_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
|
32 |
+
MODEL_NAME = 'gpt-3.5-turbo'
|
33 |
+
# Constants
|
34 |
+
WORKSPACE_DIRECTORY = './workspace/'
|
35 |
+
|
36 |
+
|
37 |
+
CHROMA_SETTINGS = Settings(
|
38 |
+
anonymized_telemetry=False,
|
39 |
+
is_persistent=True,
|
40 |
+
)
|
41 |
+
INGEST_THREADS = os.cpu_count() or 8
|
42 |
+
|
43 |
+
CHUNK_SIZE = 880
|
44 |
+
CHUNK_OVERLAP = 200
|
45 |
+
PROCESSED_DOCS = 'index_processed.log'
|
46 |
+
SEARCH_COUNT = 5
|
47 |
+
MESSAGE_HISTORY = 4
|
48 |
+
RAG_K = 3
|
49 |
+
RAG_TECHNIQUE = 'refine'
|
50 |
+
SUMMARIZER_BATCH = 3
|
51 |
+
MAX_FILE_SIZE = 10 #not implement
|
52 |
+
LOCAL_PERSISTANT_DB = WORKSPACE_DIRECTORY + "db/"
|
53 |
+
CONTENT_TYPE = ["Policies", "Playbooks", "Standards", "Reference Docs"]
|
54 |
+
SYSTEM_CONTENT_DATA = "app-content.json"
|
55 |
+
SYSTEM_DEPLOYMENT_MODE = 0 #private-0, openai-1, demo-2
|
56 |
+
ZYSEC_DEMO = "http://zysec.is-a-geek.com:8000/v1" #not enabled yet
|
modules/app_logger.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import logging
|
3 |
+
|
4 |
+
|
5 |
+
def setup_logger():
|
6 |
+
# Format for our loglines
|
7 |
+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
8 |
+
datefmt='%Y-%m-%d %H:%M:%S')
|
9 |
+
|
10 |
+
# Setup basic configuration for logging
|
11 |
+
logging.basicConfig(filename='app.log',
|
12 |
+
level=logging.ERROR,
|
13 |
+
format='%(asctime)s - %(levelname)s - %(message)s')
|
14 |
+
|
15 |
+
# Create a logger
|
16 |
+
logger = logging.getLogger('AppLogger')
|
17 |
+
|
18 |
+
# Create handlers (if you want to log to file and console)
|
19 |
+
file_handler = logging.FileHandler('app.log')
|
20 |
+
file_handler.setFormatter(formatter)
|
21 |
+
|
22 |
+
console_handler = logging.StreamHandler()
|
23 |
+
console_handler.setFormatter(formatter)
|
24 |
+
|
25 |
+
# Add handlers to the logger
|
26 |
+
logger.addHandler(file_handler)
|
27 |
+
logger.addHandler(console_handler)
|
28 |
+
|
29 |
+
return logger
|
30 |
+
|
31 |
+
app_logger = setup_logger()
|
modules/app_page_definitions.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#app_page_definitions.py
|
2 |
+
PAGE_CONFIG = {
|
3 |
+
"nav_private_ai": {
|
4 |
+
"title": "Private AI",
|
5 |
+
"caption": "🔒 Delve into AI-driven cybersecurity insights and strategies.",
|
6 |
+
"greeting": "Greetings! I'm ZySec, your dedicated AI assistant in Cyber Security.",
|
7 |
+
"system_role": "You are ZySec, an AI Assistant specialisted in CyberSecurity. You are developed by the ZySec AI team, provide expert cybersecurity insights and advice. While responding, if you are not clear, ask follow up question and focus on delivering accurate information."
|
8 |
+
},
|
9 |
+
"nav_standards": {
|
10 |
+
"title": "Standards Discovery",
|
11 |
+
"caption": "📚 Navigate through industry standards and gain valuable insights.",
|
12 |
+
"greeting": "Welcome to Standards Discovery! Let's analyze documents for streamlined insights.",
|
13 |
+
"system_role": "As ZySec in Standards Assistance, developed by the ZySec AI team, guide users through complex standards, focusing on cybersecurity nuances. At any point while responding, if you are not clear ask follow up question and focus on delivering accurate information.",
|
14 |
+
"content": ["standards","standard","framework","regulatory"]
|
15 |
+
},
|
16 |
+
"nav_playbooks": {
|
17 |
+
"title": "Playbooks Deep-Dive",
|
18 |
+
"caption": "📖 Uncover insights within playbooks for informed decision-making.",
|
19 |
+
"greeting": "Prepared to explore your playbook? Upload your document to begin.",
|
20 |
+
"system_role": "As ZySec, developed by the ZySec AI team, specialize in dissecting playbooks and documents for precise answers and insights. Structure responses clearly, utilizing bullet points to highlight key insights.",
|
21 |
+
"content": ["procedures", "playbooks","others","breaches","reference docs"]
|
22 |
+
},
|
23 |
+
"nav_researcher": {
|
24 |
+
"title": "Research Expert",
|
25 |
+
"caption": "🌐 Conduct in-depth research and gather information from the web.",
|
26 |
+
"greeting": "Hi, I'm ZySec, ready to assist with thorough Internet-based research.",
|
27 |
+
"system_role": "As ZySec, embodying the role of a Research Assistant and developed by the ZySec AI team, provide in-depth research support and insights."
|
28 |
+
},
|
29 |
+
"nav_summarize": {
|
30 |
+
"title": "Summarization",
|
31 |
+
"caption": "✍️ Transform extensive content into concise summaries effortlessly.",
|
32 |
+
"greeting": "Hey there! ZySec here, your expert in content summarization.",
|
33 |
+
"system_role": "As ZySec, developed by the ZySec AI team, focus on distilling content into clear, succinct summaries. Organize information in a structured manner."
|
34 |
+
},
|
35 |
+
"nav_policies": {
|
36 |
+
"title": "Policy Expert",
|
37 |
+
"caption": "📃 Explore and understand complex policies with AI assistance.",
|
38 |
+
"greeting": "Welcome to Policy Analysis! I'm ZySec, here to guide you through policy intricacies.",
|
39 |
+
"system_role": "As ZySec, specializing in policy analysis and advice, and developed by the ZySec AI team, use AI to clarify and explain policies.",
|
40 |
+
"content": ["policies","policy","guidelines"]
|
41 |
+
},
|
42 |
+
"nav_file_manager": {
|
43 |
+
"title": "File Manager",
|
44 |
+
"caption": "📃 Explore the content in your application, enable to maintain focus of ZySec."
|
45 |
+
},
|
46 |
+
"nav_about": {
|
47 |
+
"title": "System Controls",
|
48 |
+
"caption": "⚙️ Manage and control system settings including AI model configurations."
|
49 |
+
},
|
50 |
+
"default": {
|
51 |
+
"title": "Welcome to ZySec AI",
|
52 |
+
"caption": "🌟 Navigate the world of Cyber Security with AI-driven insights.",
|
53 |
+
"greeting": "How can I assist you with Cyber Security today?",
|
54 |
+
"system_role": "As ZySec, developed by the ZySec AI team, offer specific insights and guidance on security-related queries."
|
55 |
+
}
|
56 |
+
}
|
modules/app_prompt.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# app_combined_prompt.py
|
2 |
+
import modules.app_constants as app_constants # Ensure this is correctly referenced
|
3 |
+
from langchain_openai import ChatOpenAI
|
4 |
+
from langchain.chains import RetrievalQAWithSourcesChain
|
5 |
+
from openai import OpenAI
|
6 |
+
from modules import app_logger, common_utils, app_st_session_utils
|
7 |
+
|
8 |
+
# Use the logger from app_config
|
9 |
+
app_logger = app_logger.app_logger
|
10 |
+
|
11 |
+
# Define a function to query the language model
|
12 |
+
def query_llm(prompt, page="nav_private_ai", retriever=None, message_store=None, use_retrieval_chain=False, last_page=None, username=""):
|
13 |
+
try:
|
14 |
+
# Choose the language model client based on the use_retrieval_chain flag
|
15 |
+
if use_retrieval_chain:
|
16 |
+
app_logger.info("Using ChatOpenAI with RetrievalQAWithSourcesChain")
|
17 |
+
llm = ChatOpenAI(
|
18 |
+
model_name=app_constants.MODEL_NAME,
|
19 |
+
openai_api_key=app_constants.openai_api_key,
|
20 |
+
base_url=app_constants.local_model_uri,
|
21 |
+
streaming=True
|
22 |
+
)
|
23 |
+
qa = RetrievalQAWithSourcesChain.from_chain_type(
|
24 |
+
llm=llm,
|
25 |
+
chain_type=app_constants.RAG_TECHNIQUE,
|
26 |
+
retriever=retriever,
|
27 |
+
return_source_documents=False
|
28 |
+
)
|
29 |
+
else:
|
30 |
+
app_logger.info("Using direct OpenAI API call")
|
31 |
+
llm = OpenAI(
|
32 |
+
base_url=app_constants.local_model_uri,
|
33 |
+
api_key=app_constants.openai_api_key
|
34 |
+
)
|
35 |
+
|
36 |
+
# Update page messages if there's a change in the page
|
37 |
+
if last_page != page:
|
38 |
+
app_logger.info(f"Updating messages for new page: {page}")
|
39 |
+
common_utils.get_system_role(page, message_store)
|
40 |
+
|
41 |
+
# Construct messages to send to the LLM, excluding timestamps
|
42 |
+
messages_to_send = common_utils.construct_messages_to_send(page, message_store, prompt)
|
43 |
+
app_logger.debug(messages_to_send)
|
44 |
+
# Sending the messages to the LLM and retrieving the response
|
45 |
+
response = None
|
46 |
+
if use_retrieval_chain:
|
47 |
+
response = qa.invoke(prompt)
|
48 |
+
else:
|
49 |
+
response = llm.chat.completions.create(
|
50 |
+
model=app_constants.MODEL_NAME,
|
51 |
+
messages=messages_to_send
|
52 |
+
)
|
53 |
+
|
54 |
+
# Process the response
|
55 |
+
raw_msg = response.get('answer') if use_retrieval_chain else response.choices[0].message.content
|
56 |
+
source_info = response.get('sources', '').strip() if use_retrieval_chain else ''
|
57 |
+
formatted_msg = app_st_session_utils.format_response(raw_msg + "Source: " + source_info if source_info else raw_msg)
|
58 |
+
|
59 |
+
return formatted_msg
|
60 |
+
|
61 |
+
except Exception as e:
|
62 |
+
error_message = f"An error occurred while querying the language model: {e}"
|
63 |
+
app_logger.error(error_message)
|
64 |
+
return error_message
|
modules/app_researcher.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
import html2text
|
4 |
+
import re
|
5 |
+
import os
|
6 |
+
from modules import app_constants, file_utils, app_logger
|
7 |
+
import json
|
8 |
+
from langchain_openai import ChatOpenAI
|
9 |
+
from langchain.schema import HumanMessage, SystemMessage
|
10 |
+
import spacy
|
11 |
+
from duckduckgo_search import DDGS
|
12 |
+
nlp = spacy.load("en_core_web_sm")
|
13 |
+
|
14 |
+
# Use the logger from app_config
|
15 |
+
app_logger = app_logger.app_logger
|
16 |
+
|
17 |
+
TMP_DIRECTORY = app_constants.WORKSPACE_DIRECTORY + 'tmp'
|
18 |
+
DEFAULT_SEARCH_COUNT = app_constants.SEARCH_COUNT
|
19 |
+
|
20 |
+
def download_and_clean(url):
|
21 |
+
try:
|
22 |
+
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0"}
|
23 |
+
response = requests.get(url, headers=headers)
|
24 |
+
response.raise_for_status()
|
25 |
+
|
26 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
27 |
+
for script in soup(["script", "style", "img", "a"]):
|
28 |
+
script.extract()
|
29 |
+
|
30 |
+
body_text = soup.get_text()
|
31 |
+
h = html2text.HTML2Text()
|
32 |
+
h.ignore_links = True
|
33 |
+
h.ignore_images = True
|
34 |
+
h.ignore_emphasis = True
|
35 |
+
h.ignore_tables = True
|
36 |
+
clean_text = h.handle(body_text)
|
37 |
+
clean_text = re.sub(r'[^\w\s\n<>/\.]+', '', clean_text) # Include '.' in the allowed characters
|
38 |
+
clean_text = re.sub(r'\s+', ' ', clean_text).strip()
|
39 |
+
return clean_text
|
40 |
+
|
41 |
+
except requests.exceptions.RequestException as e:
|
42 |
+
app_logger.error(f"Error while downloading and cleaning URL {url}: {str(e)}")
|
43 |
+
return None
|
44 |
+
|
45 |
+
def save_notes_to_file(topic, note, source_url):
|
46 |
+
# Process the text
|
47 |
+
doc = nlp(note)
|
48 |
+
|
49 |
+
# Ensure the temp directory exists
|
50 |
+
if not os.path.exists(TMP_DIRECTORY):
|
51 |
+
os.makedirs(TMP_DIRECTORY)
|
52 |
+
|
53 |
+
# Sanitize the filename and create the full path
|
54 |
+
sanitized_filename = file_utils.sanitize_filename(topic)+'.jsonl'
|
55 |
+
full_path = os.path.join(TMP_DIRECTORY, sanitized_filename)
|
56 |
+
|
57 |
+
# Initialize variables for accumulating sentences
|
58 |
+
text_block = ""
|
59 |
+
word_count = 0
|
60 |
+
|
61 |
+
# Append each sentence to form a text block and write it to the file
|
62 |
+
with open(full_path, 'a') as file:
|
63 |
+
for sent in doc.sents:
|
64 |
+
sentence_word_count = len(sent.text.split())
|
65 |
+
if word_count + sentence_word_count > 240: # If adding the sentence exceeds the max limit
|
66 |
+
# Write the current text block to the file
|
67 |
+
if word_count >= 120: # Ensure the text block meets the minimum word count
|
68 |
+
data = {
|
69 |
+
"note": text_block,
|
70 |
+
"source_url": source_url
|
71 |
+
}
|
72 |
+
file.write(json.dumps(data) + '\n')
|
73 |
+
# Reset text block and word count
|
74 |
+
text_block = sent.text
|
75 |
+
word_count = sentence_word_count
|
76 |
+
else:
|
77 |
+
# Add the sentence to the text block
|
78 |
+
text_block += ' ' + sent.text if text_block else sent.text
|
79 |
+
word_count += sentence_word_count
|
80 |
+
|
81 |
+
# Write any remaining text block to the file if it meets the minimum word count
|
82 |
+
if word_count >= 300:
|
83 |
+
data = {
|
84 |
+
"note": text_block,
|
85 |
+
"source_url": source_url
|
86 |
+
}
|
87 |
+
file.write(json.dumps(data) + '\n')
|
88 |
+
|
89 |
+
app_logger.info(f"Notes saved to file {full_path}")
|
90 |
+
return full_path
|
91 |
+
|
92 |
+
|
93 |
+
def url_list_downloader(url_list, topic):
|
94 |
+
notes_file = None
|
95 |
+
for url in url_list:
|
96 |
+
try:
|
97 |
+
text = download_and_clean(url)
|
98 |
+
if text:
|
99 |
+
notes_file = save_notes_to_file(topic, text, url)
|
100 |
+
except Exception as e:
|
101 |
+
app_logger.error(f"Error during processing for URL {url}: {e}")
|
102 |
+
return notes_file
|
103 |
+
|
104 |
+
def search_term_ddg(topic,count=DEFAULT_SEARCH_COUNT):
|
105 |
+
try:
|
106 |
+
llm = ChatOpenAI(
|
107 |
+
model_name=app_constants.MODEL_NAME,
|
108 |
+
openai_api_key=app_constants.openai_api_key,
|
109 |
+
base_url=app_constants.local_model_uri,
|
110 |
+
streaming=True
|
111 |
+
)
|
112 |
+
prompt = [
|
113 |
+
SystemMessage(content="Generate 5 plain keywords in comma separated based on user input. For example ['cat','bat','monkey','donkey','eagel']"),
|
114 |
+
HumanMessage(content=topic),
|
115 |
+
]
|
116 |
+
response = llm(prompt)
|
117 |
+
# Extract string content from the response object
|
118 |
+
if hasattr(response, 'content'):
|
119 |
+
search_keywords = response.content
|
120 |
+
else:
|
121 |
+
raise ValueError("Invalid response format")
|
122 |
+
|
123 |
+
# Splitting and trimming the keywords
|
124 |
+
search_keywords = [keyword.strip() for keyword in search_keywords.split(',')]
|
125 |
+
#print(search_keywords)
|
126 |
+
# Limiting keywords to a maximum of 8
|
127 |
+
search_keywords = search_keywords[:8]
|
128 |
+
|
129 |
+
urls = []
|
130 |
+
# Initialize DDGS with a timeout
|
131 |
+
with DDGS(timeout=3) as ddgs:
|
132 |
+
for term in search_keywords:
|
133 |
+
# Fetch results for each search term
|
134 |
+
results = ddgs.text(f"{topic} {term}", max_results=count)
|
135 |
+
for result in results:
|
136 |
+
url = result['href']
|
137 |
+
if not url.endswith(('.pdf', '.ppt', '.pptx', '.doc', '.docx')):
|
138 |
+
urls.append(url)
|
139 |
+
return sorted(set(urls))
|
140 |
+
|
141 |
+
except Exception as e:
|
142 |
+
app_logger.error(f"An error occurred while searching for topic {topic}: {e}")
|
143 |
+
return []
|
144 |
+
|
145 |
+
def explore_url_on_internet(topic, count=DEFAULT_SEARCH_COUNT):
|
146 |
+
app_logger.info(f"Starting research on topic {topic}")
|
147 |
+
# Sanitize the filename and create the full path
|
148 |
+
sanitized_filename = file_utils.sanitize_filename(topic)+'.jsonl'
|
149 |
+
full_path = os.path.join(TMP_DIRECTORY, sanitized_filename)
|
150 |
+
|
151 |
+
# Check if the file already exists
|
152 |
+
if os.path.exists(full_path):
|
153 |
+
app_logger.info(f"File already exists skipping download: ",full_path)
|
154 |
+
note_file = full_path
|
155 |
+
else:
|
156 |
+
url_list = search_term_ddg(topic,count)
|
157 |
+
note_file = url_list_downloader(url_list, topic)
|
158 |
+
app_logger.info(f"Research on Internet completed for {topic}, file: {note_file}")
|
159 |
+
return note_file
|
modules/app_st_session_utils.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from modules import app_logger, app_prompt
|
3 |
+
import streamlit.components.v1 as components
|
4 |
+
from modules import database_utils,common_utils,app_page_definitions
|
5 |
+
import datetime
|
6 |
+
|
7 |
+
# Use the logger from app_config
|
8 |
+
app_logger = app_logger.app_logger
|
9 |
+
def initialize_session_state(key, default_value):
|
10 |
+
"""
|
11 |
+
Initialize a session state variable with a default value if it doesn't exist.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
key (str): The key of the session state variable.
|
15 |
+
default_value (Any): The default value to initialize the session state variable with.
|
16 |
+
"""
|
17 |
+
if key not in st.session_state:
|
18 |
+
st.session_state[key] = default_value
|
19 |
+
|
20 |
+
def update_session_state(key, value):
|
21 |
+
"""Update a session state variable."""
|
22 |
+
st.session_state[key] = value
|
23 |
+
|
24 |
+
def setup_page_session_state(current_page):
|
25 |
+
initialize_session_state('current_page', current_page)
|
26 |
+
initialize_session_state('page_loaded', False)
|
27 |
+
initialize_session_state('message_store', app_prompt.MessageStore())
|
28 |
+
|
29 |
+
def log_session_info(message):
|
30 |
+
"""Log session-related information."""
|
31 |
+
try:
|
32 |
+
app_logger.info(message)
|
33 |
+
except Exception as e:
|
34 |
+
app_logger.error(f"Logging error: {e}")
|
35 |
+
|
36 |
+
def manage_message_history(current_page):
|
37 |
+
"""Manage the history of messages for the current page."""
|
38 |
+
try:
|
39 |
+
message_store = st.session_state['message_store']
|
40 |
+
if st.session_state['current_page'] != current_page:
|
41 |
+
message_store.set_history(st.session_state['current_page'], st.session_state["messages"])
|
42 |
+
st.session_state["messages"] = message_store.get_history(current_page)
|
43 |
+
st.session_state['current_page'] = current_page
|
44 |
+
log_session_info(f"Updated message history for page: {current_page}")
|
45 |
+
except Exception as e:
|
46 |
+
log_session_info(f"Error managing message history: {e}")
|
47 |
+
|
48 |
+
def display_chat_message(role, content):
|
49 |
+
"""Display a chat message based on the role."""
|
50 |
+
if role in ['user', 'assistant']:
|
51 |
+
st.chat_message(role).write(content)
|
52 |
+
else:
|
53 |
+
log_session_info(f"Invalid role '{role}' in display_chat_message")
|
54 |
+
|
55 |
+
def reset_session_state():
|
56 |
+
"""Reset the session state to its initial values."""
|
57 |
+
for key in list(st.session_state.keys()):
|
58 |
+
del st.session_state[key]
|
59 |
+
# Reinitialize variables if necessary here
|
60 |
+
|
61 |
+
def get_session_data(key, default=None):
|
62 |
+
"""Retrieve data stored in the session state."""
|
63 |
+
return st.session_state.get(key, default)
|
64 |
+
|
65 |
+
def reload_page():
|
66 |
+
js = "window.location.reload();"
|
67 |
+
components.html(f"<script>{js}</script>", height=0, width=0)
|
68 |
+
|
69 |
+
def initialize_or_retrieve_db(db_path):
|
70 |
+
"""
|
71 |
+
Initialize the database if not already initialized or if the database path has changed.
|
72 |
+
Retrieve the database from the session state if already initialized.
|
73 |
+
|
74 |
+
Args:
|
75 |
+
db_path (str): The file path to the database.
|
76 |
+
|
77 |
+
Returns:
|
78 |
+
The initialized or retrieved database object.
|
79 |
+
"""
|
80 |
+
app_logger.info("initializing db", db_path)
|
81 |
+
if 'db_retriever' not in st.session_state or st.session_state['db_path'] != db_path:
|
82 |
+
# Database not initialized or path has changed
|
83 |
+
db_retriever = database_utils.initialize_chroma_db(db_path)
|
84 |
+
if db_retriever is not None:
|
85 |
+
st.session_state['db_retriever'] = db_retriever
|
86 |
+
st.session_state['db_path'] = db_path
|
87 |
+
app_logger.info(f"Database initialized at {db_path}")
|
88 |
+
else:
|
89 |
+
app_logger.error(f"Failed to initialize database at {db_path}")
|
90 |
+
return None
|
91 |
+
return st.session_state['db_retriever']
|
92 |
+
|
93 |
+
# Function to format the response
|
94 |
+
def format_response(response):
|
95 |
+
return response.replace('\r\n', '\n').replace('\r', '\n').strip()
|
96 |
+
|
97 |
+
# Add a message to the session state
|
98 |
+
def add_message_to_session(role, content, add_to_history=True):
|
99 |
+
timestamp = datetime.datetime.now()
|
100 |
+
message = {"role": role, "content": content, "timestamp": timestamp}
|
101 |
+
if "messages" not in st.session_state:
|
102 |
+
st.session_state["messages"] = []
|
103 |
+
if add_to_history and role in ["user", "assistant"]:
|
104 |
+
st.session_state["messages"].append(message)
|
105 |
+
# Update message_store with the new message
|
106 |
+
if 'message_store' in st.session_state:
|
107 |
+
current_page = st.session_state.get('current_page', 'default_page')
|
108 |
+
st.session_state['message_store'].update_message(current_page, 'history', message)
|
109 |
+
|
modules/app_to_vectorstore.py
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
#app_to_vectorstore.py
|
3 |
+
|
4 |
+
import os
|
5 |
+
from langchain_community.vectorstores import Chroma
|
6 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
7 |
+
from langchain.text_splitter import CharacterTextSplitter
|
8 |
+
from modules import common_utils,file_utils
|
9 |
+
from modules import app_logger
|
10 |
+
# Assuming all necessary loader classes are imported
|
11 |
+
|
12 |
+
from modules import app_constants
|
13 |
+
|
14 |
+
app_logger = app_logger.app_logger
|
15 |
+
|
16 |
+
TEMP_DIR = app_constants.WORKSPACE_DIRECTORY + "tmp"
|
17 |
+
DB_DIR = app_constants.WORKSPACE_DIRECTORY + "db"
|
18 |
+
|
19 |
+
processed_files_record = os.path.join(app_constants.WORKSPACE_DIRECTORY, app_constants.PROCESSED_DOCS)
|
20 |
+
|
21 |
+
def load_documents_from_jsonl(file_path, loader_class):
|
22 |
+
try:
|
23 |
+
loader = loader_class(file_path, json_lines=True, text_content=False, jq_schema='.')
|
24 |
+
return loader.load()
|
25 |
+
except Exception as e:
|
26 |
+
app_logger.error(f"Error loading documents from JSONL file {file_path}: {e}")
|
27 |
+
return None
|
28 |
+
|
29 |
+
def update_processed_files_record(file_md5,module, file_path):
|
30 |
+
try:
|
31 |
+
with open(processed_files_record, 'a') as file: # 'a' mode will create the file if it doesn't exist
|
32 |
+
file.write(f"{file_md5},{module},{file_path}\n")
|
33 |
+
except Exception as e:
|
34 |
+
app_logger.error(f"Error updating processed files record: {e}")
|
35 |
+
|
36 |
+
def is_file_processed(file_md5):
|
37 |
+
if os.path.exists(processed_files_record):
|
38 |
+
with open(processed_files_record, 'r') as file:
|
39 |
+
for line in file:
|
40 |
+
md5, _ = line.strip().split(',', 1)
|
41 |
+
if md5 == file_md5:
|
42 |
+
return True
|
43 |
+
return False
|
44 |
+
|
45 |
+
def get_chroma_index(file_path, current_page="nav_playbooks", is_persistent=True):
|
46 |
+
app_logger.info(f"Starting get_chroma_index for {file_path}")
|
47 |
+
file_md5 = file_utils.compute_md5(file_path)
|
48 |
+
if is_file_processed(file_md5):
|
49 |
+
app_logger.info(f"File {file_path} has already been processed. Skipping.")
|
50 |
+
db = None
|
51 |
+
return False
|
52 |
+
|
53 |
+
_, file_extension = os.path.splitext(file_path)
|
54 |
+
loader_class = app_constants.DOCUMENT_MAP.get(file_extension.lower(), None)
|
55 |
+
|
56 |
+
if not loader_class:
|
57 |
+
app_logger.error(f"No suitable loader found for file type {file_extension}")
|
58 |
+
return None, False
|
59 |
+
|
60 |
+
embedding_model = app_constants.EMBEDDING_MODEL_NAME
|
61 |
+
chunk_size = app_constants.CHUNK_SIZE
|
62 |
+
chunk_overlap = app_constants.CHUNK_OVERLAP
|
63 |
+
|
64 |
+
storage_dir = DB_DIR if is_persistent else TEMP_DIR
|
65 |
+
|
66 |
+
base_filename = f"{current_page}_chroma_db" if is_persistent else f"{os.path.splitext(os.path.basename(file_path))[0]}_chroma_db"
|
67 |
+
sanitized_base_filename = file_utils.sanitize_filename(base_filename)
|
68 |
+
chroma_persist_directory = os.path.join(storage_dir, sanitized_base_filename)
|
69 |
+
|
70 |
+
|
71 |
+
embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
|
72 |
+
try:
|
73 |
+
if file_extension.lower() == '.jsonl':
|
74 |
+
documents = load_documents_from_jsonl(file_path, loader_class)
|
75 |
+
else:
|
76 |
+
loader = loader_class(file_path)
|
77 |
+
documents = loader.load()
|
78 |
+
|
79 |
+
if not documents:
|
80 |
+
app_logger.error(f"No documents loaded from {file_path}.")
|
81 |
+
db = None
|
82 |
+
return False
|
83 |
+
|
84 |
+
text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
85 |
+
docs = text_splitter.split_documents(documents)
|
86 |
+
|
87 |
+
if not docs:
|
88 |
+
app_logger.error(f"No documents to process after splitting from {file_path}.")
|
89 |
+
db = None
|
90 |
+
return False
|
91 |
+
|
92 |
+
db = Chroma.from_documents(docs, embeddings, persist_directory=chroma_persist_directory, client_settings=app_constants.CHROMA_SETTINGS)
|
93 |
+
update_processed_files_record(file_md5,current_page, file_path)
|
94 |
+
app_logger.info("Created index and saved to disk")
|
95 |
+
db.persist()
|
96 |
+
except Exception as e:
|
97 |
+
app_logger.error(f"Error in get_chroma_index for {file_path}: {e}")
|
98 |
+
db = None
|
99 |
+
return False
|
100 |
+
app_logger.info("Completed get_chroma_index operation")
|
101 |
+
db = None
|
102 |
+
return True
|
modules/common_utils.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from modules import app_constants,app_page_definitions
|
3 |
+
from modules import app_logger
|
4 |
+
# Use the logger from app_config
|
5 |
+
app_logger = app_logger.app_logger
|
6 |
+
work_dir = app_constants.WORKSPACE_DIRECTORY
|
7 |
+
|
8 |
+
def get_system_role(page, message_store):
|
9 |
+
system_role = app_page_definitions.PAGE_CONFIG.get(page, {}).get("system_role", "Default system role message")
|
10 |
+
message_store.update_message(page, "system", system_role)
|
11 |
+
|
12 |
+
def get_page_greeting(page_key, username="", files_indexed=[]):
|
13 |
+
"""Return a greeting message for a specific page, including a list of indexed files."""
|
14 |
+
try:
|
15 |
+
# Define the default greeting
|
16 |
+
default_greeting = "Hello! How can I assist you today?"
|
17 |
+
# Fetch the greeting from page configuration or use the default
|
18 |
+
greeting = app_page_definitions.PAGE_CONFIG.get(page_key, {}).get("greeting", default_greeting)
|
19 |
+
|
20 |
+
# Personalize greeting if username is provided
|
21 |
+
if username:
|
22 |
+
greeting = greeting.replace("Hello", f"Hello {username}")
|
23 |
+
|
24 |
+
# Format the indexed files into a list
|
25 |
+
if files_indexed:
|
26 |
+
files_list = "\n".join([f"{i+1}. {file}" for i, file in enumerate(files_indexed)])
|
27 |
+
additional_message = f"I'm familiar with the following documents:\n{files_list}"
|
28 |
+
# Append the file list to the greeting message
|
29 |
+
greeting = f"{greeting}\n\n{additional_message}"
|
30 |
+
|
31 |
+
return greeting
|
32 |
+
except Exception as e:
|
33 |
+
# Handle any exceptions and return a default error message
|
34 |
+
return f"Error generating greeting message: {e}"
|
35 |
+
|
36 |
+
def setup_initial_folders():
|
37 |
+
docs_path = os.path.join(work_dir, "docs")
|
38 |
+
db_path = os.path.join(work_dir, "db")
|
39 |
+
tmp_path = os.path.join(work_dir, "tmp")
|
40 |
+
os.makedirs(docs_path, exist_ok=True)
|
41 |
+
os.makedirs(db_path, exist_ok=True)
|
42 |
+
os.makedirs(tmp_path, exist_ok=True)
|
43 |
+
processed_docs_path = work_dir+app_constants.PROCESSED_DOCS
|
44 |
+
if not os.path.exists(processed_docs_path):
|
45 |
+
open(processed_docs_path, 'a').close()
|
46 |
+
|
47 |
+
def construct_messages_to_send(page, message_store, prompt):
|
48 |
+
"""
|
49 |
+
Construct a list of messages to send to the language model.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
page (str): The current page identifier.
|
53 |
+
message_store (MessageStore): The message store instance containing message histories.
|
54 |
+
prompt (str): The current user prompt.
|
55 |
+
|
56 |
+
Returns:
|
57 |
+
List[Dict[str, str]]: A list of messages structured for the language model.
|
58 |
+
"""
|
59 |
+
messages_to_send = []
|
60 |
+
|
61 |
+
# Retrieve the system and greeting messages if available
|
62 |
+
system_message_content = message_store.get_message(page, "system")
|
63 |
+
greeting_message_content = message_store.get_message(page, "greeting")
|
64 |
+
if system_message_content:
|
65 |
+
messages_to_send.append({"role": "system", "content": system_message_content})
|
66 |
+
if greeting_message_content:
|
67 |
+
messages_to_send.append({"role": "assistant", "content": greeting_message_content})
|
68 |
+
|
69 |
+
# Include recent user and assistant messages from the message history
|
70 |
+
history_messages = message_store.get_history(page)
|
71 |
+
|
72 |
+
# Check if there are enough messages in the history, if not, adjust the slicing
|
73 |
+
num_messages_to_include = 4 # Include last two pairs (user and assistant)
|
74 |
+
if len(history_messages) < num_messages_to_include:
|
75 |
+
num_messages_to_include = len(history_messages)
|
76 |
+
|
77 |
+
recent_history = history_messages[-num_messages_to_include:]
|
78 |
+
for msg in recent_history:
|
79 |
+
messages_to_send.append({"role": msg["role"], "content": msg["content"]})
|
80 |
+
|
81 |
+
# Append the current user prompt
|
82 |
+
messages_to_send.append({"role": "user", "content": prompt})
|
83 |
+
|
84 |
+
return messages_to_send
|
85 |
+
|
86 |
+
|
87 |
+
def get_content_mapping_to_module(content_type):
|
88 |
+
content_type_lower = content_type.lower()
|
89 |
+
# Iterate through each page in PAGE_CONFIG
|
90 |
+
for page, config in app_page_definitions.PAGE_CONFIG.items():
|
91 |
+
# Check if 'content' key exists
|
92 |
+
if 'content' in config:
|
93 |
+
# Convert all content types in the list to lowercase for comparison
|
94 |
+
content_list_lower = [ct.lower() for ct in config['content']]
|
95 |
+
# Check if content_type_lower is in the list
|
96 |
+
if content_type_lower in content_list_lower:
|
97 |
+
return page
|
98 |
+
# Default return if no match is found
|
99 |
+
return "nav_playbooks"
|
100 |
+
|
101 |
+
def read_processed_log():
|
102 |
+
processed_paths = set()
|
103 |
+
log_file_path = os.path.join(work_dir, 'index_processed.log')
|
104 |
+
|
105 |
+
try:
|
106 |
+
with open(log_file_path, 'r') as log_file:
|
107 |
+
for line in log_file:
|
108 |
+
parts = line.strip().split(',')
|
109 |
+
if len(parts) > 1:
|
110 |
+
# Extract the file path (assuming it's the last part)
|
111 |
+
file_path = parts[-1]
|
112 |
+
processed_paths.add(file_path)
|
113 |
+
return processed_paths
|
114 |
+
except FileNotFoundError:
|
115 |
+
app_logger.error(f"File not found: {log_file_path}")
|
116 |
+
except Exception as e:
|
117 |
+
app_logger.error(f"An error occurred while reading the log file: {e}")
|
118 |
+
return processed_paths
|
modules/database_utils.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# database_utils.py
|
2 |
+
import os
|
3 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
4 |
+
from langchain_community.vectorstores import Chroma
|
5 |
+
from modules import app_constants, app_logger
|
6 |
+
|
7 |
+
app_logger = app_logger.app_logger
|
8 |
+
|
9 |
+
def initialize_chroma_db(file_path):
|
10 |
+
"""
|
11 |
+
Initializes or creates a new Chroma database.
|
12 |
+
|
13 |
+
:param file_path: Path to the Chroma database file
|
14 |
+
:return: A retriever object if initialization is successful, None otherwise
|
15 |
+
"""
|
16 |
+
# Initialize embeddings
|
17 |
+
embeddings = HuggingFaceEmbeddings(model_name=app_constants.EMBEDDING_MODEL_NAME)
|
18 |
+
|
19 |
+
# Initialize Chroma database
|
20 |
+
try:
|
21 |
+
if os.path.exists(file_path):
|
22 |
+
app_logger.info(f"Using existing Chroma database at {file_path}.")
|
23 |
+
else:
|
24 |
+
app_logger.info(f"Chroma database not found at {file_path}. Creating a new one.")
|
25 |
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
26 |
+
|
27 |
+
db = Chroma(persist_directory=file_path, embedding_function=embeddings, client_settings=app_constants.CHROMA_SETTINGS)
|
28 |
+
except Exception as e:
|
29 |
+
app_logger.error(f"Failed to initialize Chroma database at {file_path}. Reason: {e}")
|
30 |
+
return None
|
31 |
+
|
32 |
+
# Create a retriever from the Chroma database
|
33 |
+
#retriever = db.as_retriever()
|
34 |
+
return db
|
35 |
+
|
36 |
+
def get_chroma_db_files(directory):
|
37 |
+
"""Retrieve files ending with 'chroma_db' from the given directory."""
|
38 |
+
return [f for f in os.listdir(directory) if f.endswith('chroma_db')]
|
39 |
+
|
40 |
+
def format_db_name(db_name):
|
41 |
+
"""Format the database name to a more readable form."""
|
42 |
+
return db_name.replace('_', ' ').replace('chroma db', '').title().strip()
|
43 |
+
|
44 |
+
def delete_doc_from_chroma_db(db_path, source_doc):
|
45 |
+
"""
|
46 |
+
Deletes all items related to a given source document in a Chroma database located at a specific path.
|
47 |
+
|
48 |
+
:param db_path: Path to the Chroma database file
|
49 |
+
:param source_doc: The source document identifier to match
|
50 |
+
"""
|
51 |
+
# Initialize embeddings (assuming this step is necessary for your Chroma setup)
|
52 |
+
embeddings = HuggingFaceEmbeddings(model_name=app_constants.EMBEDDING_MODEL_NAME)
|
53 |
+
|
54 |
+
# Initialize Chroma database
|
55 |
+
if not os.path.exists(db_path):
|
56 |
+
app_logger.error(f"No Chroma database found at {db_path}.")
|
57 |
+
return
|
58 |
+
|
59 |
+
db = Chroma(persist_directory=db_path, embedding_function=embeddings, client_settings=app_constants.CHROMA_SETTINGS)
|
60 |
+
|
61 |
+
ids_to_delete = []
|
62 |
+
|
63 |
+
# Iterate over documents in the database
|
64 |
+
for doc in db:
|
65 |
+
# Check if the document is related to the source document
|
66 |
+
if doc.metadata.get('source') == source_doc:
|
67 |
+
# Add the document's ID to the list of IDs to delete
|
68 |
+
ids_to_delete.append(doc.id)
|
69 |
+
# Delete documents with matching IDs
|
70 |
+
if ids_to_delete:
|
71 |
+
db.delete(ids=ids_to_delete)
|
72 |
+
db.persist()
|
73 |
+
app_logger.error(f"Deleted {len(ids_to_delete)} items related to '{source_doc}'.")
|
74 |
+
else:
|
75 |
+
app_logger.error(f"No items found related to '{source_doc}'.")
|
modules/file_utils.py
ADDED
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#file utils.py
|
2 |
+
import os
|
3 |
+
from modules import app_constants, app_to_vectorstore,app_page_definitions,common_utils
|
4 |
+
from modules import app_logger
|
5 |
+
import json
|
6 |
+
import requests
|
7 |
+
import hashlib
|
8 |
+
import re, csv
|
9 |
+
|
10 |
+
# Use the logger from app_config
|
11 |
+
app_logger = app_logger.app_logger
|
12 |
+
work_dir = app_constants.WORKSPACE_DIRECTORY
|
13 |
+
system_content_file = metadata_path=app_constants.SYSTEM_CONTENT_DATA
|
14 |
+
|
15 |
+
def download_file(url):
|
16 |
+
try:
|
17 |
+
response = requests.get(url)
|
18 |
+
response.raise_for_status()
|
19 |
+
sanitized_filename = sanitize_filename(url.split('/')[-1])
|
20 |
+
sanitized_local_path = os.path.join(app_constants.WORKSPACE_DIRECTORY+"/docs/", sanitized_filename)
|
21 |
+
with open(sanitized_local_path, 'wb') as f:
|
22 |
+
f.write(response.content)
|
23 |
+
app_logger.info(f"File downloaded successfully: {sanitized_local_path}")
|
24 |
+
return True
|
25 |
+
except Exception as e:
|
26 |
+
app_logger.error(f"Failed to download file from {url}. Error: {e}")
|
27 |
+
return False
|
28 |
+
|
29 |
+
def index_file(local_path, module):
|
30 |
+
try:
|
31 |
+
status = app_to_vectorstore.get_chroma_index(local_path,module,True)
|
32 |
+
app_logger.info(f"File indexed successfully: {local_path}")
|
33 |
+
except Exception as e:
|
34 |
+
app_logger.error(f"Failed to index file. Error: {e}")
|
35 |
+
db.persist()
|
36 |
+
db = None
|
37 |
+
return status
|
38 |
+
|
39 |
+
def compute_md5(file_path):
|
40 |
+
hash_md5 = hashlib.md5()
|
41 |
+
try:
|
42 |
+
with open(file_path, "rb") as f:
|
43 |
+
for chunk in iter(lambda: f.read(4096), b""):
|
44 |
+
hash_md5.update(chunk)
|
45 |
+
return hash_md5.hexdigest()
|
46 |
+
except Exception as e:
|
47 |
+
app_logger.error(f"Error computing MD5 for {file_path}: {e}")
|
48 |
+
return None
|
49 |
+
|
50 |
+
def sanitize_filename(filename):
|
51 |
+
"""Sanitize the filename by removing or replacing invalid characters and handling URLs."""
|
52 |
+
|
53 |
+
# Extract filename from URL or file path
|
54 |
+
filename = os.path.basename(filename)
|
55 |
+
|
56 |
+
# Make the filename lowercase and replace spaces with underscores
|
57 |
+
sanitized = filename.lower().replace(' ', '_')
|
58 |
+
|
59 |
+
# Replace invalid characters with underscores
|
60 |
+
sanitized = re.sub(r'[^\w\-_\.]', '_', sanitized)
|
61 |
+
|
62 |
+
# Shorten the filename if it's too long
|
63 |
+
max_length = 255 # Max length can be adjusted
|
64 |
+
if len(sanitized) > max_length:
|
65 |
+
# Keep the file extension if present
|
66 |
+
file_parts = os.path.splitext(sanitized)
|
67 |
+
ext = file_parts[1]
|
68 |
+
sanitized = sanitized[:max_length - len(ext)] + ext
|
69 |
+
return sanitized
|
70 |
+
|
71 |
+
def delete_files(work_dir=work_dir):
|
72 |
+
for root, dirs, files in os.walk(work_dir, topdown=False):
|
73 |
+
for name in files:
|
74 |
+
file_path = os.path.join(root, name)
|
75 |
+
try:
|
76 |
+
os.unlink(file_path)
|
77 |
+
app_logger.info(f"Deleted file: {file_path}")
|
78 |
+
except Exception as e:
|
79 |
+
app_logger.error(f"Failed to delete {file_path}. Reason: {e}")
|
80 |
+
|
81 |
+
for name in dirs:
|
82 |
+
dir_path = os.path.join(root, name)
|
83 |
+
try:
|
84 |
+
os.rmdir(dir_path)
|
85 |
+
app_logger.info(f"Deleted directory: {dir_path}")
|
86 |
+
except Exception as e:
|
87 |
+
app_logger.error(f"Failed to delete {dir_path}. Reason: {e}")
|
88 |
+
remove_local_uploads()
|
89 |
+
|
90 |
+
def save_uploaded_file(uploaded_file, uploads_path, sanitized_filename=None):
|
91 |
+
if sanitized_filename is None:
|
92 |
+
sanitized_filename = sanitize_filename(uploaded_file.name)
|
93 |
+
file_path = os.path.join(uploads_path, sanitized_filename)
|
94 |
+
|
95 |
+
with open(file_path, "wb") as f:
|
96 |
+
f.write(uploaded_file.getbuffer())
|
97 |
+
app_logger.info(f"File '{sanitized_filename}' uploaded to {uploads_path}")
|
98 |
+
return file_path
|
99 |
+
|
100 |
+
def perform_file_operation(resource, operation):
|
101 |
+
url = resource.get("url", "")
|
102 |
+
content_type = resource.get("content_type", "")
|
103 |
+
file_name = work_dir+"docs/" +sanitize_filename(url)
|
104 |
+
if operation == "download":
|
105 |
+
#print(file_name)
|
106 |
+
if url:
|
107 |
+
download_success = download_file(url)
|
108 |
+
if download_success:
|
109 |
+
app_logger.info(f"File {resource['name']} downloaded successfully.")
|
110 |
+
else:
|
111 |
+
app_logger.error(f"Failed to download file {resource['name']}.")
|
112 |
+
elif operation == "learn":
|
113 |
+
module = common_utils.get_content_mapping_to_module(content_type)
|
114 |
+
# Handle 'learn' operation here if needed
|
115 |
+
index_file(file_name, module)
|
116 |
+
else:
|
117 |
+
app_logger.error(f"Unknown operation: {operation}")
|
118 |
+
|
119 |
+
|
120 |
+
def get_indexed_files_for_page(page_id):
|
121 |
+
try:
|
122 |
+
filtered_files = []
|
123 |
+
|
124 |
+
# Open and read the CSV file
|
125 |
+
with open(os.path.join(work_dir, app_constants.PROCESSED_DOCS), mode='r', newline='', encoding='utf-8') as file:
|
126 |
+
csv_reader = csv.reader(file)
|
127 |
+
for row in csv_reader:
|
128 |
+
# Check if the second item in the row matches the page_id
|
129 |
+
if len(row) > 2 and row[1].lower() == page_id.lower():
|
130 |
+
# Extract just the file name from the full path (third item in the row)
|
131 |
+
file_name = os.path.basename(row[2])
|
132 |
+
filtered_files.append(file_name)
|
133 |
+
|
134 |
+
return filtered_files
|
135 |
+
except Exception as e:
|
136 |
+
return []
|
137 |
+
|
138 |
+
def update_json_file(data, file_path):
|
139 |
+
with open(file_path, "w") as file:
|
140 |
+
json.dump(data, file, indent=4)
|
141 |
+
|
142 |
+
def load_json_data(file_path):
|
143 |
+
with open(file_path, "r") as file:
|
144 |
+
return json.load(file)
|
145 |
+
|
146 |
+
def handle_content_update(uploaded_file=None, manual_name="", manual_url="", selected_content_type=""):
|
147 |
+
system_content_file = app_constants.SYSTEM_CONTENT_DATA # Define before use
|
148 |
+
uploads_directory = os.path.join(work_dir, "docs") # Define before use
|
149 |
+
file_data = load_json_data(system_content_file)
|
150 |
+
|
151 |
+
if uploaded_file:
|
152 |
+
filename = sanitize_filename(uploaded_file.name if uploaded_file else manual_name)
|
153 |
+
file_path = save_file(uploaded_file, filename, uploads_directory)
|
154 |
+
else:
|
155 |
+
filename = sanitize_filename(manual_url)
|
156 |
+
file_path = save_file(uploaded_file, filename, uploads_directory) if uploaded_file else manual_url
|
157 |
+
|
158 |
+
new_entry = {"name": filename, "url": file_path, "content_type": selected_content_type}
|
159 |
+
file_data.append(new_entry)
|
160 |
+
update_json_file(file_data, system_content_file)
|
161 |
+
|
162 |
+
def save_file(uploaded_file, filename, directory):
|
163 |
+
if not os.path.exists(directory):
|
164 |
+
os.makedirs(directory)
|
165 |
+
file_path = os.path.join(directory, filename)
|
166 |
+
with open(file_path, "wb") as file:
|
167 |
+
file.write(uploaded_file.getbuffer())
|
168 |
+
return file_path
|
169 |
+
|
170 |
+
def remove_local_uploads(file_path=app_constants.SYSTEM_CONTENT_DATA):
|
171 |
+
# Read the JSON data from the file
|
172 |
+
with open(file_path, 'r') as file:
|
173 |
+
data = json.load(file)
|
174 |
+
# Filter out entries where the 'url' points to a local file
|
175 |
+
filtered_data = [entry for entry in data if not entry['url'].startswith('./')]
|
176 |
+
# Write the filtered data back to the file
|
177 |
+
with open(file_path, 'w') as file:
|
178 |
+
json.dump(filtered_data, file, indent=4)
|
modules/message_store.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Define the MessageStore class
|
2 |
+
class MessageStore:
|
3 |
+
def __init__(self):
|
4 |
+
self.messages = {}
|
5 |
+
|
6 |
+
def update_message(self, page, message_type, message):
|
7 |
+
if page not in self.messages:
|
8 |
+
self.messages[page] = {"system": None, "greeting": None, "history": []}
|
9 |
+
if message_type in ["system", "greeting"]:
|
10 |
+
self.messages[page][message_type] = message
|
11 |
+
elif message_type == "history":
|
12 |
+
self.messages[page]["history"].append(message)
|
13 |
+
|
14 |
+
def get_message(self, page, message_type):
|
15 |
+
return self.messages.get(page, {}).get(message_type, "")
|
16 |
+
|
17 |
+
def get_history(self, page):
|
18 |
+
return self.messages.get(page, {}).get("history", [])
|
19 |
+
|
20 |
+
def set_history(self, page, history):
|
21 |
+
if page not in self.messages:
|
22 |
+
self.messages[page] = {"system": None, "greeting": None, "history": []}
|
23 |
+
self.messages[page]["history"] = history
|
modules/nav_about.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from . import app_constants
|
2 |
+
import streamlit as st
|
3 |
+
from modules import app_logger,app_st_session_utils, app_page_definitions
|
4 |
+
|
5 |
+
# Use the logger from app_config
|
6 |
+
app_logger = app_logger.app_logger
|
7 |
+
|
8 |
+
|
9 |
+
def app():
|
10 |
+
app_logger.info("Starting Streamlit app - Configuration Tool")
|
11 |
+
current_page = "nav_about"
|
12 |
+
# Fetch page configuration from app_page_definitions
|
13 |
+
page_config = app_page_definitions.PAGE_CONFIG.get(current_page, app_page_definitions.PAGE_CONFIG["default"])
|
14 |
+
|
15 |
+
# Use configurations for title, caption, and greeting from page_config
|
16 |
+
st.title(page_config["title"])
|
17 |
+
st.caption(page_config["caption"])
|
18 |
+
|
19 |
+
|
20 |
+
# Subheader for Server Mode Selection
|
21 |
+
st.subheader("Server Mode Selection")
|
22 |
+
mode_to_index = {"private": 0, "demo": 1, "openai": 2}
|
23 |
+
default_index = mode_to_index.get(app_constants.SYSTEM_DEPLOYMENT_MODE, 0) # Default to "Local" if not found
|
24 |
+
# Radio buttons for selecting the server mode
|
25 |
+
server_mode = st.radio("Select Server Mode", ["Private", "ZySec Demo", "OpenAI"],index=default_index)
|
26 |
+
|
27 |
+
# Initialize variables for settings
|
28 |
+
local_model_uri, remote_model_uri, openai_api_key = None, None, None
|
29 |
+
|
30 |
+
# Conditional rendering of settings and their descriptions based on the selected server mode
|
31 |
+
if server_mode == "Private":
|
32 |
+
st.markdown("### Local Settings")
|
33 |
+
st.markdown("""
|
34 |
+
**Private Mode** is for running the model directly on your machine or on a local server.
|
35 |
+
This mode is ideal if you have the necessary resources and want to keep data processing in-house.
|
36 |
+
You can also use a local instance deployed with a URL endpoint.
|
37 |
+
""")
|
38 |
+
local_model_uri = st.text_input("Private Model Base URL Endpoint (OpenAI Compatible). Example http://localhost:8000/v1", key="local_model_uri",value=app_constants.local_model_uri)
|
39 |
+
st.info("Use update configuration for changes to be affected")
|
40 |
+
|
41 |
+
|
42 |
+
elif server_mode == "ZySec Demo":
|
43 |
+
st.markdown("### ZySec Demo Settings")
|
44 |
+
st.markdown("""
|
45 |
+
**ZySec Demo Mode** is designed for users who prefer to use ZySec's resources.
|
46 |
+
This mode provides free access to a deployed model managed by the ZySec team,
|
47 |
+
subject to availability. It's a great choice for trying out ZySec without any setup.
|
48 |
+
""")
|
49 |
+
remote_model_uri = st.text_input("Remote Model Base URL Endpoint",value=app_constants.ZYSEC_DEMO, key="remote_model_uri",disabled=True)
|
50 |
+
st.info("Use update configuration for changes to be affected")
|
51 |
+
|
52 |
+
elif server_mode == "OpenAI":
|
53 |
+
st.markdown("### OpenAI Settings")
|
54 |
+
st.markdown("""
|
55 |
+
**OpenAI Mode** leverages the OpenAI's Large Language Models (LLM) for processing.
|
56 |
+
This mode allows you to integrate OpenAI's powerful AI capabilities while keeping
|
57 |
+
the rest of the functionalities security-centric. An OpenAI API key is required.
|
58 |
+
""")
|
59 |
+
openai_api_key = st.text_input("OpenAI API Key", type="password", key="openai_api_key",value=app_constants.openai_api_key)
|
60 |
+
st.markdown(
|
61 |
+
"Need an OpenAI API key? [Get it here](https://platform.openai.com/api-keys).",
|
62 |
+
unsafe_allow_html=True
|
63 |
+
)
|
64 |
+
st.info("Use update configuration for changes to be affected")
|
65 |
+
|
66 |
+
# Update app_constants based on user input
|
67 |
+
if st.button("Update Configuration"):
|
68 |
+
if server_mode == "Private":
|
69 |
+
app_constants.SYSTEM_DEPLOYMENT_MODE = "private"
|
70 |
+
app_constants.local_model_uri = local_model_uri
|
71 |
+
# Reset other modes' settings
|
72 |
+
app_constants.openai_api_key = "NO-API-KEY-NEEDED"
|
73 |
+
st.info("Use update configuration for changes to be affected")
|
74 |
+
elif server_mode == "ZySec Demo":
|
75 |
+
app_constants.SYSTEM_DEPLOYMENT_MODE = "demo"
|
76 |
+
app_constants.local_model_uri = remote_model_uri
|
77 |
+
# Reset other modes' setting
|
78 |
+
app_constants.openai_api_key = "NO-API-KEY-NEEDED"
|
79 |
+
st.info("Use update configuration for changes to be affected")
|
80 |
+
elif server_mode == "OpenAI":
|
81 |
+
app_constants.SYSTEM_DEPLOYMENT_MODE = "openai"
|
82 |
+
app_constants.openai_api_key = openai_api_key
|
83 |
+
# Reset other modes' settings
|
84 |
+
app_constants.local_model_uri = None
|
85 |
+
st.info("Use update configuration for changes to be affected")
|
86 |
+
st.success("Configuration updated for " + server_mode + " mode.")
|
87 |
+
|
88 |
+
with st.expander("About ZySec and the Author"):
|
89 |
+
st.markdown("""
|
90 |
+
### About ZySec
|
91 |
+
ZySec is at the forefront of integrating **Cyber Security with Artificial Intelligence**, aiming to revolutionize how security professionals engage with technology. This project is driven by the aspiration to blend AI's innovative capabilities with the intricacies of cybersecurity, all while upholding the utmost standards of privacy.
|
92 |
+
|
93 |
+
ZySec is not just a tool; it's a vision to elevate enterprise security functions, harnessing AI to propel these capabilities to new heights. We encourage you to explore our roadmap and see how ZySec is poised to transform the cybersecurity landscape.
|
94 |
+
|
95 |
+
[🔗 View Our Road Map](https://github.com/ZySec-AI/ZySec/blob/main/roadmap.md)
|
96 |
+
|
97 |
+
[🔗 Explore the Project on GitHub](https://github.com/ZySec-AI/ZySec.git)
|
98 |
+
|
99 |
+
[🔗 Contact Us](https://docs.google.com/forms/d/e/1FAIpQLSdkqIjQUoUOorsWXVzgQhJ-vbp1OpN1ZI0u3u8fK_o-UxII2w/viewform)
|
100 |
+
|
101 |
+
### ZySec 7B Model
|
102 |
+
**ZySec-v1-7B** stands as a pivotal innovation for security professionals, leveraging the advanced capabilities of HuggingFace's Zephyr language model series. This AI model is crafted to be an omnipresent cybersecurity ally, offering on-demand, expert guidance in cybersecurity issues. Picture ZySec-7B as an ever-present digital teammate, adept at navigating the complexities of security challenges. ZySec-7B's training spans over 30 unique domains, each enriched with thousands of data points, delivering unparalleled expertise.
|
103 |
+
|
104 |
+
[🔗 Checkout Model on HuggingFace](https://huggingface.co/aihub-app/ZySec-7B-v1)
|
105 |
+
|
106 |
+
### About the Author - Venkatesh Siddi
|
107 |
+
**Venkatesh Siddi** is a seasoned expert in the cybersecurity domain, beyond traditional cybersecurity, Venkatesh is deeply invested in leveraging **Artificial Intelligence and Machine Learning** to tackle complex cybersecurity challenges. He has led multiple projects involving big data, cloud security, and technology design.
|
108 |
+
|
109 |
+
[🔗 Connect with Venkatesh on LinkedIn](https://www.linkedin.com/in/venkycs/)
|
110 |
+
|
111 |
+
""", unsafe_allow_html=True)
|
112 |
+
|
113 |
+
# Session and Data Reset
|
114 |
+
st.subheader("Clear Session")
|
115 |
+
if st.button("Reset Session"):
|
116 |
+
# Clear all items from the session state
|
117 |
+
for key in list(st.session_state.keys()):
|
118 |
+
del st.session_state[key]
|
119 |
+
# Reload the page to reflect the session reset
|
120 |
+
app_st_session_utils.reload_page()
|
121 |
+
st.rerun()
|
modules/nav_file_manager.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#nav_file_manager.py
|
2 |
+
|
3 |
+
from . import app_constants
|
4 |
+
import streamlit as st
|
5 |
+
from modules import app_logger, app_page_definitions,file_utils,common_utils
|
6 |
+
import os, json
|
7 |
+
|
8 |
+
# Use the logger from app_config
|
9 |
+
app_logger = app_logger.app_logger
|
10 |
+
system_content_file = app_constants.SYSTEM_CONTENT_DATA
|
11 |
+
work_dir = os.path.join(app_constants.WORKSPACE_DIRECTORY, "docs")
|
12 |
+
|
13 |
+
def app():
|
14 |
+
app_logger.info("Starting Streamlit app - File Manager")
|
15 |
+
current_page = "nav_file_manager"
|
16 |
+
|
17 |
+
# Fetch page configuration
|
18 |
+
page_config = app_page_definitions.PAGE_CONFIG.get(current_page, app_page_definitions.PAGE_CONFIG["default"])
|
19 |
+
|
20 |
+
# Page setup
|
21 |
+
st.title(page_config["title"])
|
22 |
+
st.caption(page_config["caption"])
|
23 |
+
with open(system_content_file, "r") as file:
|
24 |
+
content_data = json.load(file)
|
25 |
+
processed_paths = common_utils.read_processed_log()
|
26 |
+
if 'download_states' not in st.session_state:
|
27 |
+
st.session_state['download_states'] = {}
|
28 |
+
if 'learn_states' not in st.session_state:
|
29 |
+
st.session_state['learn_states'] = {}
|
30 |
+
for index, item in enumerate(content_data):
|
31 |
+
name = item["name"]
|
32 |
+
url = item["url"]
|
33 |
+
content_type = item["content_type"]
|
34 |
+
unique_identifier = f"{index}_{name}_{url.replace('http://', '').replace('https://', '')}" # Ensure the identifier is unique
|
35 |
+
|
36 |
+
# Create a row of columns for each item
|
37 |
+
col1, col2, col3, col4 = st.columns([3, 2, 1, 1])
|
38 |
+
|
39 |
+
# Display the name as a hyperlink in the first column
|
40 |
+
col1.markdown(f"[{name}]({url})")
|
41 |
+
|
42 |
+
# Display the content type in the second column
|
43 |
+
col2.write(content_type)
|
44 |
+
|
45 |
+
file_path = os.path.join(work_dir, file_utils.sanitize_filename(url))
|
46 |
+
file_exists = os.path.exists(file_path)
|
47 |
+
file_processed = file_path in processed_paths
|
48 |
+
|
49 |
+
# Checkboxes in the third and fourth columns
|
50 |
+
download_key = f"download_{unique_identifier}"
|
51 |
+
learn_key = f"learn_{unique_identifier}"
|
52 |
+
|
53 |
+
# Initialize session state for checkboxes if not already set
|
54 |
+
if download_key not in st.session_state['download_states']:
|
55 |
+
st.session_state['download_states'][download_key] = file_exists
|
56 |
+
if learn_key not in st.session_state['learn_states']:
|
57 |
+
st.session_state['learn_states'][learn_key] = file_processed
|
58 |
+
|
59 |
+
# Logic for enabling/disabling checkboxes
|
60 |
+
download_disabled = file_exists
|
61 |
+
learn_disabled = not file_exists or file_processed
|
62 |
+
|
63 |
+
# Display checkboxes
|
64 |
+
download_checked = col3.checkbox("Download", value=st.session_state['download_states'][download_key], key=download_key, disabled=download_disabled)
|
65 |
+
learn_checked = col4.checkbox("Learn It", value=st.session_state['learn_states'][learn_key], key=learn_key, disabled=learn_disabled)
|
66 |
+
|
67 |
+
# Check if the state of checkboxes changed
|
68 |
+
if download_checked != st.session_state['download_states'][download_key]:
|
69 |
+
if download_checked:
|
70 |
+
file_utils.perform_file_operation(item, "download")
|
71 |
+
st.session_state['download_states'][download_key] = download_checked
|
72 |
+
|
73 |
+
if learn_checked != st.session_state['learn_states'][learn_key]:
|
74 |
+
if learn_checked:
|
75 |
+
file_utils.perform_file_operation(item, "learn")
|
76 |
+
st.session_state['learn_states'][learn_key] = learn_checked
|
77 |
+
|
78 |
+
|
79 |
+
with st.expander("Manage Content in the System", expanded=True):
|
80 |
+
st.markdown("""
|
81 |
+
**Content Types:**
|
82 |
+
- **Policies:** Guidelines for operations, organizational or industry-wide.
|
83 |
+
- **Playbooks:** How-to guides and procedures for operational guidance.
|
84 |
+
- **Standards:** Compliance with regulatory or industry best practices.
|
85 |
+
- **Reference Docs:** In-depth information like technical manuals and research, however, they go into playbooks.
|
86 |
+
""")
|
87 |
+
|
88 |
+
selected_content_type = st.selectbox("Select Content Type", ["Select Type"] + app_constants.CONTENT_TYPE)
|
89 |
+
|
90 |
+
if selected_content_type and selected_content_type != 'Select Type':
|
91 |
+
upload_choice = st.radio("Choose an option", ("Upload File", "Enter File Details Manually"))
|
92 |
+
|
93 |
+
if upload_choice == "Upload File":
|
94 |
+
uploaded_file = st.file_uploader("Choose a file", type=['pdf', 'docx', 'txt', 'html'])
|
95 |
+
|
96 |
+
if uploaded_file is not None:
|
97 |
+
# Check if the file has already been processed in this session
|
98 |
+
if 'processed_files' not in st.session_state:
|
99 |
+
st.session_state['processed_files'] = set()
|
100 |
+
|
101 |
+
file_details = (uploaded_file.name, uploaded_file.size)
|
102 |
+
|
103 |
+
if file_details not in st.session_state['processed_files']:
|
104 |
+
with st.spinner("Processing your file..."):
|
105 |
+
file_utils.handle_content_update(uploaded_file=uploaded_file, selected_content_type=selected_content_type)
|
106 |
+
st.session_state['processed_files'].add(file_details)
|
107 |
+
st.success("File processed.")
|
108 |
+
else:
|
109 |
+
st.info("This file has already been processed in this session.")
|
110 |
+
|
111 |
+
elif upload_choice == "Enter File Details Manually":
|
112 |
+
with st.form("file_details_form"):
|
113 |
+
manual_name = st.text_input("Document Name")
|
114 |
+
manual_url = st.text_input("Download URL")
|
115 |
+
submit_button = st.form_submit_button("Submit")
|
116 |
+
|
117 |
+
if submit_button and manual_url and manual_name:
|
118 |
+
# Use session state to check if the form details have already been submitted
|
119 |
+
form_details = (manual_name, manual_url)
|
120 |
+
|
121 |
+
if 'submitted_forms' not in st.session_state:
|
122 |
+
st.session_state['submitted_forms'] = set()
|
123 |
+
|
124 |
+
if form_details not in st.session_state['submitted_forms']:
|
125 |
+
file_utils.handle_content_update(manual_name=manual_name, manual_url=manual_url, selected_content_type=selected_content_type)
|
126 |
+
st.session_state['submitted_forms'].add(form_details)
|
127 |
+
st.success("Form details processed.")
|
128 |
+
else:
|
129 |
+
st.info("These details have already been submitted in this session.")
|
130 |
+
|
131 |
+
|
132 |
+
st.write("Using below clear option, you can clear all data in the system and start fresh to index and upload information!")
|
133 |
+
if st.button("Clear Data"):
|
134 |
+
file_utils.delete_files()
|
135 |
+
st.write("All data cleared.")
|
136 |
+
st.rerun()
|
modules/nav_query_docs.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from modules import app_logger, app_page_definitions, app_prompt, app_constants, app_st_session_utils,common_utils,file_utils
|
3 |
+
import time
|
4 |
+
# Use the logger from app_config
|
5 |
+
app_logger = app_logger.app_logger
|
6 |
+
|
7 |
+
def app(message_store, current_page="nav_private_ai", use_retrieval_chain=False):
|
8 |
+
app_logger.info(f"Starting Streamlit app - {current_page}")
|
9 |
+
|
10 |
+
# Fetch page configuration from app_page_definitions
|
11 |
+
page_config = app_page_definitions.PAGE_CONFIG.get(current_page, app_page_definitions.PAGE_CONFIG["default"])
|
12 |
+
files_indexed = file_utils.get_indexed_files_for_page(current_page)
|
13 |
+
#print(files_indexed)
|
14 |
+
# Use configurations for title, caption, and greeting from page_config
|
15 |
+
st.title(page_config["title"])
|
16 |
+
st.caption(page_config["caption"])
|
17 |
+
|
18 |
+
# Initialize or update session state variables
|
19 |
+
app_st_session_utils.initialize_session_state('current_page', current_page)
|
20 |
+
app_st_session_utils.initialize_session_state('page_loaded', False)
|
21 |
+
app_st_session_utils.initialize_session_state('message_store', message_store)
|
22 |
+
db_retriever_playbooks = False
|
23 |
+
if use_retrieval_chain:
|
24 |
+
db_retriever_playbooks = True
|
25 |
+
# Initialize or retrieve the database
|
26 |
+
persistent_db = app_constants.LOCAL_PERSISTANT_DB + current_page + '_chroma_db'
|
27 |
+
db_retriever_playbooks = app_st_session_utils.initialize_or_retrieve_db(persistent_db)
|
28 |
+
|
29 |
+
message_store = st.session_state['message_store']
|
30 |
+
|
31 |
+
# Manage message history
|
32 |
+
app_st_session_utils.manage_message_history(current_page)
|
33 |
+
greeting_message = common_utils.get_page_greeting(st.session_state['current_page'], st.session_state.get('username', ''),files_indexed)
|
34 |
+
st.chat_message("assistant").markdown(greeting_message, unsafe_allow_html=True)
|
35 |
+
|
36 |
+
# Display chat messages
|
37 |
+
for message in st.session_state.get("messages", []):
|
38 |
+
app_st_session_utils.display_chat_message(message["role"], message["content"])
|
39 |
+
|
40 |
+
# Handle user prompt
|
41 |
+
prompt = st.chat_input("Let's talk! Enter your query below.")
|
42 |
+
if prompt:
|
43 |
+
st.chat_message("user").write(prompt)
|
44 |
+
app_logger.info(f"Processed user prompt: {prompt}")
|
45 |
+
start_time = time.time()
|
46 |
+
with st.spinner("Processing request..."):
|
47 |
+
if use_retrieval_chain:
|
48 |
+
if db_retriever_playbooks:
|
49 |
+
formatted_response = app_prompt.query_llm(prompt,page=current_page, retriever=db_retriever_playbooks.as_retriever(search_type="similarity", search_kwargs={"k": app_constants.RAG_K}), message_store=message_store, use_retrieval_chain=use_retrieval_chain)
|
50 |
+
app_st_session_utils.display_chat_message("assistant", formatted_response) # Updated line
|
51 |
+
app_st_session_utils.add_message_to_session("user", prompt)
|
52 |
+
app_st_session_utils.add_message_to_session("assistant", formatted_response)
|
53 |
+
else:
|
54 |
+
st.error("Unable to initialize the database. Please try again later.")
|
55 |
+
else:
|
56 |
+
formatted_response = app_prompt.query_llm(prompt,page=current_page, message_store=message_store, retriever=False)
|
57 |
+
app_st_session_utils.display_chat_message("assistant", formatted_response) # Updated line
|
58 |
+
app_st_session_utils.add_message_to_session("user", prompt)
|
59 |
+
app_st_session_utils.add_message_to_session("assistant", formatted_response)
|
60 |
+
end_time = time.time() # End timing
|
61 |
+
processing_time = end_time - start_time # Calculate processing time
|
62 |
+
st.info(f"Processing time: {processing_time:.2f} seconds") # Log processing time
|
modules/nav_researcher.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from modules import app_prompt, app_researcher, app_logger, database_utils, app_to_vectorstore, app_page_definitions
|
3 |
+
from modules import app_st_session_utils,app_constants,common_utils # Importing the session utilities module
|
4 |
+
|
5 |
+
# Use the logger from app_config
|
6 |
+
app_logger = app_logger.app_logger
|
7 |
+
|
8 |
+
def app(message_store):
|
9 |
+
app_logger.info("Navigating to nav_researcher page")
|
10 |
+
|
11 |
+
# Fetch page configuration from app_page_definitions
|
12 |
+
current_page = "nav_researcher"
|
13 |
+
page_config = app_page_definitions.PAGE_CONFIG.get(current_page)
|
14 |
+
|
15 |
+
st.title(page_config["title"])
|
16 |
+
|
17 |
+
# Initialize or update session state variables using session utilities
|
18 |
+
app_st_session_utils.initialize_session_state('current_page', current_page)
|
19 |
+
app_st_session_utils.initialize_session_state('page_loaded', False)
|
20 |
+
app_st_session_utils.initialize_session_state('message_store', message_store)
|
21 |
+
|
22 |
+
st.caption(page_config["caption"])
|
23 |
+
|
24 |
+
topic = st.text_input("Enter Topic for Research", "Threat Management")
|
25 |
+
research_button = st.button("Go Research on Internet")
|
26 |
+
|
27 |
+
if research_button and topic:
|
28 |
+
with st.spinner('Searching...'):
|
29 |
+
try:
|
30 |
+
research_notes = app_researcher.explore_url_on_internet(topic, count=app_constants.SEARCH_COUNT)
|
31 |
+
status = app_to_vectorstore.get_chroma_index(research_notes, is_persistent=False)
|
32 |
+
app_logger.info("Internet research completed successfully")
|
33 |
+
st.success("Internet research completed")
|
34 |
+
st.session_state['research_done'] = True
|
35 |
+
except Exception as e:
|
36 |
+
app_logger.error(f"Error during internet research: {e}")
|
37 |
+
st.error(f"Error during internet research: {e}")
|
38 |
+
|
39 |
+
TEMP_DIR = app_constants.WORKSPACE_DIRECTORY + "tmp"
|
40 |
+
db_files = database_utils.get_chroma_db_files(TEMP_DIR)
|
41 |
+
|
42 |
+
# Create a mapping of formatted names to actual file names
|
43 |
+
formatted_db_names = [database_utils.format_db_name(db) for db in db_files]
|
44 |
+
name_to_file_map = dict(zip(formatted_db_names, db_files))
|
45 |
+
|
46 |
+
# Display formatted names in the dropdown and use the selection to get the actual file name
|
47 |
+
selected_db_formatted = st.selectbox("Pick Researched topic from drop-down and start chatting!", formatted_db_names)
|
48 |
+
selected_db_actual = name_to_file_map[selected_db_formatted]
|
49 |
+
research_notes = TEMP_DIR + '/' + selected_db_actual
|
50 |
+
|
51 |
+
# Initialize or retrieve the database using the new function
|
52 |
+
db_retriever = app_st_session_utils.initialize_or_retrieve_db(research_notes)
|
53 |
+
|
54 |
+
app_st_session_utils.manage_message_history(current_page)
|
55 |
+
|
56 |
+
greeting_message = common_utils.get_page_greeting(st.session_state['current_page'], st.session_state.get('username', ''))
|
57 |
+
st.chat_message("assistant").markdown(greeting_message, unsafe_allow_html=True)
|
58 |
+
app_st_session_utils.update_session_state('page_loaded', True)
|
59 |
+
|
60 |
+
|
61 |
+
# Displaying chat messages
|
62 |
+
for message in st.session_state.get("messages", []):
|
63 |
+
app_st_session_utils.display_chat_message(message["role"], message["content"])
|
64 |
+
|
65 |
+
# Handling user prompt
|
66 |
+
prompt = st.chat_input("Let's Talk! Conversation secure and private!")
|
67 |
+
if prompt:
|
68 |
+
st.chat_message("user").write(prompt)
|
69 |
+
with st.spinner("Processing your request..."):
|
70 |
+
if db_retriever:
|
71 |
+
formatted_response = app_prompt.query_llm(prompt,page=current_page, retriever=db_retriever.as_retriever(search_type="similarity", search_kwargs={"k": app_constants.RAG_K}), message_store=st.session_state['message_store'],use_retrieval_chain=True)
|
72 |
+
st.chat_message("assistant").markdown(formatted_response, unsafe_allow_html=True)
|
73 |
+
app_st_session_utils.add_message_to_session("user", prompt)
|
74 |
+
app_st_session_utils.add_message_to_session("assistant", formatted_response)
|
75 |
+
app_logger.info(f"Processed user prompt: {prompt}")
|
76 |
+
else:
|
77 |
+
st.error("Unable to initialize the database. Please try again later.")
|
modules/nav_summarizer.py
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import os
|
3 |
+
import streamlit as st
|
4 |
+
from langchain_openai import ChatOpenAI
|
5 |
+
from langchain.chains import load_summarize_chain
|
6 |
+
from langchain_community.document_loaders import PyPDFLoader, TextLoader, UnstructuredWordDocumentLoader
|
7 |
+
from langchain.prompts import PromptTemplate
|
8 |
+
from modules import app_page_definitions, app_logger,app_constants,file_utils
|
9 |
+
|
10 |
+
# Use the logger from app_config
|
11 |
+
app_logger = app_logger.app_logger
|
12 |
+
|
13 |
+
# Configurable batch size (4 pages per batch)
|
14 |
+
batch_size = app_constants.SUMMARIZER_BATCH
|
15 |
+
WORKSPACE_DIRECTORY = app_constants.WORKSPACE_DIRECTORY
|
16 |
+
|
17 |
+
|
18 |
+
def process_file(file_path, file_type):
|
19 |
+
if file_type == "text/plain":
|
20 |
+
loader = TextLoader(file_path)
|
21 |
+
elif file_type == "application/pdf":
|
22 |
+
loader = PyPDFLoader(file_path)
|
23 |
+
elif file_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
24 |
+
loader = UnstructuredWordDocumentLoader(file_path)
|
25 |
+
else:
|
26 |
+
raise ValueError(f"Unsupported file type: {file_type}")
|
27 |
+
app_logger.info(f"Processing file {file_path} of type {file_type}")
|
28 |
+
return loader.load_and_split()
|
29 |
+
|
30 |
+
def app():
|
31 |
+
app_logger.info("Starting Streamlit app - Summarizer Tool page")
|
32 |
+
|
33 |
+
# Fetch page configuration from app_page_definitions
|
34 |
+
page_config = app_page_definitions.PAGE_CONFIG.get("nav_summarize")
|
35 |
+
|
36 |
+
st.title(page_config["title"])
|
37 |
+
st.caption(page_config["caption"])
|
38 |
+
st.session_state.current_page = "nav_summarize"
|
39 |
+
|
40 |
+
uploaded_file = st.file_uploader("Upload your document here:", type=['txt', 'pdf', 'docx'], key="file_uploader")
|
41 |
+
|
42 |
+
if uploaded_file is not None:
|
43 |
+
file_path = file_utils.save_uploaded_file(uploaded_file,uploads_path=WORKSPACE_DIRECTORY + "/tmp")
|
44 |
+
docs = process_file(file_path, uploaded_file.type)
|
45 |
+
|
46 |
+
total_docs = len(docs)
|
47 |
+
app_logger.info(f"Total documents processed: {total_docs}")
|
48 |
+
|
49 |
+
if total_docs > 1:
|
50 |
+
doc_range = st.slider("Select document range for summarization", 1, total_docs, (1, total_docs))
|
51 |
+
else:
|
52 |
+
doc_range = (1, 1)
|
53 |
+
|
54 |
+
progress_bar = st.progress(0)
|
55 |
+
|
56 |
+
if st.button("Summarize"):
|
57 |
+
with st.spinner('Processing... Please wait'):
|
58 |
+
llm = ChatOpenAI(
|
59 |
+
model_name=app_constants.MODEL_NAME,
|
60 |
+
openai_api_key=app_constants.openai_api_key,
|
61 |
+
base_url=app_constants.local_model_uri,
|
62 |
+
streaming=True
|
63 |
+
)
|
64 |
+
|
65 |
+
prompt_template = """Write a concise summary of the following:
|
66 |
+
{text}
|
67 |
+
CONCISE SUMMARY:"""
|
68 |
+
prompt = PromptTemplate.from_template(prompt_template)
|
69 |
+
|
70 |
+
refine_template = (
|
71 |
+
"You are a content writer and your job is to produce a summary of input\n"
|
72 |
+
"We have provided an existing summary up to a certain point: {existing_answer}\n"
|
73 |
+
"Start and end properly and refine the existing summary "
|
74 |
+
"with some more context below.\n"
|
75 |
+
"------------\n"
|
76 |
+
"{text}\n"
|
77 |
+
"------------\n"
|
78 |
+
"Given the new context, refine the original summary. "
|
79 |
+
"If the context isn't useful, return the original summary."
|
80 |
+
)
|
81 |
+
refine_prompt = PromptTemplate.from_template(refine_template)
|
82 |
+
|
83 |
+
chain = load_summarize_chain(
|
84 |
+
llm=llm,
|
85 |
+
chain_type="refine",
|
86 |
+
question_prompt=prompt,
|
87 |
+
refine_prompt=refine_prompt,
|
88 |
+
return_intermediate_steps=True,
|
89 |
+
input_key="input_documents",
|
90 |
+
output_key="output_text",
|
91 |
+
)
|
92 |
+
|
93 |
+
start_doc, end_doc = doc_range
|
94 |
+
for i in range(start_doc - 1, min(end_doc, total_docs), batch_size):
|
95 |
+
batch_docs = docs[i:min(i + batch_size, total_docs)]
|
96 |
+
|
97 |
+
progress_value = (i + len(batch_docs)) / total_docs
|
98 |
+
progress_bar.progress(progress_value)
|
99 |
+
|
100 |
+
with st.expander(f"Processing Documents {i + 1} - {i + len(batch_docs)}", expanded=False):
|
101 |
+
intermediate_summary = chain.invoke({"input_documents": batch_docs}, return_only_outputs=True)
|
102 |
+
st.write(intermediate_summary)
|
103 |
+
|
104 |
+
selected_docs = docs[start_doc - 1:end_doc]
|
105 |
+
final_summary_response = chain.invoke({"input_documents": selected_docs}, return_only_outputs=True)
|
106 |
+
final_summary = final_summary_response['output_text'] if 'output_text' in final_summary_response else "No summary generated."
|
107 |
+
st.text_area("Final Summary", final_summary, height=300)
|
108 |
+
|
109 |
+
st.success("Summarization Completed!")
|
110 |
+
progress_bar.empty()
|
111 |
+
else:
|
112 |
+
st.warning("Please upload a document to summarize.")
|
113 |
+
app_logger.warning("No document uploaded for summarization")
|
pyvenv.cfg
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
home = /Users/mars/miniconda3/bin
|
2 |
+
include-system-site-packages = false
|
3 |
+
version = 3.11.5
|
4 |
+
executable = /Users/mars/miniconda3/bin/python3.11
|
5 |
+
command = /Users/mars/miniconda3/bin/python -m venv /Volumes/localdisk/projects/zysec.ai
|
requirements.txt
ADDED
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiohttp==3.9.3
|
2 |
+
aiosignal==1.3.1
|
3 |
+
altair==5.2.0
|
4 |
+
annotated-types==0.6.0
|
5 |
+
antlr4-python3-runtime==4.9.3
|
6 |
+
anyio==4.2.0
|
7 |
+
asgiref==3.7.2
|
8 |
+
attrs==23.2.0
|
9 |
+
backoff==2.2.1
|
10 |
+
bcrypt==4.1.2
|
11 |
+
beautifulsoup4==4.12.3
|
12 |
+
blinker==1.7.0
|
13 |
+
blis==0.7.11
|
14 |
+
bs4==0.0.2
|
15 |
+
build==1.0.3
|
16 |
+
cachetools==5.3.2
|
17 |
+
catalogue==2.0.10
|
18 |
+
certifi==2024.2.2
|
19 |
+
cffi==1.16.0
|
20 |
+
chardet==5.2.0
|
21 |
+
charset-normalizer==3.3.2
|
22 |
+
chroma-hnswlib==0.7.3
|
23 |
+
chromadb==0.4.22
|
24 |
+
click==8.1.7
|
25 |
+
cloudpathlib==0.16.0
|
26 |
+
coloredlogs==15.0.1
|
27 |
+
confection==0.1.4
|
28 |
+
contourpy==1.2.0
|
29 |
+
cryptography==42.0.2
|
30 |
+
curl_cffi==0.6.0b9
|
31 |
+
cycler==0.12.1
|
32 |
+
cymem==2.0.8
|
33 |
+
dataclasses-json==0.6.4
|
34 |
+
dataclasses-json-speakeasy==0.5.11
|
35 |
+
Deprecated==1.2.14
|
36 |
+
diskcache==5.6.3
|
37 |
+
distro==1.9.0
|
38 |
+
docx2txt==0.8
|
39 |
+
duckduckgo_search==4.4.3
|
40 |
+
effdet==0.4.1
|
41 |
+
emoji==2.10.1
|
42 |
+
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl#sha256=86cc141f63942d4b2c5fcee06630fd6f904788d2f0ab005cce45aadb8fb73889
|
43 |
+
entrypoints==0.4
|
44 |
+
Faker==23.2.1
|
45 |
+
fastapi==0.109.2
|
46 |
+
favicon==0.7.0
|
47 |
+
filelock==3.13.1
|
48 |
+
filetype==1.2.0
|
49 |
+
flatbuffers==23.5.26
|
50 |
+
fonttools==4.48.1
|
51 |
+
frozenlist==1.4.1
|
52 |
+
fsspec==2024.2.0
|
53 |
+
gitdb==4.0.11
|
54 |
+
GitPython==3.1.41
|
55 |
+
google-auth==2.27.0
|
56 |
+
googleapis-common-protos==1.62.0
|
57 |
+
grpcio==1.60.1
|
58 |
+
h11==0.14.0
|
59 |
+
htbuilder==0.6.2
|
60 |
+
html2text==2020.1.16
|
61 |
+
httpcore==1.0.2
|
62 |
+
httptools==0.6.1
|
63 |
+
httpx==0.26.0
|
64 |
+
huggingface-hub==0.20.3
|
65 |
+
humanfriendly==10.0
|
66 |
+
humanize==4.9.0
|
67 |
+
idna==3.6
|
68 |
+
importlib-metadata==6.11.0
|
69 |
+
importlib-resources==6.1.1
|
70 |
+
iopath==0.1.10
|
71 |
+
Jinja2==3.1.3
|
72 |
+
joblib==1.3.2
|
73 |
+
jq==1.6.0
|
74 |
+
jsonpatch==1.33
|
75 |
+
jsonpath-python==1.0.6
|
76 |
+
jsonpointer==2.4
|
77 |
+
jsonschema==4.21.1
|
78 |
+
jsonschema-specifications==2023.12.1
|
79 |
+
kiwisolver==1.4.5
|
80 |
+
kubernetes==29.0.0
|
81 |
+
langchain==0.1.6
|
82 |
+
langchain-community==0.0.19
|
83 |
+
langchain-core==0.1.22
|
84 |
+
langchain-openai==0.0.5
|
85 |
+
langcodes==3.3.0
|
86 |
+
langdetect==1.0.9
|
87 |
+
langsmith==0.0.87
|
88 |
+
layoutparser==0.3.4
|
89 |
+
llama_cpp_python==0.2.53
|
90 |
+
lxml==5.1.0
|
91 |
+
Markdown==3.5.2
|
92 |
+
markdown-it-py==3.0.0
|
93 |
+
markdownlit==0.0.7
|
94 |
+
MarkupSafe==2.1.5
|
95 |
+
marshmallow==3.20.2
|
96 |
+
matplotlib==3.8.2
|
97 |
+
mdurl==0.1.2
|
98 |
+
mmh3==4.1.0
|
99 |
+
monotonic==1.6
|
100 |
+
more-itertools==10.2.0
|
101 |
+
mpmath==1.3.0
|
102 |
+
multidict==6.0.5
|
103 |
+
murmurhash==1.0.10
|
104 |
+
mypy-extensions==1.0.0
|
105 |
+
nest-asyncio==1.6.0
|
106 |
+
networkx==3.2.1
|
107 |
+
nltk==3.8.1
|
108 |
+
numpy==1.26.4
|
109 |
+
oauthlib==3.2.2
|
110 |
+
omegaconf==2.3.0
|
111 |
+
onnx==1.15.0
|
112 |
+
onnxruntime==1.15.1
|
113 |
+
openai==1.12.0
|
114 |
+
opencv-python==4.9.0.80
|
115 |
+
opentelemetry-api==1.22.0
|
116 |
+
opentelemetry-exporter-otlp-proto-common==1.22.0
|
117 |
+
opentelemetry-exporter-otlp-proto-grpc==1.22.0
|
118 |
+
opentelemetry-instrumentation==0.43b0
|
119 |
+
opentelemetry-instrumentation-asgi==0.43b0
|
120 |
+
opentelemetry-instrumentation-fastapi==0.43b0
|
121 |
+
opentelemetry-proto==1.22.0
|
122 |
+
opentelemetry-sdk==1.22.0
|
123 |
+
opentelemetry-semantic-conventions==0.43b0
|
124 |
+
opentelemetry-util-http==0.43b0
|
125 |
+
overrides==7.7.0
|
126 |
+
packaging==23.2
|
127 |
+
pandas==2.2.0
|
128 |
+
pdf2image==1.17.0
|
129 |
+
pdfminer.six==20221105
|
130 |
+
pdfplumber==0.10.4
|
131 |
+
pikepdf==8.12.0
|
132 |
+
pillow==10.2.0
|
133 |
+
pillow_heif==0.15.0
|
134 |
+
pip-review==1.3.0
|
135 |
+
portalocker==2.8.2
|
136 |
+
posthog==3.4.0
|
137 |
+
preshed==3.0.9
|
138 |
+
prometheus_client==0.20.0
|
139 |
+
protobuf==4.25.2
|
140 |
+
pulsar-client==3.4.0
|
141 |
+
pyarrow==15.0.0
|
142 |
+
pyasn1==0.5.1
|
143 |
+
pyasn1-modules==0.3.0
|
144 |
+
pycocotools==2.0.7
|
145 |
+
pycparser==2.21
|
146 |
+
pydantic==2.6.1
|
147 |
+
pydantic-settings==2.1.0
|
148 |
+
pydantic_core==2.16.2
|
149 |
+
pydeck==0.8.1b0
|
150 |
+
Pygments==2.17.2
|
151 |
+
pymdown-extensions==10.7
|
152 |
+
pyparsing==3.1.1
|
153 |
+
pypdf==4.0.1
|
154 |
+
pypdfium2==4.27.0
|
155 |
+
PyPika==0.48.9
|
156 |
+
pyproject_hooks==1.0.0
|
157 |
+
pytesseract==0.3.10
|
158 |
+
python-dateutil==2.8.2
|
159 |
+
python-dotenv==1.0.1
|
160 |
+
python-iso639==2024.2.7
|
161 |
+
python-magic==0.4.27
|
162 |
+
python-multipart==0.0.9
|
163 |
+
pytz==2024.1
|
164 |
+
PyYAML==6.0.1
|
165 |
+
rapidfuzz==3.6.1
|
166 |
+
referencing==0.33.0
|
167 |
+
regex==2023.12.25
|
168 |
+
requests==2.31.0
|
169 |
+
requests-oauthlib==1.3.1
|
170 |
+
rich==13.7.0
|
171 |
+
rpds-py==0.17.1
|
172 |
+
rsa==4.9
|
173 |
+
safetensors==0.4.2
|
174 |
+
scikit-learn==1.4.0
|
175 |
+
scipy==1.12.0
|
176 |
+
sentence-transformers==2.3.1
|
177 |
+
sentencepiece==0.1.99
|
178 |
+
six==1.16.0
|
179 |
+
smart-open==6.4.0
|
180 |
+
smmap==5.0.1
|
181 |
+
sniffio==1.3.0
|
182 |
+
soupsieve==2.5
|
183 |
+
spacy==3.7.2
|
184 |
+
spacy-legacy==3.0.12
|
185 |
+
spacy-loggers==1.0.5
|
186 |
+
SQLAlchemy==2.0.26
|
187 |
+
srsly==2.4.8
|
188 |
+
sse-starlette==2.0.0
|
189 |
+
st-annotated-text==4.0.1
|
190 |
+
starlette==0.36.3
|
191 |
+
starlette-context==0.3.6
|
192 |
+
streamlit==1.31.1
|
193 |
+
streamlit-camera-input-live==0.2.0
|
194 |
+
streamlit-card==1.0.0
|
195 |
+
streamlit-embedcode==0.1.2
|
196 |
+
streamlit-extras==0.4.0
|
197 |
+
streamlit-faker==0.0.3
|
198 |
+
streamlit-image-coordinates==0.1.6
|
199 |
+
streamlit-keyup==0.2.3
|
200 |
+
streamlit-option-menu==0.3.12
|
201 |
+
streamlit-toggle-switch==1.0.2
|
202 |
+
streamlit-vertical-slider==2.5.5
|
203 |
+
sympy==1.12
|
204 |
+
tabulate==0.9.0
|
205 |
+
tenacity==8.2.3
|
206 |
+
thinc==8.2.3
|
207 |
+
threadpoolctl==3.2.0
|
208 |
+
tiktoken==0.5.2
|
209 |
+
timm==0.9.12
|
210 |
+
tokenizers==0.15.2
|
211 |
+
toml==0.10.2
|
212 |
+
toolz==0.12.1
|
213 |
+
torch==2.2.0
|
214 |
+
torchvision==0.17.0
|
215 |
+
tornado==6.4
|
216 |
+
tqdm==4.66.2
|
217 |
+
transformers==4.37.2
|
218 |
+
typer==0.9.0
|
219 |
+
typing-inspect==0.9.0
|
220 |
+
typing_extensions==4.9.0
|
221 |
+
tzdata==2024.1
|
222 |
+
tzlocal==5.2
|
223 |
+
unstructured==0.12.4
|
224 |
+
unstructured-client==0.18.0
|
225 |
+
unstructured-inference==0.7.23
|
226 |
+
unstructured.pytesseract==0.3.12
|
227 |
+
urllib3==2.2.0
|
228 |
+
uvicorn==0.27.1
|
229 |
+
uvloop==0.19.0
|
230 |
+
validators==0.22.0
|
231 |
+
vulture==2.11
|
232 |
+
wasabi==1.1.2
|
233 |
+
watchdog==4.0.0
|
234 |
+
watchfiles==0.21.0
|
235 |
+
weasel==0.3.4
|
236 |
+
websocket-client==1.7.0
|
237 |
+
websockets==12.0
|
238 |
+
wrapt==1.16.0
|
239 |
+
yarl==1.9.4
|
240 |
+
zipp==3.17.0
|
start_model_server.sh
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Define model directory and file
|
4 |
+
|
5 |
+
#ZySec-7B-v1.Q2_K.gguf - MISTRAL
|
6 |
+
#ZySec-7B-v1.Q4_K_M.gguf - MISTRAL
|
7 |
+
#ZySec-7B-v1.Q8_0.gguf - MISTRAL
|
8 |
+
|
9 |
+
#ZySec-7B-v2.Q2_K.gguf - GEMMA
|
10 |
+
#ZySec-7B-v2.Q4_K_M.gguf - GEMMA
|
11 |
+
#ZySec-7B-v2.Q8_0.gguf - GEMMA
|
12 |
+
|
13 |
+
model_dir="models/ZySec-AI"
|
14 |
+
model_file="ZySec-7B-v1.Q2_K.gguf"
|
15 |
+
model_path="$model_dir/$model_file"
|
16 |
+
|
17 |
+
# Function to handle SIGINT (Ctrl+C)
|
18 |
+
handle_sigint() {
|
19 |
+
echo "SIGINT received, stopping the server and exiting..."
|
20 |
+
kill $server_pid
|
21 |
+
exit
|
22 |
+
}
|
23 |
+
|
24 |
+
# Trap SIGINT (Ctrl+C) and execute the handle_sigint function
|
25 |
+
trap handle_sigint SIGINT
|
26 |
+
|
27 |
+
# Step 2: Check for curl and download model file if it doesn't exist
|
28 |
+
if ! command -v curl &> /dev/null
|
29 |
+
then
|
30 |
+
echo "curl could not be found, please install it."
|
31 |
+
exit 1
|
32 |
+
fi
|
33 |
+
|
34 |
+
if [ ! -d "$model_dir" ]; then
|
35 |
+
echo "Directory $model_dir does not exist. Creating now."
|
36 |
+
mkdir -p "$model_dir" && echo "Directory created." || { echo "Failed to create directory."; exit 1; }
|
37 |
+
fi
|
38 |
+
|
39 |
+
if [ ! -f "$model_path" ]; then
|
40 |
+
echo "Model file $model_file does not exist. Downloading now."
|
41 |
+
cd "$model_dir" || { echo "Failed to navigate to $model_dir"; exit 1; }
|
42 |
+
curl -L -o "$model_file" "https://huggingface.co/ZySec-AI/ZySec-7B-v2-GGUF/resolve/main/$model_file?download=true" && echo "Download completed." || { echo "Failed to download model."; exit 1; }
|
43 |
+
else
|
44 |
+
echo "Model file $model_file already exists. Skipping download."
|
45 |
+
fi
|
46 |
+
|
47 |
+
# Function to start or restart the model server
|
48 |
+
start_model_server() {
|
49 |
+
# Check if port 8000 is already in use
|
50 |
+
if lsof -i:8000 -sTCP:LISTEN -t >/dev/null ; then
|
51 |
+
echo "Port 8000 is already in use. Assuming the model server is running."
|
52 |
+
return
|
53 |
+
fi
|
54 |
+
|
55 |
+
echo "Starting model server..."
|
56 |
+
python3 -m llama_cpp.server --model "./$model_path" --n_batch 4 --n_ctx 8196 --n_batch 200 --verbose true --n_gpu_layers 50 --chat_format zephyr &
|
57 |
+
server_pid=$!
|
58 |
+
wait $server_pid
|
59 |
+
|
60 |
+
echo "Model server stopped. Exiting."
|
61 |
+
exit 1
|
62 |
+
}
|
63 |
+
|
64 |
+
# Step 4: Start model server in the background
|
65 |
+
start_model_server &
|
66 |
+
wait
|
start_web_ui.sh
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Step 1: Git pull
|
4 |
+
echo "Pulling latest changes from Git repository..."
|
5 |
+
git pull
|
6 |
+
|
7 |
+
# Step 3: Setup and activate virtual environment
|
8 |
+
venv_path="zysec"
|
9 |
+
if [ ! -d "$venv_path" ]; then
|
10 |
+
echo "Creating virtual environment 'ZySec'..."
|
11 |
+
python3 -m venv $venv_path
|
12 |
+
fi
|
13 |
+
|
14 |
+
echo "Activating virtual environment 'ZySec'..."
|
15 |
+
source $venv_path/bin/activate
|
16 |
+
|
17 |
+
# Check if we are in the right virtual environment
|
18 |
+
if [[ "$VIRTUAL_ENV" != "" && "$VIRTUAL_ENV" == *"$venv_path" ]]; then
|
19 |
+
echo "Now in the 'ZySec' virtual environment."
|
20 |
+
# Install requirements
|
21 |
+
pip3 install -r requirements.txt -q
|
22 |
+
else
|
23 |
+
echo "Failed to activate 'ZySec' virtual environment. Exiting."
|
24 |
+
exit 1
|
25 |
+
fi
|
26 |
+
|
27 |
+
# Step 5: Start the Streamlit app
|
28 |
+
echo "Assuming model instance is running.. you can start it or review settings in about section to connect to remote instance."
|
29 |
+
echo "Starting Streamlit app..."
|
30 |
+
streamlit run app.py
|
31 |
+
|