Spaces:
Sleeping
Sleeping
Commit
•
f543029
1
Parent(s):
acc18ac
[WIP]
Browse files- .gitignore +2 -1
- app.py +33 -2
- chroma.py +39 -0
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
local setting.md
|
|
|
|
1 |
+
local setting.md
|
2 |
+
venv
|
app.py
CHANGED
@@ -1,5 +1,36 @@
|
|
1 |
import streamlit as st
|
|
|
2 |
|
3 |
-
|
4 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
|
|
1 |
import streamlit as st
|
2 |
+
from .chroma import searchengine
|
3 |
|
4 |
+
def main():
|
5 |
+
st.title("ChromaDB Search Engine")
|
6 |
+
|
7 |
+
model_name = "your_model_name" # Replace with the actual model name
|
8 |
+
collection_name = "your_collection_name" # Replace with the actual collection name
|
9 |
+
|
10 |
+
search_engine = searchengine(model_name, collection_name)
|
11 |
+
|
12 |
+
st.sidebar.header("Add Document")
|
13 |
+
text_input = st.sidebar.text_area("Enter Document Text")
|
14 |
+
metadata_input = st.sidebar.text_input("Enter Metadata")
|
15 |
+
add_button = st.sidebar.button("Add Document")
|
16 |
+
|
17 |
+
if add_button:
|
18 |
+
document_id = search_engine.count() + 1 # Incremental ID
|
19 |
+
search_engine.add(text_input, metadata_input, document_id)
|
20 |
+
st.sidebar.success(f"Document added with ID: {document_id}")
|
21 |
+
|
22 |
+
st.sidebar.header("Search")
|
23 |
+
query = st.sidebar.text_input("Enter Search Query")
|
24 |
+
search_button = st.sidebar.button("Search")
|
25 |
+
|
26 |
+
if search_button:
|
27 |
+
results = search_engine.query(query)
|
28 |
+
st.subheader("Search Results:")
|
29 |
+
for result in results:
|
30 |
+
st.write(f"Document ID: {result['id']}, Metadata: {result['metadata']}")
|
31 |
+
st.write(f"Text: {result['text']}")
|
32 |
+
st.markdown("---")
|
33 |
+
|
34 |
+
if __name__ == "__main__":
|
35 |
+
main()
|
36 |
|
chroma.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from chromadb.utils import embedding_functions
|
2 |
+
from chromadb.config import Settings
|
3 |
+
import chromadb
|
4 |
+
|
5 |
+
class searchengine:
|
6 |
+
def __init__(self, model_name,collection_name):
|
7 |
+
self.sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(
|
8 |
+
model_name = model_name
|
9 |
+
)
|
10 |
+
self.chroma_client = chromadb.Client(
|
11 |
+
)
|
12 |
+
self.collection = self.chroma_client.get_or_create_collection(name=collection_name)
|
13 |
+
|
14 |
+
def add(self, text , metadata,id):
|
15 |
+
self.collection.add(
|
16 |
+
documents = [text],
|
17 |
+
metadatas = [metadata],
|
18 |
+
ids = [id]
|
19 |
+
)
|
20 |
+
|
21 |
+
def add_list(self, texts:list, metadatas:list, ids:list):
|
22 |
+
self.collection.add(
|
23 |
+
documents = texts,
|
24 |
+
metadatas = metadatas,
|
25 |
+
ids = ids
|
26 |
+
)
|
27 |
+
|
28 |
+
def query(self, query, number=2):
|
29 |
+
results = self.collection.query(
|
30 |
+
query_texts = [query],
|
31 |
+
n_results = number
|
32 |
+
)
|
33 |
+
return results
|
34 |
+
|
35 |
+
def count(self):
|
36 |
+
return self.collection.count()
|
37 |
+
|
38 |
+
def peak(self):
|
39 |
+
result = self.collection.peek()
|