Spaces:
Running
Running
Médéric Hurier (Fmind)
commited on
Commit
·
5c68cc7
1
Parent(s):
beb32ed
Fix configs
Browse files- app.py +5 -3
- database.py +3 -2
- invoke.yaml +1 -0
- lib.py +10 -5
- packages.txt +0 -1
- tasks/convert.py +1 -0
app.py
CHANGED
@@ -18,15 +18,17 @@ logging.basicConfig(
|
|
18 |
|
19 |
# %% CONFIGS
|
20 |
|
21 |
-
TITLE = "Fmind AI Assistant"
|
22 |
THEME = "glass"
|
23 |
-
|
24 |
-
|
|
|
|
|
25 |
FUNCTION = lib.get_embedding_function()
|
26 |
COLLECTION = CLIENT.get_collection(
|
27 |
name=lib.DATABASE_COLLECTION,
|
28 |
embedding_function=FUNCTION,
|
29 |
)
|
|
|
30 |
EXAMPLES = [
|
31 |
"Who is Médéric Hurier (Fmind)?",
|
32 |
"Is Fmind open to new opportunities?",
|
|
|
18 |
|
19 |
# %% CONFIGS
|
20 |
|
|
|
21 |
THEME = "glass"
|
22 |
+
TITLE = "Fmind Chatbot"
|
23 |
+
|
24 |
+
CLIENT = lib.get_database_client(path=lib.DATABASE_PATH)
|
25 |
+
ENCODING = tiktoken.get_encoding(encoding_name=lib.EMBEDDING_TOKENIZER)
|
26 |
FUNCTION = lib.get_embedding_function()
|
27 |
COLLECTION = CLIENT.get_collection(
|
28 |
name=lib.DATABASE_COLLECTION,
|
29 |
embedding_function=FUNCTION,
|
30 |
)
|
31 |
+
|
32 |
EXAMPLES = [
|
33 |
"Who is Médéric Hurier (Fmind)?",
|
34 |
"Is Fmind open to new opportunities?",
|
database.py
CHANGED
@@ -21,8 +21,9 @@ logging.basicConfig(
|
|
21 |
# %% PARSING
|
22 |
|
23 |
PARSER = argparse.ArgumentParser(description=__doc__)
|
24 |
-
PARSER.add_argument("--database", type=str, required=True)
|
25 |
PARSER.add_argument("files", type=argparse.FileType("r"), nargs="+")
|
|
|
|
|
26 |
|
27 |
# %% FUNCTIONS
|
28 |
|
@@ -67,7 +68,7 @@ def main(args: list[str] | None = None) -> int:
|
|
67 |
embedding_function = lib.get_embedding_function()
|
68 |
logging.info("Embedding function: %s", embedding_function)
|
69 |
# collection
|
70 |
-
database_collection =
|
71 |
logging.info("Database collection: %s", database_collection)
|
72 |
collection = client.create_collection(
|
73 |
name=database_collection, embedding_function=embedding_function
|
|
|
21 |
# %% PARSING
|
22 |
|
23 |
PARSER = argparse.ArgumentParser(description=__doc__)
|
|
|
24 |
PARSER.add_argument("files", type=argparse.FileType("r"), nargs="+")
|
25 |
+
PARSER.add_argument("--database", type=str, default=lib.DATABASE_PATH)
|
26 |
+
PARSER.add_argument("--collection", type=str, default=lib.DATABASE_COLLECTION)
|
27 |
|
28 |
# %% FUNCTIONS
|
29 |
|
|
|
68 |
embedding_function = lib.get_embedding_function()
|
69 |
logging.info("Embedding function: %s", embedding_function)
|
70 |
# collection
|
71 |
+
database_collection = opts.collection
|
72 |
logging.info("Database collection: %s", database_collection)
|
73 |
collection = client.create_collection(
|
74 |
name=database_collection, embedding_function=embedding_function
|
invoke.yaml
CHANGED
@@ -5,6 +5,7 @@ run:
|
|
5 |
app:
|
6 |
path: "app.py"
|
7 |
database:
|
|
|
8 |
path: "database"
|
9 |
linkedin:
|
10 |
html: "files/linkedin.html"
|
|
|
5 |
app:
|
6 |
path: "app.py"
|
7 |
database:
|
8 |
+
collection: "resume"
|
9 |
path: "database"
|
10 |
linkedin:
|
11 |
html: "files/linkedin.html"
|
lib.py
CHANGED
@@ -14,15 +14,20 @@ sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
|
|
14 |
import chromadb
|
15 |
from chromadb.utils import embedding_functions
|
16 |
|
17 |
-
# %% TYPINGS
|
18 |
-
|
19 |
-
Collection = chromadb.Collection
|
20 |
-
|
21 |
# %% CONFIGS
|
22 |
|
23 |
DATABASE_COLLECTION = "resume"
|
|
|
|
|
|
|
|
|
|
|
24 |
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
25 |
|
|
|
|
|
|
|
|
|
26 |
# %% FUNCTIONS
|
27 |
|
28 |
|
@@ -36,7 +41,7 @@ def get_database_client(path: str) -> chromadb.API:
|
|
36 |
|
37 |
|
38 |
def get_embedding_function(
|
39 |
-
model_name: str =
|
40 |
) -> embedding_functions.EmbeddingFunction:
|
41 |
"""Get the embedding function for Chroma DB collections."""
|
42 |
return embedding_functions.OpenAIEmbeddingFunction(
|
|
|
14 |
import chromadb
|
15 |
from chromadb.utils import embedding_functions
|
16 |
|
|
|
|
|
|
|
|
|
17 |
# %% CONFIGS
|
18 |
|
19 |
DATABASE_COLLECTION = "resume"
|
20 |
+
DATABASE_PATH = "database"
|
21 |
+
|
22 |
+
EMBEDDING_MODEL = "text-embedding-ada-002"
|
23 |
+
EMBEDDING_TOKENIZER = "cl100k_base"
|
24 |
+
|
25 |
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
26 |
|
27 |
+
# %% TYPINGS
|
28 |
+
|
29 |
+
Collection = chromadb.Collection
|
30 |
+
|
31 |
# %% FUNCTIONS
|
32 |
|
33 |
|
|
|
41 |
|
42 |
|
43 |
def get_embedding_function(
|
44 |
+
model_name: str = EMBEDDING_MODEL, api_key: str = OPENAI_API_KEY
|
45 |
) -> embedding_functions.EmbeddingFunction:
|
46 |
"""Get the embedding function for Chroma DB collections."""
|
47 |
return embedding_functions.OpenAIEmbeddingFunction(
|
packages.txt
CHANGED
@@ -1 +0,0 @@
|
|
1 |
-
# https://huggingface.co/docs/hub/spaces-dependencies
|
|
|
|
tasks/convert.py
CHANGED
@@ -26,6 +26,7 @@ def database(ctx: Context) -> None:
|
|
26 |
ctx.run(
|
27 |
f"""{ctx.venv.python} database.py \
|
28 |
--database={ctx.database.path} \
|
|
|
29 |
{ctx.linkedin.markdown}
|
30 |
"""
|
31 |
)
|
|
|
26 |
ctx.run(
|
27 |
f"""{ctx.venv.python} database.py \
|
28 |
--database={ctx.database.path} \
|
29 |
+
--collection={ctx.database.collection} \
|
30 |
{ctx.linkedin.markdown}
|
31 |
"""
|
32 |
)
|