Médéric Hurier (Fmind) commited on
Commit
5c68cc7
·
1 Parent(s): beb32ed

Fix configs

Browse files
Files changed (6) hide show
  1. app.py +5 -3
  2. database.py +3 -2
  3. invoke.yaml +1 -0
  4. lib.py +10 -5
  5. packages.txt +0 -1
  6. tasks/convert.py +1 -0
app.py CHANGED
@@ -18,15 +18,17 @@ logging.basicConfig(
18
 
19
  # %% CONFIGS
20
 
21
- TITLE = "Fmind AI Assistant"
22
  THEME = "glass"
23
- CLIENT = lib.get_database_client(path="database")
24
- ENCODING = tiktoken.get_encoding("cl100k_base")
 
 
25
  FUNCTION = lib.get_embedding_function()
26
  COLLECTION = CLIENT.get_collection(
27
  name=lib.DATABASE_COLLECTION,
28
  embedding_function=FUNCTION,
29
  )
 
30
  EXAMPLES = [
31
  "Who is Médéric Hurier (Fmind)?",
32
  "Is Fmind open to new opportunities?",
 
18
 
19
  # %% CONFIGS
20
 
 
21
  THEME = "glass"
22
+ TITLE = "Fmind Chatbot"
23
+
24
+ CLIENT = lib.get_database_client(path=lib.DATABASE_PATH)
25
+ ENCODING = tiktoken.get_encoding(encoding_name=lib.EMBEDDING_TOKENIZER)
26
  FUNCTION = lib.get_embedding_function()
27
  COLLECTION = CLIENT.get_collection(
28
  name=lib.DATABASE_COLLECTION,
29
  embedding_function=FUNCTION,
30
  )
31
+
32
  EXAMPLES = [
33
  "Who is Médéric Hurier (Fmind)?",
34
  "Is Fmind open to new opportunities?",
database.py CHANGED
@@ -21,8 +21,9 @@ logging.basicConfig(
21
  # %% PARSING
22
 
23
  PARSER = argparse.ArgumentParser(description=__doc__)
24
- PARSER.add_argument("--database", type=str, required=True)
25
  PARSER.add_argument("files", type=argparse.FileType("r"), nargs="+")
 
 
26
 
27
  # %% FUNCTIONS
28
 
@@ -67,7 +68,7 @@ def main(args: list[str] | None = None) -> int:
67
  embedding_function = lib.get_embedding_function()
68
  logging.info("Embedding function: %s", embedding_function)
69
  # collection
70
- database_collection = lib.DATABASE_COLLECTION
71
  logging.info("Database collection: %s", database_collection)
72
  collection = client.create_collection(
73
  name=database_collection, embedding_function=embedding_function
 
21
  # %% PARSING
22
 
23
  PARSER = argparse.ArgumentParser(description=__doc__)
 
24
  PARSER.add_argument("files", type=argparse.FileType("r"), nargs="+")
25
+ PARSER.add_argument("--database", type=str, default=lib.DATABASE_PATH)
26
+ PARSER.add_argument("--collection", type=str, default=lib.DATABASE_COLLECTION)
27
 
28
  # %% FUNCTIONS
29
 
 
68
  embedding_function = lib.get_embedding_function()
69
  logging.info("Embedding function: %s", embedding_function)
70
  # collection
71
+ database_collection = opts.collection
72
  logging.info("Database collection: %s", database_collection)
73
  collection = client.create_collection(
74
  name=database_collection, embedding_function=embedding_function
invoke.yaml CHANGED
@@ -5,6 +5,7 @@ run:
5
  app:
6
  path: "app.py"
7
  database:
 
8
  path: "database"
9
  linkedin:
10
  html: "files/linkedin.html"
 
5
  app:
6
  path: "app.py"
7
  database:
8
+ collection: "resume"
9
  path: "database"
10
  linkedin:
11
  html: "files/linkedin.html"
lib.py CHANGED
@@ -14,15 +14,20 @@ sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
14
  import chromadb
15
  from chromadb.utils import embedding_functions
16
 
17
- # %% TYPINGS
18
-
19
- Collection = chromadb.Collection
20
-
21
  # %% CONFIGS
22
 
23
  DATABASE_COLLECTION = "resume"
 
 
 
 
 
24
  OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
25
 
 
 
 
 
26
  # %% FUNCTIONS
27
 
28
 
@@ -36,7 +41,7 @@ def get_database_client(path: str) -> chromadb.API:
36
 
37
 
38
  def get_embedding_function(
39
- model_name: str = "text-embedding-ada-002", api_key: str = OPENAI_API_KEY
40
  ) -> embedding_functions.EmbeddingFunction:
41
  """Get the embedding function for Chroma DB collections."""
42
  return embedding_functions.OpenAIEmbeddingFunction(
 
14
  import chromadb
15
  from chromadb.utils import embedding_functions
16
 
 
 
 
 
17
  # %% CONFIGS
18
 
19
  DATABASE_COLLECTION = "resume"
20
+ DATABASE_PATH = "database"
21
+
22
+ EMBEDDING_MODEL = "text-embedding-ada-002"
23
+ EMBEDDING_TOKENIZER = "cl100k_base"
24
+
25
  OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
26
 
27
+ # %% TYPINGS
28
+
29
+ Collection = chromadb.Collection
30
+
31
  # %% FUNCTIONS
32
 
33
 
 
41
 
42
 
43
  def get_embedding_function(
44
+ model_name: str = EMBEDDING_MODEL, api_key: str = OPENAI_API_KEY
45
  ) -> embedding_functions.EmbeddingFunction:
46
  """Get the embedding function for Chroma DB collections."""
47
  return embedding_functions.OpenAIEmbeddingFunction(
packages.txt CHANGED
@@ -1 +0,0 @@
1
- # https://huggingface.co/docs/hub/spaces-dependencies
 
 
tasks/convert.py CHANGED
@@ -26,6 +26,7 @@ def database(ctx: Context) -> None:
26
  ctx.run(
27
  f"""{ctx.venv.python} database.py \
28
  --database={ctx.database.path} \
 
29
  {ctx.linkedin.markdown}
30
  """
31
  )
 
26
  ctx.run(
27
  f"""{ctx.venv.python} database.py \
28
  --database={ctx.database.path} \
29
+ --collection={ctx.database.collection} \
30
  {ctx.linkedin.markdown}
31
  """
32
  )