HonestAnnie commited on
Commit
331b253
1 Parent(s): 92ed022

Jetzt läuft alles!!

Browse files
Files changed (1) hide show
  1. app.py +42 -46
app.py CHANGED
@@ -4,6 +4,12 @@ import chromadb
4
  from sentence_transformers import SentenceTransformer
5
  import spaces
6
 
 
 
 
 
 
 
7
  @spaces.GPU
8
  def get_embeddings(queries, task):
9
  model = SentenceTransformer("Linq-AI-Research/Linq-Embed-Mistral", use_auth_token=os.getenv("HF_TOKEN"))
@@ -11,59 +17,45 @@ def get_embeddings(queries, task):
11
  query_embeddings = model.encode(prompts)
12
  return query_embeddings
13
 
14
- # Initialize a persistent Chroma client and retrieve collection
15
- client = chromadb.PersistentClient(path="./chroma")
16
- collection_de = client.get_collection(name="phil_de")
17
- collection_en = client.get_collection(name="phil_en")
18
- authors_list_de = ["Ludwig Wittgenstein", "Sigmund Freud", "Marcus Aurelius", "Friedrich Nietzsche", "Epiktet", "Ernst Jünger", "Georg Christoph Lichtenberg", "Balthasar Gracian", "Hannah Arendt", "Erich Fromm", "Albert Camus"]
19
- authors_list_en = ["Friedrich Nietzsche", "Joscha Bach"]
20
-
21
  def query_chroma(collection, embedding, authors):
22
- try:
23
- where_filter = {"author": {"$in": authors}} if authors else {}
24
- # Directly use the embedding provided, already in list format suitable for the query
25
- results = collection.query(
26
- query_embeddings=[embedding.tolist()], # Ensure embedding is properly formatted
27
- n_results=10,
28
- where=where_filter,
29
- include=["documents", "metadatas", "distances"]
30
- )
31
-
32
- ids = results.get('ids', [[]])[0]
33
- metadatas = results.get('metadatas', [[]])[0]
34
- documents = results.get('documents', [[]])[0]
35
- distances = results.get('distances', [[]])[0]
36
-
37
- formatted_results = []
38
- for id_, metadata, document_text, distance in zip(ids, metadatas, documents, distances):
39
- result_dict = {
40
- "id": id_,
41
- "author": metadata.get('author', 'Unknown author'),
42
- "book": metadata.get('book', 'Unknown book'),
43
- "section": metadata.get('section', 'Unknown section'),
44
- "title": metadata.get('title', 'Untitled'),
45
- "text": document_text,
46
- "distance": distance
47
- }
48
- formatted_results.append(result_dict)
49
-
50
- return formatted_results
51
- except Exception as e:
52
- return [{"error": str(e)}]
53
 
54
- def update_authors(database):
55
- return gr.update(choices=authors_list_de if database == "German" else authors_list_en)
 
 
56
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
 
58
 
59
  with gr.Blocks(css=".custom-markdown { border: 1px solid #ccc; padding: 10px; border-radius: 5px; }") as demo:
60
- gr.Markdown("Enter your query, filter authors (default is all), click **Search** to search.")
61
- database_inp = gr.Dropdown(label="Database", choices=["English", "German"], value="German")
62
  author_inp = gr.Dropdown(label="Authors", choices=authors_list_de, multiselect=True)
63
- inp = gr.Textbox(label="Query", placeholder="Enter questions separated by semicolons...")
64
  btn = gr.Button("Search")
65
  results = gr.State()
66
 
 
 
 
67
  database_inp.change(
68
  fn=lambda database: update_authors(database),
69
  inputs=[database_inp],
@@ -91,8 +83,12 @@ with gr.Blocks(css=".custom-markdown { border: 1px solid #ccc; padding: 10px; bo
91
  def display_accordion(data):
92
  for query, res in data:
93
  with gr.Accordion(query, open=False) as acc:
94
- markdown_contents = "\n".join(f"**{r['author']}, {r['book']}**\n\n{r['text']}" for r in res)
95
- with gr.Column():
96
- gr.Markdown(value=markdown_contents, elem_classes="custom-markdown")
 
 
 
 
97
 
98
  demo.launch()
 
4
  from sentence_transformers import SentenceTransformer
5
  import spaces
6
 
7
+ client = chromadb.PersistentClient(path="./chroma")
8
+ collection_de = client.get_collection(name="phil_de")
9
+ collection_en = client.get_collection(name="phil_en")
10
+ authors_list_de = ["Ludwig Wittgenstein", "Sigmund Freud", "Marcus Aurelius", "Friedrich Nietzsche", "Epiktet", "Ernst Jünger", "Georg Christoph Lichtenberg", "Balthasar Gracian", "Hannah Arendt", "Erich Fromm", "Albert Camus"]
11
+ authors_list_en = ["Friedrich Nietzsche", "Joscha Bach"]
12
+
13
  @spaces.GPU
14
  def get_embeddings(queries, task):
15
  model = SentenceTransformer("Linq-AI-Research/Linq-Embed-Mistral", use_auth_token=os.getenv("HF_TOKEN"))
 
17
  query_embeddings = model.encode(prompts)
18
  return query_embeddings
19
 
 
 
 
 
 
 
 
20
  def query_chroma(collection, embedding, authors):
21
+ results = collection.query(
22
+ query_embeddings=[embedding.tolist()],
23
+ n_results=10,
24
+ where={"author": {"$in": authors}} if authors else {},
25
+ include=["documents", "metadatas", "distances"]
26
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ ids = results.get('ids', [[]])[0]
29
+ metadatas = results.get('metadatas', [[]])[0]
30
+ documents = results.get('documents', [[]])[0]
31
+ distances = results.get('distances', [[]])[0]
32
 
33
+ formatted_results = []
34
+ for id_, metadata, document_text, distance in zip(ids, metadatas, documents, distances):
35
+ result_dict = {
36
+ "id": id_,
37
+ "author": metadata.get('author', 'Unknown author'),
38
+ "book": metadata.get('book', 'Unknown book'),
39
+ "section": metadata.get('section', 'Unknown section'),
40
+ "title": metadata.get('title', 'Untitled'),
41
+ "text": document_text,
42
+ "distance": distance
43
+ }
44
+ formatted_results.append(result_dict)
45
 
46
+ return formatted_results
47
 
48
  with gr.Blocks(css=".custom-markdown { border: 1px solid #ccc; padding: 10px; border-radius: 5px; }") as demo:
49
+ gr.Markdown("Enter your query, filter authors (default is all), click **Search** to search. Delimit multiple queries with semicola; since there is a search-quota for each user (based on IP) it makes sense to query in batches (if you enjoy querying that is).")
50
+ database_inp = gr.Dropdown(label="Database", choices=["German", "English"], value="German")
51
  author_inp = gr.Dropdown(label="Authors", choices=authors_list_de, multiselect=True)
52
+ inp = gr.Textbox(label="Query", placeholder="Wie kann ich gesund leben und bedeutet Gesundheit für jeden das gleiche?; Why is life so difficult and aren't there any shortcuts?")
53
  btn = gr.Button("Search")
54
  results = gr.State()
55
 
56
+ def update_authors(database):
57
+ return gr.update(choices=authors_list_de if database == "German" else authors_list_en)
58
+
59
  database_inp.change(
60
  fn=lambda database: update_authors(database),
61
  inputs=[database_inp],
 
83
  def display_accordion(data):
84
  for query, res in data:
85
  with gr.Accordion(query, open=False) as acc:
86
+ for result in res:
87
+ with gr.Column():
88
+ author = result.get('author', 'Unknown author')
89
+ book = result.get('book', 'Unknown book')
90
+ text = result.get('text')
91
+ markdown_contents = f"**{author}, {book}**\n\n{text}"
92
+ gr.Markdown(value=markdown_contents, elem_classes="custom-markdown")
93
 
94
  demo.launch()