alexkueck commited on
Commit
5d84e6e
·
verified ·
1 Parent(s): 7fc9240

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -26
app.py CHANGED
@@ -140,43 +140,43 @@ def search_documents(query):
140
  similarities = cosine_similarity(query_embedding, text_embeddings).flatten()
141
 
142
  # Sortieren nach Relevanz
143
- sorted_indices = similarities.argsort()[::-1]
144
- results = [documents[i]['file'] for i in sorted_indices if similarities[i] > 0]
145
- results = []
 
146
  relevant_text = ""
147
  relevant_docs = {}
148
  num_pages_per_doc = [len(doc['pages']) for doc in documents]
149
  cumulative_pages = [sum(num_pages_per_doc[:i+1]) for i in range(len(num_pages_per_doc))]
150
 
151
  for i in related_docs_indices:
152
- if cosine_similarities[i] > 0:
153
- doc_index = next(idx for idx, cumulative in enumerate(cumulative_pages) if i < cumulative)
154
- page_index = i if doc_index == 0 else i - cumulative_pages[doc_index-1]
155
- doc = documents[doc_index]
156
- page = doc['pages'][page_index]
157
- page_content = page['content']
158
- header_content = page.get('header', '')
159
 
160
- # Überprüfen, ob der Suchtext in der Überschrift oder im Seiteninhalt enthalten ist
161
- index_in_content = page_content.lower().find(query.lower())
162
- index_in_header = header_content.lower().find(query.lower())
163
 
164
- if index_in_content != -1 or index_in_header != -1:
165
- # Erstellen Sie einen Snippet für die Suchergebnisse
166
- start = max(0, index_in_content - 400) if index_in_content != -1 else 0
167
- end = min(len(page_content), index_in_content + 400) if index_in_content != -1 else len(page_content)
168
- snippet = f"Aus <span class='doc-name'>{doc['file']}</span> (Seite <span class='page-number'>{page_index + 1}</span>):<br>"
169
 
170
- # Fügen Sie die Überschrift hinzu, falls vorhanden
171
- if header_content:
172
- snippet += f"<b>Überschrift:</b> {header_content}<br>"
173
 
174
- snippet += f"...{page_content[start:end]}...<br><br>"
175
- relevant_text += snippet
176
 
177
- if doc['file'] not in relevant_docs:
178
- relevant_docs[doc['file']] = []
179
- relevant_docs[doc['file']].append(snippet)
180
 
181
  results = list(relevant_docs.keys())
182
  return results, relevant_text
 
140
  similarities = cosine_similarity(query_embedding, text_embeddings).flatten()
141
 
142
  # Sortieren nach Relevanz
143
+ related_docs_indices = similarities.argsort()[::-1]
144
+ #results = [documents[i]['file'] for i in sorted_indices if similarities[i] > 0]
145
+
146
+ results=[]
147
  relevant_text = ""
148
  relevant_docs = {}
149
  num_pages_per_doc = [len(doc['pages']) for doc in documents]
150
  cumulative_pages = [sum(num_pages_per_doc[:i+1]) for i in range(len(num_pages_per_doc))]
151
 
152
  for i in related_docs_indices:
153
+ doc_index = next(idx for idx, cumulative in enumerate(cumulative_pages) if i < cumulative)
154
+ page_index = i if doc_index == 0 else i - cumulative_pages[doc_index-1]
155
+ doc = documents[doc_index]
156
+ page = doc['pages'][page_index]
157
+ page_content = page['content']
158
+ header_content = page.get('header', '')
 
159
 
160
+ # Überprüfen, ob der Suchtext in der Überschrift oder im Seiteninhalt enthalten ist
161
+ index_in_content = page_content.lower().find(query.lower())
162
+ index_in_header = header_content.lower().find(query.lower())
163
 
164
+ if index_in_content != -1 or index_in_header != -1:
165
+ # Erstellen Sie einen Snippet für die Suchergebnisse
166
+ start = max(0, index_in_content - 400) if index_in_content != -1 else 0
167
+ end = min(len(page_content), index_in_content + 400) if index_in_content != -1 else len(page_content)
168
+ snippet = f"Aus <span class='doc-name'>{doc['file']}</span> (Seite <span class='page-number'>{page_index + 1}</span>):<br>"
169
 
170
+ # Fügen Sie die Überschrift hinzu, falls vorhanden
171
+ if header_content:
172
+ snippet += f"<b>Überschrift:</b> {header_content}<br>"
173
 
174
+ snippet += f"...{page_content[start:end]}...<br><br>"
175
+ relevant_text += snippet
176
 
177
+ if doc['file'] not in relevant_docs:
178
+ relevant_docs[doc['file']] = []
179
+ relevant_docs[doc['file']].append(snippet)
180
 
181
  results = list(relevant_docs.keys())
182
  return results, relevant_text