Spaces:
Paused
Paused
Carlos Rosas
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -45,7 +45,7 @@ db = lancedb.connect("content 5/lancedb_data")
|
|
45 |
table = db.open_table("sciencev4")
|
46 |
|
47 |
def hybrid_search(text):
|
48 |
-
results = table.search(text, query_type="hybrid").limit(
|
49 |
|
50 |
document = []
|
51 |
document_html = []
|
@@ -88,10 +88,21 @@ class pleiasBot:
|
|
88 |
eos_token_id=tokenizer.eos_token_id
|
89 |
)
|
90 |
|
91 |
-
# Decode
|
92 |
generated_text = tokenizer.decode(output[0][len(input_ids[0]):])
|
93 |
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
fiches_html = '<h2 style="text-align:center">Sources</h3>\n' + fiches_html
|
96 |
return generated_text, fiches_html
|
97 |
|
@@ -102,7 +113,6 @@ class pleiasBot:
|
|
102 |
return None, None
|
103 |
|
104 |
def format_references(text):
|
105 |
-
# New ref format pattern
|
106 |
ref_pattern = r'<ref name="([^"]+)">"([^"]+)"</ref>'
|
107 |
|
108 |
parts = []
|
@@ -118,8 +128,8 @@ def format_references(text):
|
|
118 |
ref_id = match.group(1) # The source ID
|
119 |
ref_text = match.group(2).strip() # The reference text
|
120 |
|
121 |
-
# Create tooltip HTML
|
122 |
-
tooltip_html = f'<span class="tooltip"
|
123 |
parts.append(tooltip_html)
|
124 |
|
125 |
current_pos = match.end()
|
@@ -131,7 +141,7 @@ def format_references(text):
|
|
131 |
return ''.join(parts)
|
132 |
|
133 |
# Initialize the pleiasBot
|
134 |
-
|
135 |
|
136 |
# CSS for styling
|
137 |
css = """
|
@@ -195,10 +205,9 @@ css = """
|
|
195 |
border-color: #fff transparent transparent transparent;
|
196 |
}
|
197 |
"""
|
198 |
-
|
199 |
# Gradio interface
|
200 |
def gradio_interface(user_message):
|
201 |
-
response, sources =
|
202 |
return response, sources
|
203 |
|
204 |
# Create Gradio app
|
|
|
45 |
table = db.open_table("sciencev4")
|
46 |
|
47 |
def hybrid_search(text):
|
48 |
+
results = table.search(text, query_type="hybrid").limit(5).to_pandas()
|
49 |
|
50 |
document = []
|
51 |
document_html = []
|
|
|
88 |
eos_token_id=tokenizer.eos_token_id
|
89 |
)
|
90 |
|
91 |
+
# Decode the generated text
|
92 |
generated_text = tokenizer.decode(output[0][len(input_ids[0]):])
|
93 |
|
94 |
+
# Split the text into analysis and answer sections
|
95 |
+
parts = generated_text.split("<|source_analysis_end|>")
|
96 |
+
if len(parts) == 2:
|
97 |
+
analysis = parts[0].strip()
|
98 |
+
answer = parts[1].replace("<|answer_start|>", "").replace("<|answer_end|>", "").strip()
|
99 |
+
|
100 |
+
# Format with section titles
|
101 |
+
formatted_text = f'<div class="section-title">Sources Analysis</div>\n\n{analysis}\n\n<div class="section-title">Answer</div>\n\n{answer}'
|
102 |
+
else:
|
103 |
+
formatted_text = generated_text
|
104 |
+
|
105 |
+
generated_text = '<h2 style="text-align:center">Réponse</h3>\n<div class="generation">' + format_references(formatted_text) + "</div>"
|
106 |
fiches_html = '<h2 style="text-align:center">Sources</h3>\n' + fiches_html
|
107 |
return generated_text, fiches_html
|
108 |
|
|
|
113 |
return None, None
|
114 |
|
115 |
def format_references(text):
|
|
|
116 |
ref_pattern = r'<ref name="([^"]+)">"([^"]+)"</ref>'
|
117 |
|
118 |
parts = []
|
|
|
128 |
ref_id = match.group(1) # The source ID
|
129 |
ref_text = match.group(2).strip() # The reference text
|
130 |
|
131 |
+
# Create tooltip HTML with source ID in bold
|
132 |
+
tooltip_html = f'<span class="tooltip">[{ref_number}]<span class="tooltiptext"><strong>{ref_id}</strong>: {ref_text}</span></span>'
|
133 |
parts.append(tooltip_html)
|
134 |
|
135 |
current_pos = match.end()
|
|
|
141 |
return ''.join(parts)
|
142 |
|
143 |
# Initialize the pleiasBot
|
144 |
+
pleias_bot = pleiasBot()
|
145 |
|
146 |
# CSS for styling
|
147 |
css = """
|
|
|
205 |
border-color: #fff transparent transparent transparent;
|
206 |
}
|
207 |
"""
|
|
|
208 |
# Gradio interface
|
209 |
def gradio_interface(user_message):
|
210 |
+
response, sources = pleias_bot.predict(user_message)
|
211 |
return response, sources
|
212 |
|
213 |
# Create Gradio app
|