Carlos Rosas commited on
Commit
73600fd
·
verified ·
1 Parent(s): cd95bca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -9
app.py CHANGED
@@ -45,7 +45,7 @@ db = lancedb.connect("content 5/lancedb_data")
45
  table = db.open_table("sciencev4")
46
 
47
  def hybrid_search(text):
48
- results = table.search(text, query_type="hybrid").limit(6).to_pandas()
49
 
50
  document = []
51
  document_html = []
@@ -88,10 +88,21 @@ class pleiasBot:
88
  eos_token_id=tokenizer.eos_token_id
89
  )
90
 
91
- # Decode only the new tokens
92
  generated_text = tokenizer.decode(output[0][len(input_ids[0]):])
93
 
94
- generated_text = '<h2 style="text-align:center">Réponse</h3>\n<div class="generation">' + format_references(generated_text) + "</div>"
 
 
 
 
 
 
 
 
 
 
 
95
  fiches_html = '<h2 style="text-align:center">Sources</h3>\n' + fiches_html
96
  return generated_text, fiches_html
97
 
@@ -102,7 +113,6 @@ class pleiasBot:
102
  return None, None
103
 
104
  def format_references(text):
105
- # New ref format pattern
106
  ref_pattern = r'<ref name="([^"]+)">"([^"]+)"</ref>'
107
 
108
  parts = []
@@ -118,8 +128,8 @@ def format_references(text):
118
  ref_id = match.group(1) # The source ID
119
  ref_text = match.group(2).strip() # The reference text
120
 
121
- # Create tooltip HTML
122
- tooltip_html = f'<span class="tooltip"><strong>[{ref_number}]</strong><span class="tooltiptext"><strong>{ref_number}:</strong> {ref_text}</span></span>'
123
  parts.append(tooltip_html)
124
 
125
  current_pos = match.end()
@@ -131,7 +141,7 @@ def format_references(text):
131
  return ''.join(parts)
132
 
133
  # Initialize the pleiasBot
134
- cassandre_bot = pleiasBot()
135
 
136
  # CSS for styling
137
  css = """
@@ -195,10 +205,9 @@ css = """
195
  border-color: #fff transparent transparent transparent;
196
  }
197
  """
198
-
199
  # Gradio interface
200
  def gradio_interface(user_message):
201
- response, sources = cassandre_bot.predict(user_message)
202
  return response, sources
203
 
204
  # Create Gradio app
 
45
  table = db.open_table("sciencev4")
46
 
47
  def hybrid_search(text):
48
+ results = table.search(text, query_type="hybrid").limit(5).to_pandas()
49
 
50
  document = []
51
  document_html = []
 
88
  eos_token_id=tokenizer.eos_token_id
89
  )
90
 
91
+ # Decode the generated text
92
  generated_text = tokenizer.decode(output[0][len(input_ids[0]):])
93
 
94
+ # Split the text into analysis and answer sections
95
+ parts = generated_text.split("<|source_analysis_end|>")
96
+ if len(parts) == 2:
97
+ analysis = parts[0].strip()
98
+ answer = parts[1].replace("<|answer_start|>", "").replace("<|answer_end|>", "").strip()
99
+
100
+ # Format with section titles
101
+ formatted_text = f'<div class="section-title">Sources Analysis</div>\n\n{analysis}\n\n<div class="section-title">Answer</div>\n\n{answer}'
102
+ else:
103
+ formatted_text = generated_text
104
+
105
+ generated_text = '<h2 style="text-align:center">Réponse</h3>\n<div class="generation">' + format_references(formatted_text) + "</div>"
106
  fiches_html = '<h2 style="text-align:center">Sources</h3>\n' + fiches_html
107
  return generated_text, fiches_html
108
 
 
113
  return None, None
114
 
115
  def format_references(text):
 
116
  ref_pattern = r'<ref name="([^"]+)">"([^"]+)"</ref>'
117
 
118
  parts = []
 
128
  ref_id = match.group(1) # The source ID
129
  ref_text = match.group(2).strip() # The reference text
130
 
131
+ # Create tooltip HTML with source ID in bold
132
+ tooltip_html = f'<span class="tooltip">[{ref_number}]<span class="tooltiptext"><strong>{ref_id}</strong>: {ref_text}</span></span>'
133
  parts.append(tooltip_html)
134
 
135
  current_pos = match.end()
 
141
  return ''.join(parts)
142
 
143
  # Initialize the pleiasBot
144
+ pleias_bot = pleiasBot()
145
 
146
  # CSS for styling
147
  css = """
 
205
  border-color: #fff transparent transparent transparent;
206
  }
207
  """
 
208
  # Gradio interface
209
  def gradio_interface(user_message):
210
+ response, sources = pleias_bot.predict(user_message)
211
  return response, sources
212
 
213
  # Create Gradio app