Carlos Rosas commited on
Commit
70e7937
·
verified ·
1 Parent(s): c7a2ff9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -32
app.py CHANGED
@@ -77,11 +77,11 @@ class pleiasBot:
77
  fiches, fiches_html = hybrid_search(user_message)
78
 
79
  detailed_prompt = f"""<|query_start|>{user_message}<|query_end|>\n{fiches}\n<|source_analysis_start|>"""
80
-
81
  # Convert inputs to tensor
82
  input_ids = tokenizer.encode(detailed_prompt, return_tensors="pt").to(device)
83
  attention_mask = torch.ones_like(input_ids)
84
-
85
  try:
86
  output = model.generate(
87
  input_ids,
@@ -95,7 +95,7 @@ class pleiasBot:
95
  pad_token_id=tokenizer.pad_token_id,
96
  eos_token_id=tokenizer.eos_token_id
97
  )
98
-
99
  # Decode the generated text
100
  generated_text = tokenizer.decode(output[0][len(input_ids[0]):])
101
 
@@ -105,11 +105,8 @@ class pleiasBot:
105
  analysis = parts[0].strip()
106
  answer = parts[1].replace("<|answer_start|>", "").replace("<|answer_end|>", "").strip()
107
 
108
- # Add the prompt display to analysis section
109
- prompt_display = f'<div class="generation" style="background-color: #f5f5f5; padding: 1em; margin-bottom: 1em; font-family: monospace; white-space: pre-wrap;">{detailed_prompt}</div>'
110
-
111
  # Format each section with matching h2 titles
112
- analysis_text = '<h2 style="text-align:center">Analyse des sources</h2>\n' + prompt_display + '<div class="generation">' + format_references(analysis) + "</div>"
113
  answer_text = '<h2 style="text-align:center">Réponse</h2>\n<div class="generation">' + format_references(answer) + "</div>"
114
  else:
115
  analysis_text = ""
@@ -117,40 +114,40 @@ class pleiasBot:
117
 
118
  fiches_html = '<h2 style="text-align:center">Sources</h2>\n' + fiches_html
119
  return analysis_text, answer_text, fiches_html
120
-
121
  except Exception as e:
122
  print(f"Error during generation: {str(e)}")
123
  import traceback
124
  traceback.print_exc()
125
  return None, None, None
 
 
 
126
 
127
- def format_references(text):
128
- ref_pattern = r'<ref name="([^"]+)">"([^"]+)"</ref>\.\s*' # Modified pattern to include the period and whitespace after ref
 
 
 
 
 
 
129
 
130
- parts = []
131
- current_pos = 0
132
- ref_number = 1
133
-
134
- for match in re.finditer(ref_pattern, text):
135
- # Add text before the reference
136
- text_before = text[current_pos:match.start()].rstrip()
137
- parts.append(text_before)
138
-
139
- # Extract reference components
140
- ref_id = match.group(1)
141
- ref_text = match.group(2).strip()
142
-
143
- # Add the reference, keeping the existing structure but adding <br> where whitespace was
144
- tooltip_html = f'<span class="tooltip"><strong>[{ref_number}]</strong><span class="tooltiptext"><strong>{ref_id}</strong>: {ref_text}</span></span>.<br>'
145
- parts.append(tooltip_html)
146
-
147
- current_pos = match.end()
148
- ref_number += 1
149
 
150
- # Add any remaining text
151
- parts.append(text[current_pos:])
 
152
 
153
- return ''.join(parts)
 
 
 
 
 
 
154
 
155
  # Initialize the pleiasBot
156
  pleias_bot = pleiasBot()
 
77
  fiches, fiches_html = hybrid_search(user_message)
78
 
79
  detailed_prompt = f"""<|query_start|>{user_message}<|query_end|>\n{fiches}\n<|source_analysis_start|>"""
80
+
81
  # Convert inputs to tensor
82
  input_ids = tokenizer.encode(detailed_prompt, return_tensors="pt").to(device)
83
  attention_mask = torch.ones_like(input_ids)
84
+
85
  try:
86
  output = model.generate(
87
  input_ids,
 
95
  pad_token_id=tokenizer.pad_token_id,
96
  eos_token_id=tokenizer.eos_token_id
97
  )
98
+
99
  # Decode the generated text
100
  generated_text = tokenizer.decode(output[0][len(input_ids[0]):])
101
 
 
105
  analysis = parts[0].strip()
106
  answer = parts[1].replace("<|answer_start|>", "").replace("<|answer_end|>", "").strip()
107
 
 
 
 
108
  # Format each section with matching h2 titles
109
+ analysis_text = '<h2 style="text-align:center">Analyse des sources</h2>\n<div class="generation">' + format_references(analysis) + "</div>"
110
  answer_text = '<h2 style="text-align:center">Réponse</h2>\n<div class="generation">' + format_references(answer) + "</div>"
111
  else:
112
  analysis_text = ""
 
114
 
115
  fiches_html = '<h2 style="text-align:center">Sources</h2>\n' + fiches_html
116
  return analysis_text, answer_text, fiches_html
117
+
118
  except Exception as e:
119
  print(f"Error during generation: {str(e)}")
120
  import traceback
121
  traceback.print_exc()
122
  return None, None, None
123
+
124
+ def format_references(text):
125
+ ref_pattern = r'<ref name="([^"]+)">"([^"]+)"</ref>\.\s*' # Modified pattern to include the period and whitespace after ref
126
 
127
+ parts = []
128
+ current_pos = 0
129
+ ref_number = 1
130
+
131
+ for match in re.finditer(ref_pattern, text):
132
+ # Add text before the reference
133
+ text_before = text[current_pos:match.start()].rstrip()
134
+ parts.append(text_before)
135
 
136
+ # Extract reference components
137
+ ref_id = match.group(1)
138
+ ref_text = match.group(2).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
+ # Add the reference, keeping the existing structure but adding <br> where whitespace was
141
+ tooltip_html = f'<span class="tooltip"><strong>[{ref_number}]</strong><span class="tooltiptext"><strong>{ref_id}</strong>: {ref_text}</span></span>.<br>'
142
+ parts.append(tooltip_html)
143
 
144
+ current_pos = match.end()
145
+ ref_number += 1
146
+
147
+ # Add any remaining text
148
+ parts.append(text[current_pos:])
149
+
150
+ return ''.join(parts)
151
 
152
  # Initialize the pleiasBot
153
  pleias_bot = pleiasBot()