gutgut

Paused

App Files Files Community

Carlos Rosas commited on Nov 28, 2024

Commit

70e7937

verified ·

1 Parent(s): c7a2ff9

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -32

app.py CHANGED Viewed

@@ -77,11 +77,11 @@ class pleiasBot:
         fiches, fiches_html = hybrid_search(user_message)
         detailed_prompt = f"""<|query_start|>{user_message}<|query_end|>\n{fiches}\n<|source_analysis_start|>"""
         # Convert inputs to tensor
         input_ids = tokenizer.encode(detailed_prompt, return_tensors="pt").to(device)
         attention_mask = torch.ones_like(input_ids)
         try:
             output = model.generate(
                 input_ids,
@@ -95,7 +95,7 @@ class pleiasBot:
                 pad_token_id=tokenizer.pad_token_id,
                 eos_token_id=tokenizer.eos_token_id
             )
             # Decode the generated text
             generated_text = tokenizer.decode(output[0][len(input_ids[0]):])
@@ -105,11 +105,8 @@ class pleiasBot:
                 analysis = parts[0].strip()
                 answer = parts[1].replace("<|answer_start|>", "").replace("<|answer_end|>", "").strip()
-                # Add the prompt display to analysis section
-                prompt_display = f'<div class="generation" style="background-color: #f5f5f5; padding: 1em; margin-bottom: 1em; font-family: monospace; white-space: pre-wrap;">{detailed_prompt}</div>'
                 # Format each section with matching h2 titles
-                analysis_text = '<h2 style="text-align:center">Analyse des sources</h2>\n' + prompt_display + '<div class="generation">' + format_references(analysis) + "</div>"
                 answer_text = '<h2 style="text-align:center">Réponse</h2>\n<div class="generation">' + format_references(answer) + "</div>"
             else:
                 analysis_text = ""
@@ -117,40 +114,40 @@ class pleiasBot:
             fiches_html = '<h2 style="text-align:center">Sources</h2>\n' + fiches_html
             return analysis_text, answer_text, fiches_html
         except Exception as e:
             print(f"Error during generation: {str(e)}")
             import traceback
             traceback.print_exc()
             return None, None, None
-    def format_references(text):
-        ref_pattern = r'<ref name="([^"]+)">"([^"]+)"</ref>\.\s*'  # Modified pattern to include the period and whitespace after ref
-        parts = []
-        current_pos = 0
-        ref_number = 1
-        for match in re.finditer(ref_pattern, text):
-            # Add text before the reference
-            text_before = text[current_pos:match.start()].rstrip()
-            parts.append(text_before)
-            # Extract reference components
-            ref_id = match.group(1)
-            ref_text = match.group(2).strip()
-            # Add the reference, keeping the existing structure but adding <br> where whitespace was
-            tooltip_html = f'<span class="tooltip"><strong>[{ref_number}]</strong><span class="tooltiptext"><strong>{ref_id}</strong>: {ref_text}</span></span>.<br>'
-            parts.append(tooltip_html)
-            current_pos = match.end()
-            ref_number += 1
-        # Add any remaining text
-        parts.append(text[current_pos:])
-        return ''.join(parts)
 # Initialize the pleiasBot
 pleias_bot = pleiasBot()

         fiches, fiches_html = hybrid_search(user_message)
         detailed_prompt = f"""<|query_start|>{user_message}<|query_end|>\n{fiches}\n<|source_analysis_start|>"""
         # Convert inputs to tensor
         input_ids = tokenizer.encode(detailed_prompt, return_tensors="pt").to(device)
         attention_mask = torch.ones_like(input_ids)
         try:
             output = model.generate(
                 input_ids,
                 pad_token_id=tokenizer.pad_token_id,
                 eos_token_id=tokenizer.eos_token_id
             )
             # Decode the generated text
             generated_text = tokenizer.decode(output[0][len(input_ids[0]):])
                 analysis = parts[0].strip()
                 answer = parts[1].replace("<|answer_start|>", "").replace("<|answer_end|>", "").strip()
                 # Format each section with matching h2 titles
+                analysis_text = '<h2 style="text-align:center">Analyse des sources</h2>\n<div class="generation">' + format_references(analysis) + "</div>"
                 answer_text = '<h2 style="text-align:center">Réponse</h2>\n<div class="generation">' + format_references(answer) + "</div>"
             else:
                 analysis_text = ""
             fiches_html = '<h2 style="text-align:center">Sources</h2>\n' + fiches_html
             return analysis_text, answer_text, fiches_html
         except Exception as e:
             print(f"Error during generation: {str(e)}")
             import traceback
             traceback.print_exc()
             return None, None, None
+def format_references(text):
+    ref_pattern = r'<ref name="([^"]+)">"([^"]+)"</ref>\.\s*'  # Modified pattern to include the period and whitespace after ref
+    parts = []
+    current_pos = 0
+    ref_number = 1
+    for match in re.finditer(ref_pattern, text):
+        # Add text before the reference
+        text_before = text[current_pos:match.start()].rstrip()
+        parts.append(text_before)
+        # Extract reference components
+        ref_id = match.group(1)
+        ref_text = match.group(2).strip()
+        # Add the reference, keeping the existing structure but adding <br> where whitespace was
+        tooltip_html = f'<span class="tooltip"><strong>[{ref_number}]</strong><span class="tooltiptext"><strong>{ref_id}</strong>: {ref_text}</span></span>.<br>'
+        parts.append(tooltip_html)
+        current_pos = match.end()
+        ref_number += 1
+    # Add any remaining text
+    parts.append(text[current_pos:])
+    return ''.join(parts)
 # Initialize the pleiasBot
 pleias_bot = pleiasBot()