gutgut

Paused

App Files Files Community

Carlos Rosas commited on Nov 18, 2024

Commit

aa38253

verified ·

1 Parent(s): 8088280

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -16

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import transformers
 import re
 from transformers import AutoConfig, AutoTokenizer, AutoModel, AutoModelForCausalLM
-from vllm import LLM, SamplingParams
 import torch
 import gradio as gr
 import json
@@ -15,15 +14,29 @@ import pandas as pd
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Define variables
-temperature = 0.7
 max_new_tokens = 3000
 top_p = 0.95
-repetition_penalty = 1.2
-model_name = "PleIAs/llama-reasoning-rag"
-# Initialize vLLM
-llm = LLM(model_name, max_model_len=8128)
 # Connect to the LanceDB database
 db = lancedb.connect("content 5/lancedb_data")
@@ -37,7 +50,6 @@ def hybrid_search(text):
     for _, row in results.iterrows():
         hash_id = str(row['hash'])
         title = row['section']
-        #content = row['text'][:100] + "..."  # Truncate the text for preview
         content = row['text']
         document.append(f"**{hash_id}**\n{title}\n{content}")
@@ -53,16 +65,37 @@ class CassandreChatBot:
     def predict(self, user_message):
         fiches, fiches_html = hybrid_search(user_message)
-        sampling_params = SamplingParams(temperature=temperature, top_p=top_p, max_tokens=max_new_tokens, presence_penalty=repetition_penalty, stop=["#END#"])
         detailed_prompt = f"""### Query ###\n{user_message}\n\n### Source ###\n{fiches}\n\n### Analysis ###\n"""
-        prompts = [detailed_prompt]
-        outputs = llm.generate(prompts, sampling_params, use_tqdm=False)
-        generated_text = outputs[0].outputs[0].text
-        generated_text = '<h2 style="text-align:center">Réponse</h3>\n<div class="generation">' + format_references(generated_text) + "</div>"
-        fiches_html = '<h2 style="text-align:center">Sources</h3>\n' + fiches_html
-        return generated_text, fiches_html
 def format_references(text):
     ref_start_marker = '<ref text="'
@@ -104,7 +137,7 @@ def format_references(text):
 # Initialize the CassandreChatBot
 cassandre_bot = CassandreChatBot()
-# CSS for styling
 css = """
 .generation {
     margin-left:2em;

 import transformers
 import re
 from transformers import AutoConfig, AutoTokenizer, AutoModel, AutoModelForCausalLM
 import torch
 import gradio as gr
 import json
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Define variables
+temperature = 0.4
 max_new_tokens = 3000
 top_p = 0.95
+repetition_penalty = 1.0
+min_new_tokens = 1000
+early_stopping = False
+model_name = "PleIAs/Pleias-Rag"
+# Get Hugging Face token from environment variable
+hf_token = os.environ.get('HF_TOKEN')
+if not hf_token:
+    raise ValueError("Please set the HF_TOKEN environment variable")
+# Initialize model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
+model = AutoModelForCausalLM.from_pretrained(model_name, token=hf_token)
+model.to(device)
+# Set tokenizer configuration
+tokenizer.pad_token = tokenizer.eos_token
+tokenizer.pad_token_id = tokenizer.eos_token_id
+tokenizer.eos_token = "<|end_of_text|>"
 # Connect to the LanceDB database
 db = lancedb.connect("content 5/lancedb_data")
     for _, row in results.iterrows():
         hash_id = str(row['hash'])
         title = row['section']
         content = row['text']
         document.append(f"**{hash_id}**\n{title}\n{content}")
     def predict(self, user_message):
         fiches, fiches_html = hybrid_search(user_message)
         detailed_prompt = f"""### Query ###\n{user_message}\n\n### Source ###\n{fiches}\n\n### Analysis ###\n"""
+        # Convert inputs to tensor
+        input_ids = tokenizer.encode(detailed_prompt, return_tensors="pt").to(device)
+        attention_mask = torch.ones_like(input_ids)
+        try:
+            output = model.generate(
+                input_ids,
+                attention_mask=attention_mask,
+                max_new_tokens=max_new_tokens,
+                do_sample=False,
+                early_stopping=early_stopping,
+                min_new_tokens=min_new_tokens,
+                temperature=temperature,
+                repetition_penalty=repetition_penalty,
+                pad_token_id=tokenizer.pad_token_id,
+                eos_token_id=tokenizer.eos_token_id
+            )
+            generated_text = tokenizer.decode(output[0])
+            generated_text = '<h2 style="text-align:center">Réponse</h3>\n<div class="generation">' + format_references(generated_text) + "</div>"
+            fiches_html = '<h2 style="text-align:center">Sources</h3>\n' + fiches_html
+            return generated_text, fiches_html
+        except Exception as e:
+            print(f"Error during generation: {str(e)}")
+            import traceback
+            traceback.print_exc()
+            return None, None
 def format_references(text):
     ref_start_marker = '<ref text="'
 # Initialize the CassandreChatBot
 cassandre_bot = CassandreChatBot()
+# CSS for styling
 css = """
 .generation {
     margin-left:2em;