xavierbarbier commited on
Commit
7e8d37f
·
1 Parent(s): 765d679

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -30
app.py CHANGED
@@ -1,48 +1,46 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
 
 
3
  import torch
4
 
5
- bnb_config = BitsAndBytesConfig(
6
- load_in_4bit=True,
7
- bnb_4bit_quant_type="nf4",
8
- bnb_4bit_use_double_quant=True,
9
- )
 
 
 
 
 
 
 
 
 
10
 
11
  model_name = "mistralai/Mistral-7B-Instruct-v0.1"
12
 
13
- tokenizer = AutoTokenizer.from_pretrained(model_name)
14
- model = AutoModelForCausalLM.from_pretrained(
15
- model_name,
16
- load_in_4bit=True,
17
- quantization_config=bnb_config,
18
- torch_dtype=torch.bfloat16,
19
- device_map="auto",
20
- trust_remote_code=True,
21
- )
22
 
23
- device = "cuda"
 
 
 
 
 
24
 
25
  def greet(input_text):
26
  question = input_text
27
 
28
  prompt = f"""<s>[INST] Le contexte est l'assurance maladie en France[/INST]
29
- {question}</s>
30
- [INST] Rédige un email courtois de réponse en français à la question [/INST]"""
31
-
32
- messages = [
33
- {"role": "user", "content": question},
34
- {"role": "assistant", "content": "le contexte est l'assurance maladie en France"},
35
- {"role": "user", "content": "Rédige un email courtois de réponse en français à la question"}
36
- ]
37
-
38
- encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
39
 
40
- model_inputs = encodeds.to(device)
41
 
42
- generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
43
- decoded = tokenizer.batch_decode(generated_ids)
44
 
45
- answer = decoded[0].split("[/INST]")[2].replace("</s>", "").replace("[Votre nom]", "").replace("[nom]", "")
46
 
47
  return answer
48
 
 
1
  import gradio as gr
2
+ import os
3
+ from langchain.llms import CTransformers
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import torch
6
 
7
+
8
+ MODEL_PATH = 'TheBloke/Mistral-7B-Instruct-v0.1-GGUF'
9
+
10
+ # Some basic configurations for the model
11
+ config = {
12
+ "max_new_tokens": 1000,
13
+ "context_length": 1000,
14
+ "repetition_penalty": 1.1,
15
+ "temperature": 0.5,
16
+ "top_k": 50,
17
+ "top_p": 0.9,
18
+ "stream": True,
19
+ "threads": int(os.cpu_count() / 2)
20
+ }
21
 
22
  model_name = "mistralai/Mistral-7B-Instruct-v0.1"
23
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # We use Langchain's CTransformers llm class to load our quantized model
26
+ llm = CTransformers(model=MODEL_PATH,
27
+ config=config)
28
+
29
+ # Tokenizer for Mistral-7B-Instruct from HuggingFace
30
+ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
31
 
32
  def greet(input_text):
33
  question = input_text
34
 
35
  prompt = f"""<s>[INST] Le contexte est l'assurance maladie en France[/INST]
36
+ {question}</s>
37
+ [INST] Rédige un email courtois de réponse en français à la question [/INST]"""
 
 
 
 
 
 
 
 
38
 
39
+ answer = llm(prompt)
40
 
41
+ answer = answer.replace("</s>", "").replace("[Votre nom]", "").replace("[nom]", "")
 
42
 
43
+
44
 
45
  return answer
46