0xdev23 commited on
Commit
0a7ba53
β€’
1 Parent(s): 3e21f25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -0
app.py CHANGED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import transformers
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, BitsAndBytesConfig
3
+ import torch
4
+ from langchain.llms import HuggingFacePipeline
5
+ import gradio as gr
6
+
7
+ origin_model_path = "mistralai/Mistral-7B-Instruct-v0.1"
8
+ model_path = "filipealmeida/Mistral-7B-Instruct-v0.1-sharded"
9
+ bnb_config = BitsAndBytesConfig \
10
+ (
11
+ load_in_4bit=True,
12
+ bnb_4bit_use_double_quant=True,
13
+ bnb_4bit_quant_type="nf4",
14
+ bnb_4bit_compute_dtype=torch.bfloat16,
15
+ )
16
+ model = AutoModelForCausalLM.from_pretrained (model_path, trust_remote_code=True,
17
+ quantization_config=bnb_config,
18
+ device_map="auto")
19
+ tokenizer = AutoTokenizer.from_pretrained(origin_model_path)
20
+
21
+ text_generation_pipeline = transformers.pipeline(
22
+ model=model,
23
+ tokenizer=tokenizer,
24
+ task="text-generation",
25
+ eos_token_id=tokenizer.eos_token_id,
26
+ pad_token_id=tokenizer.eos_token_id,
27
+ repetition_penalty=1.1,
28
+ return_full_text=True,
29
+ max_new_tokens=100,
30
+ temperature = 0.5,
31
+ do_sample=True,
32
+ )
33
+ mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
34
+
35
+ def get_response(message, history):
36
+ return mistral_llm.invoke(message)
37
+
38
+ demo = gr.ChatInterface(get_response)
39
+
40
+ demo.launch()