rodrigomasini commited on
Commit
ebabf70
·
1 Parent(s): 7783f31

Create app_v2.py

Browse files
Files changed (1) hide show
  1. app_v2.py +49 -0
app_v2.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoTokenizer
3
+ from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
4
+ from huggingface_hub import snapshot_download
5
+
6
+ cwd = os.getcwd()
7
+ cachedir = cwd + '/cache'
8
+
9
+ # Check if the directory exists before creating it
10
+ if not os.path.exists(cachedir):
11
+ os.mkdir(cachedir)
12
+
13
+ os.environ['HF_HOME'] = cachedir
14
+
15
+ local_folder = cachedir + "/model"
16
+
17
+ quantized_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
18
+
19
+ # Check if the model has already been downloaded
20
+ model_path = os.path.join(local_folder, 'pytorch_model.bin')
21
+ if not os.path.isfile(model_path):
22
+ snapshot_download(repo_id=quantized_model_dir, local_dir=local_folder, local_dir_use_symlinks=True)
23
+
24
+ model_basename = cachedir + "/model/Jackson2-4bit-128g-GPTQ"
25
+
26
+ use_strict = False
27
+ use_triton = False
28
+
29
+ # Load tokenizer and model
30
+ tokenizer = AutoTokenizer.from_pretrained(local_folder, use_fast=False)
31
+
32
+ quantize_config = BaseQuantizeConfig(
33
+ bits=4,
34
+ group_size=128,
35
+ desc_act=False
36
+ )
37
+
38
+ model = AutoGPTQForCausalLM.from_quantized(
39
+ local_folder,
40
+ use_safetensors=True,
41
+ strict=use_strict,
42
+ model_basename=model_basename,
43
+ device="cuda:0",
44
+ use_triton=use_triton,
45
+ quantize_config=quantize_config
46
+ )
47
+
48
+ st.write(model.hf_device_map)
49
+