Spaces:

rodrigomasini
/

rephrase

Paused

rodrigomasini commited on Nov 3, 2023

Commit

ebabf70

1 Parent(s): 7783f31

Create app_v2.py

Files changed (1) hide show

app_v2.py ADDED Viewed

+import streamlit as st
+from transformers import AutoTokenizer
+from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
+from huggingface_hub import snapshot_download
+cwd = os.getcwd()
+cachedir = cwd + '/cache'
+# Check if the directory exists before creating it
+if not os.path.exists(cachedir):
+    os.mkdir(cachedir)
+os.environ['HF_HOME'] = cachedir
+local_folder = cachedir + "/model"
+quantized_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
+# Check if the model has already been downloaded
+model_path = os.path.join(local_folder, 'pytorch_model.bin')
+if not os.path.isfile(model_path):
+    snapshot_download(repo_id=quantized_model_dir, local_dir=local_folder, local_dir_use_symlinks=True)
+model_basename = cachedir + "/model/Jackson2-4bit-128g-GPTQ"
+use_strict = False
+use_triton = False
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(local_folder, use_fast=False)
+quantize_config = BaseQuantizeConfig(
+    bits=4,
+    group_size=128,
+    desc_act=False
+)
+model = AutoGPTQForCausalLM.from_quantized(
+    local_folder,
+    use_safetensors=True,
+    strict=use_strict,
+    model_basename=model_basename,
+    device="cuda:0",
+    use_triton=use_triton,
+    quantize_config=quantize_config
+)
+st.write(model.hf_device_map)