Spaces:

robert-moyai
/

lets-chat-langchain-zerogpu

Sleeping

robert commited on Jul 18, 2024

Commit

8a84eb2

1 Parent(s): 146bc3a

Changing the model loading to 4bit

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,8 +18,9 @@ from transformers import (
 )
 tokenizer = AutoTokenizer.from_pretrained("ContextualAI/archangel_sft-kto_llama13b")
-model = AutoModelForCausalLM.from_pretrained("ContextualAI/archangel_sft-kto_llama13b")
-model = model.to("cuda:0")
 class OAAPIKey(BaseModel):
@@ -44,7 +45,7 @@ class StopOnSequence(StoppingCriteria):
             return False
         return (
             (
-                input_ids[0, -self.sequence_len:]
                 == torch.tensor(self.sequence_ids, device=input_ids.device)
             )
             .all()
@@ -52,7 +53,7 @@ class StopOnSequence(StoppingCriteria):
         )
-@spaces.GPU(duration=42)
 def spaces_model_predict(message: str, history: list[tuple[str, str]]):
     history_transformer_format = history + [[message, ""]]
     stop = StopOnSequence("<|human|>", tokenizer)
@@ -135,7 +136,7 @@ with gr.Blocks() as demo:
             label="Please enter your message",
             interactive=True,
             multiselect=False,
-            allow_custom_value=True
         )
     with gr.Row():

 )
 tokenizer = AutoTokenizer.from_pretrained("ContextualAI/archangel_sft-kto_llama13b")
+model = AutoModelForCausalLM.from_pretrained(
+    "ContextualAI/archangel_sft-kto_llama13b", device_map="auto", load_in_4bit=True
+)
 class OAAPIKey(BaseModel):
             return False
         return (
             (
+                input_ids[0, -self.sequence_len :]
                 == torch.tensor(self.sequence_ids, device=input_ids.device)
             )
             .all()
         )
+@spaces.GPU(duration=54)
 def spaces_model_predict(message: str, history: list[tuple[str, str]]):
     history_transformer_format = history + [[message, ""]]
     stop = StopOnSequence("<|human|>", tokenizer)
             label="Please enter your message",
             interactive=True,
             multiselect=False,
+            allow_custom_value=True,
         )
     with gr.Row():