Hermes-3-Llama-3.2-3B

Running on Zero

App Files Files Community

vilarin commited on Aug 16, 2024

Commit

063316d

•

1 Parent(s): d875b4e

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -26

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from threading import Thread
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
-MODEL_LIST = "THUDM/glm-4-9b-chat, THUDM/glm-4-9b-chat-1m, THUDM/codegeex4-all-9b"
 #MODELS = os.environ.get("MODELS")
 #MODEL_NAME = MODELS.split("/")[-1]
@@ -26,7 +26,7 @@ CSS = """
 """
 model_chat = AutoModelForCausalLM.from_pretrained(
-        "THUDM/glm-4-9b-chat",
         torch_dtype=torch.bfloat16,
         low_cpu_mem_usage=True,
         trust_remote_code=True,
@@ -34,17 +34,9 @@ model_chat = AutoModelForCausalLM.from_pretrained(
 tokenizer_chat = AutoTokenizer.from_pretrained("THUDM/glm-4-9b-chat",trust_remote_code=True)
-model_code = AutoModelForCausalLM.from_pretrained(
-    "THUDM/codegeex4-all-9b",
-    torch_dtype=torch.bfloat16,
-    low_cpu_mem_usage=True,
-    trust_remote_code=True
-).to(0).eval()
-tokenizer_code = AutoTokenizer.from_pretrained("THUDM/codegeex4-all-9b", trust_remote_code=True)
 @spaces.GPU
-def stream_chat(message: str, history: list, temperature: float, max_length: int, choice: str):
     print(f'message is - {message}')
     print(f'history is - {history}')
     conversation = []
@@ -54,12 +46,6 @@ def stream_chat(message: str, history: list, temperature: float, max_length: int
     print(f"Conversation is -\n{conversation}")
-    if choice == "glm-4-9b-chat":
-        tokenizer = tokenizer_chat
-        model = model_chat
-    else:
-        model = model_code
-        tokenizer = tokenizer_code
     input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
@@ -71,6 +57,7 @@ def stream_chat(message: str, history: list, temperature: float, max_length: int
         top_k=1,
         temperature=temperature,
         repetition_penalty=1.2,
     )
     gen_kwargs = {**input_ids, **generate_kwargs}
@@ -97,24 +84,18 @@ with gr.Blocks(css=CSS) as demo:
                 minimum=0,
                 maximum=1,
                 step=0.1,
-                value=0.8,
                 label="Temperature",
                 render=False,
             ),
             gr.Slider(
                 minimum=128,
-                maximum=8192,
                 step=1,
-                value=1024,
                 label="Max Length",
                 render=False,
             ),
-            gr.Radio(
-                ["glm-4-9b-chat", "codegeex4-all-9b"],
-                value="glm-4-9b-chat",
-                label="Load Model",
-                render=False,
-            ),
         ],
         examples=[
             ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],

 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+MODEL_LIST = "THUDM/LongWriter-glm4-9b"
 #MODELS = os.environ.get("MODELS")
 #MODEL_NAME = MODELS.split("/")[-1]
 """
 model_chat = AutoModelForCausalLM.from_pretrained(
+        "THUDM/LongWriter-glm4-9b",
         torch_dtype=torch.bfloat16,
         low_cpu_mem_usage=True,
         trust_remote_code=True,
 tokenizer_chat = AutoTokenizer.from_pretrained("THUDM/glm-4-9b-chat",trust_remote_code=True)
 @spaces.GPU
+def stream_chat(message: str, history: list, temperature: float, max_length: int):
     print(f'message is - {message}')
     print(f'history is - {history}')
     conversation = []
     print(f"Conversation is -\n{conversation}")
     input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
         top_k=1,
         temperature=temperature,
         repetition_penalty=1.2,
+        num_beams=1,
     )
     gen_kwargs = {**input_ids, **generate_kwargs}
                 minimum=0,
                 maximum=1,
                 step=0.1,
+                value=0.5,
                 label="Temperature",
                 render=False,
             ),
             gr.Slider(
                 minimum=128,
+                maximum=32768,
                 step=1,
+                value=4096,
                 label="Max Length",
                 render=False,
             ),
         ],
         examples=[
             ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],