ollama-Chat

Running

vilarin commited on 8 days ago

Commit

417f21a

•

1 Parent(s): bbd8145

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,7 +1,4 @@
 import os
-#remove this if in CPU
-import spaces
 import threading
 import time
 import subprocess
@@ -21,8 +18,10 @@ OLLAMA_SERVICE_THREAD.start()
 print("Giving ollama serve a moment")
 time.sleep(10)
 # Modify the model to what you want
-model = "gemma2:27b"
 subprocess.run(f"~/ollama pull {model}", shell=True)
@@ -32,7 +31,7 @@ from ollama import Client
 client = Client(host='http://localhost:11434', timeout=120)
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
-MODEL_ID = os.environ.get("MODEL_ID", "google/gemma-2-27b-it")
 MODEL_NAME = MODEL_ID.split("/")[-1]
 TITLE = "<h1><center>ollama-Chat</center></h1>"
@@ -42,7 +41,7 @@ DESCRIPTION = f"""
 <center>
 <p>Feel free to test models with ollama.
 <br>
-Easy to modify and running models even in CPU.
 </p>
 </center>
 """
@@ -59,7 +58,6 @@ h3 {
 }
 """
-# Remove this if in CPU
 def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
@@ -84,8 +82,6 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
             'top_k': top_k,
             'repeat_penalty': penalty,
             'low_vram': True,
-            'main_gpu': 0,
-            'num_gpu': 1,
         },
     )

 import os
 import threading
 import time
 import subprocess
 print("Giving ollama serve a moment")
 time.sleep(10)
 # Modify the model to what you want
+model = "gemma2"
 subprocess.run(f"~/ollama pull {model}", shell=True)
 client = Client(host='http://localhost:11434', timeout=120)
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+MODEL_ID = os.environ.get("MODEL_ID", "google/gemma-2-9b-it")
 MODEL_NAME = MODEL_ID.split("/")[-1]
 TITLE = "<h1><center>ollama-Chat</center></h1>"
 <center>
 <p>Feel free to test models with ollama.
 <br>
+Easy to modify and running models you want.
 </p>
 </center>
 """
 }
 """
 def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
             'top_k': top_k,
             'repeat_penalty': penalty,
             'low_vram': True,
         },
     )