vilarin commited on
Commit
86bea01
1 Parent(s): da59244

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -9
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import os
 
 
2
  import spaces
3
  import threading
4
  import time
@@ -18,26 +20,28 @@ OLLAMA_SERVICE_THREAD.start()
18
 
19
  print("Giving ollama serve a moment")
20
  time.sleep(10)
21
- subprocess.run("~/ollama pull gemma2", shell=True)
 
 
22
 
23
 
24
  import copy
25
  import gradio as gr
26
  from ollama import Client
27
- client = Client(host='http://localhost:11434', timeout=60)
28
 
29
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
30
- MODEL_ID = os.environ.get("MODEL_ID", "google/gemma-2-9b-it")
31
  MODEL_NAME = MODEL_ID.split("/")[-1]
32
 
33
- TITLE = "<h1><center>Chatbox</center></h1>"
34
 
35
  DESCRIPTION = f"""
36
  <h3>MODEL: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></h3>
37
  <center>
38
- <p>Gemma is the large language model built by Google.
39
  <br>
40
- Feel free to test without log.
41
  </p>
42
  </center>
43
  """
@@ -54,10 +58,10 @@ h3 {
54
  }
55
  """
56
 
 
57
  @spaces.GPU()
58
  def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
59
- print(f'message is - {message}')
60
- print(f'history is - {history}')
61
  conversation = []
62
  for prompt, answer in history:
63
  conversation.extend([
@@ -69,7 +73,7 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
69
  print(f"Conversation is -\n{conversation}")
70
 
71
  response = client.chat(
72
- model="gemma2",
73
  messages=conversation,
74
  stream=True,
75
  options={
 
1
  import os
2
+
3
+ #remove this if in CPU
4
  import spaces
5
  import threading
6
  import time
 
20
 
21
  print("Giving ollama serve a moment")
22
  time.sleep(10)
23
+ # Modify the model to what you want
24
+ model = "gemma2:27b"
25
+ subprocess.run(f"~/ollama pull {model}", shell=True)
26
 
27
 
28
  import copy
29
  import gradio as gr
30
  from ollama import Client
31
+ client = Client(host='http://localhost:11434', timeout=120)
32
 
33
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
34
+ MODEL_ID = os.environ.get("MODEL_ID", "google/gemma-2-27b-it")
35
  MODEL_NAME = MODEL_ID.split("/")[-1]
36
 
37
+ TITLE = "<h1><center>ollama-Chat</center></h1>"
38
 
39
  DESCRIPTION = f"""
40
  <h3>MODEL: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></h3>
41
  <center>
42
+ <p>Feel free to test models with ollama.
43
  <br>
44
+ Easy to modify and running models even in CPU.
45
  </p>
46
  </center>
47
  """
 
58
  }
59
  """
60
 
61
+ # Remove this if in CPU
62
  @spaces.GPU()
63
  def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
64
+
 
65
  conversation = []
66
  for prompt, answer in history:
67
  conversation.extend([
 
73
  print(f"Conversation is -\n{conversation}")
74
 
75
  response = client.chat(
76
+ model=model,
77
  messages=conversation,
78
  stream=True,
79
  options={