vilarin commited on
Commit
27dc368
·
verified ·
1 Parent(s): c434f82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -23
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import os
3
  import threading
4
  import time
@@ -18,19 +17,16 @@ OLLAMA_SERVICE_THREAD.start()
18
 
19
  print("Giving ollama serve a moment")
20
  time.sleep(10)
21
- subprocess.run("~/ollama run gemma2", shell=True)
22
 
23
 
24
  import copy
25
  import gradio as gr
26
- import spaces
27
- from llama_index.llms.ollama import Ollama
28
- import llama_index
29
- from llama_index.core.llms import ChatMessage
30
 
31
 
32
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
33
- MODEL_ID = "google/gemma-2-27b-it"
34
  MODEL_NAME = MODEL_ID.split("/")[-1]
35
 
36
  TITLE = "<h1><center>Chatbox</center></h1>"
@@ -56,33 +52,30 @@ h3 {
56
  text-align: center;
57
  }
58
  """
59
- @spaces.GPU()
60
  def stream_chat(message: str, history: list, temperature: float, context_window: int, top_p: float, top_k: int, penalty: float):
61
  print(f'message is - {message}')
62
  print(f'history is - {history}')
63
  conversation = []
64
  for prompt, answer in history:
65
  conversation.extend([
66
- ChatMessage(
67
- role="user", content=prompt
68
- ),
69
- ChatMessage(role="assistant", content=answer),
70
  ])
71
- messages = [ChatMessage(role="user", content=message)]
72
 
73
  print(f"Conversation is -\n{conversation}")
74
 
75
- llm = Ollama(model="gemma2", request_timeout=60.0)
76
- resp = llm.chat(
77
- messages = messages,
78
- chat_history = conversation,
79
- top_p=top_p,
80
- top_k=top_k,
81
- repeat_penalty=penalty,
82
- context_window=context_window,
83
  )
84
-
85
- return resp
 
 
 
 
86
 
87
 
88
  chatbot = gr.Chatbot(height=600)
 
 
1
  import os
2
  import threading
3
  import time
 
17
 
18
  print("Giving ollama serve a moment")
19
  time.sleep(10)
20
+ subprocess.run("~/ollama pull gemma2", shell=True)
21
 
22
 
23
  import copy
24
  import gradio as gr
25
+ import ollama
 
 
 
26
 
27
 
28
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
29
+ MODEL_ID = os.environ.get(MODEL_ID, "google/gemma-2-9b-it")
30
  MODEL_NAME = MODEL_ID.split("/")[-1]
31
 
32
  TITLE = "<h1><center>Chatbox</center></h1>"
 
52
  text-align: center;
53
  }
54
  """
 
55
  def stream_chat(message: str, history: list, temperature: float, context_window: int, top_p: float, top_k: int, penalty: float):
56
  print(f'message is - {message}')
57
  print(f'history is - {history}')
58
  conversation = []
59
  for prompt, answer in history:
60
  conversation.extend([
61
+ {"role": "user", "content": prompt},
62
+ {"role": "assistant", "content": answer})
 
 
63
  ])
64
+ conversation.append({"role": "user", "content": message})
65
 
66
  print(f"Conversation is -\n{conversation}")
67
 
68
+ response = ollama.chat(
69
+ model="gemma2",
70
+ messages=conversation,
71
+ stream=True
 
 
 
 
72
  )
73
+
74
+ message = ""
75
+ for chunk in response:
76
+ message += chunk["message"]["content"]
77
+ yield "", message
78
+
79
 
80
 
81
  chatbot = gr.Chatbot(height=600)