|
import gradio as gr |
|
from ollama import chat, ChatResponse |
|
import subprocess |
|
import time |
|
|
|
model_id = "deepseek-r1:1.5b" |
|
|
|
def interact(message: str, history: list): |
|
message_dct = { |
|
"role": "user", |
|
"content": message |
|
} |
|
|
|
chat_history = [msg for msg in history] |
|
chat_history.append(message_dct) |
|
|
|
response: ChatResponse = chat( |
|
model=model_id, |
|
messages=chat_history, |
|
stream=True |
|
) |
|
text_response = "" |
|
thinking_response = gr.ChatMessage(content="", metadata={"title":"Thinking Cloud"}) |
|
thinking = False |
|
|
|
for chunk in response: |
|
bit = chunk["message"]["content"] |
|
if(bit == "<think>"): |
|
thinking = True |
|
continue |
|
elif(bit == "</think>"): |
|
thinking = False |
|
continue |
|
|
|
if(thinking): |
|
thinking_response.content += bit |
|
else: |
|
text_response += bit |
|
final_response = [thinking_response, text_response] |
|
yield final_response |
|
|
|
interface = gr.ChatInterface( |
|
fn=interact, |
|
type="messages", |
|
title="Deepseek-R1 Chat Interface", |
|
description="Model: Deepseek R1: 1.5B params" |
|
) |
|
|
|
if __name__ == "__main__": |
|
print("\n\nStarting Ollama...\n\n") |
|
subprocess.Popen(["ollama", "serve"]) |
|
time.sleep(10) |
|
print("\n\nOllama started successfully!!\n\n\n\nTesting...\n\n") |
|
subprocess.run(["ollama", "pull", model_id]) |
|
time.sleep(5) |
|
print("\n\nDeepseek-R1 started successfully!!\n\n") |
|
interface.launch(server_name="0.0.0.0", server_port=7860) |