import gradio as gr from ollama import chat, ChatResponse import subprocess import time model_id = "deepseek-r1:1.5b" def interact(message: str, history: list): message_dct = { "role": "user", "content": message } chat_history = [msg for msg in history] chat_history.append(message_dct) response: ChatResponse = chat( model=model_id, messages=chat_history, stream=True ) text_response = "" thinking_response = gr.ChatMessage(content="", metadata={"title":"Thinking Cloud"}) thinking = False for chunk in response: bit = chunk["message"]["content"] if(bit == ""): thinking = True continue elif(bit == ""): thinking = False continue if(thinking): thinking_response.content += bit else: text_response += bit final_response = [thinking_response, text_response] yield final_response interface = gr.ChatInterface( fn=interact, type="messages", title="Deepseek-R1 Chat Interface", description="Model: Deepseek R1: 1.5B params" ) if __name__ == "__main__": print("\n\nStarting Ollama...\n\n") subprocess.Popen(["ollama", "serve"]) time.sleep(10) print("\n\nOllama started successfully!!\n\n\n\nTesting...\n\n") subprocess.run(["ollama", "pull", model_id]) time.sleep(5) print("\n\nDeepseek-R1 started successfully!!\n\n") interface.launch(server_name="0.0.0.0", server_port=7860)