Spaces:

sapthesh
/

deepseekv3

Runtime error

sapthesh commited on Feb 3

Commit

f4c88e6

verified ·

1 Parent(s): ded2ab4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,30 +2,23 @@ import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 model_id = "deepseek-ai/DeepSeek-V3"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True) # device_map="auto" for GPU if available, ADD trust_remote_code=True
-def predict(message, history):
-    conversation = []
-    for user_msg, bot_response in history:
-        conversation.append({"role": "user", "content": user_msg})
-        if bot_response: # Only add bot response if it exists
-            conversation.append({"role": "assistant", "content": bot_response})
-    conversation.append({"role": "user", "content": message})
-    inputs = tokenizer.apply_chat_template(conversation=conversation, tokenizer=tokenizer, return_tensors="pt").to("cuda" if model.device.type == 'cuda' else "cpu") # Move input to GPU if model is on GPU
-    outputs = model.generate(**inputs, max_new_tokens=512) # Adjust max_new_tokens as needed
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Basic cleanup (you might need more sophisticated cleaning)
-    response = response.replace("<|assistant|>", "").strip()
     return response
 iface = gr.ChatInterface(
     fn=predict,
-    inputs=gr.Chatbox(placeholder="Type a message..."),
-    outputs=gr.Chatbot(),
     title="DeepSeek-V3 Chatbot",
-    description="Chat with the DeepSeek-V3 model.",
 )
 iface.launch()

 from transformers import AutoModelForCausalLM, AutoTokenizer
 model_id = "deepseek-ai/DeepSeek-V3"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True) # device_map="auto" for GPU if available, ADD trust_remote_code=True
+def predict(message, history):
+    inputs = tokenizer.apply_chat_template(conversation=history + [{"role": "user", "content": message}], tokenize=True, return_tensors="pt").to("cuda")
+    outputs = model.generate(inputs, max_new_tokens=50)  # Adjust max_new_tokens as needed
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return response
 iface = gr.ChatInterface(
     fn=predict,
+    inputs=gr.Chatbox(label="Chat with DeepSeek-V3"),
+    outputs=gr.Chatbot(label="DeepSeek-V3"),
     title="DeepSeek-V3 Chatbot",
+    description="Chat with the DeepSeek-V3 model from Hugging Face.",
 )
 iface.launch()