sapthesh commited on
Commit
f4c88e6
Β·
verified Β·
1 Parent(s): ded2ab4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -15
app.py CHANGED
@@ -2,30 +2,23 @@ import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
 
4
  model_id = "deepseek-ai/DeepSeek-V3"
 
5
  tokenizer = AutoTokenizer.from_pretrained(model_id)
6
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True) # device_map="auto" for GPU if available, ADD trust_remote_code=True
7
 
8
- def predict(message, history):
9
- conversation = []
10
- for user_msg, bot_response in history:
11
- conversation.append({"role": "user", "content": user_msg})
12
- if bot_response: # Only add bot response if it exists
13
- conversation.append({"role": "assistant", "content": bot_response})
14
- conversation.append({"role": "user", "content": message})
15
 
16
- inputs = tokenizer.apply_chat_template(conversation=conversation, tokenizer=tokenizer, return_tensors="pt").to("cuda" if model.device.type == 'cuda' else "cpu") # Move input to GPU if model is on GPU
17
- outputs = model.generate(**inputs, max_new_tokens=512) # Adjust max_new_tokens as needed
 
18
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
19
-
20
- # Basic cleanup (you might need more sophisticated cleaning)
21
- response = response.replace("<|assistant|>", "").strip()
22
  return response
23
 
24
  iface = gr.ChatInterface(
25
  fn=predict,
26
- inputs=gr.Chatbox(placeholder="Type a message..."),
27
- outputs=gr.Chatbot(),
28
  title="DeepSeek-V3 Chatbot",
29
- description="Chat with the DeepSeek-V3 model.",
30
  )
 
31
  iface.launch()
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
 
4
  model_id = "deepseek-ai/DeepSeek-V3"
5
+
6
  tokenizer = AutoTokenizer.from_pretrained(model_id)
7
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True) # device_map="auto" for GPU if available, ADD trust_remote_code=True
8
 
 
 
 
 
 
 
 
9
 
10
+ def predict(message, history):
11
+ inputs = tokenizer.apply_chat_template(conversation=history + [{"role": "user", "content": message}], tokenize=True, return_tensors="pt").to("cuda")
12
+ outputs = model.generate(inputs, max_new_tokens=50) # Adjust max_new_tokens as needed
13
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
14
  return response
15
 
16
  iface = gr.ChatInterface(
17
  fn=predict,
18
+ inputs=gr.Chatbox(label="Chat with DeepSeek-V3"),
19
+ outputs=gr.Chatbot(label="DeepSeek-V3"),
20
  title="DeepSeek-V3 Chatbot",
21
+ description="Chat with the DeepSeek-V3 model from Hugging Face.",
22
  )
23
+
24
  iface.launch()