nguyenbh commited on
Commit
29a4795
·
1 Parent(s): 38ff03d
Files changed (2) hide show
  1. app.py +108 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ import torch
4
+ import os
5
+
6
+
7
+ hf_token = os.getenv("YOUR_HF_TOKEN")
8
+
9
+ # Load model and tokenizer
10
+ print("Loading model and tokenizer...")
11
+ model_path = "microsoft/Phi-4-mini-instruct" # Can be changed to local path "./Phi-4-Mini-Instruct"
12
+
13
+ tokenizer = AutoTokenizer.from_pretrained(
14
+ model_path,
15
+ padding_side="left",
16
+ token=hf_token,
17
+ trust_remote_code=True
18
+ )
19
+
20
+ model = AutoModelForCausalLM.from_pretrained(
21
+ model_path,
22
+ device_map="auto",
23
+ attn_implementation="flash_attention_2",
24
+ torch_dtype="auto",
25
+ token=hf_token,
26
+ trust_remote_code=True
27
+ )
28
+
29
+ # Create pipeline for easier inference
30
+ pipe = pipeline(
31
+ "text-generation",
32
+ model=model,
33
+ tokenizer=tokenizer,
34
+ )
35
+
36
+ print("Model and tokenizer loaded successfully!")
37
+
38
+ # Format chat history to messages format
39
+ def format_chat_history(message, history):
40
+ messages = [
41
+ {"role": "system", "content": "You are a helpful AI assistant."}
42
+ ]
43
+
44
+ # Add chat history
45
+ for user_msg, assistant_msg in history:
46
+ messages.append({"role": "user", "content": user_msg})
47
+ messages.append({"role": "assistant", "content": assistant_msg})
48
+
49
+ # Add current message
50
+ messages.append({"role": "user", "content": message})
51
+
52
+ return messages
53
+
54
+ # Streaming response generator
55
+ def predict(message, history):
56
+ messages = format_chat_history(message, history)
57
+
58
+ generation_args = {
59
+ "max_new_tokens": 1024,
60
+ "return_full_text": False,
61
+ "temperature": 0.001,
62
+ "top_p": 1.0,
63
+ "do_sample": True,
64
+ "streamer": None, # Will be set in the generator
65
+ }
66
+
67
+ # Initialize an empty response
68
+ partial_message = ""
69
+ history_with_message = history + [[message, partial_message]]
70
+
71
+ # Create a TextIteratorStreamer for streaming generation
72
+ from transformers import TextIteratorStreamer
73
+ from threading import Thread
74
+
75
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
76
+ generation_args["streamer"] = streamer
77
+
78
+ # Start a separate thread for generation
79
+ thread = Thread(target=pipe, args=(messages,), kwargs=generation_args)
80
+ thread.start()
81
+
82
+ # Stream the response
83
+ for new_text in streamer:
84
+ partial_message += new_text
85
+ yield history + [[message, partial_message]]
86
+
87
+ # Create the Gradio interface
88
+ css = """
89
+ .chatbot-container {max-width: 800px; margin: auto;}
90
+ .chat-header {text-align: center; margin-bottom: 20px;}
91
+ """
92
+
93
+ with gr.Blocks(css=css) as demo:
94
+ gr.HTML("<div class='chat-header'><h1>Phi-4 Mini Chatbot</h1></div>")
95
+
96
+ with gr.Column(elem_classes="chatbot-container"):
97
+ chatbot = gr.Chatbot(height=400)
98
+ msg = gr.Textbox(placeholder="Type your message here...", label="Input")
99
+ clear = gr.Button("Clear Conversation")
100
+
101
+ msg.submit(predict, [msg, chatbot], [chatbot], queue=True, api_name="chat").then(
102
+ lambda: "", None, [msg]
103
+ )
104
+ clear.click(lambda: None, None, chatbot, queue=False)
105
+ """)
106
+
107
+ # Launch the app
108
+ demo.launch(share=True) # Set share=False if you don't want a public link
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers==4.49.0
2
+ gradio==5.14.0