Spaces:
Sleeping
Sleeping
File size: 8,546 Bytes
22cebd2 3e78891 60eea81 22cebd2 60eea81 5952a1b 6d5706b 22cebd2 6cadba0 5952a1b 6cadba0 3e78891 22cebd2 cb10045 22cebd2 cb10045 22cebd2 60eea81 3e78891 60eea81 22cebd2 f29faa9 22cebd2 9a79b79 22cebd2 9a79b79 844bf31 22cebd2 c165e2e 949197a 9465a08 a5163eb c165e2e fd4581c eecd30c 22cebd2 fd4581c 22cebd2 eecd30c 22cebd2 60eea81 22cebd2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 |
import gradio as gr
from huggingface_hub import InferenceClient
# import torch
# from transformers import pipeline
from prometheus_client import start_http_server, Counter, Summary
from typing import Iterable
from gradio.themes.base import Base
from gradio.themes.utils import colors, fonts, sizes
# Prometheus metrics
REQUEST_COUNTER = Counter('app_requests_total', 'Total number of requests')
SUCCESSFUL_REQUESTS = Counter('app_successful_requests_total', 'Total number of successful requests')
FAILED_REQUESTS = Counter('app_failed_requests_total', 'Total number of failed requests')
REQUEST_DURATION = Summary('app_request_duration_seconds', 'Time spent processing request')
# import os
# from dotenv import load_dotenv
# load_dotenv()
#
# HF_ACCESS = os.getenv("HF_ACCESS")
# Inference client setup
client = InferenceClient(model="mistralai/Mistral-Small-Instruct-2409",
# token=HF_ACCESS
)
# pipe = pipeline("text-generation", "microsoft/Phi-3-mini-4k-instruct", torch_dtype=torch.bfloat16, device_map="auto")
# Global flag to handle cancellation
stop_inference = False
def respond(
message,
history: list[tuple[str, str]],
system_message="You are a friendly and playful cat. Answer all user queries clearly and engagingly",
max_tokens=512,
temperature=0.7,
top_p=0.95,
use_local_model=False,
):
system_message += " You also love puns and add 'meow' at the end of every response."
global stop_inference
stop_inference = False # Reset cancellation flag
REQUEST_COUNTER.inc() # Increment request counter
request_timer = REQUEST_DURATION.time() # Start timing the request
try:
# Initialize history if it's None
if history is None:
history = []
# API-based inference
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message_chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=False,
temperature=temperature,
top_p=top_p,
):
if stop_inference:
response = "Inference cancelled."
yield history + [(message, response)]
return
if stop_inference:
response = "Inference cancelled."
break
token = message_chunk.choices[0].delta.content
response += token
yield history + [(message, response)] # Yield history + new response
SUCCESSFUL_REQUESTS.inc() # Increment successful request counter
except Exception as e:
FAILED_REQUESTS.inc() # Increment failed request counter
yield history + [(message, f"Error: {str(e)}")]
finally:
request_timer.observe_duration() # Stop timing the request
def cancel_inference():
global stop_inference
stop_inference = True
# Custom CSS for a fancy look
custom_css = """
#main-container {
background-color: #FFC0CB;
background-image: url('file=image.ipg');
font-family: 'Arial', sans-serif;
}
.gradio-container {
max-width: 700px;
margin: 0 auto;
padding: 20px;
background: #FFC0CB;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
border-radius: 10px;
}
.gr-button {
background-color: #4CAF50;
color: white;
border: none;
border-radius: 5px;
padding: 10px 20px;
cursor: pointer;
transition: background-color 0.3s ease;
}
.gr-button:hover {
background-color: #45a049;
}
.gr-slider input {
color: #4CAF50;
}
.gr-chat {
font-size: 16px;
}
#title {
text-align: center;
font-size: 2em;
margin-bottom: 20px;
color: #333;
}
"""
class UI_design(Base):
def __init__(
self,
*,
primary_hue: colors.Color | str = colors.emerald,
secondary_hue: colors.Color | str = colors.blue,
neutral_hue: colors.Color | str = colors.blue,
spacing_size: sizes.Size | str = sizes.spacing_md,
radius_size: sizes.Size | str = sizes.radius_md,
text_size: sizes.Size | str = sizes.text_lg,
font: fonts.Font
| str
| Iterable[fonts.Font | str] = (
fonts.GoogleFont("Quicksand"),
"ui-sans-serif",
"sans-serif",
),
font_mono: fonts.Font
| str
| Iterable[fonts.Font | str] = (
fonts.GoogleFont("IBM Plex Mono"),
"ui-monospace",
"monospace",
),
):
super().__init__(
primary_hue=primary_hue,
secondary_hue=secondary_hue,
neutral_hue=neutral_hue,
spacing_size=spacing_size,
radius_size=radius_size,
text_size=text_size,
font=font,
font_mono=font_mono,
)
super().set(
body_background_fill="repeating-linear-gradient(45deg, *primary_200, *primary_200 10px, *primary_50 10px, *primary_50 20px)",
body_background_fill_dark="repeating-linear-gradient(45deg, *primary_800, *primary_800 10px, *primary_900 10px, *primary_900 20px)",
button_primary_background_fill="linear-gradient(90deg, *primary_300, *secondary_400)",
button_primary_background_fill_hover="linear-gradient(90deg, *primary_200, *secondary_300)",
button_primary_text_color="white",
button_primary_background_fill_dark="linear-gradient(90deg, *primary_600, *secondary_800)",
slider_color="*secondary_300",
slider_color_dark="*secondary_600",
block_title_text_weight="600",
block_border_width="3px",
block_shadow="*shadow_drop_lg",
button_shadow="*shadow_drop_lg",
button_large_padding="32px",
)
ui_design = UI_design()
# Define the interface
# with gr.Blocks(theme=ui_design) as demo:
with gr.Blocks(css=custom_css) as demo:
gr.Markdown("<h1 style='text-align: center;'> 😸 Meowthamatical AI Chatbot 😸</h1>")
gr.Markdown(" Welcome to the Cat & Math Chatbot! Whether you're here to sharpen your math skills or just enjoy some cat-themed fun, we're excited to make learning a little more pawsome!!")
# with gr.Row():
# with gr.Column():
# with gr.Tabs() as input_tabs:
# with gr.Tab("Sketch"):
# input_sketchpad = gr.Sketchpad(type="pil", label="Sketch", layers=False)
#
# input_text = gr.Textbox(label="input your question")
#
# with gr.Row():
# # with gr.Column():
# # clear_btn = gr.ClearButton(
# # [input_sketchpad, input_text])
# with gr.Column():
# submit_btn = gr.Button("Submit", variant="primary")
with gr.Row():
system_message = gr.Textbox(value="You are a friendly and playful cat who loves help users learn math.", label="System message", interactive=True)
use_local_model = gr.Checkbox(label="Use Local Model", value=False)
# button_1 = gr.Button("Submit", variant="primary")
with gr.Row():
max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
chat_history = gr.Chatbot(label="Chat")
user_input = gr.Textbox(show_label=False, placeholder="Type your message here...")
cancel_button = gr.Button("Cancel Inference", variant="danger")
# Adjusted to ensure history is maintained and passed correctly
user_input.submit(respond, [user_input, chat_history, system_message, max_tokens, temperature, top_p, use_local_model], chat_history)
# user_input.submit(respond,
# [user_input, chat_history, system_message, 512, 0.8, 0.95, use_local_model],
# chat_history)
cancel_button.click(cancel_inference)
if __name__ == "__main__":
start_http_server(8000) # Expose metrics on port 8000
demo.launch(share=False) # Remove share=True because it's not supported on HF Spaces
|