Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import torch | |
import json | |
import uuid | |
import os | |
import time | |
import pytz | |
from datetime import datetime | |
from transformers import ( | |
AutoModelForCausalLM, | |
AutoTokenizer, | |
TextIteratorStreamer, | |
) | |
from threading import Thread | |
from huggingface_hub import CommitScheduler | |
from pathlib import Path | |
import spaces | |
os.system("apt-get update && apt-get install -y libstdc++6") | |
# Load HF token from the environment | |
token = os.environ["HF_TOKEN"] | |
# Load Model and Tokenizer | |
model = AutoModelForCausalLM.from_pretrained( | |
"large-traversaal/Phi-4-Hindi", | |
token=token, | |
trust_remote_code=True, | |
torch_dtype=torch.bfloat16 | |
) | |
tok = AutoTokenizer.from_pretrained("large-traversaal/Phi-4-Hindi", token=token) | |
terminators = [tok.eos_token_id] | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model = model.to(device) | |
# Set up logging folder and CommitScheduler | |
log_folder = Path("logs") | |
log_folder.mkdir(parents=True, exist_ok=True) | |
log_file = log_folder / f"chat_log_{uuid.uuid4()}.json" | |
scheduler = CommitScheduler( | |
repo_id="DrishtiSharma/phi-gradio-logs", | |
repo_type="dataset", | |
folder_path=log_folder, | |
path_in_repo="data", | |
every=0.01, | |
token=token | |
) | |
# Fixed timezone | |
timezone = pytz.timezone("UTC") | |
def chat(message, history, temperature, do_sample, max_tokens, top_p): | |
start_time = time.time() | |
timestamp = datetime.now(timezone).strftime("%Y-%m-%d %H:%M:%S %Z") | |
conversation_history = [] | |
for item in history: | |
conversation_history.append({"role": "user", "content": item[0]}) | |
if item[1] is not None: | |
conversation_history.append({"role": "assistant", "content": item[1]}) | |
conversation_history.append({"role": "user", "content": message}) | |
messages = tok.apply_chat_template(conversation_history, tokenize=False, add_generation_prompt=True) | |
model_inputs = tok([messages], return_tensors="pt").to(device) | |
streamer = TextIteratorStreamer( | |
tok, timeout=20.0, skip_prompt=True, skip_special_tokens=True | |
) | |
generate_kwargs = dict( | |
model_inputs, | |
streamer=streamer, | |
max_new_tokens=max_tokens, | |
do_sample=do_sample, | |
temperature=temperature, | |
top_p=top_p, | |
eos_token_id=terminators, | |
) | |
if temperature == 0: | |
generate_kwargs["do_sample"] = False | |
generation_thread = Thread(target=model.generate, kwargs=generate_kwargs) | |
generation_thread.start() | |
partial_text = "" | |
for new_text in streamer: | |
partial_text += new_text | |
yield partial_text | |
response_time = round(time.time() - start_time, 2) | |
log_data = { | |
"timestamp": timestamp, | |
"input": message, | |
"output": partial_text, | |
"response_time": response_time, | |
"temperature": temperature, | |
"do_sample": do_sample, | |
"max_tokens": max_tokens, | |
"top_p": top_p | |
} | |
with scheduler.lock: | |
with log_file.open("a", encoding="utf-8") as f: | |
f.write(json.dumps(log_data, ensure_ascii=False) + "\n") | |
# Function to clear chat history | |
def clear_chat(): | |
return [], [] | |
# Function to export chat history as a downloadable file | |
def export_chat(history): | |
if not history: | |
return None # No chat history to export | |
file_path = "chat_history.txt" | |
with open(file_path, "w") as f: | |
for msg in history: | |
f.write(f"User: {msg[0]}\nBot: {msg[1]}\n") | |
return file_path | |
# Gradio UI | |
with gr.Blocks(theme="soft") as demo: | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.Markdown("#### ⚙️🛠 Configure Settings") | |
temperature = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.1, label="Temperature", interactive=True) | |
do_sample = gr.Checkbox(label="Sampling", value=True, interactive=True) | |
max_tokens = gr.Slider(minimum=128, maximum=4096, step=1, value=1024, label="Max new tokens", interactive=True) | |
top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.2, label="Top-p Sampling", interactive=True) | |
with gr.Column(scale=3): | |
gr.Markdown("# 💬 Chat With Phi-4-Hindi") | |
chat_interface = gr.ChatInterface( | |
fn=chat, | |
examples=[ | |
["What is the English translation of: 'इस मॉडल को हिंदी और अंग्रेजी डेटा पर प्रशिक्षित किया गया था'?"], | |
["टिम अपने 3 बच्चों को ट्रिक या ट्रीटिंग के लिए ले जाता है। वे 4 घंटे बाहर रहते हैं। हर घंटे वे x घरों में जाते हैं। हर घर में हर बच्चे को 3 ट्रीट मिलते हैं। उसके बच्चों को कुल 180 ट्रीट मिलते हैं। अज्ञात चर x का मान क्या है?"], | |
["how do you play fetch? A) throw the object for the dog to get and bring back to you. B) get the object and bring it back to the dog."] | |
], | |
additional_inputs=[temperature, do_sample, max_tokens, top_p], | |
stop_btn="⏹ Stop", | |
description="Test App", | |
theme="default" | |
) | |
with gr.Row(): | |
clear_btn = gr.Button("🧹 Clear Chat", variant="secondary") | |
export_btn = gr.Button("📥 Export Chat", variant="secondary") | |
# Connect buttons to their functions | |
clear_btn.click( | |
fn=clear_chat, | |
outputs=[chat_interface.chatbot, chat_interface.chatbot_value] | |
) | |
export_btn.click(fn=export_chat, inputs=[chat_interface.chatbot], outputs=[gr.File()]) # Exports chat | |
demo.launch() |