import gradio as gr from theme4 import fast_rtc_theme import torch import json import uuid import os import time import pytz from datetime import datetime from transformers import ( AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, ) from threading import Thread from huggingface_hub import CommitScheduler from pathlib import Path import spaces os.system("apt-get update && apt-get install -y libstdc++6") # Load HF token from the environment token = os.environ["HF_TOKEN"] # Load Model and Tokenizer model = AutoModelForCausalLM.from_pretrained( "large-traversaal/Phi-4-Hindi", token=token, trust_remote_code=True, torch_dtype=torch.bfloat16 ) tok = AutoTokenizer.from_pretrained("large-traversaal/Phi-4-Hindi", token=token) terminators = [tok.eos_token_id] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) # Set up logging folder and CommitScheduler log_folder = Path("logs") log_folder.mkdir(parents=True, exist_ok=True) log_file = log_folder / f"chat_log_{uuid.uuid4()}.json" scheduler = CommitScheduler( repo_id="DrishtiSharma/phi-gradio-logs", repo_type="dataset", folder_path=log_folder, path_in_repo="data", every=0.01, token=token ) # Fixed timezone timezone = pytz.timezone("UTC") @spaces.GPU(duration=60) def chat(message, history, temperature, do_sample, max_tokens, top_p): start_time = time.time() timestamp = datetime.now(timezone).strftime("%Y-%m-%d %H:%M:%S %Z") conversation_history = [] for item in history: conversation_history.append({"role": "user", "content": item[0]}) if item[1] is not None: conversation_history.append({"role": "assistant", "content": item[1]}) conversation_history.append({"role": "user", "content": message}) messages = tok.apply_chat_template(conversation_history, tokenize=False, add_generation_prompt=True) model_inputs = tok([messages], return_tensors="pt").to(device) streamer = TextIteratorStreamer( tok, timeout=20.0, skip_prompt=True, skip_special_tokens=True ) generate_kwargs = dict( model_inputs, streamer=streamer, max_new_tokens=max_tokens, do_sample=do_sample, temperature=temperature, top_p=top_p, eos_token_id=terminators, ) if temperature == 0: generate_kwargs["do_sample"] = False generation_thread = Thread(target=model.generate, kwargs=generate_kwargs) generation_thread.start() partial_text = "" for new_text in streamer: partial_text += new_text yield partial_text response_time = round(time.time() - start_time, 2) log_data = { "timestamp": timestamp, "input": message, "output": partial_text, "response_time": response_time, "temperature": temperature, "do_sample": do_sample, "max_tokens": max_tokens, "top_p": top_p } with scheduler.lock: with log_file.open("a", encoding="utf-8") as f: f.write(json.dumps(log_data, ensure_ascii=False) + "\n") # Function to clear chat history def clear_chat(): return [], [] # Function to export chat history as a downloadable file def export_chat(history): if not history: return None # No chat history to export file_path = "chat_history.txt" with open(file_path, "w") as f: for msg in history: f.write(f"User: {msg[0]}\nBot: {msg[1]}\n") return file_path # Gradio UI with gr.Blocks(theme=fast_rtc_theme) as demo: with gr.Row(): with gr.Column(scale=1): gr.Markdown("#### ⚙️🛠 Configure Settings") temperature = gr.Slider(minimum=0, maximum=1, step=0.1, value=0.1, label="Temperature", interactive=True) do_sample = gr.Checkbox(label="Sampling", value=True, interactive=True) max_tokens = gr.Slider(minimum=128, maximum=4096, step=1, value=1024, label="Max new tokens", interactive=True) top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.2, label="Top-p Sampling", interactive=True) with gr.Column(scale=3): gr.Markdown("# 💬 Chat With Phi-4-Hindi") chat_interface = gr.ChatInterface( fn=chat, examples=[ ["What is the English translation of: 'इस मॉडल को हिंदी और अंग्रेजी डेटा पर प्रशिक्षित किया गया था'?"], ["टिम अपने 3 बच्चों को ट्रिक या ट्रीटिंग के लिए ले जाता है। वे 4 घंटे बाहर रहते हैं। हर घंटे वे x घरों में जाते हैं। हर घर में हर बच्चे को 3 ट्रीट मिलते हैं। उसके बच्चों को कुल 180 ट्रीट मिलते हैं। अज्ञात चर x का मान क्या है?"], ["how do you play fetch? A) throw the object for the dog to get and bring back to you. B) get the object and bring it back to the dog."] ], additional_inputs=[temperature, do_sample, max_tokens, top_p], stop_btn="⏹ Stop", description="Test App", #theme="default" ) with gr.Row(): clear_btn = gr.Button("🧹 Clear Chat", variant="primary") export_btn = gr.Button("📥 Export Chat", variant="primary") # Connect buttons to their functions clear_btn.click( fn=clear_chat, outputs=[chat_interface.chatbot, chat_interface.chatbot_value] ) export_btn.click(fn=export_chat, inputs=[chat_interface.chatbot], outputs=[gr.File()]) # Exports chat demo.launch()