import gradio as gr import shutil import os #import ollama import time #import pandas as pd global Modelfile # Declare Modelfile as a global variable import os from llama_cpp import Llama os.environ["HF_HOME"] = "/app/.hf_cache" os.environ["TRANSFORMERS_CACHE"] = "/app/.transformers_cache" llm = Llama.from_pretrained( repo_id="alibidaran/LLAMA3.2-Virtual_doctor_GGUF", filename="unsloth.Q8_0.gguf", ) #def Generate_report(history,model_flags): # data={'steps':model_flags, # 'turns':history} # dataframe=pd.DataFrame.from_dict(data) #dataframe.to_csv('Repports.csv',index=False) def user(user_message,history): return "", history+[{'role': 'user', 'content':user_message}] def respond(history): text=f" ###Human: {history[-1]['content']} ###Asistant: " response=llm(text, max_tokens=512, echo=True) response=response['choices'][0]['text'] print(response) history.append({'role':'assistant','content':""}) for character in response: history[-1]['content']+=character time.sleep(0.02) yield history with gr.Blocks() as demo: gr.Markdown('# Welcome to Zaki platform') with gr.Tab('Chat Interface'): gr.HTML('

Virtual Doctor

') chatbot = gr.Chatbot(type="messages") msg = gr.Textbox() btn=gr.Button('Send') clear = gr.ClearButton([msg, chatbot]) btn.click(user, [msg, chatbot], [msg, chatbot],queue=False).then(respond,chatbot,chatbot) clear.click(lambda:None,None,chatbot,queue=False) if __name__=='__main__': demo.launch(server_name="0.0.0.0", server_port=7860)