import gradio as gr from transformers import AutoModelForCausalLM, AutoProcessor, TextIteratorStreamer from threading import Thread import re import time from PIL import Image import torch import spaces processor = AutoProcessor.from_pretrained("ucsahin/TraVisionLM-base", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained("ucsahin/TraVisionLM-base", trust_remote_code=True) model_od = AutoModelForCausalLM.from_pretrained("ucsahin/TraVisionLM-Object-Detection-v2", trust_remote_code=True) model.to("cuda:0") model_od.to("cuda:0") @spaces.GPU def bot_streaming(message, history, max_tokens, temperature, top_p, top_k, repetition_penalty): if message.files: image = message.files[-1].path else: # if there's no image uploaded for this turn, look for images in the past turns # kept inside tuples, take the last one for hist in history: print(hist) if type(hist[0])==tuple: [('/tmp/gradio/d2358aee8efc2902714f1b4ccc3089ec858104a3e7c8fc7a6ae702e1d0bcbcb0/oguzhan.jpg',), None] image = hist[0][0] if image is None: gr.Error("Lütfen önce bir resim yükleyin.") prompt = f"{message.text}" image = Image.open(image).convert("RGB") inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda:0") streamer = TextIteratorStreamer(processor, **{"skip_special_tokens": True}) generation_kwargs = dict( inputs, streamer=streamer, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty ) generated_text = "" thread = Thread(target=model.generate, kwargs=generation_kwargs) thread.start() text_prompt = f"{message.text}\n" buffer = "" for new_text in streamer: buffer += new_text generated_text_without_prompt = buffer[len(text_prompt):] time.sleep(0.04) yield generated_text_without_prompt gr.set_static_paths(paths=["static/images/"]) logo_path = "static/images/logo-color-v2.png" PLACEHOLDER = f"""

Resim yükleyin ve bir soru sorun

""" # with gr.Blocks() as demo: # with gr.Tab("Open-ended Questions (Soru-cevap)"): with gr.Accordion("Generation parameters", open=False) as parameter_accordion: max_tokens_item = gr.Slider(64, 1024, value=512, step=64, label="Max tokens") temperature_item = gr.Slider(0.1, 2, value=0.6, step=0.1, label="Temperature") top_p_item = gr.Slider(0, 1.0, value=0.9, step=0.05, label="Top_p") top_k_item = gr.Slider(0, 100, value=50, label="Top_k") repeat_penalty_item = gr.Slider(0, 2, value=1.2, label="Repeat penalty") demo = gr.ChatInterface( title="TraVisionLM - Turkish Visual Language Model", description="", fn=bot_streaming, chatbot=gr.Chatbot(placeholder=PLACEHOLDER, scale=1), # examples=[{"text": "", "files":[""]},{"text": "", "files":[""]}], additional_inputs=[max_tokens_item, temperature_item, top_p_item, top_k_item, repeat_penalty_item], additional_inputs_accordion=parameter_accordion, stop_btn="Stop Generation", multimodal=True ) # with gr.Tab("Object Detection (Obje Tespiti)"): # gr.Image("tiger.jpg") # gr.Button("New Tiger") # demo = gr.ChatInterface(fn=bot_streaming, title="TraVisionLM - Turkish Visual Language Model", # # examples=[{"text": "", "files":[""]},{"text": "", "files":[""]}], # description="", # additional_inputs=[ # gr.Slider(64, 1024, value=512, step=64, label="Max tokens"), # gr.Slider(0.1, 2, value=0.6, step=0.1, label="Temperature"), # gr.Slider(0, 1.0, value=0.9, step=0.05, label="Top_p"), # gr.Slider(0, 100, value=50, label="Top_k"), # gr.Slider(0, 2, value=1.2, label="Repeat penalty"), # ], # additional_inputs_accordion_name="Text generation parameters", # # additional_inputs_accordion= # stop_btn="Stop Generation", multimodal=True) demo.launch(max_file_size="5mb")