Spaces:
Running
Running
from .localizer import build_localizer | |
from .llm import LLM | |
from .utils import image_w_box | |
import numpy as np | |
class LangGround: | |
def __init__(self, loc_model="owl", llm_model="Qwen/Qwen2.5-7B-Instruct"): | |
self.loc = build_localizer(loc_model) | |
self.llm = LLM(llm_model) | |
def localize(self, frame, question, **kwargs): | |
frame = np.array(frame) | |
objxbox = self.loc.localize(frame, kwargs.get("threshold", 0.5)) | |
locobjs = self.llm.answer(question, objxbox.keys()) | |
locobjxbox = {k: v for k, v in objxbox.items() if k in locobjs} | |
all_box_image = image_w_box(frame, objxbox) | |
llm_box_image = image_w_box(frame, locobjxbox) | |
texts = [(text, str(idx)) for idx, text in enumerate(locobjs)] | |
return texts, all_box_image, llm_box_image | |