Spaces:
Sleeping
Sleeping
File size: 1,744 Bytes
79c1479 6e61211 0a5203f c5797b7 0306e1b 24eb62a 9c010ec 80ba3ac c5797b7 79c1479 c5797b7 e727785 24eb62a e727785 24eb62a e727785 0306e1b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
from transformers import BlipProcessor, BlipForQuestionAnswering
from transformers.utils import logging
class Inference:
def __init__(self):
self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
self.blip_model_saffal = BlipForQuestionAnswering.from_pretrained("wiusdy/blip_pretrained_saffal_fashion_finetuning")
self.blip_model_control_net = BlipForQuestionAnswering.from_pretrained("wiusdy/blip_pretrained_control_net_fashion_finetuning")
logging.set_verbosity_info()
self.logger = logging.get_logger("transformers")
def inference(self, options, image, text):
if options == "Blip Saffal":
self.logger.info(f"Running inference for model BLIP Saffal")
BLIP_saffal_inference = self.__inference_saffal_blip(image, text)
return BLIP_saffal_inference
elif options == "Blip Control Net":
self.logger.info(f"Running inference for model BLIP Control Net")
BLIP_control_net_inference = self.__inference_control_net_blip(image, text)
return BLIP_control_net_inference
def __inference_saffal_blip(self, image, text):
encoding = self.blip_processor(image, text, return_tensors="pt")
out = self.blip_model_saffal.generate(**encoding)
generated_text = self.blip_processor.decode(out[0], skip_special_tokens=True)
return f"{generated_text}"
def __inference_control_net_blip(self, image, text):
encoding = self.blip_processor(image, text, return_tensors="pt")
out = self.blip_model_control_net.generate(**encoding)
generated_text = self.blip_processor.decode(out[0], skip_special_tokens=True)
return f"{generated_text}" |