Spaces:

root-sajjan
/

backend_image_detection

Running

App Files Files Community

backend_image_detection / model.py

root-sajjan

sending image urls too

9af57d5 verified about 2 months ago

raw

history blame contribute delete

7.99 kB

	import torch
	from pathlib import Path
	from transformers import CLIPProcessor, CLIPModel
	from PIL import Image, ImageDraw
	import pytesseract
	import requests
	import os
	from llm import inference, upload_image

	import re


	cropped_images_dir = "cropped_images"
	os.makedirs(cropped_images_dir, exist_ok=True)

	# Load YOLO model
	class YOLOModel:
	def __init__(self, model_path="yolov5s.pt"):
	"""
	Initialize the YOLO model. Downloads YOLOv5 pretrained model if not available.
	"""
	torch.hub._validate_not_a_forked_repo=lambda a,b,c: True
	self.model = torch.hub.load("ultralytics/yolov5", "custom", path=model_path, force_reload=True)
	# self.model2 = YOLOv10.from_pretrained("Ultralytics/Yolov8")
	# print(f'YOLO Model:\n\n{self.model}')
	# self.clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")

	# # print(f'CLIP Model:\n\n{self.clip_model}')
	# self.clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
	# self.category_brands = {
	# "electronics": ["Samsung", "Apple", "Sony", "LG", "Panasonic"],
	# "furniture": ["Ikea", "Ashley", "La-Z-Boy", "Wayfair", "West Elm"],
	# "appliances": ["Whirlpool", "GE", "Samsung", "LG", "Bosch"],
	# "vehicles": ["Tesla", "Toyota", "Ford", "Honda", "Chevrolet"],
	# "chair": ["Ikea", "Ashley", "Wayfair", "La-Z-Boy", "Herman Miller"],
	# "microwave": ["Samsung", "Panasonic", "Sharp", "LG", "Whirlpool"],
	# "table": ["Ikea", "Wayfair", "Ashley", "CB2", "West Elm"],
	# "oven": ["Whirlpool", "GE", "Samsung", "Bosch", "LG"],
	# "potted plant": ["The Sill", "PlantVine", "Lowe's", "Home Depot", "UrbanStems"],
	# "couch": ["Ikea", "Ashley", "Wayfair", "La-Z-Boy", "CushionCo"],
	# "cow": ["Angus", "Hereford", "Jersey", "Holstein", "Charolais"],
	# "bed": ["Tempur-Pedic", "Ikea", "Sealy", "Serta", "Sleep Number"],
	# "tv": ["Samsung", "LG", "Sony", "Vizio", "TCL"],
	# "bin": ["Rubbermaid", "Sterilite", "Hefty", "Glad", "Simplehuman"],
	# "refrigerator": ["Whirlpool", "GE", "Samsung", "LG", "Bosch"],
	# "laptop": ["Dell", "HP", "Apple", "Lenovo", "Asus"],
	# "smartphone": ["Apple", "Samsung", "Google", "OnePlus", "Huawei"],
	# "camera": ["Canon", "Nikon", "Sony", "Fujifilm", "Panasonic"],
	# "toaster": ["Breville", "Cuisinart", "Black+Decker", "Hamilton Beach", "Oster"],
	# "fan": ["Dyson", "Honeywell", "Lasko", "Vornado", "Bionaire"],
	# "vacuum cleaner": ["Dyson", "Shark", "Roomba", "Hoover", "Bissell"]
	# }


	def predict_clip(self, image, brand_names):
	"""
	Predict the most probable brand using CLIP.
	"""
	inputs = self.clip_processor(
	text=brand_names,
	images=image,
	return_tensors="pt",
	padding=True
	)
	# print(f'Inputs to clip processor:{inputs}')
	outputs = self.clip_model(**inputs)
	logits_per_image = outputs.logits_per_image
	probs = logits_per_image.softmax(dim=1) # Convert logits to probabilities
	best_idx = probs.argmax().item()
	return brand_names[best_idx], probs[0, best_idx].item()


	def predict_text(self, image):
	try:
	# Convert image to grayscale
	grayscale = image.convert('L')

	# Perform OCR using pytesseract
	text = pytesseract.image_to_string(grayscale)

	# Return the stripped text if successful
	return text.strip()
	except Exception as e:
	# Log the error for debugging purposes
	print(f"Error during text prediction: {e}")

	# Return an empty string if OCR fails
	return ""



	def predict(self, image_path):
	"""
	Run YOLO inference on an image.

	:param image_path: Path to the input image
	:return: List of predictions with labels and bounding boxes
	"""
	results = self.model(image_path)
	image = Image.open(image_path).convert("RGB")
	draw = ImageDraw.Draw(image)
	predictions = results.pandas().xyxy[0] # Get predictions as pandas DataFrame
	print(f'YOLO predictions:\n\n{predictions}')
	output = []
	for idx, row in predictions.iterrows():
	category = row['name']
	confidence = row['confidence']
	bbox = [row["xmin"], row["ymin"], row["xmax"], row["ymax"]]

	# Crop the detected region
	cropped_image = image.crop((bbox[0], bbox[1], bbox[2], bbox[3]))
	cropped_image_path = os.path.join(cropped_images_dir, f"crop_{idx}.jpg")
	cropped_image.save(cropped_image_path, "JPEG")

	# uploading to cloud for getting URL to pass into LLM
	print(f'Uploading now to image url')
	image_url = upload_image.upload_image_to_imgbb(cropped_image_path)
	print(f'Image URL received as{image_url}')
	# inferencing llm for possible brands
	result_llms = inference.get_name(image_url, category)
	# possible_brands_llm = re.findall(r"-\s*(.+)", possible_brands_mixed)

	# if len(possible_brands_llm)>0:
	# predicted_brand, clip_confidence = self.predict_clip(cropped_image, possible_brands_llm)
	# else:
	# predicted_brand, clip_confidence = "Unknown", 0.0


	'''
	# Match category to possible brands
	if category in self.category_brands:
	possible_brands = self.category_brands[category]
	print(f'Predicting with CLIP:\n\n')
	predicted_brand, clip_confidence = self.predict_clip(cropped_image, possible_brands)
	else:
	predicted_brand, clip_confidence = "Unknown", 0.0
	'''


	detected_text = self.predict_text(cropped_image)
	print(f'Details:{detected_text}')
	print(f'Predicted brand: {result_llms["model"]}')
	# Draw bounding box and label on the image
	draw.rectangle(bbox, outline="red", width=3)
	draw.text(
	(bbox[0], bbox[1] - 10),
	f'{result_llms["brand"]})',
	fill="red"
	)

	# Append result
	output.append({
	"category": category,
	"bbox": bbox,
	"confidence": confidence,
	"category_llm":result_llms["brand"],
	"predicted_brand": result_llms["model"],
	# "clip_confidence": clip_confidence,
	"price":result_llms["price"],
	"details":result_llms["description"],
	"detected_text":detected_text,
	"image_url":image_url
	})

	valid_indices = set(range(len(predictions)))

	# Iterate over all files in the directory
	for filename in os.listdir(cropped_images_dir):
	# Check if the filename matches the pattern for cropped images
	if filename.startswith("crop_") and filename.endswith(".jpg"):
	# Extract the index from the filename
	try:
	file_idx = int(filename.split("_")[1].split(".")[0])
	if file_idx not in valid_indices:
	# Delete the file if its index is not valid
	file_path = os.path.join(cropped_images_dir, filename)
	os.remove(file_path)
	print(f"Deleted excess file: {filename}")
	except ValueError:
	# Skip files that don't match the pattern
	continue

	return output