Spaces:

akhil2808
/

MongoDBpixtralOCR

Running

App Files Files Community

MongoDBpixtralOCR / app.py

akhil2808

Upload 4 files

ba395cf verified 4 months ago

raw

history blame contribute delete

3.25 kB

	import base64
	import requests
	from io import BytesIO
	from PIL import Image
	import gradio as gr
	from pymongo import MongoClient
	import time
	import uuid

	# MongoDB setup
	mongo_client = MongoClient("mongodb+srv://atharva2021:123@cluster0.so5reec.mongodb.net/")
	db = mongo_client['bajaj']
	collection = db['client']

	# Function to encode the image to base64
	def encode_image(img):
	buffered = BytesIO()
	img.save(buffered, format="PNG")
	encoded_string = base64.b64encode(buffered.getvalue()).decode("utf-8")
	return encoded_string

	# Chat function with Pixtral and MongoDB saving
	def chat_with_pixtral(uploaded_file, mrn_number, user_question):
	if uploaded_file is not None and mrn_number.strip() != "":
	base64_img = encode_image(uploaded_file)

	api = "https://api.hyperbolic.xyz/v1/chat/completions"
	api_key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJyZzMyNzAyNEBnbWFpbC5jb20ifQ._frFve-BYZdb0Qo6FIj6xcDcxpY-6QlC2O-ToQxBjkc" # Add your API key here

	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {api_key}",
	}

	payload = {
	"messages": [
	{
	"role": "user",
	"content": [
	{"type": "text", "text": user_question},
	{
	"type": "image_url",
	"image_url": {"url": f"data:image/jpeg;base64,{base64_img}"},
	},
	],
	}
	],
	"model": "mistralai/Pixtral-12B-2409",
	"max_tokens": 2048,
	"temperature": 0.7,
	"top_p": 0.9,
	}

	response = requests.post(api, headers=headers, json=payload)

	# Process API response
	if response.status_code == 200:
	response_data = response.json()
	if 'choices' in response_data:
	assistant_response = response_data['choices'][0]['message']['content']
	else:
	assistant_response = "Response format is incorrect"
	else:
	assistant_response = f"API request failed: {response.status_code} - {response.text}"

	# Generate a unique ID for the request
	unique_id = str(uuid.uuid4())

	# Save the result to MongoDB with the specified format
	document = {
	'mrn_number': mrn_number,
	'ocr_result': assistant_response, # This will be the OCR/API result
	'unique_id': unique_id,
	'got_mode': "plain texts OCR",
	'timestamp': time.time()
	}

	collection.insert_one(document) # Insert the document into MongoDB

	return assistant_response
	return "Upload your image, enter MRN number, and enter your question."

	# Gradio GUI
	iface = gr.Interface(
	fn=chat_with_pixtral,
	inputs=[
	gr.Image(type="pil", label="Upload Your Image"),
	gr.Textbox(label="Enter MRN Number"),
	gr.Textbox(label="Please enter your question")
	],
	outputs="text",
	title="Pixtral Image Chat",
	description="Upload your Image, enter MRN number, and get insights out of the Image"
	)

	iface.launch(share=True)