Spaces:

NexaAIDev
/

omnivlm-dpo-demo

Running

App Files Files Community

omnivlm-dpo-demo / app.py

PerryCheng614

delete load dotenv logic

da2a633 5 days ago

raw

history blame

3.88 kB

	import gradio as gr
	import requests
	import json
	import base64
	from PIL import Image
	import io
	import os

	API_KEY = os.getenv("API_KEY")
	if not API_KEY:
	raise ValueError("API_KEY environment variable must be set")

	def process_image_stream(image_path, prompt, max_tokens=512):
	"""
	Process image with streaming response via HTTP
	"""
	if not image_path:
	yield "Please upload an image first."
	return

	try:
	# Read and prepare image file
	with open(image_path, 'rb') as img_file:
	files = {
	'image': ('image.jpg', img_file, 'image/jpeg')
	}
	data = {
	'prompt': prompt,
	'task': 'instruct',
	'max_tokens': max_tokens
	}
	headers = {
	'X-API-Key': API_KEY
	}

	# Make streaming request
	response = requests.post(
	'https://nexa-omni.nexa4ai.com/process-image/',
	files=files,
	data=data,
	headers=headers,
	stream=True
	)

	if response.status_code != 200:
	yield f"Error: Server returned status code {response.status_code}"
	return

	# Initialize response and token counter
	response_text = ""
	token_count = 0

	# Process the streaming response
	for line in response.iter_lines():
	if line:
	line = line.decode('utf-8')
	if line.startswith('data: '):
	try:
	data = json.loads(line[6:]) # Skip 'data: ' prefix
	if data["status"] == "generating":
	# Skip first three tokens if they match specific patterns
	if token_count < 3 and data["token"] in [" ", " \n", "\n", "<\|im_start\|>", "assistant"]:
	token_count += 1
	continue
	response_text += data["token"]
	yield response_text
	elif data["status"] == "complete":
	break
	elif data["status"] == "error":
	yield f"Error: {data['error']}"
	break
	except json.JSONDecodeError:
	continue

	except Exception as e:
	yield f"Error processing request: {str(e)}"

	# Create Gradio interface
	demo = gr.Interface(
	fn=process_image_stream,
	inputs=[
	gr.Image(type="filepath", label="Upload Image"),
	gr.Textbox(
	label="Question",
	placeholder="Ask a question about the image...",
	value="Describe this image"
	),
	gr.Slider(
	minimum=50,
	maximum=200,
	value=200,
	step=1,
	label="Max Tokens"
	)
	],
	outputs=gr.Textbox(label="Response", interactive=False),
	title="NEXA OmniVLM-968M",
	description=f"""
	Model Repo: <a href="https://huggingface.co/NexaAIDev/OmniVLM-968M">NexaAIDev/OmniVLM-968M</a>

	*Model updated on Nov 21, 2024\n
	Upload an image and ask questions about it. The model will analyze the image and provide detailed answers to your queries.
	""",
	examples=[
	["example_images/example_1.jpg", "What kind of cat is this?", 128],
	["example_images/example_2.jpg", "What color is this dress? ", 128],
	["example_images/example_3.jpg", "What is this image about?", 128],
	]
	)

	if __name__ == "__main__":
	demo.queue().launch(server_name="0.0.0.0", server_port=7860)