Spaces:

ignitariumcloud
/

idefics2

Runtime error

App Files Files Community

idefics2 / app_NWrk.py

arjunanand13

Rename app.py to app_NWrk.py

79f2932 verified 5 months ago

raw

history blame contribute delete

5.85 kB

	import gradio as gr
	from transformers import AutoProcessor, Idefics2ForConditionalGeneration
	import re
	import time
	from PIL import Image
	import torch
	import spaces
	import subprocess
	from peft import LoraConfig
	from transformers import BitsAndBytesConfig
	subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

	DEVICE = "cuda:0"
	USE_LORA = False
	USE_QLORA = True

	processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b", do_image_splitting=False)

	if USE_QLORA or USE_LORA:
	lora_config = LoraConfig(
	r=8,
	lora_alpha=8,
	lora_dropout=0.1,
	target_modules='.(text_model\|modality_projection\|perceiver_resampler).(down_proj\|gate_proj\|up_proj\|k_proj\|q_proj\|v_proj\|o_proj).*',
	use_dora=False if USE_QLORA else True,
	init_lora_weights="gaussian"
	)
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.float16
	) if USE_QLORA else None
	model = Idefics2ForConditionalGeneration.from_pretrained(
	"HuggingFaceM4/idefics2-8b",
	torch_dtype=torch.float16,
	quantization_config=bnb_config,
	)
	model.add_adapter(lora_config)
	model.enable_adapters()
	else:
	model = Idefics2ForConditionalGeneration.from_pretrained(
	"HuggingFaceM4/idefics2-8b",
	torch_dtype=torch.float16,
	_attn_implementation="flash_attention_2"
	).to("cuda")

	import gradio as gr
	from huggingface_hub import InferenceApi
	import base64
	from PIL import Image
	import io

	client = InferenceApi("HuggingFaceM4/idefics2-8b")

	def image_to_base64(image):
	buffered = io.BytesIO()
	image.save(buffered, format="JPEG")
	img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
	return img_str

	def model_inference(image, text):
	image_base64 = image_to_base64(image)
	inputs = {
	"inputs": {
	"text": text,
	"image": image_base64
	}
	}

	result = client(inputs)
	print(result)
	generated_text = result['generated_text']
	return generated_text

	with gr.Blocks(css="""
	.input_image, .prompt_input {
	background-color: lightgrey;
	}
	""") as demo:
	gr.Markdown("## IDEFICS2 Demo")

	# Create a row with two columns of equal size
	with gr.Row():
	with gr.Column():
	# Labelled input fields
	image_input = gr.Image(label="Upload Image", type="pil", elem_classes=["input_image"]) #,height=240, width=320
	query_input = gr.Textbox(label="Enter Prompt", placeholder="Type your prompt here...", elem_classes=["prompt_input"])
	with gr.Column():
	# Output textbox
	output = gr.Textbox(label="Model Output", interactive=True, placeholder="Output will be displayed here...", elem_classes=["prompt_input"])

	# Button to submit the inputs for model inference
	submit_btn = gr.Button("Generate")
	submit_btn.click(model_inference, inputs=[image_input, query_input], outputs=output)

	# Example inputs for quick testing
	examples = [
	["american_football.png", "Explain in detail what is depicted in the picture"],
	["bike.png", "Describe in detail what you see in this image."],
	["finance.png", "Describe in detail everything you see in the image."],
	["science.png", "Extract all visible text in the image, keeping the format."],
	["spirituality.png", "Extract all text from the image, preserving its format."]
	]
	gr.Examples(examples=examples, inputs=[image_input, query_input], outputs=output)

	# Launch the Gradio app with debugging enabled
	demo.launch(debug=True)

	# with gr.Blocks(css=".input_image {max-width: 100%; border: 1px solid #ccc; box-shadow: 0 0 10px #ccc; margin-bottom: 10px;} .output_textbox {min-height: 100px;}") as demo:
	# gr.Markdown("## Enhanced IDEFICS2 Demo")
	# with gr.Row():
	# with gr.Column(scale=1):
	# image_input = gr.Image(label="Upload Image", type="pil", height=240, width=320)
	# query_input = gr.Textbox(label="Enter Prompt", placeholder="Type your prompt here...")
	# with gr.Column(scale=1):
	# output = gr.Textbox(label="Model Output", interactive=True, placeholder="Output will be displayed here...")

	# submit_btn = gr.Button("Generate")
	# submit_btn.click(model_inference, inputs=[image_input, query_input], outputs=output)

	# examples = [
	# ["american_football.png", "Explain in detail what is depicted in the picture"],
	# ["bike.png", "Explore the image closely and describe in detail what you discover."],
	# ["finance.png", "Provide a detailed description of everything you see in the image."],
	# ["science.png", "Please perform optical character recognition (OCR) on the uploaded image. Extract all text visible in the image accurately. Ensure to capture the text in its entirety and maintain the formatting as closely as possible to how it appears in the image. After extracting the text, display it in a clear and readable format, making sure that any special characters or symbols are also accurately represented. Provide the extracted text as output."],
	# ["spirituality.png", "Please perform optical character recognition (OCR) on the uploaded image. Extract all text visible in the image accurately. Ensure to capture the text in its entirety and maintain the formatting as closely as possible to how it appears in the image. After extracting the text, display it in a clear and readable format, making sure that any special characters or symbols are also accurately represented. Provide the extracted text as output."]
	# ]
	# gr.Examples(examples=examples, inputs=[image_input, query_input], outputs=output)

	# demo.launch(debug=True)