J-LAB commited on
Commit
d59f119
·
verified ·
1 Parent(s): 9c53151

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -38
app.py CHANGED
@@ -7,27 +7,15 @@ from PIL import Image
7
  import subprocess
8
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
9
 
10
- models = {
11
- 'J-LAB/Florence_2_B_FluxiAI_Product_Caption': AutoModelForCausalLM.from_pretrained('J-LAB/Florence_2_B_FluxiAI_Product_Caption', trust_remote_code=True).to("cuda").eval(),
12
- 'J-LAB/Florence_2_L_FluxiAI_Product_Caption': AutoModelForCausalLM.from_pretrained('J-LAB/Florence_2_L_FluxiAI_Product_Caption', trust_remote_code=True).to("cuda").eval()
13
- }
14
-
15
- processors = {
16
- 'J-LAB/Florence_2_B_FluxiAI_Product_Caption': AutoProcessor.from_pretrained('J-LAB/Florence_2_B_FluxiAI_Product_Caption', trust_remote_code=True),
17
- 'J-LAB/Florence_2_L_FluxiAI_Product_Caption': AutoProcessor.from_pretrained('J-LAB/Florence_2_L_FluxiAI_Product_Caption', trust_remote_code=True)
18
- }
19
 
20
  DESCRIPTION = "# [Florence-2 Product Describe by Fluxi IA](https://huggingface.co/microsoft/Florence-2-large)"
21
 
22
  @spaces.GPU
23
- def run_example(task_prompt, image, text_input=None, model_id='J-LAB/Florence-Idesire'):
24
- model = models[model_id]
25
- processor = processors[model_id]
26
- if text_input is None:
27
- prompt = task_prompt
28
- else:
29
- prompt = task_prompt + text_input
30
- inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda")
31
  generated_ids = model.generate(
32
  input_ids=inputs["input_ids"],
33
  pixel_values=inputs["pixel_values"],
@@ -43,18 +31,12 @@ def run_example(task_prompt, image, text_input=None, model_id='J-LAB/Florence-Id
43
  image_size=(image.width, image.height)
44
  )
45
  return parsed_answer
46
-
47
- def process_image(image, task_prompt, text_input=None, model_id='J-LAB/Florence_2_B_FluxiAI_Product_Caption'):
48
- image = Image.fromarray(image) # Convert NumPy array to PIL Image
49
- if task_prompt == 'Product Caption':
50
- task_prompt = '<PC>'
51
- results = run_example(task_prompt, image, model_id=model_id)
52
- elif task_prompt == 'More Detailed Caption':
53
- task_prompt = '<MORE_DETAILED_CAPTION>'
54
- results = run_example(task_prompt, image, model_id=model_id)
55
- else:
56
- return "", None # Return empty string and None for unknown task prompts
57
 
 
 
 
 
 
58
  # Remove the key and get the text value
59
  if results and task_prompt in results:
60
  output_text = results[task_prompt]
@@ -71,27 +53,22 @@ css = """
71
  height: 500px;
72
  overflow: auto;
73
  border: 1px solid #ccc;
 
 
74
  }
75
  """
76
 
77
- single_task_list = [
78
- 'Product Caption', 'More Detailed Caption'
79
- ]
80
-
81
  with gr.Blocks(css=css) as demo:
82
  gr.Markdown(DESCRIPTION)
83
  with gr.Tab(label="Florence-2 Image Captioning"):
84
  with gr.Row():
85
  with gr.Column():
86
  input_img = gr.Image(label="Input Picture")
87
- model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value='J-LAB/Florence_2_B_FluxiAI_Product_Caption')
88
- task_type = gr.Radio(choices=['Single task', 'Cascased task'], label='Task type selector', value='Single task')
89
- task_prompt = gr.Dropdown(choices=single_task_list, label="Task Prompt", value="Caption")
90
- text_input = gr.Textbox(label="Text Input (optional)")
91
  submit_btn = gr.Button(value="Submit")
92
  with gr.Column():
93
- output_text = gr.HTML(label="Output Text")
 
94
 
95
- submit_btn.click(process_image, [input_img, task_prompt, text_input, model_selector], [output_text])
96
 
97
  demo.launch(debug=True)
 
7
  import subprocess
8
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
9
 
10
+ model_id = 'J-LAB/Florence_2_B_FluxiAI_Product_Caption'
11
+ model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to("cuda").eval()
12
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
 
 
 
 
 
 
13
 
14
  DESCRIPTION = "# [Florence-2 Product Describe by Fluxi IA](https://huggingface.co/microsoft/Florence-2-large)"
15
 
16
  @spaces.GPU
17
+ def run_example(task_prompt, image):
18
+ inputs = processor(text=task_prompt, images=image, return_tensors="pt").to("cuda")
 
 
 
 
 
 
19
  generated_ids = model.generate(
20
  input_ids=inputs["input_ids"],
21
  pixel_values=inputs["pixel_values"],
 
31
  image_size=(image.width, image.height)
32
  )
33
  return parsed_answer
 
 
 
 
 
 
 
 
 
 
 
34
 
35
+ def process_image(image):
36
+ image = Image.fromarray(image) # Convert NumPy array to PIL Image
37
+ task_prompt = '<PC>'
38
+ results = run_example(task_prompt, image)
39
+
40
  # Remove the key and get the text value
41
  if results and task_prompt in results:
42
  output_text = results[task_prompt]
 
53
  height: 500px;
54
  overflow: auto;
55
  border: 1px solid #ccc;
56
+ padding: 10px;
57
+ background-color: #f9f9f9;
58
  }
59
  """
60
 
 
 
 
 
61
  with gr.Blocks(css=css) as demo:
62
  gr.Markdown(DESCRIPTION)
63
  with gr.Tab(label="Florence-2 Image Captioning"):
64
  with gr.Row():
65
  with gr.Column():
66
  input_img = gr.Image(label="Input Picture")
 
 
 
 
67
  submit_btn = gr.Button(value="Submit")
68
  with gr.Column():
69
+ with gr.Box():
70
+ output_text = gr.HTML(label="Output Text", elem_id="output")
71
 
72
+ submit_btn.click(process_image, [input_img], [output_text])
73
 
74
  demo.launch(debug=True)