J-LAB commited on
Commit
5ae9be1
·
verified ·
1 Parent(s): 84d0e49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -42
app.py CHANGED
@@ -39,29 +39,27 @@ def fig_to_pil(fig):
39
  return Image.open(buf)
40
 
41
  @spaces.GPU
42
- def run_example(task_prompt, image, text_input=None, model_id='J-LAB/Florence-Idesire'):
43
- model = models[model_id]
44
- processor = processors[model_id]
45
- if text_input is None:
46
- prompt = task_prompt
 
 
 
 
 
 
 
 
 
47
  else:
48
- prompt = task_prompt + text_input
49
- inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda")
50
- generated_ids = model.generate(
51
- input_ids=inputs["input_ids"],
52
- pixel_values=inputs["pixel_values"],
53
- max_new_tokens=1024,
54
- early_stopping=False,
55
- do_sample=False,
56
- num_beams=3,
57
- )
58
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
59
- parsed_answer = processor.post_process_generation(
60
- generated_text,
61
- task=task_prompt,
62
- image_size=(image.width, image.height)
63
- )
64
- return parsed_answer
65
 
66
  def plot_bbox(image, data):
67
  fig, ax = plt.subplots()
@@ -117,27 +115,7 @@ def draw_ocr_bboxes(image, prediction):
117
  fill=color)
118
  return image
119
 
120
- def process_image(image, task_prompt, text_input=None, model_id='J-LAB/Florence_2_B_FluxiAI_Product_Caption'):
121
- image = Image.fromarray(image) # Convert NumPy array to PIL Image
122
- if task_prompt == 'Product Caption':
123
- task_prompt = '<PC>'
124
- results = run_example(task_prompt, image, model_id=model_id)
125
- elif task_prompt == 'More Detailed Caption':
126
- task_prompt = '<MORE_DETAILED_CAPTION>'
127
- results = run_example(task_prompt, image, model_id=model_id)
128
- else:
129
- return "", None # Return empty string and None for unknown task prompts
130
 
131
- # Remove the key and get the text value
132
- if results and task_prompt in results:
133
- output_text = results[task_prompt]
134
- else:
135
- output_text = ""
136
-
137
- # Convert newline characters to HTML line breaks
138
- output_text = output_text.replace("\n\n", "<br><br>").replace("\n", "<br>")
139
-
140
- return output_text, None
141
 
142
 
143
  css = """
 
39
  return Image.open(buf)
40
 
41
  @spaces.GPU
42
+ def process_image(image, task_prompt, text_input=None, model_id='J-LAB/Florence_2_B_FluxiAI_Product_Caption'):
43
+ image = Image.fromarray(image) # Convert NumPy array to PIL Image
44
+ if task_prompt == 'Product Caption':
45
+ task_prompt = '<PC>'
46
+ results = run_example(task_prompt, image, model_id=model_id)
47
+ elif task_prompt == 'More Detailed Caption':
48
+ task_prompt = '<MORE_DETAILED_CAPTION>'
49
+ results = run_example(task_prompt, image, model_id=model_id)
50
+ else:
51
+ return "", None # Return empty string and None for unknown task prompts
52
+
53
+ # Remove the key and get the text value
54
+ if results and task_prompt in results:
55
+ output_text = results[task_prompt]
56
  else:
57
+ output_text = ""
58
+
59
+ # Convert newline characters to HTML line breaks
60
+ output_text = output_text.replace("\n\n", "<br><br>").replace("\n", "<br>")
61
+
62
+ return output_text, None
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  def plot_bbox(image, data):
65
  fig, ax = plt.subplots()
 
115
  fill=color)
116
  return image
117
 
 
 
 
 
 
 
 
 
 
 
118
 
 
 
 
 
 
 
 
 
 
 
119
 
120
 
121
  css = """