aifeifei798 commited on
Commit
7cbb5f2
·
verified ·
1 Parent(s): 20991a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -58
app.py CHANGED
@@ -1,62 +1,54 @@
 
 
 
 
 
1
  from PIL import Image
2
- import gradio as gr
3
- from transformers import (
4
- AutoProcessor,
5
- AutoModelForCausalLM,
6
- )
7
- import torch
8
- import subprocess
9
 
10
- subprocess.run(
11
- "pip install flash-attn --no-build-isolation",
12
- env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
13
- shell=True,
14
- )
15
-
16
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
17
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
18
-
19
- Florence_models = AutoModelForCausalLM.from_pretrained(
20
- "microsoft/Florence-2-large",
21
- torch_dtype=torch_dtype,
22
- trust_remote_code=True).to(device)
23
-
24
- Florence_processors = AutoProcessor.from_pretrained(
25
- "microsoft/Florence-2-large", trust_remote_code=True)
26
-
27
-
28
-
29
- def feifeiflorence(
30
- image,
31
- progress=gr.Progress(track_tqdm=True),
32
- ):
33
- image = Image.fromarray(image)
34
- task_prompt = "<MORE_DETAILED_CAPTION>"
35
-
36
- if image.mode != "RGB":
37
- image = image.convert("RGB")
38
-
39
- inputs = Florence_processors(text=task_prompt,
40
- images=image,
41
- return_tensors="pt").to(device, torch_dtype)
42
-
43
- generated_ids = Florence_models.generate(
44
- input_ids=inputs["input_ids"],
45
- pixel_values=inputs["pixel_values"],
46
- max_new_tokens=1024,
47
- num_beams=3,
48
- do_sample=False,
49
- )
50
- generated_text = Florence_processors.batch_decode(
51
- generated_ids, skip_special_tokens=False)[0]
52
- parsed_answer = Florence_processors.post_process_generation(
53
- generated_text,
54
- task=task_prompt,
55
- image_size=(image.width, image.height))
56
- out_text=parsed_answer["<MORE_DETAILED_CAPTION>"]
57
- width, height = image.size
58
- return out_text,f"width={width} height={height}"
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
 
62
  with gr.Blocks() as demo:
@@ -68,9 +60,8 @@ with gr.Blocks() as demo:
68
  submit_btn = gr.Button(value="Submit")
69
  with gr.Column():
70
  output_text = gr.Textbox(label="Output Text")
71
- output_img_text = gr.Textbox(label="Output width and height")
72
 
73
 
74
- submit_btn.click(feifeiflorence, [input_img], [output_text, output_img_text])
75
 
76
  demo.launch()
 
1
+ import base64
2
+ from io import BytesIO
3
+ import os
4
+ from mistralai import Mistral
5
+ import re
6
  from PIL import Image
 
 
 
 
 
 
 
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ api_key = os.getenv("MISTRAL_API_KEY")
10
+ Mistralclient = Mistral(api_key=api_key)
11
+
12
+ def encode_image(image_path):
13
+ """Encode the image to base64."""
14
+ try:
15
+ # 打开图片文件
16
+ image = Image.open(image_path).convert("RGB")
17
+
18
+ # 将图片转换为字节流
19
+ buffered = BytesIO()
20
+ image.save(buffered, format="JPEG")
21
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
22
+
23
+ return img_str
24
+ except FileNotFoundError:
25
+ print(f"Error: The file {image_path} was not found.")
26
+ return None
27
+ except Exception as e: # 添加通用异常处理
28
+ print(f"Error: {e}")
29
+ return None
30
+
31
+ def feifeichat(image):
32
+ model = "pixtral-large-2411"
33
+ # Define the messages for the chat
34
+ messages = [{
35
+ "role":
36
+ "user",
37
+ "content": [
38
+ {
39
+ "type": "text",
40
+ "text": "用英文详细描述下"
41
+ },
42
+ {
43
+ "type": "image_url",
44
+ "image_url": f"data:image/jpeg;base64,{base64_image}",
45
+ },
46
+ ],
47
+ }]
48
+
49
+
50
+ partial_message = Mistralclient.chat.stream(model=model, messages=messages):
51
+ return partial_message
52
 
53
 
54
  with gr.Blocks() as demo:
 
60
  submit_btn = gr.Button(value="Submit")
61
  with gr.Column():
62
  output_text = gr.Textbox(label="Output Text")
 
63
 
64
 
65
+ submit_btn.click(feifeichat, [input_img], [output_text])
66
 
67
  demo.launch()