File size: 3,882 Bytes
76578bc
70a6a62
76578bc
 
 
 
70a6a62
76578bc
70a6a62
 
 
 
 
76578bc
70a6a62
76578bc
 
 
 
 
 
70a6a62
 
 
 
 
 
 
 
 
 
 
 
 
76578bc
70a6a62
 
 
 
 
 
 
 
76578bc
70a6a62
 
 
 
6e0c709
70a6a62
6e0c709
76578bc
70a6a62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e0c709
76578bc
 
70a6a62
76578bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6978e20
f651569
6978e20
f651569
3388a44
76578bc
 
 
6e0c709
 
 
76578bc
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import gradio as gr
import requests
import json
import base64
from PIL import Image
import io
import os

API_KEY = os.getenv("API_KEY")
if not API_KEY:
    raise ValueError("API_KEY environment variable must be set")

def process_image_stream(image_path, prompt, max_tokens=512):
    """
    Process image with streaming response via HTTP
    """
    if not image_path:
        yield "Please upload an image first."
        return
    
    try:
        # Read and prepare image file
        with open(image_path, 'rb') as img_file:
            files = {
                'image': ('image.jpg', img_file, 'image/jpeg')
            }
            data = {
                'prompt': prompt,
                'task': 'instruct',
                'max_tokens': max_tokens
            }
            headers = {
                'X-API-Key': API_KEY
            }
            
            # Make streaming request
            response = requests.post(
                'https://nexa-omni.nexa4ai.com/process-image/',
                files=files,
                data=data,
                headers=headers,
                stream=True
            )
            
            if response.status_code != 200:
                yield f"Error: Server returned status code {response.status_code}"
                return

            # Initialize response and token counter
            response_text = ""
            token_count = 0
            
            # Process the streaming response
            for line in response.iter_lines():
                if line:
                    line = line.decode('utf-8')
                    if line.startswith('data: '):
                        try:
                            data = json.loads(line[6:])  # Skip 'data: ' prefix
                            if data["status"] == "generating":
                                # Skip first three tokens if they match specific patterns
                                if token_count < 3 and data["token"] in [" ", " \n", "\n", "<|im_start|>", "assistant"]:
                                    token_count += 1
                                    continue
                                response_text += data["token"]
                                yield response_text
                            elif data["status"] == "complete":
                                break
                            elif data["status"] == "error":
                                yield f"Error: {data['error']}"
                                break
                        except json.JSONDecodeError:
                            continue
                
    except Exception as e:
        yield f"Error processing request: {str(e)}"

# Create Gradio interface
demo = gr.Interface(
    fn=process_image_stream,
    inputs=[
        gr.Image(type="filepath", label="Upload Image"),
        gr.Textbox(
            label="Question", 
            placeholder="Ask a question about the image...",
            value="Describe this image"
        ),
        gr.Slider(
            minimum=50,
            maximum=200,
            value=200,
            step=1,
            label="Max Tokens"
        )
    ],
    outputs=gr.Textbox(label="Response", interactive=False),
    title="NEXA OmniVLM-968M",
    description=f"""
    Model Repo: <a href="https://huggingface.co/NexaAIDev/OmniVLM-968M">NexaAIDev/OmniVLM-968M</a>

    *Model updated on Nov 21, 2024\n
    Upload an image and ask questions about it. The model will analyze the image and provide detailed answers to your queries.
    """,
    examples=[
        ["example_images/example_1.jpg", "What kind of cat is this?", 128],
        ["example_images/example_2.jpg", "What color is this dress? ", 128],
        ["example_images/example_3.jpg", "What is this image about?", 128],
    ]
)

if __name__ == "__main__":
    demo.queue().launch(server_name="0.0.0.0", server_port=7860)