fffiloni commited on
Commit
cd87c75
β€’
1 Parent(s): 024a61c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +166 -0
app.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import json
4
+ import re
5
+ from gradio_client import Client
6
+
7
+
8
+ def get_caption_from_kosmos(image_in):
9
+ kosmos2_client = Client("https://ydshieh-kosmos-2.hf.space/")
10
+
11
+ kosmos2_result = kosmos2_client.predict(
12
+ image_in, # str (filepath or URL to image) in 'Test Image' Image component
13
+ "Detailed", # str in 'Description Type' Radio component
14
+ fn_index=4
15
+ )
16
+
17
+ print(f"KOSMOS2 RETURNS: {kosmos2_result}")
18
+
19
+ with open(kosmos2_result[1], 'r') as f:
20
+ data = json.load(f)
21
+
22
+ reconstructed_sentence = []
23
+ for sublist in data:
24
+ reconstructed_sentence.append(sublist[0])
25
+
26
+ full_sentence = ' '.join(reconstructed_sentence)
27
+ #print(full_sentence)
28
+
29
+ # Find the pattern matching the expected format ("Describe this image in detail:" followed by optional space and then the rest)...
30
+ pattern = r'^Describe this image in detail:\s*(.*)$'
31
+ # Apply the regex pattern to extract the description text.
32
+ match = re.search(pattern, full_sentence)
33
+ if match:
34
+ description = match.group(1)
35
+ print(description)
36
+ else:
37
+ print("Unable to locate valid description.")
38
+
39
+ # Find the last occurrence of "."
40
+ #last_period_index = full_sentence.rfind('.')
41
+
42
+ # Truncate the string up to the last period
43
+ #truncated_caption = full_sentence[:last_period_index + 1]
44
+
45
+ # print(truncated_caption)
46
+ #print(f"\nβ€”\nIMAGE CAPTION: {truncated_caption}")
47
+
48
+ return description
49
+
50
+ def get_caption_from_MD(image_in):
51
+ client = Client("https://vikhyatk-moondream1.hf.space/")
52
+ result = client.predict(
53
+ image_in, # filepath in 'image' Image component
54
+ "Describe character like if it was fictional", # str in 'Question' Textbox component
55
+ api_name="/answer_question"
56
+ )
57
+ print(result)
58
+ return result
59
+
60
+
61
+ import re
62
+ import torch
63
+ from transformers import pipeline
64
+
65
+ pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto")
66
+
67
+ @spaces.GPU(enable_queue=True)
68
+ def get_llm_idea(user_prompt):
69
+ agent_maker_sys = f"""
70
+
71
+ """
72
+
73
+ instruction = f"""
74
+ <|system|>
75
+ {agent_maker_sys}</s>
76
+ <|user|>
77
+ """
78
+
79
+ prompt = f"{instruction.strip()}\n{user_prompt}</s>"
80
+ #print(f"PROMPT: {prompt}")
81
+ outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
82
+ return outputs
83
+
84
+
85
+ def infer(image_in, cap_type):
86
+ gr.Info("Getting image description...")
87
+ if cap_type == "Fictional" :
88
+ user_prompt = get_caption_from_MD(image_in)
89
+ elif cap_type == "Literal" :
90
+ user_prompt = get_caption_from_kosmos(image_in)
91
+
92
+ gr.Info("Building a system according to the image caption ...")
93
+ outputs = get_llm_idea(user_prompt)
94
+
95
+
96
+ pattern = r'\<\|system\|\>(.*?)\<\|assistant\|\>'
97
+ cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)
98
+
99
+ print(f"SUGGESTED LLM: {cleaned_text}")
100
+
101
+ return user_prompt, cleaned_text.lstrip("\n")
102
+
103
+ title = f"Magic Card Generator",
104
+ description = f""
105
+
106
+ css = """
107
+ #col-container{
108
+ margin: 0 auto;
109
+ max-width: 780px;
110
+ text-align: left;
111
+ }
112
+ /* fix examples gallery width on mobile */
113
+ div#component-14 > .gallery > .gallery-item > .container > img {
114
+ width: auto!important;
115
+ }
116
+ """
117
+
118
+ with gr.Blocks(css=css) as demo:
119
+ with gr.Column(elem_id="col-container"):
120
+ gr.HTML(f"""
121
+ <h2 style="text-align: center;">LLM Agent from a Picture</h2>
122
+ <p style="text-align: center;">{description}</p>
123
+ """)
124
+
125
+ with gr.Row():
126
+ with gr.Column():
127
+ image_in = gr.Image(
128
+ label = "Image reference",
129
+ type = "filepath",
130
+ elem_id = "image-in"
131
+ )
132
+ cap_type = gr.Radio(
133
+ label = "Caption type",
134
+ choices = [
135
+ "Literal",
136
+ "Fictional"
137
+ ],
138
+ value = "Fictional"
139
+ )
140
+ submit_btn = gr.Button("Make LLM system from my pic !")
141
+ with gr.Column():
142
+ caption = gr.Textbox(
143
+ label = "Image caption",
144
+ elem_id = "image-caption"
145
+ )
146
+ result = gr.Textbox(
147
+ label = "Suggested System",
148
+ lines = 6,
149
+ max_lines = 30,
150
+ elem_id = "suggested-system-prompt"
151
+ )
152
+
153
+
154
+ submit_btn.click(
155
+ fn = infer,
156
+ inputs = [
157
+ image_in,
158
+ cap_type
159
+ ],
160
+ outputs =[
161
+ caption,
162
+ result
163
+ ]
164
+ )
165
+
166
+ demo.queue().launch(show_api=False, show_error=True)