Spaces:

fffiloni
/

magic-card-generator

Sleeping

App Files Files Community

fffiloni commited on Apr 15

Commit

cd87c75

•

1 Parent(s): 024a61c

Create app.py

Browse files

Files changed (1) hide show

app.py +166 -0

app.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import gradio as gr
+import spaces
+import json
+import re
+from gradio_client import Client
+def get_caption_from_kosmos(image_in):
+    kosmos2_client = Client("https://ydshieh-kosmos-2.hf.space/")
+    kosmos2_result = kosmos2_client.predict(
+        image_in,	# str (filepath or URL to image) in 'Test Image' Image component
+        "Detailed",	# str in 'Description Type' Radio component
+        fn_index=4
+    )
+    print(f"KOSMOS2 RETURNS: {kosmos2_result}")
+    with open(kosmos2_result[1], 'r') as f:
+        data = json.load(f)
+    reconstructed_sentence = []
+    for sublist in data:
+        reconstructed_sentence.append(sublist[0])
+    full_sentence = ' '.join(reconstructed_sentence)
+    #print(full_sentence)
+    # Find the pattern matching the expected format ("Describe this image in detail:" followed by optional space and then the rest)...
+    pattern = r'^Describe this image in detail:\s*(.*)$'
+    # Apply the regex pattern to extract the description text.
+    match = re.search(pattern, full_sentence)
+    if match:
+        description = match.group(1)
+        print(description)
+    else:
+        print("Unable to locate valid description.")
+    # Find the last occurrence of "."
+    #last_period_index = full_sentence.rfind('.')
+    # Truncate the string up to the last period
+    #truncated_caption = full_sentence[:last_period_index + 1]
+    # print(truncated_caption)
+    #print(f"\n—\nIMAGE CAPTION: {truncated_caption}")
+    return description
+def get_caption_from_MD(image_in):
+    client = Client("https://vikhyatk-moondream1.hf.space/")
+    result = client.predict(
+		image_in,	# filepath  in 'image' Image component
+		"Describe character like if it was fictional",	# str  in 'Question' Textbox component
+		api_name="/answer_question"
+    )
+    print(result)
+    return result
+import re
+import torch
+from transformers import pipeline
+pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta", torch_dtype=torch.bfloat16, device_map="auto")
+@spaces.GPU(enable_queue=True)
+def get_llm_idea(user_prompt):
+    agent_maker_sys = f"""
+"""
+    instruction = f"""
+<|system|>
+{agent_maker_sys}</s>
+<|user|>
+"""
+    prompt = f"{instruction.strip()}\n{user_prompt}</s>"
+    #print(f"PROMPT: {prompt}")
+    outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
+    return outputs
+def infer(image_in, cap_type):
+    gr.Info("Getting image description...")
+    if cap_type == "Fictional" :
+        user_prompt = get_caption_from_MD(image_in)
+    elif cap_type == "Literal" :
+        user_prompt = get_caption_from_kosmos(image_in)
+    gr.Info("Building a system according to the image caption ...")
+    outputs = get_llm_idea(user_prompt)
+    pattern = r'\<\|system\|\>(.*?)\<\|assistant\|\>'
+    cleaned_text = re.sub(pattern, '', outputs[0]["generated_text"], flags=re.DOTALL)
+    print(f"SUGGESTED LLM: {cleaned_text}")
+    return user_prompt, cleaned_text.lstrip("\n")
+title = f"Magic Card Generator",
+description = f""
+css = """
+#col-container{
+    margin: 0 auto;
+    max-width: 780px;
+    text-align: left;
+}
+/* fix examples gallery width on mobile */
+div#component-14 > .gallery > .gallery-item > .container > img {
+    width: auto!important;
+}
+"""
+with gr.Blocks(css=css) as demo:
+    with gr.Column(elem_id="col-container"):
+        gr.HTML(f"""
+        <h2 style="text-align: center;">LLM Agent from a Picture</h2>
+        <p style="text-align: center;">{description}</p>
+        """)
+        with gr.Row():
+            with gr.Column():
+                image_in = gr.Image(
+                    label = "Image reference",
+                    type = "filepath",
+                    elem_id = "image-in"
+                )
+                cap_type = gr.Radio(
+                    label = "Caption type",
+                    choices = [
+                        "Literal",
+                        "Fictional"
+                    ],
+                    value = "Fictional"
+                )
+                submit_btn = gr.Button("Make LLM system from my pic !")
+            with gr.Column():
+                caption = gr.Textbox(
+                    label = "Image caption",
+                    elem_id = "image-caption"
+                )
+                result = gr.Textbox(
+                    label = "Suggested System",
+                    lines = 6,
+                    max_lines = 30,
+                    elem_id = "suggested-system-prompt"
+                )
+    submit_btn.click(
+        fn = infer,
+        inputs = [
+            image_in,
+            cap_type
+        ],
+        outputs =[
+            caption,
+            result
+        ]
+    )
+demo.queue().launch(show_api=False, show_error=True)