Spaces:

rbn2008k
/

Scarlett

Paused

App Files Files

rbn2008k commited on Oct 5, 2024

Commit

eb1e0c3

verified ·

1 Parent(s): 8b4a19c

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -30

app.py CHANGED Viewed

@@ -13,7 +13,6 @@ from openai import OpenAI
 from telethon import TelegramClient, events
 from PIL import Image
 from huggingface_hub import InferenceClient
-from transformers import AutoProcessor, AutoTokenizer
 import pymongo
 from pymongo import MongoClient
@@ -100,40 +99,42 @@ def update_chat_history(user_id, role, content):
     store_message_in_mongo(user_id, role, content)
 # Encode image to base64
-def encode_local_image(image):
-    pil_image = Image.open(image)
-    buffer = BytesIO()
-    pil_image.save(buffer, format="JPEG")
-    return f"data:image/jpeg;base64,{base64.b64encode(buffer.getvalue()).decode('utf-8')}"
 # Describe image using the model with error handling
-def describe_image(image_path, query=''):
-    image_string = encode_local_image(image_path)
-    messages = [
-        {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": f"{os.getenv('USER_PROMPT')}\n{query}" }]}
-    ]
-    prompt_with_template = idefics_processor.apply_chat_template(
-        messages, add_generation_prompt=True, chat_template=os.getenv('CHAT_TEMPLATE')
     )
-    prompt_with_images = prompt_with_template.replace("<image>", f"![]({image_string})")
-    payload = {
-        "inputs": prompt_with_images,
-        "parameters": {"return_full_text": False, "max_new_tokens": 2048},
-    }
     try:
-        response = idefics_client.post(json=payload)
-        # Check if the response is empty or not valid JSON
-        if response.status_code != 200 or not response.text:
-            raise ValueError(f"Invalid response: {response.status_code}, {response.text}")
-        return response.text
     except Exception as e:
-        print(f"Error during image description: {e}")
-        return "Unable to describe the image due to an error."
 # Telegram bot client
 client = TelegramClient('bot', api_id, api_hash).start(bot_token=bot_token)
@@ -207,7 +208,7 @@ async def handle_message(event):
             # If an image is sent, describe the image
             photo = await event.download_media()
             image_description = describe_image(photo, user_message)
-            user_message += f"\n\nContent of the image: {image_description}"
         # Ignore command messages to prevent double processing
         if user_message.startswith('/start') or user_message.startswith('/help') or user_message.startswith('/reset'):

 from telethon import TelegramClient, events
 from PIL import Image
 from huggingface_hub import InferenceClient
 import pymongo
 from pymongo import MongoClient
     store_message_in_mongo(user_id, role, content)
 # Encode image to base64
+def encode_local_image(image_path):
+    im = Image.fromarray(image_path)
+    buffered = BytesIO()
+    im.save(buffered, format="PNG")
+    image_bytes = buffered.getvalue()
+    image_base64 = base64.b64encode(image_bytes).decode('ascii')
+    return image_base64
 # Describe image using the model with error handling
+def inference_calling_idefics(image_path, question=""):
+    system_prompt = os.getenv('USER_PROMPT')
+    model_id = "HuggingFaceM4/idefics2-8b-chatty"
+    client = InferenceClient(model=model_id)
+    image_base64 = base64_encoded_image(image_path)
+    image_info = f"data:image/png;base64,{image_base64}"
+    # Include the system prompt before the user question
+    prompt = f"{system_prompt}\n![]({image_info})\n{question}\n\n"
+    # Adding do_sample=True to ensure different responses and max_tokens
+    response = client.text_generation(
+        prompt,
+        max_new_tokens=512,
+        do_sample=True
+        temperature=0.2
     )
+    return response
+# Function to generate answers using the idefics model with system prompt and max_tokens
+def describe_image(image_path, question=""):
     try:
+        answer = inference_calling_idefics(image_path, question)
+        return answer
     except Exception as e:
+        print(e)
+        return answer = "Error while seeing the image."
 # Telegram bot client
 client = TelegramClient('bot', api_id, api_hash).start(bot_token=bot_token)
             # If an image is sent, describe the image
             photo = await event.download_media()
             image_description = describe_image(photo, user_message)
+            user_message += f"\n\nI sent you an image. Content of the image: {image_description}"
         # Ignore command messages to prevent double processing
         if user_message.startswith('/start') or user_message.startswith('/help') or user_message.startswith('/reset'):