rbn2008k commited on
Commit
eb1e0c3
·
verified ·
1 Parent(s): 8b4a19c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -30
app.py CHANGED
@@ -13,7 +13,6 @@ from openai import OpenAI
13
  from telethon import TelegramClient, events
14
  from PIL import Image
15
  from huggingface_hub import InferenceClient
16
- from transformers import AutoProcessor, AutoTokenizer
17
  import pymongo
18
  from pymongo import MongoClient
19
 
@@ -100,40 +99,42 @@ def update_chat_history(user_id, role, content):
100
  store_message_in_mongo(user_id, role, content)
101
 
102
  # Encode image to base64
103
- def encode_local_image(image):
104
- pil_image = Image.open(image)
105
- buffer = BytesIO()
106
- pil_image.save(buffer, format="JPEG")
107
- return f"data:image/jpeg;base64,{base64.b64encode(buffer.getvalue()).decode('utf-8')}"
 
 
108
 
109
  # Describe image using the model with error handling
110
- def describe_image(image_path, query=''):
111
- image_string = encode_local_image(image_path)
112
-
113
- messages = [
114
- {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": f"{os.getenv('USER_PROMPT')}\n{query}" }]}
115
- ]
116
-
117
- prompt_with_template = idefics_processor.apply_chat_template(
118
- messages, add_generation_prompt=True, chat_template=os.getenv('CHAT_TEMPLATE')
 
 
 
 
 
 
 
119
  )
 
120
 
121
- prompt_with_images = prompt_with_template.replace("<image>", f"![]({image_string})")
122
-
123
- payload = {
124
- "inputs": prompt_with_images,
125
- "parameters": {"return_full_text": False, "max_new_tokens": 2048},
126
- }
127
-
128
  try:
129
- response = idefics_client.post(json=payload)
130
- # Check if the response is empty or not valid JSON
131
- if response.status_code != 200 or not response.text:
132
- raise ValueError(f"Invalid response: {response.status_code}, {response.text}")
133
- return response.text
134
  except Exception as e:
135
- print(f"Error during image description: {e}")
136
- return "Unable to describe the image due to an error."
137
 
138
  # Telegram bot client
139
  client = TelegramClient('bot', api_id, api_hash).start(bot_token=bot_token)
@@ -207,7 +208,7 @@ async def handle_message(event):
207
  # If an image is sent, describe the image
208
  photo = await event.download_media()
209
  image_description = describe_image(photo, user_message)
210
- user_message += f"\n\nContent of the image: {image_description}"
211
 
212
  # Ignore command messages to prevent double processing
213
  if user_message.startswith('/start') or user_message.startswith('/help') or user_message.startswith('/reset'):
 
13
  from telethon import TelegramClient, events
14
  from PIL import Image
15
  from huggingface_hub import InferenceClient
 
16
  import pymongo
17
  from pymongo import MongoClient
18
 
 
99
  store_message_in_mongo(user_id, role, content)
100
 
101
  # Encode image to base64
102
+ def encode_local_image(image_path):
103
+ im = Image.fromarray(image_path)
104
+ buffered = BytesIO()
105
+ im.save(buffered, format="PNG")
106
+ image_bytes = buffered.getvalue()
107
+ image_base64 = base64.b64encode(image_bytes).decode('ascii')
108
+ return image_base64
109
 
110
  # Describe image using the model with error handling
111
+ def inference_calling_idefics(image_path, question=""):
112
+ system_prompt = os.getenv('USER_PROMPT')
113
+ model_id = "HuggingFaceM4/idefics2-8b-chatty"
114
+ client = InferenceClient(model=model_id)
115
+ image_base64 = base64_encoded_image(image_path)
116
+ image_info = f"data:image/png;base64,{image_base64}"
117
+
118
+ # Include the system prompt before the user question
119
+ prompt = f"{system_prompt}\n![]({image_info})\n{question}\n\n"
120
+
121
+ # Adding do_sample=True to ensure different responses and max_tokens
122
+ response = client.text_generation(
123
+ prompt,
124
+ max_new_tokens=512,
125
+ do_sample=True
126
+ temperature=0.2
127
  )
128
+ return response
129
 
130
+ # Function to generate answers using the idefics model with system prompt and max_tokens
131
+ def describe_image(image_path, question=""):
 
 
 
 
 
132
  try:
133
+ answer = inference_calling_idefics(image_path, question)
134
+ return answer
 
 
 
135
  except Exception as e:
136
+ print(e)
137
+ return answer = "Error while seeing the image."
138
 
139
  # Telegram bot client
140
  client = TelegramClient('bot', api_id, api_hash).start(bot_token=bot_token)
 
208
  # If an image is sent, describe the image
209
  photo = await event.download_media()
210
  image_description = describe_image(photo, user_message)
211
+ user_message += f"\n\nI sent you an image. Content of the image: {image_description}"
212
 
213
  # Ignore command messages to prevent double processing
214
  if user_message.startswith('/start') or user_message.startswith('/help') or user_message.startswith('/reset'):