|
import base64
|
|
import requests
|
|
import os
|
|
from openai import OpenAI
|
|
from tqdm import tqdm
|
|
import time
|
|
import sys
|
|
|
|
|
|
if len(sys.argv) < 2:
|
|
print("Please, provide the path to image folder.")
|
|
sys.exit(1)
|
|
|
|
|
|
image_dir = sys.argv[1]
|
|
|
|
openai_api_key = "EMPTY"
|
|
openai_api_base = "http://localhost:8000/v1"
|
|
client = OpenAI(
|
|
api_key=openai_api_key,
|
|
base_url=openai_api_base,
|
|
)
|
|
|
|
model_type = client.models.list().data[0].id
|
|
print(f'model_type: {model_type}')
|
|
|
|
|
|
def encode_image(image_path):
|
|
with open(image_path, "rb") as image_file:
|
|
return base64.b64encode(image_file.read()).decode('utf-8')
|
|
|
|
|
|
|
|
txt_dir = './txt/'
|
|
|
|
maintxt_dir = './maintxt/'
|
|
image_path =''
|
|
|
|
|
|
os.makedirs(maintxt_dir, exist_ok=True)
|
|
|
|
|
|
image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg'))]
|
|
|
|
total_files = len(image_files)
|
|
start_time = time.time()
|
|
|
|
progress_bar = tqdm(total=total_files, unit='file', bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]')
|
|
total_elapsed_time = 0
|
|
processed_files = 0
|
|
|
|
|
|
for image_file in image_files:
|
|
image_path = os.path.join(image_dir, image_file)
|
|
txt_file = os.path.join(txt_dir, os.path.splitext(image_file)[0] + '.txt')
|
|
output_file = os.path.join(maintxt_dir, os.path.splitext(image_file)[0] + '.txt')
|
|
|
|
|
|
with open(txt_file, 'r') as f:
|
|
tags = f.read().strip()
|
|
|
|
base64_image = encode_image(image_path)
|
|
|
|
step_start_time = time.time()
|
|
|
|
chat_response = client.chat.completions.create(
|
|
model="./phi3_v14_800-merged",
|
|
messages=[{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": f"Make a caption that describe this image. Here is the tags for this image: {tags}"},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:image/jpeg;base64,{base64_image}"
|
|
},
|
|
},
|
|
],
|
|
}],
|
|
extra_body={'repetition_penalty': 1.05, 'top_k': -1,'top_p': 1,'temperature': 0, 'use_beam_search': True, 'best_of':5},
|
|
)
|
|
|
|
step_end_time = time.time()
|
|
step_time = step_end_time - step_start_time
|
|
total_elapsed_time += step_time
|
|
remaining_time = (total_elapsed_time / (processed_files + 1)) * (total_files - processed_files - 1)
|
|
|
|
|
|
remaining_hours = int(remaining_time // 3600)
|
|
remaining_minutes = int((remaining_time % 3600) // 60)
|
|
remaining_seconds = int(remaining_time % 60)
|
|
|
|
|
|
content = chat_response.choices[0].message.content
|
|
content = content.lstrip()
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
|
|
print(f"\n\nFile {image_file}\nProcessing time: {step_time:.2f} seconds\n{content}")
|
|
print(f"Response saved to file: {output_file}")
|
|
|
|
processed_files += 1
|
|
progress_bar.update(1)
|
|
progress_bar.set_postfix(remaining=f'{remaining_hours:02d}:{remaining_minutes:02d}:{remaining_seconds:02d}', refresh=True)
|
|
|
|
progress_bar.close()
|
|
print("All images processed.")
|
|
print(f"Total time: {time.time() - start_time:.2f} seconds") |