Desm0nt's picture
Upload new_captioner.py
bf4ab95 verified
raw
history blame
3.76 kB
import base64
import requests
import os
from openai import OpenAI
from tqdm import tqdm
import time
import sys
# Проверка наличия аргумента командной строки
if len(sys.argv) < 2:
print("Please, provide the path to image folder.")
sys.exit(1)
# Get the path to image dir from command line.
image_dir = sys.argv[1]
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"
client = OpenAI(
api_key=openai_api_key,
base_url=openai_api_base,
)
model_type = client.models.list().data[0].id
print(f'model_type: {model_type}')
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
# Directories
#dir with tags captions from wd tagger
txt_dir = './txt/'
#dir with result captions
maintxt_dir = './maintxt/'
image_path =''
# Ensure the output directory exists
os.makedirs(maintxt_dir, exist_ok=True)
# Get list of all JPEG images in the directory
image_files = [f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg'))]
total_files = len(image_files)
start_time = time.time()
progress_bar = tqdm(total=total_files, unit='file', bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]')
total_elapsed_time = 0
processed_files = 0
# Process all images in the image directory
for image_file in image_files:
image_path = os.path.join(image_dir, image_file)
txt_file = os.path.join(txt_dir, os.path.splitext(image_file)[0] + '.txt')
output_file = os.path.join(maintxt_dir, os.path.splitext(image_file)[0] + '.txt')
# Read tags from the corresponding txt file
with open(txt_file, 'r') as f:
tags = f.read().strip()
base64_image = encode_image(image_path)
step_start_time = time.time()
chat_response = client.chat.completions.create(
model="./phi3_v14_800-merged",
messages=[{
"role": "user",
"content": [
{"type": "text", "text": f"Make a caption that describe this image. Here is the tags for this image: {tags}"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
},
},
],
}],
extra_body={'repetition_penalty': 1.05, 'top_k': -1,'top_p': 1,'temperature': 0, 'use_beam_search': True, 'best_of':5},
)
step_end_time = time.time()
step_time = step_end_time - step_start_time
total_elapsed_time += step_time
remaining_time = (total_elapsed_time / (processed_files + 1)) * (total_files - processed_files - 1)
# Convert remaining time to hours, minutes and seconds
remaining_hours = int(remaining_time // 3600)
remaining_minutes = int((remaining_time % 3600) // 60)
remaining_seconds = int(remaining_time % 60)
# Extract the content from the response
content = chat_response.choices[0].message.content
content = content.lstrip()
# Write the content to the output file
with open(output_file, 'w', encoding='utf-8') as f:
f.write(content)
print(f"\n\nFile {image_file}\nProcessing time: {step_time:.2f} seconds\n{content}")
print(f"Response saved to file: {output_file}")
processed_files += 1
progress_bar.update(1)
progress_bar.set_postfix(remaining=f'{remaining_hours:02d}:{remaining_minutes:02d}:{remaining_seconds:02d}', refresh=True)
progress_bar.close()
print("All images processed.")
print(f"Total time: {time.time() - start_time:.2f} seconds")