DucHaiten
/

IMageDucHaiten

ONNX

Safetensors

English

image-processing

deep-learning

Model card Files Files and versions Community

DucHaiten commited on Oct 8, 2024

Commit

3734881

verified ·

1 Parent(s): 9f86577

Update image_to_caption.py

Browse files

Files changed (1) hide show

image_to_caption.py +72 -42

image_to_caption.py CHANGED Viewed

@@ -10,6 +10,7 @@ from transformers import AutoModelForCausalLM, LlamaTokenizer
 import json
 import traceback
 import math
 torch.set_grad_enabled(False)
@@ -125,7 +126,7 @@ def update_and_save_config():
         'temperature': temperature_var.get(),
         'top_k': top_k_var.get(),
         'top_p': float(top_p_value) if top_p_value is not None else None,
-        'bit_precision': bit_precision_var.get(),  # Hợp nhất cả precision và bit
         'thread_count': thread_count_var.get(),
         'batch_size': batch_size_var.get(),
         'prepend_text': prepend_text_var.get(),
@@ -150,7 +151,7 @@ def load_config_from_json():
                 top_k_var.set(config_entry.get('top_k', 50))
                 top_p_var.set(config_entry.get('top_p', 0.95))
                 bit_precision_var.set(config_entry.get('bit_precision', 8))  # Tải bit_precision
-                thread_count_var.set(config_entry.get('thread_count', 4))
                 batch_size_var.set(config_entry.get('batch_size', 1))
                 prepend_text_var.set(config_entry.get('prepend_text', ''))
                 append_text_var.set(config_entry.get('append_text', ''))
@@ -290,7 +291,7 @@ def open_image_to_caption():
     temperature_var = tk.DoubleVar(value=1.0)
     top_k_var = tk.IntVar(value=50)
     top_p_var = tk.DoubleVar(value=0.95)
-    thread_count_var = tk.IntVar(value=4)
     precision_var = tk.IntVar(value=1)
     batch_size_var = tk.IntVar(value=1)
     prepend_text_var = tk.StringVar()
@@ -482,7 +483,7 @@ def generate_caption(image_path, save_directory, q):
         load_model()
         filename = os.path.splitext(os.path.basename(image_path))[0]
-        caption_file_path = os.path.join(save_directory, f"{filename}.txt")
         # Kiểm tra các lựa chọn của người dùng
         if os.path.exists(caption_file_path):
@@ -497,10 +498,21 @@ def generate_caption(image_path, save_directory, q):
         else:
             existing_caption = ""
         image = PILImage.open(image_path).convert('RGB')
         if not isinstance(image, PILImage.Image):
             raise ValueError(f"Expected image to be of type PIL.Image.Image, but got {type(image)}")
         inputs = model.build_conversation_input_ids(
             tokenizer,
             query=prompt_var.get(),
@@ -510,14 +522,14 @@ def generate_caption(image_path, save_directory, q):
         # Điều chỉnh dtype dựa trên bit_precision
         if bit_precision_var.get() == 32:
-            image_tensor = inputs['images'][0].to('cuda').to(torch.float32)
         else:
-            image_tensor = inputs['images'][0].to('cuda').to(torch.float16)
         inputs = {
-            'input_ids': inputs['input_ids'].unsqueeze(0).to('cuda'),
-            'token_type_ids': inputs['token_type_ids'].unsqueeze(0).to('cuda'),
-            'attention_mask': inputs['attention_mask'].unsqueeze(0).to('cuda'),
             'images': [[image_tensor]],
         }
@@ -530,7 +542,8 @@ def generate_caption(image_path, save_directory, q):
             "num_beams": precision_var.get()
         }
-        with torch.no_grad():
             outputs = model.generate(**inputs, **gen_kwargs)
             outputs = outputs[:, inputs['input_ids'].shape[1]:]
             new_caption = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -541,7 +554,7 @@ def generate_caption(image_path, save_directory, q):
             file.write(final_caption)
         q.put(image_path)
-        torch.cuda.empty_cache()
     except torch.cuda.OutOfMemoryError as e:
         torch.cuda.empty_cache()
         error_message = f"CUDA OutOfMemoryError: {traceback.format_exc()}"
@@ -553,45 +566,55 @@ def generate_caption(image_path, save_directory, q):
         print(error_message)
         q.put(error_message)
         error_messages.append(error_message)
 def worker(save_directory, num_threads, batch_size):
     try:
         progress.set(0)
-        threads = []
         num_batches = math.ceil(len(selected_files) / batch_size)
-        batch_size_per_thread = max(1, batch_size // num_threads)  # Số ảnh mỗi luồng xử lý trong một batch
-        for batch_index in range(num_batches):
-            if stop_processing:
-                break
-            start_index = batch_index * batch_size
-            end_index = min(start_index + batch_size, len(selected_files))
-            batch = selected_files[start_index:end_index]
-            # Chia ảnh trong batch cho các luồng
-            for i in range(0, len(batch), batch_size_per_thread):
-                thread_batch = batch[i:i + batch_size_per_thread]
-                thread = threading.Thread(target=generate_captions_for_batch, args=(thread_batch, save_directory, q))
-                threads.append(thread)
-                thread.start()
-            # Đợi các luồng trong batch hiện tại hoàn thành
-            for thread in threads:
-                thread.join()
-            threads.clear()
         q.put(None)
     except Exception as e:
         if not stop_processing:
-            q.put(e)
 def generate_captions_for_batch(batch, save_directory, q):
     for image_path in batch:
         generate_caption(image_path, save_directory, q)
 def update_progress():
     try:
         completed = 0
@@ -758,7 +781,8 @@ def update_image_preview(content_canvas):
             file_label = tk.Label(caption_frame, text=os.path.basename(file_path), font=('Helvetica', 12), wraplength=300, justify="left")
             file_label.grid(row=i*2, column=1, padx=5, pady=5, sticky="nsew")
-            caption_file = os.path.join(save_directory, f"{os.path.basename(file_path)}_caption.txt")
             if os.path.exists(caption_file):
                 with open(caption_file, 'r', encoding='utf-8') as file:
                     caption_text = file.read()
@@ -817,7 +841,8 @@ def go_to_page(page_number, content_canvas):
         messagebox.showerror("Invalid Input", "Please enter a valid integer for the page number.")
 def save_caption(file_path, caption_text):
-    output_path = os.path.join(save_directory, f"{os.path.basename(file_path)}_caption.txt")
     try:
         with open(output_path, 'w', encoding='utf-8') as file:
             file.write(caption_text.strip())
@@ -840,7 +865,8 @@ def search_captions():
     update_image_preview(content_canvas)
 def search_score(file_path, search_term):
-    caption_file = os.path.join(save_directory, f"{os.path.basename(file_path)}_caption.txt")
     try:
         if os.path.exists(caption_file):
            with open(caption_file, 'r', encoding='utf-8') as file:
@@ -866,7 +892,8 @@ def add_to_captions(position):
         return
     for file_path in selected_files:
-        caption_file = os.path.join(save_directory, f"{os.path.basename(file_path)}_caption.txt")
         if os.path.exists(caption_file):
             with open(caption_file, 'r+', encoding='utf-8') as file:
                 caption_text = file.read()
@@ -889,7 +916,8 @@ def delete_keyword_from_captions():
         return
     for file_path in selected_files:
-        caption_file = os.path.join(save_directory, f"{os.path.basename(file_path)}_caption.txt")
         if os.path.exists(caption_file):
             with open(caption_file, 'r+', encoding='utf-8') as file:
                 caption_text = file.read().lower().replace(keyword, "")
@@ -910,7 +938,8 @@ def delete_images_with_keyword():
     files_to_delete = []
     for file_path in selected_files:
-        caption_file = os.path.join(save_directory, f"{os.path.basename(file_path)}_caption.txt")
         if os.path.exists(caption_file):
             with open(caption_file, 'r', encoding='utf-8') as file:
                 caption_text = file.read().lower()
@@ -920,7 +949,8 @@ def delete_images_with_keyword():
     for file_path in files_to_delete:
         try:
             os.remove(file_path)
-            caption_file = os.path.join(save_directory, f"{os.path.basename(file_path)}_caption.txt")
             if os.path.exists(caption_file):
                os.remove(caption_file)
         except Exception as e:

 import json
 import traceback
 import math
+from concurrent.futures import ThreadPoolExecutor, as_completed
 torch.set_grad_enabled(False)
         'temperature': temperature_var.get(),
         'top_k': top_k_var.get(),
         'top_p': float(top_p_value) if top_p_value is not None else None,
+        'bit_precision': bit_precision_var.get(),  # Tải bit_precision
         'thread_count': thread_count_var.get(),
         'batch_size': batch_size_var.get(),
         'prepend_text': prepend_text_var.get(),
                 top_k_var.set(config_entry.get('top_k', 50))
                 top_p_var.set(config_entry.get('top_p', 0.95))
                 bit_precision_var.set(config_entry.get('bit_precision', 8))  # Tải bit_precision
+                thread_count_var.set(config_entry.get('thread_count', 1))
                 batch_size_var.set(config_entry.get('batch_size', 1))
                 prepend_text_var.set(config_entry.get('prepend_text', ''))
                 append_text_var.set(config_entry.get('append_text', ''))
     temperature_var = tk.DoubleVar(value=1.0)
     top_k_var = tk.IntVar(value=50)
     top_p_var = tk.DoubleVar(value=0.95)
+    thread_count_var = tk.IntVar(value=1)
     precision_var = tk.IntVar(value=1)
     batch_size_var = tk.IntVar(value=1)
     prepend_text_var = tk.StringVar()
         load_model()
         filename = os.path.splitext(os.path.basename(image_path))[0]
+        caption_file_path = os.path.join(save_directory, f"{filename}.txt")  # Thay đổi tên tệp caption
         # Kiểm tra các lựa chọn của người dùng
         if os.path.exists(caption_file_path):
         else:
             existing_caption = ""
+        # Xử lý ảnh trên CPU trước khi chuyển lên GPU
         image = PILImage.open(image_path).convert('RGB')
         if not isinstance(image, PILImage.Image):
             raise ValueError(f"Expected image to be of type PIL.Image.Image, but got {type(image)}")
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        # Kiểm tra nếu bit_precision là 4 hoặc 8
+        if bit_precision_var.get() in [4, 8]:
+            # Không sử dụng `.to()` cho mô hình khi đang ở chế độ 4-bit hoặc 8-bit
+            pass
+        else:
+            model.to(device)
+        # Xử lý dtype và inputs tương ứng
         inputs = model.build_conversation_input_ids(
             tokenizer,
             query=prompt_var.get(),
         # Điều chỉnh dtype dựa trên bit_precision
         if bit_precision_var.get() == 32:
+            image_tensor = inputs['images'][0].to(device).to(torch.float32)
         else:
+            image_tensor = inputs['images'][0].to(device).to(torch.float16)
         inputs = {
+            'input_ids': inputs['input_ids'].unsqueeze(0).to(device),
+            'token_type_ids': inputs['token_type_ids'].unsqueeze(0).to(device),
+            'attention_mask': inputs['attention_mask'].unsqueeze(0).to(device),
             'images': [[image_tensor]],
         }
             "num_beams": precision_var.get()
         }
+        # Sử dụng torch.amp.autocast để cải thiện hiệu suất trên GPU
+        with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.float16 if bit_precision_var.get() != 32 else torch.float32):
             outputs = model.generate(**inputs, **gen_kwargs)
             outputs = outputs[:, inputs['input_ids'].shape[1]:]
             new_caption = tokenizer.decode(outputs[0], skip_special_tokens=True)
             file.write(final_caption)
         q.put(image_path)
     except torch.cuda.OutOfMemoryError as e:
         torch.cuda.empty_cache()
         error_message = f"CUDA OutOfMemoryError: {traceback.format_exc()}"
         print(error_message)
         q.put(error_message)
         error_messages.append(error_message)
+    finally:
+        if stop_processing or bit_precision_var.get() not in [4, 8]:
+            model.to('cpu')
+            torch.cuda.empty_cache()
 def worker(save_directory, num_threads, batch_size):
     try:
         progress.set(0)
         num_batches = math.ceil(len(selected_files) / batch_size)
+        batch_size_per_thread = max(1, batch_size // num_threads)
+        def process_batch(thread_batch):
+            generate_captions_for_batch(thread_batch, save_directory, q)
+        with ThreadPoolExecutor(max_workers=num_threads) as executor:
+            for batch_index in range(num_batches):
+                if stop_processing:
+                    break
+                start_index = batch_index * batch_size
+                end_index = min(start_index + batch_size, len(selected_files))
+                batch = selected_files[start_index:end_index]
+                futures = []
+                for i in range(0, len(batch), batch_size_per_thread):
+                    thread_batch = batch[i:i + batch_size_per_thread]
+                    futures.append(executor.submit(process_batch, thread_batch))
+                # Đợi các công việc trong batch hiện tại hoàn thành
+                for future in as_completed(futures):
+                    try:
+                        future.result()  # Xử lý lỗi nếu có xảy ra trong quá trình xử lý batch
+                    except Exception as e:
+                        q.put(f"Error processing batch: {e}")
+                        if stop_processing:
+                            break
         q.put(None)
     except Exception as e:
         if not stop_processing:
+            q.put(f"Worker encountered an error: {e}")
 def generate_captions_for_batch(batch, save_directory, q):
     for image_path in batch:
         generate_caption(image_path, save_directory, q)
 def update_progress():
     try:
         completed = 0
             file_label = tk.Label(caption_frame, text=os.path.basename(file_path), font=('Helvetica', 12), wraplength=300, justify="left")
             file_label.grid(row=i*2, column=1, padx=5, pady=5, sticky="nsew")
+            filename = os.path.splitext(os.path.basename(file_path))[0]
+            caption_file = os.path.join(save_directory, f"{filename}.txt")  # Thay đổi tên tệp caption
             if os.path.exists(caption_file):
                 with open(caption_file, 'r', encoding='utf-8') as file:
                     caption_text = file.read()
         messagebox.showerror("Invalid Input", "Please enter a valid integer for the page number.")
 def save_caption(file_path, caption_text):
+    filename = os.path.splitext(os.path.basename(file_path))[0]
+    output_path = os.path.join(save_directory, f"{filename}.txt")  # Thay đổi tên tệp caption
     try:
         with open(output_path, 'w', encoding='utf-8') as file:
             file.write(caption_text.strip())
     update_image_preview(content_canvas)
 def search_score(file_path, search_term):
+    filename = os.path.splitext(os.path.basename(file_path))[0]
+    caption_file = os.path.join(save_directory, f"{filename}.txt")  # Thay đổi tên tệp caption
     try:
         if os.path.exists(caption_file):
            with open(caption_file, 'r', encoding='utf-8') as file:
         return
     for file_path in selected_files:
+        filename = os.path.splitext(os.path.basename(file_path))[0]
+        caption_file = os.path.join(save_directory, f"{filename}.txt")  # Thay đổi tên tệp caption
         if os.path.exists(caption_file):
             with open(caption_file, 'r+', encoding='utf-8') as file:
                 caption_text = file.read()
         return
     for file_path in selected_files:
+        filename = os.path.splitext(os.path.basename(file_path))[0]
+        caption_file = os.path.join(save_directory, f"{filename}.txt")  # Thay đổi tên tệp caption
         if os.path.exists(caption_file):
             with open(caption_file, 'r+', encoding='utf-8') as file:
                 caption_text = file.read().lower().replace(keyword, "")
     files_to_delete = []
     for file_path in selected_files:
+        filename = os.path.splitext(os.path.basename(file_path))[0]
+        caption_file = os.path.join(save_directory, f"{filename}.txt")  # Thay đổi tên tệp caption
         if os.path.exists(caption_file):
             with open(caption_file, 'r', encoding='utf-8') as file:
                 caption_text = file.read().lower()
     for file_path in files_to_delete:
         try:
             os.remove(file_path)
+            filename = os.path.splitext(os.path.basename(file_path))[0]
+            caption_file = os.path.join(save_directory, f"{filename}.txt")  # Thay đổi tên tệp caption
             if os.path.exists(caption_file):
                os.remove(caption_file)
         except Exception as e: