Desm0nt
/

LORA_Phi-3-HornyVision-128k-instruct

Safetensors

Model card Files Files and versions Community

Desm0nt commited on May 31, 2024

Commit

e237273

verified ·

1 Parent(s): b194f04

Upload 2 files

Browse files

Files changed (2) hide show

LICENSE +21 -0
phi_captioning_example.py +86 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+    MIT License
+    Copyright (c) Microsoft Corporation.
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+    The above copyright notice and this permission notice shall be included in all
+    copies or substantial portions of the Software.
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+    SOFTWARE

phi_captioning_example.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+from swift.tuners import Swift #chinese toolkit for finetunin and inference
+from swift.llm import (
+    get_model_tokenizer, get_template, inference, ModelType,
+    get_default_template_type, inference_stream
+)
+from swift.utils import seed_everything
+import torch
+from tqdm import tqdm
+import time
+model_type = ModelType.phi3_vision_128k_instruct # model type
+template_type = get_default_template_type(model_type)
+print(f'template_type: {template_type}')
+model_path = "./phi3-1476" # by default it is the lora path, not sure if it works the same way with merged checkpoint
+model, tokenizer = get_model_tokenizer(model_type, torch.bfloat16, model_kwargs={'device_map': 'auto'})
+model.generation_config.max_new_tokens = 1256 #generation params. As for me - defaults with do_sample=False works better than anything.
+model.generation_config.do_sample = False
+#model.generation_config.top_p = 0.7
+#model.generation_config.temperature = 0.3
+model = Swift.from_pretrained(model, model_path, "lora", inference_mode=True)
+template = get_template(template_type, tokenizer)
+#seed_everything(6321)
+text = 'Make a caption that describe this image'
+image_dir = './images/'  # path to images
+txt_dir = './tags/'  # path to txt files with tags (from danbooru or from WD_Tagger)
+maintxt_dir = './maintxt/'  # path for result txt caprtions in natureal language
+# image parsing
+image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]
+total_files = len(image_files)
+start_time = time.time()
+progress_bar = tqdm(total=total_files, unit='file', bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}{postfix}]')
+total_elapsed_time = 0
+processed_files = 0
+# Main captioning cycle
+for image_file in image_files:
+    image_path = os.path.join(image_dir, image_file)
+    if os.path.exists(image_path):
+        txt_file = os.path.splitext(image_file)[0] + '.txt'
+        txt_path = os.path.join(txt_dir, txt_file)
+        if os.path.exists(txt_path):
+            with open(txt_path, 'r', encoding='utf-8') as f:
+                tags = f.read().strip()
+            text = f'<img>{image_path}</img> Make a caption that describe this image. Here is the tags describing image: {tags}\n Find the relevant character\'s names in the tags and use it.'
+            print(text)
+            step_start_time = time.time()
+            response, history = inference(model, template, text, do_sample=True, temperature=0, repetition_penalty=1.05)
+            step_end_time = time.time()
+            step_time = step_end_time - step_start_time
+            total_elapsed_time += step_time
+            remaining_time = (total_elapsed_time / (processed_files + 1)) * (total_files - processed_files)
+            remaining_hours = int(remaining_time // 3600)
+            remaining_minutes = int((remaining_time % 3600) // 60)
+            remaining_seconds = int(remaining_time % 60)
+            progress_bar.set_postfix(remaining=f'\n', refresh=False)
+            print(f"\n\n\nFile {image_file}\nConsumed time: {step_time:.2f} s\n{response}")
+            # Создаем имя файла для сохранения ответа
+            output_file = os.path.splitext(image_file)[0] + '.txt'
+            output_path = os.path.join(maintxt_dir, output_file)
+            # Записываем ответ в файл
+            with open(output_path, 'w', encoding='utf-8') as f:
+                f.write(response)
+            print(f"Caption saved in file: {output_file} \n")
+            processed_files += 1
+            progress_bar.update(1)
+        else:
+            print(f"File {txt_file} doesn't exist.")
+    else:
+        print(f"Image {image_file} not found.")
+progress_bar.close()