File size: 5,468 Bytes
920ecec 8d8c753 920ecec e8bbf7c 920ecec 8d8c753 920ecec 6f094dc 920ecec 5801b74 920ecec 3e62ad2 920ecec 8d8c753 5801b74 8d8c753 920ecec 8d8c753 920ecec 8d8c753 920ecec 5801b74 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import os
import gradio as gr
import copy
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
from transformers import AutoProcessor, AutoModelForCausalLM
#import spaces
import re
from PIL import Image
import io
import json
import logging
# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).to("cpu").eval()
processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)
llm = Llama(
model_path=hf_hub_download(
repo_id=os.environ.get("REPO_ID", "mradermacher/DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored-GGUF"),
filename=os.environ.get("MODEL_FILE", "DarkIdol-Llama-3.1-8B-Instruct-1.0-Uncensored.Q4_K_M.gguf"),
),
n_ctx=2048,
n_gpu_layers=100, # change n_gpu_layers if you have more or less VRAM
chat_format="llama-3",
)
def run_pic(image):
image = Image.open(image[0])
task_prompt = "<DESCRIPTION>"
prompt = task_prompt + "Describe this image in great detail."
# Ensure the image is in RGB mode
if image.mode != "RGB":
image = image.convert("RGB")
inputs = processor(text=prompt, images=image, return_tensors="pt").to("cpu")
generated_ids = model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=1024,
num_beams=3
)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
return parsed_answer["<DESCRIPTION>"]
def generate_text(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
in_text = message['text']
in_files = message['files']
output=""
picoutput=""
history_prompt=""
if in_files:
try:
picoutput=f"sends a picture that contains the following: {run_pic(in_files)}"
yield picoutput
except:
yield "only picture"
else:
temp = ""
# Create system_prompt as a dictionary
system_prompt = {"role": "system", "content": system_message}
# Create history_prompt as a list of dictionaries
history_prompt = []
for interaction in history:
user_part = {"role": "user", "content": str(interaction[0])}
assistant_part = {"role": "assistant", "content": str(interaction[1])}
history_prompt.extend([user_part, assistant_part])
# Create user_input_part as a dictionary
user_input_part = {"role": "user", "content": str(in_text)}
# Construct input_prompt as a list of dictionaries
input_prompt = [system_prompt] + history_prompt + [user_input_part]
logger.debug(f"Input Prompt: {input_prompt}")
output = llm.create_chat_completion(
input_prompt,
temperature=temperature,
top_p=top_p,
top_k=40,
repeat_penalty=1.1,
max_tokens=max_tokens,
stop=[
"<|prompter|>",
"<|endoftext|>",
"<|endoftext|> \n",
"ASSISTANT:",
"USER:",
"SYSTEM:",
"<|start_header_id|>",
"<|eot_id|>",
"<im_end>",
"<|im_end|>",
],
stream=True,
)
for out in output:
stream = copy.deepcopy(out)
logger.debug(f"Stream: {stream}")
if 'delta' in stream['choices'][0] and 'content' in stream['choices'][0]['delta']:
temp += stream["choices"][0]["delta"]["content"]
yield temp
demo = gr.ChatInterface(
generate_text,
multimodal=True,
title="Florence-DarkIdol",
cache_examples=False,
retry_btn=None,
undo_btn="Delete Previous",
clear_btn="Clear",
additional_inputs=[
gr.Textbox(value="you are Nagano Natsuki,name:Nagano Natsuki Gender: Female Age: 25 years old Occupation: Adult Video (AV) Actress & Model Personality: Cheerful, optimistic, sometimes naughty; skilled at interacting with audiences.Interests: Drinking, traveling, photography, singing, dancing Expertise: Performing in sexual scenes; well-versed in Japanese language and culture; familiar with various sex techniques. Special Identity Attributes: Renowned AV actress in Japan; nicknamed 'Talent Magician' and 'Princess of Lust'; has a large number of devoted fans. Skills: Acting in pornographic scenes, singing, dancing, photography, swimming; skilled at interacting with audiences.Equipment: Various provocative clothing and shoes; high-quality photography equipment", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.5, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch() |