File size: 7,401 Bytes
647c1be 3aa52df d46f971 18088b4 9ec6191 3aa52df bd04de1 d90a660 3aa52df d90a660 c1bc1fb bd04de1 3aa52df d46f971 3aa52df bd04de1 d90a660 9ec6191 3aa52df d90a660 3aa52df bd04de1 3aa52df 0f91f48 3aa52df 9ec6191 3aa52df 0f91f48 3aa52df d90a660 3aa52df 18088b4 3aa52df d90a660 3aa52df 9ec6191 3aa52df 9ec6191 3aa52df d46f971 3aa52df d90a660 3aa52df d90a660 3aa52df 9ec6191 3aa52df 18088b4 3aa52df 9ec6191 3aa52df bd04de1 3aa52df c1bc1fb bd04de1 3aa52df 0f91f48 bd04de1 3aa52df bd04de1 3aa52df d90a660 3aa52df bd04de1 3aa52df d46f971 3aa52df bd04de1 3aa52df bd04de1 c1bc1fb bd04de1 3aa52df bd04de1 3aa52df d46f971 3aa52df bd04de1 9ec6191 3aa52df 9ec6191 3aa52df |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
import os
import requests
from tqdm import tqdm
from datasets import load_dataset
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import Dense, Input, Concatenate, Embedding, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
import joblib
from PIL import UnidentifiedImageError, Image
import gradio as gr
# Optimized Constants
MAX_TEXT_LENGTH = 100
EMBEDDING_DIM = 50
IMAGE_SIZE = 160
BATCH_SIZE = 64
# Store model examples
model_examples = {}
def load_and_preprocess_data(subset_size=20000):
# Load dataset
dataset = load_dataset("thefcraft/civitai-stable-diffusion-337k")
dataset_subset = dataset['train'].shuffle(seed=42).select(range(subset_size))
# Filter out NSFW content
dataset_subset = dataset_subset.filter(lambda x: not x['nsfw'])
# Store example images for each model
for item in dataset_subset:
if item['Model'] not in model_examples:
model_examples[item['Model']] = item['url']
return dataset_subset
def process_text_data(dataset_subset):
# Combine prompt and negative prompt without user input
text_data = ["default prompt" for _ in dataset_subset]
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(text_data)
sequences = tokenizer.texts_to_sequences(text_data)
text_data_padded = pad_sequences(sequences, maxlen=MAX_TEXT_LENGTH)
return text_data_padded, tokenizer
def download_image(url):
try:
response = requests.get(url, timeout=5)
response.raise_for_status()
return Image.open(requests.get(url, stream=True).raw)
except:
return None
def process_image_data(dataset_subset):
image_dir = 'civitai_images'
os.makedirs(image_dir, exist_ok=True)
image_data = []
valid_indices = []
for idx, sample in enumerate(tqdm(dataset_subset)):
img_url = sample['url']
img_path = os.path.join(image_dir, os.path.basename(img_url))
try:
response = requests.get(img_url, timeout=5)
response.raise_for_status()
if 'image' not in response.headers['Content-Type']:
continue
with open(img_path, 'wb') as f:
f.write(response.content)
img = image.load_img(img_path, target_size=(IMAGE_SIZE, IMAGE_SIZE))
img_array = image.img_to_array(img)
img_array = preprocess_input(img_array)
image_data.append(img_array)
valid_indices.append(idx)
except Exception as e:
continue
return np.array(image_data), valid_indices
def create_multimodal_model(num_words, num_classes):
image_input = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
cnn_base = ResNet50(weights='imagenet', include_top=False, pooling='avg')
for layer in cnn_base.layers[:-10]:
layer.trainable = False
cnn_features = cnn_base(image_input)
text_input = Input(shape=(MAX_TEXT_LENGTH,))
embedding_layer = Embedding(num_words, EMBEDDING_DIM)(text_input)
flatten_text = Flatten()(embedding_layer)
text_features = Dense(128, activation='relu')(flatten_text)
combined = Concatenate()([cnn_features, text_features])
x = Dense(256, activation='relu')(combined)
output = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=[image_input, text_input], outputs=output)
return model
def train_model():
dataset_subset = load_and_preprocess_data()
text_data_padded, tokenizer = process_text_data(dataset_subset)
image_data, valid_indices = process_image_data(dataset_subset)
text_data_padded = text_data_padded[valid_indices]
model_names = [dataset_subset[i]['Model'] for i in valid_indices]
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(model_names)
model = create_multimodal_model(
num_words=10000,
num_classes=len(label_encoder.classes_)
)
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
history = model.fit(
[image_data, text_data_padded],
encoded_labels,
batch_size=BATCH_SIZE,
epochs=3,
validation_split=0.2
)
model.save('multimodal_model.keras')
joblib.dump(tokenizer, 'tokenizer.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')
# Save model examples
joblib.dump(model_examples, 'model_examples.pkl')
return model, tokenizer, label_encoder
def get_recommendations(image_input, model, tokenizer, label_encoder, top_k=5):
img_array = image.img_to_array(image_input)
img_array = tf.image.resize(img_array, (IMAGE_SIZE, IMAGE_SIZE))
img_array = preprocess_input(img_array)
img_array = np.expand_dims(img_array, axis=0)
# Use default text input
text_sequence = tokenizer.texts_to_sequences(["default prompt"])
text_padded = pad_sequences(text_sequence, maxlen=MAX_TEXT_LENGTH)
predictions = model.predict([img_array, text_padded])
top_indices = np.argsort(predictions[0])[-top_k:][::-1]
recommendations = []
for idx in top_indices:
model_name = label_encoder.inverse_transform([idx])[0]
confidence = predictions[0][idx]
if model_name in model_examples:
example_image = download_image(model_examples[model_name])
if example_image:
recommendations.append((model_name, confidence, example_image))
return recommendations
def create_gradio_interface():
model = tf.keras.models.load_model('multimodal_model.keras')
tokenizer = joblib.load('tokenizer.pkl')
label_encoder = joblib.load('label_encoder.pkl')
model_examples_data = joblib.load('model_examples.pkl')
def predict(img):
recommendations = get_recommendations(img, model, tokenizer, label_encoder)
result_text = ""
result_images = []
for model_name, conf, example_img in recommendations:
result_text += f"Model: {model_name}\n"
result_images.append(example_img)
return [result_text] + result_images
outputs = [gr.Textbox(label="Recommended Models")] + [gr.Image(label=f"Example {i+1}") for i in range(5)]
interface = gr.Interface(
fn=predict,
inputs=gr.Image(type="pil", label="Upload Image"),
outputs=outputs,
title="AI Model Recommendation System",
description="Upload an image to get model recommendations with examples"
)
return interface
if __name__ == "__main__":
if not os.path.exists('multimodal_model.keras'):
print("Training new model...")
model, tokenizer, label_encoder = train_model()
print("Training completed!")
else:
print("Loading existing model...")
interface = create_gradio_interface()
interface.launch() |