diffusers-gallery-bot

Running on CPU Upgrade

App Files Files Community

radames commited on Oct 20, 2023

Commit

3280cce

•

1 Parent(s): 03258aa

new

Browse files

Files changed (1) hide show

app.py +26 -9

app.py CHANGED Viewed

@@ -40,6 +40,7 @@ CLASSIFIER_URL = (
 )
 ASSETS_URL = "https://d26smi9133w0oo.cloudfront.net/diffusers-gallery/"
 s3 = boto3.client(
     service_name="s3",
@@ -120,12 +121,22 @@ def get_yaml_data(text_content):
             print(exc)
     return {}
-async def find_image_in_model_card(text):
-    image_regex = re.compile(r"https?://\S+(?:png|jpg|jpeg|webp)")
-    urls = re.findall(image_regex, text)
-    if not urls:
         return []
     async with aiohttp.ClientSession() as session:
         tasks = [
             asyncio.ensure_future(upload_resize_image_url(session, image_url))
@@ -188,9 +199,10 @@ async def sync_data():
     with open(DB_FOLDER / "models.json", "w") as f:
         json.dump(all_models, f)
     # with open(DB_FOLDER / "models.json", "r") as f:
-    #     new_models = json.load(f)
     new_models_ids = [model["id"] for model in all_models]
     # get existing models
     with database.get_db() as db:
@@ -212,7 +224,7 @@ async def sync_data():
         print("Parsing model card")
         model_card_data = get_yaml_data(model_card)
         print("Finding images in model card")
-        images = await find_image_in_model_card(model_card)
         classifier = run_classifier(images)
         print(images, classifier)
@@ -257,7 +269,7 @@ async def sync_data():
             print("Parsing model card")
             model_card_data = get_yaml_data(model_card)
             print("Finding images in model card")
-            images = await find_image_in_model_card(model_card)
             classifier = run_classifier(images)
             model_data["images"] = images
             model_data["class"] = classifier
@@ -322,6 +334,7 @@ class Style(str, Enum):
     s3D = "3d"
     realistic = "realistic"
     nsfw = "nsfw"
 @app.get("/api/models")
@@ -344,9 +357,13 @@ def get_page(
         style_query = "json_extract(data, '$.class.3d') > 0.1 AND isNFSW = false"
     elif style == Style.realistic:
         style_query = "json_extract(data, '$.class.real_life') > 0.1 AND isNFSW = false"
     elif style == Style.nsfw:
         style_query = "isNFSW = true"
     with database.get_db() as db:
         cursor = db.cursor()
         cursor.execute(
@@ -359,7 +376,7 @@ def get_page(
                             json_extract(data, '$.class.explicit') > 0.3 OR json_extract(data, '$.class.suggestive') > 0.3 AS isNFSW
                         FROM models
                     ) AS subquery
-                    WHERE (? IS NULL AND likes > 3 OR ? IS NOT NULL)
                     AND {style_query}
                     AND (? IS NULL OR EXISTS (
                             SELECT 1
@@ -368,7 +385,7 @@ def get_page(
                         ))
                     ORDER BY {sort_query}
                     LIMIT {MAX_PAGE_SIZE} OFFSET {(page - 1) * MAX_PAGE_SIZE};
-        """,
             (tag, tag, tag, tag),
         )
         results = cursor.fetchall()

 )
 ASSETS_URL = "https://d26smi9133w0oo.cloudfront.net/diffusers-gallery/"
+BLOCKED_MODELS_REGEX = re.compile(r"(CyberHarem)", re.IGNORECASE)
 s3 = boto3.client(
     service_name="s3",
             print(exc)
     return {}
+async def find_image_in_model_card(text, model_id):
+    base_url = f"https://huggingface.co/{model_id}/resolve/main/"
+    image_regex = re.compile(r"!\[.*\]\((.*?\.(png|jpg|jpeg|gif|bmp|webp))\)|src=\"(.*?\.(png|jpg|jpeg|gif|bmp|webp))\">", re.IGNORECASE)
+    matches = image_regex.findall(text)
+    urls = []
+    for match in matches:
+        for url in match:
+            if url:
+                if not url.startswith("http") and not url.startswith("https"):
+                    url = base_url + url
+                urls.append(url)
+    if len(urls) == 0:
         return []
+    print(urls)
     async with aiohttp.ClientSession() as session:
         tasks = [
             asyncio.ensure_future(upload_resize_image_url(session, image_url))
     with open(DB_FOLDER / "models.json", "w") as f:
         json.dump(all_models, f)
     # with open(DB_FOLDER / "models.json", "r") as f:
+    #     all_models = json.load(f)
     new_models_ids = [model["id"] for model in all_models]
+    new_models_ids = [model_id for model_id in new_models_ids if not re.match(BLOCKED_MODELS_REGEX, model_id)]
     # get existing models
     with database.get_db() as db:
         print("Parsing model card")
         model_card_data = get_yaml_data(model_card)
         print("Finding images in model card")
+        images = await find_image_in_model_card(model_card, model_id)
         classifier = run_classifier(images)
         print(images, classifier)
             print("Parsing model card")
             model_card_data = get_yaml_data(model_card)
             print("Finding images in model card")
+            images = await find_image_in_model_card(model_card, model_id)
             classifier = run_classifier(images)
             model_data["images"] = images
             model_data["class"] = classifier
     s3D = "3d"
     realistic = "realistic"
     nsfw = "nsfw"
+    lora = "lora"
 @app.get("/api/models")
         style_query = "json_extract(data, '$.class.3d') > 0.1 AND isNFSW = false"
     elif style == Style.realistic:
         style_query = "json_extract(data, '$.class.real_life') > 0.1 AND isNFSW = false"
+    elif style == Style.lora:
+        style_query = "json_extract(data, '$.meta.tags') LIKE '%lora%' AND isNFSW = false"
     elif style == Style.nsfw:
         style_query = "isNFSW = true"
     with database.get_db() as db:
         cursor = db.cursor()
         cursor.execute(
                             json_extract(data, '$.class.explicit') > 0.3 OR json_extract(data, '$.class.suggestive') > 0.3 AS isNFSW
                         FROM models
                     ) AS subquery
+                    WHERE (? IS NULL AND likes > 1 OR ? IS NOT NULL)
                     AND {style_query}
                     AND (? IS NULL OR EXISTS (
                             SELECT 1
                         ))
                     ORDER BY {sort_query}
                     LIMIT {MAX_PAGE_SIZE} OFFSET {(page - 1) * MAX_PAGE_SIZE};
+        """,
             (tag, tag, tag, tag),
         )
         results = cursor.fetchall()