Initial Commit

Browse files

Files changed (13) hide show

.gitattributes +1 -0
animal.json +170 -0
gluten.json +46 -0
grammar.gbnf +3 -0
llm.Q4_K_M.gguf +3 -0
main.py +240 -0
milk.json +57 -0
prompt_claude.md +8 -0
prompt_llm.md +11 -0
prompt_vision.md +1 -0
sometimes_animal.json +86 -0
utils.py +139 -0
vision_model.pt +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+llm.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text

animal.json ADDED Viewed

	@@ -0,0 +1,170 @@

+[
+  "Aal",
+  "Austern",
+  "Bienenwachs",
+  "Bratwurst",
+  "Butter",
+  "Buttereinfett",
+  "Butterfett",
+  "Butterreinfett",
+  "Chochellin",
+  "Chochineal",
+  "E120",
+  "E441",
+  "E469",
+  "E542",
+  "E631",
+  "E901",
+  "E904",
+  "E913",
+  "E966",
+  "Ei",
+  "Eier",
+  "Ente",
+  "Entenfleisch",
+  "Feta",
+  "Fisch",
+  "Fischmehl",
+  "Fischsauce",
+  "Fischschnitzel",
+  "Fleisch",
+  "Forelle",
+  "Frischkäse",
+  "Frischmilch",
+  "Frosch",
+  "Froschschenkel",
+  "Gans",
+  "Geflügel",
+  "Gelatine",
+  "Ghee",
+  "Gruyère",
+  "Gulasch",
+  "Hackfleisch",
+  "Halloumi",
+  "Hartkäse",
+  "Hase",
+  "Hasenfleisch",
+  "Hausenblase",
+  "Hering",
+  "Hirsch",
+  "Hirschfleisch",
+  "Honig",
+  "Huhn",
+  "Hund",
+  "Hunde",
+  "Hundefleisch",
+  "Hundemilch",
+  "Hühnerei",
+  "Hühnerfleisch",
+  "Insekten",
+  "Isinglass",
+  "Joghurt",
+  "Kabeljau",
+  "Kalb",
+  "Kalbfleisch",
+  "Kalbsfleisch",
+  "Kaninchen",
+  "Kanninchenfleisch",
+  "Karmin",
+  "Kasein",
+  "Katze",
+  "Katzen",
+  "Katzenfleisch",
+  "Kaviar",
+  "Kefir",
+  "Knochenphosphat",
+  "Kollagen",
+  "Krabben",
+  "Kuh",
+  "Kuhmilch",
+  "Kuhmilchpermeat",
+  "Kuhmilchproteine",
+  "Kuhmilchpulver",
+  "Kuhmilchreis",
+  "Kuhmilchschokolade",
+  "Kuhmilchzucker",
+  "Käse",
+  "L-Cysteine",
+  "Lab",
+  "Lachs",
+  "Lactose",
+  "Lactose-Monohydrat",
+  "Laktit",
+  "Laktose",
+  "Lamm",
+  "Lammfleisch",
+  "Leber",
+  "Leder",
+  "Magermilch",
+  "Magermilchpulver",
+  "Milch",
+  "Milcherzeugnis",
+  "Milchpermeat",
+  "Milchproteine",
+  "Milchpulver",
+  "Milchreis",
+  "Milchschokolade",
+  "Milchserum",
+  "Milchzucker",
+  "Molke",
+  "Parmesan",
+  "Pferd",
+  "Pferde",
+  "Pferdeblut",
+  "Pferdefleisch",
+  "Quark",
+  "Rahm",
+  "Rahmschokolade",
+  "Reh",
+  "Rehfleisch",
+  "Rind",
+  "Rindfleisch",
+  "Rohmilch",
+  "Rohmilchpulver",
+  "Salami",
+  "Sardelle",
+  "Sardellen",
+  "Schaf",
+  "Schaffleisch",
+  "Schafmilch",
+  "Schafmilchpermeat",
+  "Schafmilchproteine",
+  "Schafmilchpulver",
+  "Schafmilchreis",
+  "Schafmilchschokolade",
+  "Schafmilchzucker",
+  "Schafsfleisch",
+  "Schellack",
+  "Schinken",
+  "Schwein",
+  "Schweinefleisch",
+  "Schweinefleischbrühe",
+  "Schweinehack",
+  "Schweineschnitzel",
+  "Speck",
+  "Speckwürfel",
+  "Thunfisch",
+  "Trüffel",
+  "Truthahn",
+  "Truthahnfleisch",
+  "Vollmilch",
+  "Vollmilchproteine",
+  "Vollmilchpulver",
+  "Vollmilchreis",
+  "Vollmilchschokolade",
+  "Vollmilchzucker",
+  "Wild",
+  "Worcestershiresauce",
+  "Wurst",
+  "Wurstfleisch",
+  "Yak",
+  "Ziege",
+  "Ziegenfleisch",
+  "Ziegenmilch",
+  "Ziegenmilchpermeat",
+  "Ziegenmilchproteine",
+  "Ziegenmilchpulver",
+  "Ziegenmilchreis",
+  "Ziegenmilchschokolade",
+  "Ziegenmilchzucker"
+]

gluten.json ADDED Viewed

	@@ -0,0 +1,46 @@

+[
+  "Weizen",
+  "Weizenmehl",
+  "Weizenvollkorn",
+  "Weizenvollkornmehl",
+  "Hartweizengriess",
+  "Dinkel",
+  "Dinkelmehl",
+  "Dinkelvollkorn",
+  "Dinkelvollkornmehl",
+  "Roggen",
+  "Roggenmehl",
+  "Roggenvollkorn",
+  "Roggenvollkornmehl",
+  "Gerste",
+  "Gerstenmehl",
+  "Gerstenvollkorn",
+  "Gerstenvollkornmehl",
+  "Kamut",
+  "Kamutmehl",
+  "Kamutvollkorn",
+  "Kamutvollkornmehl",
+  "Grünkern",
+  "Grünkernmehl",
+  "Grünkernvollkorn",
+  "Grünkernvollkornmehl",
+  "Emmer",
+  "Emmermehl",
+  "Emmervollkorn",
+  "Emmervollkornmehl",
+  "Triticale",
+  "Triticalemehl",
+  "Triticalevollkorn",
+  "Triticalevollkornmehl",
+  "Gluten",
+  "Glutenmehl",
+  "Glutenvollkorn",
+  "Glutenvollkornmehl",
+  "Einkorn",
+  "Einkornmehl",
+  "Einkornvollkorn",
+  "Einkornvollkornmehl",
+  "Mehl",
+  "Vollkorn",
+  "Vollkornmehl"
+]

grammar.gbnf ADDED Viewed

	@@ -0,0 +1,3 @@

+root ::= "```json\n{\n  \"Zutaten\": [" + strings + "],\n  \"Verunreinigungen\": [" + strings + "]\n}\n```"
+strings ::= (string ", ")* string?
+string ::= "\"" [^n"\\]* "\""

llm.Q4_K_M.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a403d8cedbb26dd3f2e8e88abe08acef99d27b017c8b553571f16d7a3c5c417d
+size 397804960

main.py ADDED Viewed

	@@ -0,0 +1,240 @@

+import numpy as np
+import torch
+import math
+import easyocr
+import cv2
+import os
+import base64
+import json
+import requests
+from llama_cpp import Llama
+from PIL import Image
+from dotenv import load_dotenv
+from utils import *
+load_dotenv()
+SCALE_FACTOR = 4
+MAX_SIZE = 5_000_000
+MAX_SIDE = 8_000
+# ENGINE = ['easyocr']
+# ENGINE = ['anthropic', 'claude-3-5-sonnet-20240620']
+ENGINE = ['llama_cpp/v2/vision', 'qwen-vl-next_b2583']
+def main() -> None:
+    model_weights = torch.load(relative_path('vision_model.pt'))
+    model = NeuralNet()
+    model.load_state_dict(model_weights)
+    model.to(DEVICE)
+    model.eval()
+    with torch.no_grad():
+        file_path = input('Enter file path: ')
+        with Image.open(file_path) as image:
+            image_size = image.size
+            image = image.resize(IMAGE_SIZE, Image.Resampling.LANCZOS)
+            image = TRANSFORM(image).to(DEVICE)
+            output = model(image).tolist()[0]
+            data = {
+                'top': {
+                    'left': {
+                        'x': output[0] * image_size[0],
+                        'y': output[1] * image_size[1],
+                    },
+                    'right': {
+                        'x': output[2] * image_size[0],
+                        'y': output[3] * image_size[1],
+                    },
+                },
+                'bottom': {
+                    'left': {
+                        'x': output[4] * image_size[0],
+                        'y': output[5] * image_size[1],
+                    },
+                    'right': {
+                        'x': output[6] * image_size[0],
+                        'y': output[7] * image_size[1],
+                    },
+                },
+                'curvature': {
+                    'top': {
+                        'x': output[8] * image_size[0],
+                        'y': output[9] * image_size[1],
+                    },
+                    'bottom': {
+                        'x': output[10] * image_size[0],
+                        'y': output[11] * image_size[1],
+                    },
+                },
+            }
+            print(f"{data=}")
+    image = cv2.imread(file_path)
+    size_x = ((data['top']['right']['x'] - data['top']['left']['x']) +
+              (data['bottom']['right']['x'] - data['bottom']['left']['x'])) / 2
+    size_y = ((data['top']['right']['y'] - data['top']['left']['y']) +
+              (data['bottom']['right']['y'] - data['bottom']['left']['y'])) / 2
+    margin_x = size_x * MARGIN
+    margin_y = size_y * MARGIN
+    points = np.array([
+        (max(data['top']['left']['x'] - margin_x, 0),
+         max(data['top']['left']['y'] - margin_y, 0)),
+        (min(data['top']['right']['x'] + margin_x, image_size[0]),
+         max(data['top']['right']['y'] - margin_y, 0)),
+        (min(data['bottom']['right']['x'] + margin_x, image_size[0]),
+         min(data['bottom']['right']['y'] + margin_y, image_size[1])),
+        (max(data['bottom']['left']['x'] - margin_x, 0),
+         min(data['bottom']['left']['y'] + margin_y, image_size[1])),
+        (data['curvature']['top']['x'],
+         max(data['curvature']['top']['y'] - margin_y, 0)),
+        (data['curvature']['bottom']['x'],
+         min(data['curvature']['bottom']['y'] + margin_y, image_size[1])),
+    ], dtype=np.float32)
+    points_float: list[list[float]] = points.tolist()
+    max_height = int(max([  # y: top left - bottom left, top right - bottom right, curvature top - curvature bottom
+        abs(points_float[0][1] - points_float[3][1]),
+        abs(points_float[1][1] - points_float[4][1]),
+        abs(points_float[2][1] - points_float[5][1]),
+    ])) * SCALE_FACTOR
+    max_width = int(max([  # x: top left - top right, bottom left - bottom right
+        abs(points_float[0][0] - points_float[1][0]),
+        abs(points_float[3][0] - points_float[2][0]),
+    ])) * SCALE_FACTOR
+    destination_points = np.array([
+        [0, 0],
+        [max_width - 1, 0],
+        [max_width - 1, max_height - 1],
+        [0, max_height - 1],
+        [max_width // 2, 0],
+        [max_width // 2, max_height - 1],
+    ], dtype=np.float32)
+    homography, _ = cv2.findHomography(points, destination_points)
+    warped_image = cv2.warpPerspective(image, homography, (max_width, max_height))
+    cv2.imwrite('_warped_image.png', warped_image)
+    del data
+    if ENGINE[0] == 'easyocr':
+        reader = easyocr.Reader(['de', 'fr', 'en'], gpu=True)
+        result = reader.readtext('_warped_image.png')
+        # os.remove('_warped_image.png')
+        text = '\n'.join([r[1] for r in result])
+        ingredients = {}
+    elif ENGINE[0] == 'anthropic':
+        decrease_size('_warped_image.png', '_warped_image.webp', MAX_SIZE, MAX_SIDE)
+        # os.remove('_warped_image.png')
+        with open('_warped_image.webp', 'rb') as f:
+            base64_image = base64.b64encode(f.read()).decode('utf-8')
+        response = requests.post(
+            url='https://api.anthropic.com/v1/messages',
+            headers={
+                'x-api-key': os.environ['ANTHROPIC_API_KEY'],
+                'anthropic-version': '2023-06-01',
+                'content-type': 'application/json',
+            },
+            data=json.dumps({
+                'model': ENGINE[1],
+                'max_tokens': 1024,
+                'messages': [
+                    {
+                        'role': 'user', 'content': [
+                            {
+                                'type': 'image',
+                                'source': {
+                                    'type': 'base64',
+                                    'media_type': 'image/webp',
+                                    'data': base64_image,
+                                },
+                            },
+                            {
+                                'type': 'text',
+                                'text': PROMPT_CLAUDE,
+                            },
+                        ],
+                    },
+                ],
+            }),
+        )
+        # os.remove('_warped_image.webp')
+        try:
+            data = response.json()
+            ingredients = json.loads('{' + data['content'][0]['text'].split('{', 1)[-1].rsplit('}', 1)[0] + '}')
+        except Exception as e:
+            print(data)
+            raise e
+        text = ''
+    elif ENGINE[0] == 'llama_cpp/v2/vision':
+        decrease_size('_warped_image.png', '_warped_image.webp', MAX_SIZE, MAX_SIDE)
+        # os.remove('_warped_image.png')
+        response = requests.post(
+            url='http://127.0.0.1:11434/llama_cpp/v2/vision',
+            headers={
+                'x-version': '2024-05-21',
+                'content-type': 'application/json',
+            },
+            data=json.dumps({
+                'task': PROMPT_VISION,
+                'model': ENGINE[1],
+                'image_path': relative_path('_warped_image.webp'),
+            }),
+        )
+        # os.remove('_warped_image.webp')
+        text: str = response.json()['text']
+        ingredients = {}
+    else:
+        raise ValueError(f'Unknown engine: {ENGINE[0]}')
+    if text != '':
+        if DEVICE == 'cuda':
+            n_gpu_layers = -1
+        else:
+            n_gpu_layers = 0
+        llm = Llama(
+            model_path=relative_path('llm.Q4_K_M.gguf'),
+            n_gpu_layers=n_gpu_layers,
+        )
+        llm_result = llm.create_chat_completion(
+            messages=[
+                {
+                    'role': 'system',
+                    'content': SYSTEM_PROMPT,
+                },
+                {
+                    'role': 'user',
+                    'content': PROMPT_LLM.replace('{{old_data}}', text),
+                },
+            ],
+            max_tokens=1024,
+            temperature=0,
+            # grammar=GRAMMAR,
+        )
+        try:
+            ingredients = json.loads(
+                '{' + llm_result['choices'][0]['message']['content'].split('{', 1)[-1].rsplit('}', 1)[0] + '}')
+        except Exception as e:
+            print(f"{llm_result=}")
+            raise e
+    animal_ingredients = [item for item in ingredients['Zutaten'] if item in ANIMAL]
+    sometimes_animal_ingredients = [item for item in ingredients['Zutaten'] if item in SOMETIMES_ANIMAL]
+    milk_ingredients = ([item for item in ingredients['Zutaten'] if item in MILK]
+                        + [item for item in ingredients['Verunreinigungen'] if item in MILK])
+    gluten_ingredients = ([item for item in ingredients['Zutaten'] if item in GLUTEN]
+                          + [item for item in ingredients['Verunreinigungen'] if item in GLUTEN])
+    print('=' * 64)
+    print('Zutaten: ' + ', '.join(ingredients['Zutaten']))
+    print('=' * 64)
+    print('Kann Spuren von ' + ', '.join(ingredients['Verunreinigungen']) + ' enthalten.')
+    print('=' * 64)
+    print('Gefundene tierische Zutaten: '
+          + (', '.join(animal_ingredients) if len(animal_ingredients) > 0 else 'keine'))
+    print('=' * 64)
+    print('Gefundene potenzielle tierische Zutaten: '
+          + (', '.join(sometimes_animal_ingredients) if len(sometimes_animal_ingredients) > 0 else 'keine'))
+    print('=' * 64)
+    print('Gefundene Milchprodukte: ' + ', '.join(milk_ingredients) if len(milk_ingredients) > 0 else 'keine')
+    print('=' * 64)
+    print('Gefundene Gluten: ' + ', '.join(gluten_ingredients) if len(gluten_ingredients) > 0 else 'keine')
+    print('=' * 64)
+    print(LEGAL_NOTICE)
+    print('=' * 64)
+if __name__ == '__main__':
+    main()

milk.json ADDED Viewed

	@@ -0,0 +1,57 @@

+[
+  "Butter",
+  "E966",
+  "Frischkäse",
+  "Frischmilch",
+  "Hartkäse",
+  "Joghurt",
+  "Kefir",
+  "Kuhmilch",
+  "Kuhmilchpermeat",
+  "Kuhmilchproteine",
+  "Kuhmilchpulver",
+  "Kuhmilchreis",
+  "Kuhmilchschokolade",
+  "Kuhmilchzucker",
+  "Käse",
+  "Lactose",
+  "Lactose-Monohydrat",
+  "Laktose",
+  "Magermilch",
+  "Magermilchpulver",
+  "Milch",
+  "Milcherzeugnis",
+  "Milchpermeat",
+  "Milchproteine",
+  "Milchpulver",
+  "Milchreis",
+  "Milchschokolade",
+  "Milchserum",
+  "Milchzucker",
+  "Molke",
+  "Quark",
+  "Rahm",
+  "Rahmschokolade",
+  "Rohmilch",
+  "Rohmilchpulver",
+  "Schafmilch",
+  "Schafmilchpermeat",
+  "Schafmilchproteine",
+  "Schafmilchpulver",
+  "Schafmilchreis",
+  "Schafmilchschokolade",
+  "Schafmilchzucker",
+  "Vollmilch",
+  "Vollmilchproteine",
+  "Vollmilchpulver",
+  "Vollmilchreis",
+  "Vollmilchschokolade",
+  "Vollmilchzucker",
+  "Ziegenmilch",
+  "Ziegenmilchpermeat",
+  "Ziegenmilchproteine",
+  "Ziegenmilchpulver",
+  "Ziegenmilchreis",
+  "Ziegenmilchschokolade",
+  "Ziegenmilchzucker"
+]

prompt_claude.md ADDED Viewed

	@@ -0,0 +1,8 @@

+Im Bild ist die Rückseite einer Verpackung zu sehen. Auf dieser stehen die Zutaten (auch Inhaltsstoffe genannt) und wahrscheinlich auch die Verunreinigungen (auch "Enthält" oder "Kann Spuren von enthalten" genannt) geschrieben. Formatiere diese als JSON:
+```json
+{
+  "Zutaten": ["Zutat1", "Zutat2"],
+  "Verunreinigungen": ["Verunreinigung1", "Verunreinigung2"]
+}
+```
+Stelle sicher, dass die drei "`" am Anfang und am Ende vorhanden sind. Prozentangaben sind wegzulassen. Falls mehrere Sprachen vorhanden sind, verwende Deutsch.

prompt_llm.md ADDED Viewed

	@@ -0,0 +1,11 @@

+```
+{{old_data}}
+```
+Diese Daten stammen von einer OCR-Engine und enthält wahrscheinlich Fehler. Formatiere sie als JSON:
+```json
+{
+  "Zutaten": ["Zutat1", "Zutat2"],
+  "Verunreinigungen": ["Verunreinigung1", "Verunreinigung2"]
+}
+```
+Stelle sicher, dass die drei "`" am Anfang und am Ende vorhanden sind. Prozentangaben sind wegzulassen. Falls mehrere Sprachen vorhanden sind, verwende Deutsch.

prompt_vision.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ Im Bild ist die Rückseite einer Verpackung zu sehen. Auf dieser stehen die Zutaten (auch Inhaltsstoffe genannt) und wahrscheinlich auch die Verunreinigungen (auch "Enthält" oder "Kann Spuren von enthalten" genannt) geschrieben. Liste diese auf. Falls mehrere Sprachen vorhanden sind, verwende Deutsch.

sometimes_animal.json ADDED Viewed

	@@ -0,0 +1,86 @@

+[
+  "E101",
+  "E104",
+  "E153",
+  "E160",
+  "E161",
+  "E236",
+  "E237",
+  "E238",
+  "E252",
+  "E270",
+  "E304",
+  "E322",
+  "E325",
+  "E326",
+  "E327",
+  "E328",
+  "E329",
+  "E422",
+  "E430",
+  "E431",
+  "E432",
+  "E433",
+  "E434",
+  "E435",
+  "E436",
+  "E442",
+  "E445",
+  "E470",
+  "E471",
+  "E472",
+  "E473",
+  "E474",
+  "E475",
+  "E476",
+  "E477",
+  "E478",
+  "E479",
+  "E481",
+  "E482",
+  "E483",
+  "E484",
+  "E491",
+  "E492",
+  "E493",
+  "E494",
+  "E495",
+  "E570",
+  "E572",
+  "E585",
+  "E626",
+  "E627",
+  "E628",
+  "E629",
+  "E630",
+  "E631",
+  "E632",
+  "E633",
+  "E634",
+  "E635",
+  "E636",
+  "E637",
+  "E640",
+  "E910",
+  "E920",
+  "E921",
+  "E966",
+  "E1000",
+  "E1105",
+  "E1518",
+  "Milchsäure",
+  "Omega-3",
+  "Vitamin D2",
+  "Vitamin D3",
+  "Bier",
+  "Wein",
+  "Pommes",
+  "Pommes Frites",
+  "Marshmallows",
+  "Gummy Bärchen",
+  "Schokolade",
+  "Trüffel",
+  "Kaugummi",
+  "Pesto",
+  "Worcestershire Sauce"
+]

utils.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import torch
+import torchvision
+import os
+import json
+from PIL import Image
+from datetime import datetime
+__all__ = [
+    'current_time',
+    'relative_path',
+    'NeuralNet',
+    'DEVICE',
+    'IMAGE_SIZE',
+    'TRANSFORM',
+    'MARGIN',
+    'GRID_SIZE',
+    'decrease_size',
+    'PROMPT_LLM',
+    'PROMPT_CLAUDE',
+    'PROMPT_VISION',
+    'EOS',
+    'GRAMMAR',
+    'SYSTEM_PROMPT',
+    'ANIMAL',
+    'SOMETIMES_ANIMAL',
+    'MILK',
+    'GLUTEN',
+    'LEGAL_NOTICE',
+]
+MARGIN = 0.1
+GRID_SIZE = 4096
+DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+IMAGE_SIZE = (224, 224)
+TRANSFORM = torchvision.transforms.Compose([
+    torchvision.transforms.ToTensor(),
+])
+with open('prompt_llm.md', 'r', encoding='utf-8') as _f:
+    PROMPT_LLM = _f.read()
+with open('prompt_claude.md', 'r', encoding='utf-8') as _f:
+    PROMPT_CLAUDE = _f.read()
+with open('prompt_vision.md', 'r', encoding='utf-8') as _f:
+    PROMPT_VISION = _f.read()
+EOS = '\n<|im_end|>'
+SYSTEM_PROMPT = 'Du bist ein hilfreicher assistant.'
+with open('grammar.gbnf', 'r', encoding='utf-8') as _f:
+    GRAMMAR = _f.read()
+with open('animal.json', 'r', encoding='utf-8') as _f:
+    ANIMAL = json.load(_f)
+with open('sometimes_animal.json', 'r', encoding='utf-8') as _f:
+    SOMETIMES_ANIMAL = json.load(_f)
+with open('milk.json', 'r', encoding='utf-8') as _f:
+    MILK = json.load(_f)
+with open('gluten.json', 'r', encoding='utf-8') as _f:
+    GLUTEN = json.load(_f)
+LEGAL_NOTICE = ('Dieses Programm ist nur für Forschungszwecke gedacht. Fehler können nicht ausgeschlossen werden und '
+                'sind wahrscheinlich vorhanden. Die Erkennung von Zutaten und Verunreinigungen ist nur zum schnellen '
+                'Aussortieren und nicht zum Überprüfen gedacht.')
+def current_time() -> str:
+    return datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+def relative_path(string: str) -> str:
+    return os.path.join(os.path.dirname(__file__), string)
+class NeuralNet(torch.nn.Module):
+    def __init__(self):
+        super(NeuralNet, self).__init__()
+        # Load pre-trained ResNet model
+        self.backbone = torchvision.models.resnet18(pretrained=True)
+        # Modify the last layer to output 12 values
+        self.backbone.fc = torch.nn.Linear(self.backbone.fc.in_features, 12)
+        # Add a custom head for key-point detection
+        self.head = torch.nn.Sequential(
+            torch.nn.Conv2d(512, 256, kernel_size=3, padding=1),
+            torch.nn.ReLU(inplace=True),
+            torch.nn.Conv2d(256, 128, kernel_size=3, padding=1),
+            torch.nn.ReLU(inplace=True),
+            torch.nn.Conv2d(128, 64, kernel_size=3, padding=1),
+            torch.nn.ReLU(inplace=True),
+            torch.nn.Conv2d(64, 12, kernel_size=1),
+            torch.nn.AdaptiveAvgPool2d(1)
+        )
+    def forward(self, x):
+        # Check if we need to unsqueeze
+        if len(x.shape) == 3:  # Shape [C, H, W]
+            x = x.unsqueeze(0)  # Shape [1, C, H, W]
+        # Resize input to match ResNet input size if necessary
+        if x.shape[-2:] != (224, 224):
+            x = torch.nn.functional.interpolate(x, size=(224, 224), mode='bilinear', align_corners=False)
+        # Pass input through the backbone
+        x = self.backbone.conv1(x)
+        x = self.backbone.bn1(x)
+        x = self.backbone.relu(x)
+        x = self.backbone.maxpool(x)
+        x = self.backbone.layer1(x)
+        x = self.backbone.layer2(x)
+        x = self.backbone.layer3(x)
+        x = self.backbone.layer4(x)
+        # Pass input through the custom head
+        x = self.head(x)
+        # Flatten the output
+        x = x.view(x.size(0), -1)
+        return x
+def decrease_size(input_path, output_path, max_size, max_side):
+    with Image.open(input_path) as img:
+        original_size = os.path.getsize(input_path)
+        width, height = img.size
+        if original_size <= max_size and width <= max_side and height <= max_side:
+            img.save(output_path, format=output_path.split('.')[-1].upper())
+            print("Image is already below the maximum size.")
+        while width > 24 and height > 24:
+            img_resized = img.resize((width, height), Image.Resampling.LANCZOS)
+            img_resized.save(output_path, format=output_path.split('.')[-1].upper())
+            if os.path.getsize(output_path) <= max_size and width <= max_side and height <= max_side:
+                print(f"Reduced image size to {os.path.getsize(output_path)} bytes.")
+                break
+            width, height = int(width * 0.9), int(height * 0.9)
+        if os.path.getsize(output_path) > max_size:
+            raise ValueError("Could not reduce PNG size below max_size by reducing resolution.")

vision_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cff3b777fa2d32b7e317838db03bf5bd99033b949ee987feb6ea7b052f3cb029
+size 51013558