lenamerkli commited on
Commit
d2f3f0b
·
verified ·
1 Parent(s): 5f47c2d

Initial Commit

Browse files
Files changed (13) hide show
  1. .gitattributes +1 -0
  2. animal.json +170 -0
  3. gluten.json +46 -0
  4. grammar.gbnf +3 -0
  5. llm.Q4_K_M.gguf +3 -0
  6. main.py +240 -0
  7. milk.json +57 -0
  8. prompt_claude.md +8 -0
  9. prompt_llm.md +11 -0
  10. prompt_vision.md +1 -0
  11. sometimes_animal.json +86 -0
  12. utils.py +139 -0
  13. vision_model.pt +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llm.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
animal.json ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "Aal",
3
+ "Austern",
4
+ "Bienenwachs",
5
+ "Bratwurst",
6
+ "Butter",
7
+ "Buttereinfett",
8
+ "Butterfett",
9
+ "Butterreinfett",
10
+ "Chochellin",
11
+ "Chochineal",
12
+ "E120",
13
+ "E441",
14
+ "E469",
15
+ "E542",
16
+ "E631",
17
+ "E901",
18
+ "E904",
19
+ "E913",
20
+ "E966",
21
+ "Ei",
22
+ "Eier",
23
+ "Ente",
24
+ "Entenfleisch",
25
+ "Feta",
26
+ "Fisch",
27
+ "Fischmehl",
28
+ "Fischsauce",
29
+ "Fischschnitzel",
30
+ "Fleisch",
31
+ "Forelle",
32
+ "Frischkäse",
33
+ "Frischmilch",
34
+ "Frosch",
35
+ "Froschschenkel",
36
+ "Gans",
37
+ "Geflügel",
38
+ "Gelatine",
39
+ "Ghee",
40
+ "Gruyère",
41
+ "Gulasch",
42
+ "Hackfleisch",
43
+ "Halloumi",
44
+ "Hartkäse",
45
+ "Hase",
46
+ "Hasenfleisch",
47
+ "Hausenblase",
48
+ "Hering",
49
+ "Hirsch",
50
+ "Hirschfleisch",
51
+ "Honig",
52
+ "Huhn",
53
+ "Hund",
54
+ "Hunde",
55
+ "Hundefleisch",
56
+ "Hundemilch",
57
+ "Hühnerei",
58
+ "Hühnerfleisch",
59
+ "Insekten",
60
+ "Isinglass",
61
+ "Joghurt",
62
+ "Kabeljau",
63
+ "Kalb",
64
+ "Kalbfleisch",
65
+ "Kalbsfleisch",
66
+ "Kaninchen",
67
+ "Kanninchenfleisch",
68
+ "Karmin",
69
+ "Kasein",
70
+ "Katze",
71
+ "Katzen",
72
+ "Katzenfleisch",
73
+ "Kaviar",
74
+ "Kefir",
75
+ "Knochenphosphat",
76
+ "Kollagen",
77
+ "Krabben",
78
+ "Kuh",
79
+ "Kuhmilch",
80
+ "Kuhmilchpermeat",
81
+ "Kuhmilchproteine",
82
+ "Kuhmilchpulver",
83
+ "Kuhmilchreis",
84
+ "Kuhmilchschokolade",
85
+ "Kuhmilchzucker",
86
+ "Käse",
87
+ "L-Cysteine",
88
+ "Lab",
89
+ "Lachs",
90
+ "Lactose",
91
+ "Lactose-Monohydrat",
92
+ "Laktit",
93
+ "Laktose",
94
+ "Lamm",
95
+ "Lammfleisch",
96
+ "Leber",
97
+ "Leder",
98
+ "Magermilch",
99
+ "Magermilchpulver",
100
+ "Milch",
101
+ "Milcherzeugnis",
102
+ "Milchpermeat",
103
+ "Milchproteine",
104
+ "Milchpulver",
105
+ "Milchreis",
106
+ "Milchschokolade",
107
+ "Milchserum",
108
+ "Milchzucker",
109
+ "Molke",
110
+ "Parmesan",
111
+ "Pferd",
112
+ "Pferde",
113
+ "Pferdeblut",
114
+ "Pferdefleisch",
115
+ "Quark",
116
+ "Rahm",
117
+ "Rahmschokolade",
118
+ "Reh",
119
+ "Rehfleisch",
120
+ "Rind",
121
+ "Rindfleisch",
122
+ "Rohmilch",
123
+ "Rohmilchpulver",
124
+ "Salami",
125
+ "Sardelle",
126
+ "Sardellen",
127
+ "Schaf",
128
+ "Schaffleisch",
129
+ "Schafmilch",
130
+ "Schafmilchpermeat",
131
+ "Schafmilchproteine",
132
+ "Schafmilchpulver",
133
+ "Schafmilchreis",
134
+ "Schafmilchschokolade",
135
+ "Schafmilchzucker",
136
+ "Schafsfleisch",
137
+ "Schellack",
138
+ "Schinken",
139
+ "Schwein",
140
+ "Schweinefleisch",
141
+ "Schweinefleischbrühe",
142
+ "Schweinehack",
143
+ "Schweineschnitzel",
144
+ "Speck",
145
+ "Speckwürfel",
146
+ "Thunfisch",
147
+ "Trüffel",
148
+ "Truthahn",
149
+ "Truthahnfleisch",
150
+ "Vollmilch",
151
+ "Vollmilchproteine",
152
+ "Vollmilchpulver",
153
+ "Vollmilchreis",
154
+ "Vollmilchschokolade",
155
+ "Vollmilchzucker",
156
+ "Wild",
157
+ "Worcestershiresauce",
158
+ "Wurst",
159
+ "Wurstfleisch",
160
+ "Yak",
161
+ "Ziege",
162
+ "Ziegenfleisch",
163
+ "Ziegenmilch",
164
+ "Ziegenmilchpermeat",
165
+ "Ziegenmilchproteine",
166
+ "Ziegenmilchpulver",
167
+ "Ziegenmilchreis",
168
+ "Ziegenmilchschokolade",
169
+ "Ziegenmilchzucker"
170
+ ]
gluten.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "Weizen",
3
+ "Weizenmehl",
4
+ "Weizenvollkorn",
5
+ "Weizenvollkornmehl",
6
+ "Hartweizengriess",
7
+ "Dinkel",
8
+ "Dinkelmehl",
9
+ "Dinkelvollkorn",
10
+ "Dinkelvollkornmehl",
11
+ "Roggen",
12
+ "Roggenmehl",
13
+ "Roggenvollkorn",
14
+ "Roggenvollkornmehl",
15
+ "Gerste",
16
+ "Gerstenmehl",
17
+ "Gerstenvollkorn",
18
+ "Gerstenvollkornmehl",
19
+ "Kamut",
20
+ "Kamutmehl",
21
+ "Kamutvollkorn",
22
+ "Kamutvollkornmehl",
23
+ "Grünkern",
24
+ "Grünkernmehl",
25
+ "Grünkernvollkorn",
26
+ "Grünkernvollkornmehl",
27
+ "Emmer",
28
+ "Emmermehl",
29
+ "Emmervollkorn",
30
+ "Emmervollkornmehl",
31
+ "Triticale",
32
+ "Triticalemehl",
33
+ "Triticalevollkorn",
34
+ "Triticalevollkornmehl",
35
+ "Gluten",
36
+ "Glutenmehl",
37
+ "Glutenvollkorn",
38
+ "Glutenvollkornmehl",
39
+ "Einkorn",
40
+ "Einkornmehl",
41
+ "Einkornvollkorn",
42
+ "Einkornvollkornmehl",
43
+ "Mehl",
44
+ "Vollkorn",
45
+ "Vollkornmehl"
46
+ ]
grammar.gbnf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ root ::= "```json\n{\n \"Zutaten\": [" + strings + "],\n \"Verunreinigungen\": [" + strings + "]\n}\n```"
2
+ strings ::= (string ", ")* string?
3
+ string ::= "\"" [^n"\\]* "\""
llm.Q4_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a403d8cedbb26dd3f2e8e88abe08acef99d27b017c8b553571f16d7a3c5c417d
3
+ size 397804960
main.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import math
4
+ import easyocr
5
+ import cv2
6
+ import os
7
+ import base64
8
+ import json
9
+ import requests
10
+ from llama_cpp import Llama
11
+ from PIL import Image
12
+ from dotenv import load_dotenv
13
+
14
+ from utils import *
15
+
16
+ load_dotenv()
17
+
18
+ SCALE_FACTOR = 4
19
+ MAX_SIZE = 5_000_000
20
+ MAX_SIDE = 8_000
21
+ # ENGINE = ['easyocr']
22
+ # ENGINE = ['anthropic', 'claude-3-5-sonnet-20240620']
23
+ ENGINE = ['llama_cpp/v2/vision', 'qwen-vl-next_b2583']
24
+
25
+
26
+ def main() -> None:
27
+ model_weights = torch.load(relative_path('vision_model.pt'))
28
+ model = NeuralNet()
29
+ model.load_state_dict(model_weights)
30
+ model.to(DEVICE)
31
+ model.eval()
32
+ with torch.no_grad():
33
+ file_path = input('Enter file path: ')
34
+ with Image.open(file_path) as image:
35
+ image_size = image.size
36
+ image = image.resize(IMAGE_SIZE, Image.Resampling.LANCZOS)
37
+ image = TRANSFORM(image).to(DEVICE)
38
+ output = model(image).tolist()[0]
39
+ data = {
40
+ 'top': {
41
+ 'left': {
42
+ 'x': output[0] * image_size[0],
43
+ 'y': output[1] * image_size[1],
44
+ },
45
+ 'right': {
46
+ 'x': output[2] * image_size[0],
47
+ 'y': output[3] * image_size[1],
48
+ },
49
+ },
50
+ 'bottom': {
51
+ 'left': {
52
+ 'x': output[4] * image_size[0],
53
+ 'y': output[5] * image_size[1],
54
+ },
55
+ 'right': {
56
+ 'x': output[6] * image_size[0],
57
+ 'y': output[7] * image_size[1],
58
+ },
59
+ },
60
+ 'curvature': {
61
+ 'top': {
62
+ 'x': output[8] * image_size[0],
63
+ 'y': output[9] * image_size[1],
64
+ },
65
+ 'bottom': {
66
+ 'x': output[10] * image_size[0],
67
+ 'y': output[11] * image_size[1],
68
+ },
69
+ },
70
+ }
71
+ print(f"{data=}")
72
+ image = cv2.imread(file_path)
73
+ size_x = ((data['top']['right']['x'] - data['top']['left']['x']) +
74
+ (data['bottom']['right']['x'] - data['bottom']['left']['x'])) / 2
75
+ size_y = ((data['top']['right']['y'] - data['top']['left']['y']) +
76
+ (data['bottom']['right']['y'] - data['bottom']['left']['y'])) / 2
77
+ margin_x = size_x * MARGIN
78
+ margin_y = size_y * MARGIN
79
+ points = np.array([
80
+ (max(data['top']['left']['x'] - margin_x, 0),
81
+ max(data['top']['left']['y'] - margin_y, 0)),
82
+ (min(data['top']['right']['x'] + margin_x, image_size[0]),
83
+ max(data['top']['right']['y'] - margin_y, 0)),
84
+ (min(data['bottom']['right']['x'] + margin_x, image_size[0]),
85
+ min(data['bottom']['right']['y'] + margin_y, image_size[1])),
86
+ (max(data['bottom']['left']['x'] - margin_x, 0),
87
+ min(data['bottom']['left']['y'] + margin_y, image_size[1])),
88
+ (data['curvature']['top']['x'],
89
+ max(data['curvature']['top']['y'] - margin_y, 0)),
90
+ (data['curvature']['bottom']['x'],
91
+ min(data['curvature']['bottom']['y'] + margin_y, image_size[1])),
92
+ ], dtype=np.float32)
93
+ points_float: list[list[float]] = points.tolist()
94
+ max_height = int(max([ # y: top left - bottom left, top right - bottom right, curvature top - curvature bottom
95
+ abs(points_float[0][1] - points_float[3][1]),
96
+ abs(points_float[1][1] - points_float[4][1]),
97
+ abs(points_float[2][1] - points_float[5][1]),
98
+ ])) * SCALE_FACTOR
99
+ max_width = int(max([ # x: top left - top right, bottom left - bottom right
100
+ abs(points_float[0][0] - points_float[1][0]),
101
+ abs(points_float[3][0] - points_float[2][0]),
102
+ ])) * SCALE_FACTOR
103
+ destination_points = np.array([
104
+ [0, 0],
105
+ [max_width - 1, 0],
106
+ [max_width - 1, max_height - 1],
107
+ [0, max_height - 1],
108
+ [max_width // 2, 0],
109
+ [max_width // 2, max_height - 1],
110
+ ], dtype=np.float32)
111
+ homography, _ = cv2.findHomography(points, destination_points)
112
+ warped_image = cv2.warpPerspective(image, homography, (max_width, max_height))
113
+ cv2.imwrite('_warped_image.png', warped_image)
114
+ del data
115
+ if ENGINE[0] == 'easyocr':
116
+ reader = easyocr.Reader(['de', 'fr', 'en'], gpu=True)
117
+ result = reader.readtext('_warped_image.png')
118
+ # os.remove('_warped_image.png')
119
+ text = '\n'.join([r[1] for r in result])
120
+ ingredients = {}
121
+ elif ENGINE[0] == 'anthropic':
122
+ decrease_size('_warped_image.png', '_warped_image.webp', MAX_SIZE, MAX_SIDE)
123
+ # os.remove('_warped_image.png')
124
+ with open('_warped_image.webp', 'rb') as f:
125
+ base64_image = base64.b64encode(f.read()).decode('utf-8')
126
+ response = requests.post(
127
+ url='https://api.anthropic.com/v1/messages',
128
+ headers={
129
+ 'x-api-key': os.environ['ANTHROPIC_API_KEY'],
130
+ 'anthropic-version': '2023-06-01',
131
+ 'content-type': 'application/json',
132
+ },
133
+ data=json.dumps({
134
+ 'model': ENGINE[1],
135
+ 'max_tokens': 1024,
136
+ 'messages': [
137
+ {
138
+ 'role': 'user', 'content': [
139
+ {
140
+ 'type': 'image',
141
+ 'source': {
142
+ 'type': 'base64',
143
+ 'media_type': 'image/webp',
144
+ 'data': base64_image,
145
+ },
146
+ },
147
+ {
148
+ 'type': 'text',
149
+ 'text': PROMPT_CLAUDE,
150
+ },
151
+ ],
152
+ },
153
+ ],
154
+ }),
155
+ )
156
+ # os.remove('_warped_image.webp')
157
+ try:
158
+ data = response.json()
159
+ ingredients = json.loads('{' + data['content'][0]['text'].split('{', 1)[-1].rsplit('}', 1)[0] + '}')
160
+ except Exception as e:
161
+ print(data)
162
+ raise e
163
+ text = ''
164
+ elif ENGINE[0] == 'llama_cpp/v2/vision':
165
+ decrease_size('_warped_image.png', '_warped_image.webp', MAX_SIZE, MAX_SIDE)
166
+ # os.remove('_warped_image.png')
167
+ response = requests.post(
168
+ url='http://127.0.0.1:11434/llama_cpp/v2/vision',
169
+ headers={
170
+ 'x-version': '2024-05-21',
171
+ 'content-type': 'application/json',
172
+ },
173
+ data=json.dumps({
174
+ 'task': PROMPT_VISION,
175
+ 'model': ENGINE[1],
176
+ 'image_path': relative_path('_warped_image.webp'),
177
+ }),
178
+ )
179
+ # os.remove('_warped_image.webp')
180
+ text: str = response.json()['text']
181
+ ingredients = {}
182
+ else:
183
+ raise ValueError(f'Unknown engine: {ENGINE[0]}')
184
+ if text != '':
185
+ if DEVICE == 'cuda':
186
+ n_gpu_layers = -1
187
+ else:
188
+ n_gpu_layers = 0
189
+ llm = Llama(
190
+ model_path=relative_path('llm.Q4_K_M.gguf'),
191
+ n_gpu_layers=n_gpu_layers,
192
+ )
193
+ llm_result = llm.create_chat_completion(
194
+ messages=[
195
+ {
196
+ 'role': 'system',
197
+ 'content': SYSTEM_PROMPT,
198
+ },
199
+ {
200
+ 'role': 'user',
201
+ 'content': PROMPT_LLM.replace('{{old_data}}', text),
202
+ },
203
+ ],
204
+ max_tokens=1024,
205
+ temperature=0,
206
+ # grammar=GRAMMAR,
207
+ )
208
+ try:
209
+ ingredients = json.loads(
210
+ '{' + llm_result['choices'][0]['message']['content'].split('{', 1)[-1].rsplit('}', 1)[0] + '}')
211
+ except Exception as e:
212
+ print(f"{llm_result=}")
213
+ raise e
214
+ animal_ingredients = [item for item in ingredients['Zutaten'] if item in ANIMAL]
215
+ sometimes_animal_ingredients = [item for item in ingredients['Zutaten'] if item in SOMETIMES_ANIMAL]
216
+ milk_ingredients = ([item for item in ingredients['Zutaten'] if item in MILK]
217
+ + [item for item in ingredients['Verunreinigungen'] if item in MILK])
218
+ gluten_ingredients = ([item for item in ingredients['Zutaten'] if item in GLUTEN]
219
+ + [item for item in ingredients['Verunreinigungen'] if item in GLUTEN])
220
+ print('=' * 64)
221
+ print('Zutaten: ' + ', '.join(ingredients['Zutaten']))
222
+ print('=' * 64)
223
+ print('Kann Spuren von ' + ', '.join(ingredients['Verunreinigungen']) + ' enthalten.')
224
+ print('=' * 64)
225
+ print('Gefundene tierische Zutaten: '
226
+ + (', '.join(animal_ingredients) if len(animal_ingredients) > 0 else 'keine'))
227
+ print('=' * 64)
228
+ print('Gefundene potenzielle tierische Zutaten: '
229
+ + (', '.join(sometimes_animal_ingredients) if len(sometimes_animal_ingredients) > 0 else 'keine'))
230
+ print('=' * 64)
231
+ print('Gefundene Milchprodukte: ' + ', '.join(milk_ingredients) if len(milk_ingredients) > 0 else 'keine')
232
+ print('=' * 64)
233
+ print('Gefundene Gluten: ' + ', '.join(gluten_ingredients) if len(gluten_ingredients) > 0 else 'keine')
234
+ print('=' * 64)
235
+ print(LEGAL_NOTICE)
236
+ print('=' * 64)
237
+
238
+
239
+ if __name__ == '__main__':
240
+ main()
milk.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "Butter",
3
+ "E966",
4
+ "Frischkäse",
5
+ "Frischmilch",
6
+ "Hartkäse",
7
+ "Joghurt",
8
+ "Kefir",
9
+ "Kuhmilch",
10
+ "Kuhmilchpermeat",
11
+ "Kuhmilchproteine",
12
+ "Kuhmilchpulver",
13
+ "Kuhmilchreis",
14
+ "Kuhmilchschokolade",
15
+ "Kuhmilchzucker",
16
+ "Käse",
17
+ "Lactose",
18
+ "Lactose-Monohydrat",
19
+ "Laktose",
20
+ "Magermilch",
21
+ "Magermilchpulver",
22
+ "Milch",
23
+ "Milcherzeugnis",
24
+ "Milchpermeat",
25
+ "Milchproteine",
26
+ "Milchpulver",
27
+ "Milchreis",
28
+ "Milchschokolade",
29
+ "Milchserum",
30
+ "Milchzucker",
31
+ "Molke",
32
+ "Quark",
33
+ "Rahm",
34
+ "Rahmschokolade",
35
+ "Rohmilch",
36
+ "Rohmilchpulver",
37
+ "Schafmilch",
38
+ "Schafmilchpermeat",
39
+ "Schafmilchproteine",
40
+ "Schafmilchpulver",
41
+ "Schafmilchreis",
42
+ "Schafmilchschokolade",
43
+ "Schafmilchzucker",
44
+ "Vollmilch",
45
+ "Vollmilchproteine",
46
+ "Vollmilchpulver",
47
+ "Vollmilchreis",
48
+ "Vollmilchschokolade",
49
+ "Vollmilchzucker",
50
+ "Ziegenmilch",
51
+ "Ziegenmilchpermeat",
52
+ "Ziegenmilchproteine",
53
+ "Ziegenmilchpulver",
54
+ "Ziegenmilchreis",
55
+ "Ziegenmilchschokolade",
56
+ "Ziegenmilchzucker"
57
+ ]
prompt_claude.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ Im Bild ist die Rückseite einer Verpackung zu sehen. Auf dieser stehen die Zutaten (auch Inhaltsstoffe genannt) und wahrscheinlich auch die Verunreinigungen (auch "Enthält" oder "Kann Spuren von enthalten" genannt) geschrieben. Formatiere diese als JSON:
2
+ ```json
3
+ {
4
+ "Zutaten": ["Zutat1", "Zutat2"],
5
+ "Verunreinigungen": ["Verunreinigung1", "Verunreinigung2"]
6
+ }
7
+ ```
8
+ Stelle sicher, dass die drei "`" am Anfang und am Ende vorhanden sind. Prozentangaben sind wegzulassen. Falls mehrere Sprachen vorhanden sind, verwende Deutsch.
prompt_llm.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```
2
+ {{old_data}}
3
+ ```
4
+ Diese Daten stammen von einer OCR-Engine und enthält wahrscheinlich Fehler. Formatiere sie als JSON:
5
+ ```json
6
+ {
7
+ "Zutaten": ["Zutat1", "Zutat2"],
8
+ "Verunreinigungen": ["Verunreinigung1", "Verunreinigung2"]
9
+ }
10
+ ```
11
+ Stelle sicher, dass die drei "`" am Anfang und am Ende vorhanden sind. Prozentangaben sind wegzulassen. Falls mehrere Sprachen vorhanden sind, verwende Deutsch.
prompt_vision.md ADDED
@@ -0,0 +1 @@
 
 
1
+ Im Bild ist die Rückseite einer Verpackung zu sehen. Auf dieser stehen die Zutaten (auch Inhaltsstoffe genannt) und wahrscheinlich auch die Verunreinigungen (auch "Enthält" oder "Kann Spuren von enthalten" genannt) geschrieben. Liste diese auf. Falls mehrere Sprachen vorhanden sind, verwende Deutsch.
sometimes_animal.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "E101",
3
+ "E104",
4
+ "E153",
5
+ "E160",
6
+ "E161",
7
+ "E236",
8
+ "E237",
9
+ "E238",
10
+ "E252",
11
+ "E270",
12
+ "E304",
13
+ "E322",
14
+ "E325",
15
+ "E326",
16
+ "E327",
17
+ "E328",
18
+ "E329",
19
+ "E422",
20
+ "E430",
21
+ "E431",
22
+ "E432",
23
+ "E433",
24
+ "E434",
25
+ "E435",
26
+ "E436",
27
+ "E442",
28
+ "E445",
29
+ "E470",
30
+ "E471",
31
+ "E472",
32
+ "E473",
33
+ "E474",
34
+ "E475",
35
+ "E476",
36
+ "E477",
37
+ "E478",
38
+ "E479",
39
+ "E481",
40
+ "E482",
41
+ "E483",
42
+ "E484",
43
+ "E491",
44
+ "E492",
45
+ "E493",
46
+ "E494",
47
+ "E495",
48
+ "E570",
49
+ "E572",
50
+ "E585",
51
+ "E626",
52
+ "E627",
53
+ "E628",
54
+ "E629",
55
+ "E630",
56
+ "E631",
57
+ "E632",
58
+ "E633",
59
+ "E634",
60
+ "E635",
61
+ "E636",
62
+ "E637",
63
+ "E640",
64
+ "E910",
65
+ "E920",
66
+ "E921",
67
+ "E966",
68
+ "E1000",
69
+ "E1105",
70
+ "E1518",
71
+ "Milchsäure",
72
+ "Omega-3",
73
+ "Vitamin D2",
74
+ "Vitamin D3",
75
+ "Bier",
76
+ "Wein",
77
+ "Pommes",
78
+ "Pommes Frites",
79
+ "Marshmallows",
80
+ "Gummy Bärchen",
81
+ "Schokolade",
82
+ "Trüffel",
83
+ "Kaugummi",
84
+ "Pesto",
85
+ "Worcestershire Sauce"
86
+ ]
utils.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchvision
3
+ import os
4
+ import json
5
+ from PIL import Image
6
+ from datetime import datetime
7
+
8
+
9
+ __all__ = [
10
+ 'current_time',
11
+ 'relative_path',
12
+ 'NeuralNet',
13
+ 'DEVICE',
14
+ 'IMAGE_SIZE',
15
+ 'TRANSFORM',
16
+ 'MARGIN',
17
+ 'GRID_SIZE',
18
+ 'decrease_size',
19
+ 'PROMPT_LLM',
20
+ 'PROMPT_CLAUDE',
21
+ 'PROMPT_VISION',
22
+ 'EOS',
23
+ 'GRAMMAR',
24
+ 'SYSTEM_PROMPT',
25
+ 'ANIMAL',
26
+ 'SOMETIMES_ANIMAL',
27
+ 'MILK',
28
+ 'GLUTEN',
29
+ 'LEGAL_NOTICE',
30
+ ]
31
+
32
+
33
+ MARGIN = 0.1
34
+ GRID_SIZE = 4096
35
+ DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
36
+ IMAGE_SIZE = (224, 224)
37
+ TRANSFORM = torchvision.transforms.Compose([
38
+ torchvision.transforms.ToTensor(),
39
+ ])
40
+ with open('prompt_llm.md', 'r', encoding='utf-8') as _f:
41
+ PROMPT_LLM = _f.read()
42
+ with open('prompt_claude.md', 'r', encoding='utf-8') as _f:
43
+ PROMPT_CLAUDE = _f.read()
44
+ with open('prompt_vision.md', 'r', encoding='utf-8') as _f:
45
+ PROMPT_VISION = _f.read()
46
+ EOS = '\n<|im_end|>'
47
+ SYSTEM_PROMPT = 'Du bist ein hilfreicher assistant.'
48
+ with open('grammar.gbnf', 'r', encoding='utf-8') as _f:
49
+ GRAMMAR = _f.read()
50
+ with open('animal.json', 'r', encoding='utf-8') as _f:
51
+ ANIMAL = json.load(_f)
52
+ with open('sometimes_animal.json', 'r', encoding='utf-8') as _f:
53
+ SOMETIMES_ANIMAL = json.load(_f)
54
+ with open('milk.json', 'r', encoding='utf-8') as _f:
55
+ MILK = json.load(_f)
56
+ with open('gluten.json', 'r', encoding='utf-8') as _f:
57
+ GLUTEN = json.load(_f)
58
+
59
+
60
+ LEGAL_NOTICE = ('Dieses Programm ist nur für Forschungszwecke gedacht. Fehler können nicht ausgeschlossen werden und '
61
+ 'sind wahrscheinlich vorhanden. Die Erkennung von Zutaten und Verunreinigungen ist nur zum schnellen '
62
+ 'Aussortieren und nicht zum Überprüfen gedacht.')
63
+
64
+
65
+ def current_time() -> str:
66
+ return datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
67
+
68
+
69
+ def relative_path(string: str) -> str:
70
+ return os.path.join(os.path.dirname(__file__), string)
71
+
72
+
73
+ class NeuralNet(torch.nn.Module):
74
+ def __init__(self):
75
+ super(NeuralNet, self).__init__()
76
+
77
+ # Load pre-trained ResNet model
78
+ self.backbone = torchvision.models.resnet18(pretrained=True)
79
+
80
+ # Modify the last layer to output 12 values
81
+ self.backbone.fc = torch.nn.Linear(self.backbone.fc.in_features, 12)
82
+
83
+ # Add a custom head for key-point detection
84
+ self.head = torch.nn.Sequential(
85
+ torch.nn.Conv2d(512, 256, kernel_size=3, padding=1),
86
+ torch.nn.ReLU(inplace=True),
87
+ torch.nn.Conv2d(256, 128, kernel_size=3, padding=1),
88
+ torch.nn.ReLU(inplace=True),
89
+ torch.nn.Conv2d(128, 64, kernel_size=3, padding=1),
90
+ torch.nn.ReLU(inplace=True),
91
+ torch.nn.Conv2d(64, 12, kernel_size=1),
92
+ torch.nn.AdaptiveAvgPool2d(1)
93
+ )
94
+
95
+ def forward(self, x):
96
+ # Check if we need to unsqueeze
97
+ if len(x.shape) == 3: # Shape [C, H, W]
98
+ x = x.unsqueeze(0) # Shape [1, C, H, W]
99
+
100
+ # Resize input to match ResNet input size if necessary
101
+ if x.shape[-2:] != (224, 224):
102
+ x = torch.nn.functional.interpolate(x, size=(224, 224), mode='bilinear', align_corners=False)
103
+
104
+ # Pass input through the backbone
105
+ x = self.backbone.conv1(x)
106
+ x = self.backbone.bn1(x)
107
+ x = self.backbone.relu(x)
108
+ x = self.backbone.maxpool(x)
109
+
110
+ x = self.backbone.layer1(x)
111
+ x = self.backbone.layer2(x)
112
+ x = self.backbone.layer3(x)
113
+ x = self.backbone.layer4(x)
114
+
115
+ # Pass input through the custom head
116
+ x = self.head(x)
117
+
118
+ # Flatten the output
119
+ x = x.view(x.size(0), -1)
120
+
121
+ return x
122
+
123
+
124
+ def decrease_size(input_path, output_path, max_size, max_side):
125
+ with Image.open(input_path) as img:
126
+ original_size = os.path.getsize(input_path)
127
+ width, height = img.size
128
+ if original_size <= max_size and width <= max_side and height <= max_side:
129
+ img.save(output_path, format=output_path.split('.')[-1].upper())
130
+ print("Image is already below the maximum size.")
131
+ while width > 24 and height > 24:
132
+ img_resized = img.resize((width, height), Image.Resampling.LANCZOS)
133
+ img_resized.save(output_path, format=output_path.split('.')[-1].upper())
134
+ if os.path.getsize(output_path) <= max_size and width <= max_side and height <= max_side:
135
+ print(f"Reduced image size to {os.path.getsize(output_path)} bytes.")
136
+ break
137
+ width, height = int(width * 0.9), int(height * 0.9)
138
+ if os.path.getsize(output_path) > max_size:
139
+ raise ValueError("Could not reduce PNG size below max_size by reducing resolution.")
vision_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cff3b777fa2d32b7e317838db03bf5bd99033b949ee987feb6ea7b052f3cb029
3
+ size 51013558