nbaldwin commited on
Commit
c296fdd
·
1 Parent(s): ef59935

vision module first version

Browse files
Files changed (5) hide show
  1. .gitignore +443 -0
  2. VisionAtomicFlow.py +87 -0
  3. VisionAtomicFlow.yaml +49 -0
  4. __init__.py +7 -0
  5. pip_requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1,443 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by https://www.toptal.com/developers/gitignore/api/python,java,c++,pycharm,visualstudiocode,macos,linux,windows
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=python,java,c++,pycharm,visualstudiocode,macos,linux,windows
3
+
4
+ ### C++ ###
5
+ # Prerequisites
6
+ *.d
7
+
8
+ # Compiled Object files
9
+ *.slo
10
+ *.lo
11
+ *.o
12
+ *.obj
13
+
14
+ # Precompiled Headers
15
+ *.gch
16
+ *.pch
17
+
18
+ # Compiled Dynamic libraries
19
+ *.so
20
+ *.dylib
21
+ *.dll
22
+
23
+ # Fortran module files
24
+ *.mod
25
+ *.smod
26
+
27
+ # Compiled Static libraries
28
+ *.lai
29
+ *.la
30
+ *.a
31
+ *.lib
32
+
33
+ # Executables
34
+ *.exe
35
+ *.out
36
+ *.app
37
+
38
+ ### Java ###
39
+ # Compiled class file
40
+ *.class
41
+
42
+ # Log file
43
+ *.log
44
+
45
+ # BlueJ files
46
+ *.ctxt
47
+
48
+ # Mobile Tools for Java (J2ME)
49
+ .mtj.tmp/
50
+
51
+ # Package Files #
52
+ *.jar
53
+ *.war
54
+ *.nar
55
+ *.ear
56
+ *.zip
57
+ *.tar.gz
58
+ *.rar
59
+
60
+ # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
61
+ hs_err_pid*
62
+ replay_pid*
63
+
64
+ ### Linux ###
65
+ *~
66
+
67
+ # temporary files which can be created if a process still has a handle open of a deleted file
68
+ .fuse_hidden*
69
+
70
+ # KDE directory preferences
71
+ .directory
72
+
73
+ # Linux trash folder which might appear on any partition or disk
74
+ .Trash-*
75
+
76
+ # .nfs files are created when an open file is removed but is still being accessed
77
+ .nfs*
78
+
79
+ ### macOS ###
80
+ # General
81
+ .DS_Store
82
+ .AppleDouble
83
+ .LSOverride
84
+
85
+ # Icon must end with two \r
86
+ Icon
87
+
88
+
89
+ # Thumbnails
90
+ ._*
91
+
92
+ # Files that might appear in the root of a volume
93
+ .DocumentRevisions-V100
94
+ .fseventsd
95
+ .Spotlight-V100
96
+ .TemporaryItems
97
+ .Trashes
98
+ .VolumeIcon.icns
99
+ .com.apple.timemachine.donotpresent
100
+
101
+ # Directories potentially created on remote AFP share
102
+ .AppleDB
103
+ .AppleDesktop
104
+ Network Trash Folder
105
+ Temporary Items
106
+ .apdisk
107
+
108
+ ### macOS Patch ###
109
+ # iCloud generated files
110
+ *.icloud
111
+
112
+ ### PyCharm ###
113
+ # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
114
+ # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
115
+
116
+ # User-specific stuff
117
+ .idea/**/workspace.xml
118
+ .idea/**/tasks.xml
119
+ .idea/**/usage.statistics.xml
120
+ .idea/**/dictionaries
121
+ .idea/**/shelf
122
+
123
+ # AWS User-specific
124
+ .idea/**/aws.xml
125
+
126
+ # Generated files
127
+ .idea/**/contentModel.xml
128
+
129
+ # Sensitive or high-churn files
130
+ .idea/**/dataSources/
131
+ .idea/**/dataSources.ids
132
+ .idea/**/dataSources.local.xml
133
+ .idea/**/sqlDataSources.xml
134
+ .idea/**/dynamic.xml
135
+ .idea/**/uiDesigner.xml
136
+ .idea/**/dbnavigator.xml
137
+
138
+ # Gradle
139
+ .idea/**/gradle.xml
140
+ .idea/**/libraries
141
+
142
+ # Gradle and Maven with auto-import
143
+ # When using Gradle or Maven with auto-import, you should exclude module files,
144
+ # since they will be recreated, and may cause churn. Uncomment if using
145
+ # auto-import.
146
+ # .idea/artifacts
147
+ # .idea/compiler.xml
148
+ # .idea/jarRepositories.xml
149
+ # .idea/modules.xml
150
+ # .idea/*.iml
151
+ # .idea/modules
152
+ # *.iml
153
+ # *.ipr
154
+
155
+ # CMake
156
+ cmake-build-*/
157
+
158
+ # Mongo Explorer plugin
159
+ .idea/**/mongoSettings.xml
160
+
161
+ # File-based project format
162
+ *.iws
163
+
164
+ # IntelliJ
165
+ out/
166
+
167
+ # mpeltonen/sbt-idea plugin
168
+ .idea_modules/
169
+
170
+ # JIRA plugin
171
+ atlassian-ide-plugin.xml
172
+
173
+ # Cursive Clojure plugin
174
+ .idea/replstate.xml
175
+
176
+ # SonarLint plugin
177
+ .idea/sonarlint/
178
+
179
+ # Crashlytics plugin (for Android Studio and IntelliJ)
180
+ com_crashlytics_export_strings.xml
181
+ crashlytics.properties
182
+ crashlytics-build.properties
183
+ fabric.properties
184
+
185
+ # Editor-based Rest Client
186
+ .idea/httpRequests
187
+
188
+ # Android studio 3.1+ serialized cache file
189
+ .idea/caches/build_file_checksums.ser
190
+
191
+ ### PyCharm Patch ###
192
+ # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
193
+
194
+ # *.iml
195
+ # modules.xml
196
+ # .idea/misc.xml
197
+ # *.ipr
198
+
199
+ # Sonarlint plugin
200
+ # https://plugins.jetbrains.com/plugin/7973-sonarlint
201
+ .idea/**/sonarlint/
202
+
203
+ # SonarQube Plugin
204
+ # https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
205
+ .idea/**/sonarIssues.xml
206
+
207
+ # Markdown Navigator plugin
208
+ # https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
209
+ .idea/**/markdown-navigator.xml
210
+ .idea/**/markdown-navigator-enh.xml
211
+ .idea/**/markdown-navigator/
212
+
213
+ # Cache file creation bug
214
+ # See https://youtrack.jetbrains.com/issue/JBR-2257
215
+ .idea/$CACHE_FILE$
216
+
217
+ # CodeStream plugin
218
+ # https://plugins.jetbrains.com/plugin/12206-codestream
219
+ .idea/codestream.xml
220
+
221
+ # Azure Toolkit for IntelliJ plugin
222
+ # https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
223
+ .idea/**/azureSettings.xml
224
+
225
+ ### Python ###
226
+ # Byte-compiled / optimized / DLL files
227
+ __pycache__/
228
+ *.py[cod]
229
+ *$py.class
230
+
231
+ # C extensions
232
+
233
+ # Distribution / packaging
234
+ .Python
235
+ build/
236
+ develop-eggs/
237
+ dist/
238
+ downloads/
239
+ eggs/
240
+ .eggs/
241
+ lib/
242
+ lib64/
243
+ parts/
244
+ sdist/
245
+ var/
246
+ wheels/
247
+ share/python-wheels/
248
+ *.egg-info/
249
+ .installed.cfg
250
+ *.egg
251
+ MANIFEST
252
+
253
+ # PyInstaller
254
+ # Usually these files are written by a python script from a template
255
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
256
+ *.manifest
257
+ *.spec
258
+
259
+ # Installer logs
260
+ pip-log.txt
261
+ pip-delete-this-directory.txt
262
+
263
+ # Unit test / coverage reports
264
+ htmlcov/
265
+ .tox/
266
+ .nox/
267
+ .coverage
268
+ .coverage.*
269
+ .cache
270
+ nosetests.xml
271
+ coverage.xml
272
+ *.cover
273
+ *.py,cover
274
+ .hypothesis/
275
+ .pytest_cache/
276
+ cover/
277
+
278
+ # Translations
279
+ *.mo
280
+ *.pot
281
+
282
+ # Django stuff:
283
+ local_settings.py
284
+ db.sqlite3
285
+ db.sqlite3-journal
286
+
287
+ # Flask stuff:
288
+ instance/
289
+ .webassets-cache
290
+
291
+ # Scrapy stuff:
292
+ .scrapy
293
+
294
+ # Sphinx documentation
295
+ docs/_build/
296
+
297
+ # PyBuilder
298
+ .pybuilder/
299
+ target/
300
+
301
+ # Jupyter Notebook
302
+ .ipynb_checkpoints
303
+
304
+ # IPython
305
+ profile_default/
306
+ ipython_config.py
307
+
308
+ # pyenv
309
+ # For a library or package, you might want to ignore these files since the code is
310
+ # intended to run in multiple environments; otherwise, check them in:
311
+ # .python-version
312
+
313
+ # pipenv
314
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
315
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
316
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
317
+ # install all needed dependencies.
318
+ #Pipfile.lock
319
+
320
+ # poetry
321
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
322
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
323
+ # commonly ignored for libraries.
324
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
325
+ #poetry.lock
326
+
327
+ # pdm
328
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
329
+ #pdm.lock
330
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
331
+ # in version control.
332
+ # https://pdm.fming.dev/#use-with-ide
333
+ .pdm.toml
334
+
335
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
336
+ __pypackages__/
337
+
338
+ # Celery stuff
339
+ celerybeat-schedule
340
+ celerybeat.pid
341
+
342
+ # SageMath parsed files
343
+ *.sage.py
344
+
345
+ # Environments
346
+ .env
347
+ .venv
348
+ env/
349
+ venv/
350
+ ENV/
351
+ env.bak/
352
+ venv.bak/
353
+
354
+ # Spyder project settings
355
+ .spyderproject
356
+ .spyproject
357
+
358
+ # Rope project settings
359
+ .ropeproject
360
+
361
+ # mkdocs documentation
362
+ /site
363
+
364
+ # mypy
365
+ .mypy_cache/
366
+ .dmypy.json
367
+ dmypy.json
368
+
369
+ # Pyre type checker
370
+ .pyre/
371
+
372
+ # pytype static type analyzer
373
+ .pytype/
374
+
375
+ # Cython debug symbols
376
+ cython_debug/
377
+
378
+ # PyCharm
379
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
380
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
381
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
382
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
383
+ #.idea/
384
+
385
+ ### Python Patch ###
386
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
387
+ poetry.toml
388
+
389
+ # ruff
390
+ .ruff_cache/
391
+
392
+ # LSP config files
393
+ pyrightconfig.json
394
+
395
+ ### VisualStudioCode ###
396
+ .vscode/*
397
+ !.vscode/settings.json
398
+ !.vscode/tasks.json
399
+ !.vscode/launch.json
400
+ !.vscode/extensions.json
401
+ !.vscode/*.code-snippets
402
+
403
+ # Local History for Visual Studio Code
404
+ .history/
405
+
406
+ # Built Visual Studio Code Extensions
407
+ *.vsix
408
+
409
+ ### VisualStudioCode Patch ###
410
+ # Ignore all local history of files
411
+ .history
412
+ .ionide
413
+
414
+ ### Windows ###
415
+ # Windows thumbnail cache files
416
+ Thumbs.db
417
+ Thumbs.db:encryptable
418
+ ehthumbs.db
419
+ ehthumbs_vista.db
420
+
421
+ # Dump file
422
+ *.stackdump
423
+
424
+ # Folder config file
425
+ [Dd]esktop.ini
426
+
427
+ # Recycle Bin used on file shares
428
+ $RECYCLE.BIN/
429
+
430
+ # Windows Installer files
431
+ *.cab
432
+ *.msi
433
+ *.msix
434
+ *.msm
435
+ *.msp
436
+
437
+ # Windows shortcuts
438
+ *.lnk
439
+
440
+ # End of https://www.toptal.com/developers/gitignore/api/python,java,c++,pycharm,visualstudiocode,macos,linux,windows
441
+
442
+ .*
443
+ flow_modules/
VisionAtomicFlow.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from typing import Dict, Any
3
+ from flow_modules.aiflows.OpenAIChatFlowModule import OpenAIChatAtomicFlow
4
+ from flows.utils.general_helpers import encode_image,encode_from_buffer
5
+ import cv2
6
+
7
+
8
+ class VisionAtomicFlow(OpenAIChatAtomicFlow):
9
+
10
+ @staticmethod
11
+ def get_image(image):
12
+ extension_dict = {
13
+ "jpg": "jpeg",
14
+ "jpeg": "jpeg",
15
+ "png": "png",
16
+ "webp": "webp",
17
+ "gif": "gif"
18
+ }
19
+ supported_image_types = ["local_path","url"]
20
+ assert image.get("type",None) in supported_image_types, f"Must define a valid image type for every image \n your type: {image.get('type',None)} \n supported types{supported_image_types} "
21
+
22
+ processed_image = None
23
+ url = None
24
+ if image["type"] == "local_path":
25
+ processed_image = encode_image(image.get("image"))
26
+ image_extension_type = image.get("image").split(".")[-1]
27
+ url = f"data:image/{extension_dict[image_extension_type]};base64, {processed_image}"
28
+
29
+ elif image["type"] == "url":
30
+ processed_image = image
31
+ url = image.get("image")
32
+
33
+ return {"type": "image_url", "image_url": {"url": url}}
34
+
35
+ @staticmethod
36
+ def get_video(video):
37
+ video_path = video["video_path"]
38
+ resize = video.get("resize",768)
39
+ frame_step_size = video.get("frame_step_size",10)
40
+ start_frame = video.get("start_frame",0)
41
+ end_frame = video.get("end_frame",None)
42
+ base64Frames = []
43
+ video = cv2.VideoCapture(video_path)
44
+ while video.isOpened():
45
+ success,frame = video.read()
46
+ if not success:
47
+ break
48
+ _,buffer = cv2.imencode(".jpg",frame)
49
+ base64Frames.append(encode_from_buffer(buffer))
50
+ video.release()
51
+ return map(lambda x: {"image": x, "resize": resize},base64Frames[start_frame:end_frame:frame_step_size])
52
+
53
+ @staticmethod
54
+ def get_user_message(prompt_template, input_data: Dict[str, Any]):
55
+ content = VisionAtomicFlow._get_message(prompt_template=prompt_template,input_data=input_data)
56
+ media_data = input_data["data"]
57
+ if "video" in media_data:
58
+ content = [ content[0], *VisionAtomicFlow.get_video(media_data["video"])]
59
+ if "images" in media_data:
60
+ images = [VisionAtomicFlow.get_image(image) for image in media_data["images"]]
61
+ content.extend(images)
62
+ return content
63
+
64
+ @staticmethod
65
+ def _get_message(prompt_template, input_data: Dict[str, Any]):
66
+ template_kwargs = {}
67
+ for input_variable in prompt_template.input_variables:
68
+ template_kwargs[input_variable] = input_data[input_variable]
69
+ msg_content = prompt_template.format(**template_kwargs)
70
+ return [{"type": "text", "text": msg_content}]
71
+
72
+ def _process_input(self, input_data: Dict[str, Any]):
73
+ if self._is_conversation_initialized():
74
+ # Construct the message using the human message prompt template
75
+ user_message_content = self.get_user_message(self.human_message_prompt_template, input_data)
76
+
77
+ else:
78
+ # Initialize the conversation (add the system message, and potentially the demonstrations)
79
+ self._initialize_conversation(input_data)
80
+ if getattr(self, "init_human_message_prompt_template", None) is not None:
81
+ # Construct the message using the query message prompt template
82
+ user_message_content = self.get_user_message(self.init_human_message_prompt_template, input_data)
83
+ else:
84
+ user_message_content = self.get_user_message(self.human_message_prompt_template, input_data)
85
+
86
+ self._state_update_add_chat_message(role=self.flow_config["user_name"],
87
+ content=user_message_content)
VisionAtomicFlow.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This is an abstract flow, therefore some required fields are not defined (and must be defined by the concrete flow)
2
+ enable_cache: True
3
+
4
+ n_api_retries: 6
5
+ wait_time_between_retries: 20
6
+
7
+ system_name: system
8
+ user_name: user
9
+ assistant_name: assistant
10
+
11
+ backend:
12
+ _target_: flows.backends.llm_lite.LiteLLMBackend
13
+ api_infos: ???
14
+ model_name: "gpt-4-vision-preview"
15
+ n: 1
16
+ max_tokens: 2000
17
+ temperature: 0.3
18
+ top_p: 0.2
19
+ frequency_penalty: 0
20
+ presence_penalty: 0
21
+
22
+ system_message_prompt_template:
23
+ _target_: flows.prompt_template.JinjaPrompt
24
+
25
+ init_human_message_prompt_template:
26
+ _target_: flows.prompt_template.JinjaPrompt
27
+
28
+ human_message_prompt_template:
29
+ _target_: flows.prompt_template.JinjaPrompt
30
+ template: "{{query}}"
31
+ input_variables:
32
+ - "query"
33
+ input_interface_initialized:
34
+ - "query"
35
+ - "data"
36
+
37
+ query_message_prompt_template:
38
+ _target_: flows.prompt_template.JinjaPrompt
39
+
40
+
41
+ previous_messages:
42
+ first_k: null # Note that the first message is the system prompt
43
+ last_k: null
44
+
45
+ demonstrations: null
46
+ demonstrations_response_template: null
47
+
48
+ output_interface:
49
+ - "api_output"
__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # ~~~ Specify the dependencies ~~
2
+ dependencies = [
3
+ {"url": "aiflows/OpenAIChatFlowModule", "revision": "eeec09b71e967ce426553e2300c5689f6ea6a662"}
4
+ ]
5
+ from flows import flow_verse
6
+ flow_verse.sync_dependencies(dependencies)
7
+ from .VisionAtomicFlow import VisionAtomicFlow
pip_requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ opencv-python==4.8.1.78