Vision-CAIR commited on
Commit
be10055
β€’
1 Parent(s): 639067b

MiniGPT-Med

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. Med_examples_v2/1.2.276.0.7230010.3.1.4.8323329.1495.1517874291.249176.jpg +0 -0
  3. Med_examples_v2/1.2.276.0.7230010.3.1.4.8323329.16254.1517874395.786150.jpg +0 -0
  4. Med_examples_v2/1.2.840.113654.2.55.48339325922382839066544590341580673064.png +0 -0
  5. Med_examples_v2/1.3.6.1.4.1.14519.5.2.1.7009.9004.242286124999058976921785904029.png +0 -0
  6. Med_examples_v2/5f4e8079-8225a5d2-1b0c3c46-4394a094-f285db0e.jpg +3 -0
  7. Med_examples_v2/synpic33889.jpg +0 -0
  8. Med_examples_v2/synpic50958.jpg +0 -0
  9. Med_examples_v2/synpic56061.jpg +0 -0
  10. Med_examples_v2/synpic58547.jpg +0 -0
  11. Med_examples_v2/synpic60423.jpg +0 -0
  12. Med_examples_v2/synpic676.jpg +0 -0
  13. Med_examples_v2/xmlab149/source.jpg +0 -0
  14. Med_examples_v2/xmlab589/source.jpg +0 -0
  15. README.md +51 -13
  16. dcgm/bash/34649895/dcgm-gpu-stats-gpu202-02-r-34649895.out +39 -0
  17. dcgm/bash/34673507/dcgm-gpu-stats-gpu201-23-l-34673507.out +39 -0
  18. dcgm/bash/34676162/dcgm-gpu-stats-gpu201-23-l-34676162.out +39 -0
  19. dcgm/bash/34691276/dcgm-gpu-stats-gpu201-09-l-34691276.out +42 -0
  20. dcgm/bash/34709014/dcgm-gpu-stats-gpu109-16-l-34709014.out +39 -0
  21. dcgm/bash/34721198/dcgm-gpu-stats-gpu203-23-r-34721198.out +57 -0
  22. dcgm/bash/34734121/dcgm-gpu-stats-gpu201-23-l-34734121.out +35 -0
  23. dcgm/bash/34738689/dcgm-gpu-stats-gpu201-16-r-34738689.out +35 -0
  24. dcgm/bash/34757693/dcgm-gpu-stats-gpu202-16-r-34757693.out +42 -0
  25. demo_v2.py +648 -0
  26. environment.yml +35 -0
  27. eval_configs/minigptv2_benchmark_evaluation.yaml +69 -0
  28. eval_configs/minigptv2_eval.yaml +24 -0
  29. eval_scripts/.DS_Store +0 -0
  30. eval_scripts/__pycache__/IoU.cpython-39.pyc +0 -0
  31. eval_scripts/__pycache__/clean_json.cpython-39.pyc +0 -0
  32. eval_scripts/__pycache__/metrics.cpython-39.pyc +0 -0
  33. eval_scripts/clean_json.py +74 -0
  34. eval_scripts/metrics.py +164 -0
  35. eval_scripts/model_evaluation.py +274 -0
  36. miniGPTV2.yml +35 -0
  37. miniGPT_Med_.pth +3 -0
  38. minigpt4/.DS_Store +0 -0
  39. minigpt4/__init__.py +31 -0
  40. minigpt4/__pycache__/__init__.cpython-310.pyc +0 -0
  41. minigpt4/__pycache__/__init__.cpython-39.pyc +0 -0
  42. minigpt4/common/.DS_Store +0 -0
  43. minigpt4/common/__init__.py +0 -0
  44. minigpt4/common/__pycache__/__init__.cpython-310.pyc +0 -0
  45. minigpt4/common/__pycache__/__init__.cpython-39.pyc +0 -0
  46. minigpt4/common/__pycache__/config.cpython-310.pyc +0 -0
  47. minigpt4/common/__pycache__/config.cpython-39.pyc +0 -0
  48. minigpt4/common/__pycache__/dist_utils.cpython-310.pyc +0 -0
  49. minigpt4/common/__pycache__/dist_utils.cpython-39.pyc +0 -0
  50. minigpt4/common/__pycache__/eval_utils.cpython-39.pyc +0 -0
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  MiniGPT-Med-github/Med_examples_v2/5f4e8079-8225a5d2-1b0c3c46-4394a094-f285db0e.jpg filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  MiniGPT-Med-github/Med_examples_v2/5f4e8079-8225a5d2-1b0c3c46-4394a094-f285db0e.jpg filter=lfs diff=lfs merge=lfs -text
37
+ Med_examples_v2/5f4e8079-8225a5d2-1b0c3c46-4394a094-f285db0e.jpg filter=lfs diff=lfs merge=lfs -text
Med_examples_v2/1.2.276.0.7230010.3.1.4.8323329.1495.1517874291.249176.jpg ADDED
Med_examples_v2/1.2.276.0.7230010.3.1.4.8323329.16254.1517874395.786150.jpg ADDED
Med_examples_v2/1.2.840.113654.2.55.48339325922382839066544590341580673064.png ADDED
Med_examples_v2/1.3.6.1.4.1.14519.5.2.1.7009.9004.242286124999058976921785904029.png ADDED
Med_examples_v2/5f4e8079-8225a5d2-1b0c3c46-4394a094-f285db0e.jpg ADDED

Git LFS Details

  • SHA256: 94a8259f7b596eb34fd18375913ec0b17d0ae1e2bdb56467a236aa5cc7557ec1
  • Pointer size: 132 Bytes
  • Size of remote file: 1.95 MB
Med_examples_v2/synpic33889.jpg ADDED
Med_examples_v2/synpic50958.jpg ADDED
Med_examples_v2/synpic56061.jpg ADDED
Med_examples_v2/synpic58547.jpg ADDED
Med_examples_v2/synpic60423.jpg ADDED
Med_examples_v2/synpic676.jpg ADDED
Med_examples_v2/xmlab149/source.jpg ADDED
Med_examples_v2/xmlab589/source.jpg ADDED
README.md CHANGED
@@ -1,13 +1,51 @@
1
- ---
2
- title: MiniGPT Med
3
- emoji: 🌍
4
- colorFrom: pink
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 4.41.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MiniGPT-Med: Large Language Model as a General Interface for Radiology Diagnosis
2
+ Asma Alkhaldi, Raneem Alnajim, Layan Alabdullatef, Rawan Alyahya, Jun Chen, Deyao Zhu, Ahmed Alsinan, Mohamed Elhoseiny
3
+
4
+ *Saudi Data and Artificial Intelligence Authority (SDAIA) and King Abdullah University of Science and Technology (KAUST)*
5
+
6
+ ## Installation
7
+ ```
8
+ git clone https://github.com/Vision-CAIR/MiniGPT-Med
9
+ cd MiniGPT-Med
10
+ conda env create -f environment.yml
11
+ conda activate miniGPT-Med
12
+ ```
13
+
14
+ ## Download miniGPT-Med trained model weights
15
+
16
+ * miniGPT-Med's weights [miniGPT-Med Model](https://drive.google.com/file/d/1kjGLk6s9LsBmXfLWQFCdlwF3aul08Cl8/view?usp=sharing)
17
+
18
+ * Then modify line 8 at miniGPT-Med/eval_configs/minigptv2_eval.yaml to be the path of miniGPT-Med weight.
19
+
20
+ ## Prepare weight for LLMs
21
+
22
+ ### Llama2 Version
23
+
24
+ ```shell
25
+ git clone https://huggingface.co/meta-llama/Llama-2-13b-chat-hf
26
+ ```
27
+
28
+ Then modify line 14 at miniGPT-Med/minigpt4/configs/models/minigpt_v2.yaml to be the path of Llama-2-13b-chat-hf.
29
+
30
+ ## Launching Demo Locally
31
+
32
+ ```
33
+ python demo.py --cfg-path eval_configs/minigptv2_eval.yaml --gpu-id 0
34
+ ```
35
+
36
+ ## Dataset
37
+ | Dataset | Images | json file|
38
+ |---------|---------|----------|
39
+ | MIMIC |[Download](https://physionet.org/content/mimiciii/1.4/) | [Download](https://drive.google.com/drive/folders/1nZhdfNoh7fkx7CWvf0_47_OLv3tA2m3o?usp=sharing) |
40
+ | NLST |[Download](https://wiki.cancerimagingarchive.net/display/NLST)| [Downlaod](https://drive.google.com/drive/folders/1OKgMTaGLu_dWRuco6JipYzezw3oNwgaz?usp=sharing) |
41
+ |SLAKE |[Downlaod](https://www.med-vqa.com/slake/) |[Download](https://drive.google.com/drive/folders/1vstjmfRbKahSAsi_b6FmTQiuolvgO8oC?usp=sharing)|
42
+ |RSNA |[Downlaod](https://www.rsna.org/rsnai/ai-image-challenge/rsna-pneumonia-detection-challenge-2018) | [Download](https://drive.google.com/drive/folders/1wkXPvUNqda6jWAIduyiVJkS3Tx7P7td8?usp=sharing) |
43
+ |Rad-VQA |[Downalod](https://osf.io/89kps/) |[Download](https://drive.google.com/drive/folders/1ING6Dodwk2DU_t4GHQYudNFMMg9OMfBQ?usp=sharing) |
44
+
45
+ ## Acknowledgement
46
+
47
+ - MiniGPT-4
48
+ - Lavis
49
+ - Vicuna
50
+ - Falcon
51
+ - Llama 2
dcgm/bash/34649895/dcgm-gpu-stats-gpu202-02-r-34649895.out ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Successfully retrieved statistics for job: 34649895.
2
+ +------------------------------------------------------------------------------+
3
+ | GPU ID: 0 |
4
+ +====================================+=========================================+
5
+ |----- Execution Stats ------------+-----------------------------------------|
6
+ | Start Time | Tue Jul 9 09:29:46 2024 |
7
+ | End Time | Wed Jul 10 09:30:32 2024 |
8
+ | Total Execution Time (sec) | 86445.3 |
9
+ | No. of Processes | 1 |
10
+ +----- Performance Stats ----------+-----------------------------------------+
11
+ | Energy Consumed (Joules) | 232291 |
12
+ | Power Usage (Watts) | Avg: 65.6704, Max: 84.315, Min: 61.555 |
13
+ | Max GPU Memory Used (bytes) | 10104078336 |
14
+ | SM Clock (MHz) | Avg: 595, Max: 1155, Min: 210 |
15
+ | Memory Clock (MHz) | Avg: 1593, Max: 1593, Min: 1593 |
16
+ | SM Utilization (%) | Avg: 0, Max: 0, Min: 0 |
17
+ | Memory Utilization (%) | Avg: 0, Max: 0, Min: 0 |
18
+ | PCIe Rx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
19
+ | PCIe Tx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
20
+ +----- Event Stats ----------------+-----------------------------------------+
21
+ | Single Bit ECC Errors | 0 |
22
+ | Double Bit ECC Errors | 0 |
23
+ | PCIe Replay Warnings | 0 |
24
+ | Critical XID Errors | 0 |
25
+ +----- Slowdown Stats -------------+-----------------------------------------+
26
+ | Due to - Power (%) | 0 |
27
+ | - Thermal (%) | 0 |
28
+ | - Reliability (%) | Not Supported |
29
+ | - Board Limit (%) | Not Supported |
30
+ | - Low Utilization (%) | Not Supported |
31
+ | - Sync Boost (%) | 0 |
32
+ +-- Compute Process Utilization ---+-----------------------------------------+
33
+ | PID | 1548651 |
34
+ | Avg SM Utilization (%) | 0 |
35
+ | Avg Memory Utilization (%) | 0 |
36
+ +----- Overall Health -------------+-----------------------------------------+
37
+ | Overall Health | Healthy |
38
+ +------------------------------------+-----------------------------------------+
39
+
dcgm/bash/34673507/dcgm-gpu-stats-gpu201-23-l-34673507.out ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Successfully retrieved statistics for job: 34673507.
2
+ +------------------------------------------------------------------------------+
3
+ | GPU ID: 1 |
4
+ +====================================+=========================================+
5
+ |----- Execution Stats ------------+-----------------------------------------|
6
+ | Start Time | Fri Jul 12 11:48:45 2024 |
7
+ | End Time | Sat Jul 13 11:49:39 2024 |
8
+ | Total Execution Time (sec) | 86454.5 |
9
+ | No. of Processes | 1 |
10
+ +----- Performance Stats ----------+-----------------------------------------+
11
+ | Energy Consumed (Joules) | 252136 |
12
+ | Power Usage (Watts) | Avg: 69.7762, Max: 70.022, Min: 69.151 |
13
+ | Max GPU Memory Used (bytes) | 10104078336 |
14
+ | SM Clock (MHz) | Avg: 1157, Max: 1410, Min: 1155 |
15
+ | Memory Clock (MHz) | Avg: 1593, Max: 1593, Min: 1593 |
16
+ | SM Utilization (%) | Avg: 0, Max: 0, Min: 0 |
17
+ | Memory Utilization (%) | Avg: 0, Max: 0, Min: 0 |
18
+ | PCIe Rx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
19
+ | PCIe Tx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
20
+ +----- Event Stats ----------------+-----------------------------------------+
21
+ | Single Bit ECC Errors | 0 |
22
+ | Double Bit ECC Errors | 0 |
23
+ | PCIe Replay Warnings | 0 |
24
+ | Critical XID Errors | 0 |
25
+ +----- Slowdown Stats -------------+-----------------------------------------+
26
+ | Due to - Power (%) | 0 |
27
+ | - Thermal (%) | 0 |
28
+ | - Reliability (%) | Not Supported |
29
+ | - Board Limit (%) | Not Supported |
30
+ | - Low Utilization (%) | Not Supported |
31
+ | - Sync Boost (%) | 0 |
32
+ +-- Compute Process Utilization ---+-----------------------------------------+
33
+ | PID | 2527521 |
34
+ | Avg SM Utilization (%) | 0 |
35
+ | Avg Memory Utilization (%) | 0 |
36
+ +----- Overall Health -------------+-----------------------------------------+
37
+ | Overall Health | Healthy |
38
+ +------------------------------------+-----------------------------------------+
39
+
dcgm/bash/34676162/dcgm-gpu-stats-gpu201-23-l-34676162.out ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Successfully retrieved statistics for job: 34676162.
2
+ +------------------------------------------------------------------------------+
3
+ | GPU ID: 3 |
4
+ +====================================+=========================================+
5
+ |----- Execution Stats ------------+-----------------------------------------|
6
+ | Start Time | Sun Jul 14 07:57:08 2024 |
7
+ | End Time | Mon Jul 15 07:57:59 2024 |
8
+ | Total Execution Time (sec) | 86450.6 |
9
+ | No. of Processes | 1 |
10
+ +----- Performance Stats ----------+-----------------------------------------+
11
+ | Energy Consumed (Joules) | 249997 |
12
+ | Power Usage (Watts) | Avg: 82.8167, Max: 86.615, Min: 70.491 |
13
+ | Max GPU Memory Used (bytes) | 10104078336 |
14
+ | SM Clock (MHz) | Avg: 1352, Max: 1410, Min: 1080 |
15
+ | Memory Clock (MHz) | Avg: 1593, Max: 1593, Min: 1593 |
16
+ | SM Utilization (%) | Avg: 0, Max: 0, Min: 0 |
17
+ | Memory Utilization (%) | Avg: 0, Max: 0, Min: 0 |
18
+ | PCIe Rx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
19
+ | PCIe Tx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
20
+ +----- Event Stats ----------------+-----------------------------------------+
21
+ | Single Bit ECC Errors | 0 |
22
+ | Double Bit ECC Errors | 0 |
23
+ | PCIe Replay Warnings | 0 |
24
+ | Critical XID Errors | 0 |
25
+ +----- Slowdown Stats -------------+-----------------------------------------+
26
+ | Due to - Power (%) | 0 |
27
+ | - Thermal (%) | 0 |
28
+ | - Reliability (%) | Not Supported |
29
+ | - Board Limit (%) | Not Supported |
30
+ | - Low Utilization (%) | Not Supported |
31
+ | - Sync Boost (%) | 0 |
32
+ +-- Compute Process Utilization ---+-----------------------------------------+
33
+ | PID | 3048225 |
34
+ | Avg SM Utilization (%) | 0 |
35
+ | Avg Memory Utilization (%) | 0 |
36
+ +----- Overall Health -------------+-----------------------------------------+
37
+ | Overall Health | Healthy |
38
+ +------------------------------------+-----------------------------------------+
39
+
dcgm/bash/34691276/dcgm-gpu-stats-gpu201-09-l-34691276.out ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Successfully retrieved statistics for job: 34691276.
2
+ +------------------------------------------------------------------------------+
3
+ | GPU ID: 0 |
4
+ +====================================+=========================================+
5
+ |----- Execution Stats ------------+-----------------------------------------|
6
+ | Start Time | Tue Jul 16 08:21:43 2024 |
7
+ | End Time | Tue Jul 16 21:44:34 2024 |
8
+ | Total Execution Time (sec) | 48170.9 |
9
+ | No. of Processes | 2 |
10
+ +----- Performance Stats ----------+-----------------------------------------+
11
+ | Energy Consumed (Joules) | 222759 |
12
+ | Power Usage (Watts) | Avg: 61.4158, Max: 61.683, Min: 61.349 |
13
+ | Max GPU Memory Used (bytes) | 10806624256 |
14
+ | SM Clock (MHz) | Avg: 210, Max: 225, Min: 210 |
15
+ | Memory Clock (MHz) | Avg: 1593, Max: 1593, Min: 1593 |
16
+ | SM Utilization (%) | Avg: 0, Max: 0, Min: 0 |
17
+ | Memory Utilization (%) | Avg: 0, Max: 0, Min: 0 |
18
+ | PCIe Rx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
19
+ | PCIe Tx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
20
+ +----- Event Stats ----------------+-----------------------------------------+
21
+ | Single Bit ECC Errors | 0 |
22
+ | Double Bit ECC Errors | 0 |
23
+ | PCIe Replay Warnings | 0 |
24
+ | Critical XID Errors | 0 |
25
+ +----- Slowdown Stats -------------+-----------------------------------------+
26
+ | Due to - Power (%) | 0 |
27
+ | - Thermal (%) | 0 |
28
+ | - Reliability (%) | Not Supported |
29
+ | - Board Limit (%) | Not Supported |
30
+ | - Low Utilization (%) | Not Supported |
31
+ | - Sync Boost (%) | 0 |
32
+ +-- Compute Process Utilization ---+-----------------------------------------+
33
+ | PID | 1958147 |
34
+ | Avg SM Utilization (%) | 1 |
35
+ | Avg Memory Utilization (%) | 0 |
36
+ | PID | 2068287 |
37
+ | Avg SM Utilization (%) | 0 |
38
+ | Avg Memory Utilization (%) | 0 |
39
+ +----- Overall Health -------------+-----------------------------------------+
40
+ | Overall Health | Healthy |
41
+ +------------------------------------+-----------------------------------------+
42
+
dcgm/bash/34709014/dcgm-gpu-stats-gpu109-16-l-34709014.out ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Successfully retrieved statistics for job: 34709014.
2
+ +------------------------------------------------------------------------------+
3
+ | GPU ID: 3 |
4
+ +====================================+=========================================+
5
+ |----- Execution Stats ------------+-----------------------------------------|
6
+ | Start Time | Thu Jul 18 07:54:11 2024 |
7
+ | End Time | Fri Jul 19 07:55:07 2024 |
8
+ | Total Execution Time (sec) | 86456.3 |
9
+ | No. of Processes | 1 |
10
+ +----- Performance Stats ----------+-----------------------------------------+
11
+ | Energy Consumed (Joules) | 245376 |
12
+ | Power Usage (Watts) | Avg: 67.8347, Max: 68.156, Min: 67.563 |
13
+ | Max GPU Memory Used (bytes) | 10582228992 |
14
+ | SM Clock (MHz) | Avg: 1161, Max: 1410, Min: 1155 |
15
+ | Memory Clock (MHz) | Avg: 1593, Max: 1593, Min: 1593 |
16
+ | SM Utilization (%) | Avg: 0, Max: 0, Min: 0 |
17
+ | Memory Utilization (%) | Avg: 0, Max: 0, Min: 0 |
18
+ | PCIe Rx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
19
+ | PCIe Tx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
20
+ +----- Event Stats ----------------+-----------------------------------------+
21
+ | Single Bit ECC Errors | 0 |
22
+ | Double Bit ECC Errors | 0 |
23
+ | PCIe Replay Warnings | 0 |
24
+ | Critical XID Errors | 0 |
25
+ +----- Slowdown Stats -------------+-----------------------------------------+
26
+ | Due to - Power (%) | 0 |
27
+ | - Thermal (%) | 0 |
28
+ | - Reliability (%) | Not Supported |
29
+ | - Board Limit (%) | Not Supported |
30
+ | - Low Utilization (%) | Not Supported |
31
+ | - Sync Boost (%) | 0 |
32
+ +-- Compute Process Utilization ---+-----------------------------------------+
33
+ | PID | 4005887 |
34
+ | Avg SM Utilization (%) | 0 |
35
+ | Avg Memory Utilization (%) | 0 |
36
+ +----- Overall Health -------------+-----------------------------------------+
37
+ | Overall Health | Healthy |
38
+ +------------------------------------+-----------------------------------------+
39
+
dcgm/bash/34721198/dcgm-gpu-stats-gpu203-23-r-34721198.out ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Successfully retrieved statistics for job: 34721198.
2
+ +------------------------------------------------------------------------------+
3
+ | GPU ID: 1 |
4
+ +====================================+=========================================+
5
+ |----- Execution Stats ------------+-----------------------------------------|
6
+ | Start Time | Fri Jul 19 21:34:44 2024 |
7
+ | End Time | Sat Jul 20 00:01:06 2024 |
8
+ | Total Execution Time (sec) | 8782.23 |
9
+ | No. of Processes | 7 |
10
+ +----- Performance Stats ----------+-----------------------------------------+
11
+ | Energy Consumed (Joules) | 225540 |
12
+ | Power Usage (Watts) | Avg: 75.9496, Max: 87.541, Min: 65.792 |
13
+ | Max GPU Memory Used (bytes) | 13356761088 |
14
+ | SM Clock (MHz) | Avg: 210, Max: 210, Min: 210 |
15
+ | Memory Clock (MHz) | Avg: 1593, Max: 1593, Min: 1593 |
16
+ | SM Utilization (%) | Avg: 0, Max: 0, Min: 0 |
17
+ | Memory Utilization (%) | Avg: 0, Max: 0, Min: 0 |
18
+ | PCIe Rx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
19
+ | PCIe Tx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
20
+ +----- Event Stats ----------------+-----------------------------------------+
21
+ | Single Bit ECC Errors | 0 |
22
+ | Double Bit ECC Errors | 0 |
23
+ | PCIe Replay Warnings | 0 |
24
+ | Critical XID Errors | 0 |
25
+ +----- Slowdown Stats -------------+-----------------------------------------+
26
+ | Due to - Power (%) | 0 |
27
+ | - Thermal (%) | 0 |
28
+ | - Reliability (%) | Not Supported |
29
+ | - Board Limit (%) | Not Supported |
30
+ | - Low Utilization (%) | Not Supported |
31
+ | - Sync Boost (%) | 0 |
32
+ +-- Compute Process Utilization ---+-----------------------------------------+
33
+ | PID | 866309 |
34
+ | Avg SM Utilization (%) | 0 |
35
+ | Avg Memory Utilization (%) | 0 |
36
+ | PID | 866955 |
37
+ | Avg SM Utilization (%) | 1 |
38
+ | Avg Memory Utilization (%) | 0 |
39
+ | PID | 868076 |
40
+ | Avg SM Utilization (%) | 0 |
41
+ | Avg Memory Utilization (%) | 0 |
42
+ | PID | 868638 |
43
+ | Avg SM Utilization (%) | 5 |
44
+ | Avg Memory Utilization (%) | 0 |
45
+ | PID | 869519 |
46
+ | Avg SM Utilization (%) | 0 |
47
+ | Avg Memory Utilization (%) | 0 |
48
+ | PID | 871043 |
49
+ | Avg SM Utilization (%) | 1 |
50
+ | Avg Memory Utilization (%) | 0 |
51
+ | PID | 871322 |
52
+ | Avg SM Utilization (%) | 0 |
53
+ | Avg Memory Utilization (%) | 0 |
54
+ +----- Overall Health -------------+-----------------------------------------+
55
+ | Overall Health | Healthy |
56
+ +------------------------------------+-----------------------------------------+
57
+
dcgm/bash/34734121/dcgm-gpu-stats-gpu201-23-l-34734121.out ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Successfully retrieved statistics for job: 34734121.
2
+ +------------------------------------------------------------------------------+
3
+ | GPU ID: 3 |
4
+ +====================================+=========================================+
5
+ |----- Execution Stats ------------+-----------------------------------------|
6
+ | Start Time | Tue Jul 23 11:47:49 2024 |
7
+ | End Time | Tue Jul 23 13:47:51 2024 |
8
+ | Total Execution Time (sec) | 7202.22 |
9
+ | No. of Processes | 0 |
10
+ +----- Performance Stats ----------+-----------------------------------------+
11
+ | Energy Consumed (Joules) | 226384 |
12
+ | Power Usage (Watts) | Avg: 62.6807, Max: 81.445, Min: 62.015 |
13
+ | Max GPU Memory Used (bytes) | 0 |
14
+ | SM Clock (MHz) | Avg: 220, Max: 1410, Min: 210 |
15
+ | Memory Clock (MHz) | Avg: 1593, Max: 1593, Min: 1593 |
16
+ | SM Utilization (%) | Avg: 0, Max: 0, Min: 0 |
17
+ | Memory Utilization (%) | Avg: 0, Max: 0, Min: 0 |
18
+ | PCIe Rx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
19
+ | PCIe Tx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
20
+ +----- Event Stats ----------------+-----------------------------------------+
21
+ | Single Bit ECC Errors | 0 |
22
+ | Double Bit ECC Errors | 0 |
23
+ | PCIe Replay Warnings | 0 |
24
+ | Critical XID Errors | 0 |
25
+ +----- Slowdown Stats -------------+-----------------------------------------+
26
+ | Due to - Power (%) | 0 |
27
+ | - Thermal (%) | 0 |
28
+ | - Reliability (%) | Not Supported |
29
+ | - Board Limit (%) | Not Supported |
30
+ | - Low Utilization (%) | Not Supported |
31
+ | - Sync Boost (%) | 0 |
32
+ +----- Overall Health -------------+-----------------------------------------+
33
+ | Overall Health | Healthy |
34
+ +------------------------------------+-----------------------------------------+
35
+
dcgm/bash/34738689/dcgm-gpu-stats-gpu201-16-r-34738689.out ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Successfully retrieved statistics for job: 34738689.
2
+ +------------------------------------------------------------------------------+
3
+ | GPU ID: 3 |
4
+ +====================================+=========================================+
5
+ |----- Execution Stats ------------+-----------------------------------------|
6
+ | Start Time | Wed Jul 24 10:14:38 2024 |
7
+ | End Time | Wed Jul 24 11:45:33 2024 |
8
+ | Total Execution Time (sec) | 5454.69 |
9
+ | No. of Processes | 0 |
10
+ +----- Performance Stats ----------+-----------------------------------------+
11
+ | Energy Consumed (Joules) | 232516 |
12
+ | Power Usage (Watts) | Avg: 64.2532, Max: 64.329, Min: 63.938 |
13
+ | Max GPU Memory Used (bytes) | 0 |
14
+ | SM Clock (MHz) | Avg: 210, Max: 210, Min: 210 |
15
+ | Memory Clock (MHz) | Avg: 1593, Max: 1593, Min: 1593 |
16
+ | SM Utilization (%) | Avg: 0, Max: 0, Min: 0 |
17
+ | Memory Utilization (%) | Avg: 0, Max: 0, Min: 0 |
18
+ | PCIe Rx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
19
+ | PCIe Tx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
20
+ +----- Event Stats ----------------+-----------------------------------------+
21
+ | Single Bit ECC Errors | 0 |
22
+ | Double Bit ECC Errors | 0 |
23
+ | PCIe Replay Warnings | 0 |
24
+ | Critical XID Errors | 0 |
25
+ +----- Slowdown Stats -------------+-----------------------------------------+
26
+ | Due to - Power (%) | 0 |
27
+ | - Thermal (%) | 0 |
28
+ | - Reliability (%) | Not Supported |
29
+ | - Board Limit (%) | Not Supported |
30
+ | - Low Utilization (%) | Not Supported |
31
+ | - Sync Boost (%) | 0 |
32
+ +----- Overall Health -------------+-----------------------------------------+
33
+ | Overall Health | Healthy |
34
+ +------------------------------------+-----------------------------------------+
35
+
dcgm/bash/34757693/dcgm-gpu-stats-gpu202-16-r-34757693.out ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Successfully retrieved statistics for job: 34757693.
2
+ +------------------------------------------------------------------------------+
3
+ | GPU ID: 2 |
4
+ +====================================+=========================================+
5
+ |----- Execution Stats ------------+-----------------------------------------|
6
+ | Start Time | Thu Jul 25 15:38:16 2024 |
7
+ | End Time | Thu Jul 25 17:08:59 2024 |
8
+ | Total Execution Time (sec) | 5442.54 |
9
+ | No. of Processes | 2 |
10
+ +----- Performance Stats ----------+-----------------------------------------+
11
+ | Energy Consumed (Joules) | 214029 |
12
+ | Power Usage (Watts) | Avg: 59.2012, Max: 67.659, Min: 59.026 |
13
+ | Max GPU Memory Used (bytes) | 7616856064 |
14
+ | SM Clock (MHz) | Avg: 243, Max: 1080, Min: 210 |
15
+ | Memory Clock (MHz) | Avg: 1593, Max: 1593, Min: 1593 |
16
+ | SM Utilization (%) | Avg: 0, Max: 0, Min: 0 |
17
+ | Memory Utilization (%) | Avg: 0, Max: 0, Min: 0 |
18
+ | PCIe Rx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
19
+ | PCIe Tx Bandwidth (megabytes) | Avg: N/A, Max: N/A, Min: N/A |
20
+ +----- Event Stats ----------------+-----------------------------------------+
21
+ | Single Bit ECC Errors | 0 |
22
+ | Double Bit ECC Errors | 0 |
23
+ | PCIe Replay Warnings | 0 |
24
+ | Critical XID Errors | 0 |
25
+ +----- Slowdown Stats -------------+-----------------------------------------+
26
+ | Due to - Power (%) | 0 |
27
+ | - Thermal (%) | 0 |
28
+ | - Reliability (%) | Not Supported |
29
+ | - Board Limit (%) | Not Supported |
30
+ | - Low Utilization (%) | Not Supported |
31
+ | - Sync Boost (%) | 0 |
32
+ +-- Compute Process Utilization ---+-----------------------------------------+
33
+ | PID | 1095606 |
34
+ | Avg SM Utilization (%) | 3 |
35
+ | Avg Memory Utilization (%) | 0 |
36
+ | PID | 1096190 |
37
+ | Avg SM Utilization (%) | 14 |
38
+ | Avg Memory Utilization (%) | 2 |
39
+ +----- Overall Health -------------+-----------------------------------------+
40
+ | Overall Health | Healthy |
41
+ +------------------------------------+-----------------------------------------+
42
+
demo_v2.py ADDED
@@ -0,0 +1,648 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # python demo_v2.py --cfg-path eval_configs/minigptv2_eval.yaml --gpu-id 0
2
+
3
+ import argparse
4
+ import os
5
+ import random
6
+ from collections import defaultdict
7
+
8
+ import cv2
9
+ import re
10
+
11
+ import numpy as np
12
+ from PIL import Image
13
+ import torch
14
+ import html
15
+ import gradio as gr
16
+
17
+ import torchvision.transforms as T
18
+ import torch.backends.cudnn as cudnn
19
+
20
+ from minigpt4.common.config import Config
21
+
22
+ from minigpt4.common.registry import registry
23
+ from minigpt4.conversation.conversation import Conversation, SeparatorStyle, Chat
24
+
25
+ # imports modules for registration
26
+ from minigpt4.datasets.builders import *
27
+ from minigpt4.models import *
28
+ from minigpt4.processors import *
29
+ from minigpt4.runners import *
30
+ from minigpt4.tasks import *
31
+
32
+
33
+ def parse_args():
34
+ parser = argparse.ArgumentParser(description="Demo")
35
+ parser.add_argument("--cfg-path", default='eval_configs/minigptv2_eval.yaml',
36
+ help="path to configuration file.")
37
+ parser.add_argument("--gpu-id", type=int, default=0, help="specify the gpu to load the model.")
38
+ parser.add_argument(
39
+ "--options",
40
+ nargs="+",
41
+ help="override some settings in the used config, the key-value pair "
42
+ "in xxx=yyy format will be merged into config file (deprecate), "
43
+ "change to --cfg-options instead.",
44
+ )
45
+ args = parser.parse_args()
46
+ return args
47
+
48
+
49
+ random.seed(42)
50
+ np.random.seed(42)
51
+ torch.manual_seed(42)
52
+
53
+ cudnn.benchmark = False
54
+ cudnn.deterministic = True
55
+
56
+ print('Initializing Chat')
57
+ args = parse_args()
58
+ cfg = Config(args)
59
+
60
+ device = 'cuda:{}'.format(args.gpu_id)
61
+
62
+ model_config = cfg.model_cfg
63
+ model_config.device_8bit = args.gpu_id
64
+ model_cls = registry.get_model_class(model_config.arch)
65
+ model = model_cls.from_config(model_config).to(device)
66
+ bounding_box_size = 100
67
+
68
+ vis_processor_cfg = cfg.datasets_cfg.cc_sbu_align.vis_processor.train
69
+ vis_processor = registry.get_processor_class(vis_processor_cfg.name).from_config(vis_processor_cfg)
70
+
71
+ model = model.eval()
72
+
73
+ CONV_VISION = Conversation(
74
+ system="",
75
+ roles=(r"<s>[INST] ", r" [/INST]"),
76
+ messages=[],
77
+ offset=2,
78
+ sep_style=SeparatorStyle.SINGLE,
79
+ sep="",
80
+ )
81
+
82
+
83
+ def extract_substrings(string):
84
+ # first check if there is no-finished bracket
85
+ index = string.rfind('}')
86
+ if index != -1:
87
+ string = string[:index + 1]
88
+
89
+ pattern = r'<p>(.*?)\}(?!<)'
90
+ matches = re.findall(pattern, string)
91
+ substrings = [match for match in matches]
92
+
93
+ return substrings
94
+
95
+
96
+ def is_overlapping(rect1, rect2):
97
+ x1, y1, x2, y2 = rect1
98
+ x3, y3, x4, y4 = rect2
99
+ return not (x2 < x3 or x1 > x4 or y2 < y3 or y1 > y4)
100
+
101
+
102
+ def computeIoU(bbox1, bbox2):
103
+ x1, y1, x2, y2 = bbox1
104
+ x3, y3, x4, y4 = bbox2
105
+ intersection_x1 = max(x1, x3)
106
+ intersection_y1 = max(y1, y3)
107
+ intersection_x2 = min(x2, x4)
108
+ intersection_y2 = min(y2, y4)
109
+ intersection_area = max(0, intersection_x2 - intersection_x1 + 1) * max(0, intersection_y2 - intersection_y1 + 1)
110
+ bbox1_area = (x2 - x1 + 1) * (y2 - y1 + 1)
111
+ bbox2_area = (x4 - x3 + 1) * (y4 - y3 + 1)
112
+ union_area = bbox1_area + bbox2_area - intersection_area
113
+ iou = intersection_area / union_area
114
+ return iou
115
+
116
+
117
+ def save_tmp_img(visual_img):
118
+ file_name = "".join([str(random.randint(0, 9)) for _ in range(5)]) + ".jpg"
119
+ file_path = "/tmp/gradio" + file_name
120
+ visual_img.save(file_path)
121
+ return file_path
122
+
123
+
124
+ def mask2bbox(mask):
125
+ if mask is None:
126
+ return ''
127
+ mask = mask.resize([100, 100], resample=Image.NEAREST)
128
+ mask = np.array(mask)[:, :, 0]
129
+
130
+ rows = np.any(mask, axis=1)
131
+ cols = np.any(mask, axis=0)
132
+
133
+ if rows.sum():
134
+ # Get the top, bottom, left, and right boundaries
135
+ rmin, rmax = np.where(rows)[0][[0, -1]]
136
+ cmin, cmax = np.where(cols)[0][[0, -1]]
137
+ bbox = '{{<{}><{}><{}><{}>}}'.format(cmin, rmin, cmax, rmax)
138
+ else:
139
+ bbox = ''
140
+
141
+ return bbox
142
+
143
+
144
+ def escape_markdown(text):
145
+ # List of Markdown special characters that need to be escaped
146
+ md_chars = ['<', '>']
147
+
148
+ # Escape each special character
149
+ for char in md_chars:
150
+ text = text.replace(char, '\\' + char)
151
+
152
+ return text
153
+
154
+
155
+ def reverse_escape(text):
156
+ md_chars = ['\\<', '\\>']
157
+
158
+ for char in md_chars:
159
+ text = text.replace(char, char[1:])
160
+
161
+ return text
162
+
163
+
164
+ colors = [
165
+ (255, 0, 0),
166
+ (0, 255, 0),
167
+ (0, 0, 255),
168
+ (210, 210, 0),
169
+ (255, 0, 255),
170
+ (0, 255, 255),
171
+ (114, 128, 250),
172
+ (0, 165, 255),
173
+ (0, 128, 0),
174
+ (144, 238, 144),
175
+ (238, 238, 175),
176
+ (255, 191, 0),
177
+ (0, 128, 0),
178
+ (226, 43, 138),
179
+ (255, 0, 255),
180
+ (0, 215, 255),
181
+ ]
182
+
183
+ color_map = {
184
+ f"{color_id}": f"#{hex(color[2])[2:].zfill(2)}{hex(color[1])[2:].zfill(2)}{hex(color[0])[2:].zfill(2)}" for
185
+ color_id, color in enumerate(colors)
186
+ }
187
+
188
+ used_colors = colors
189
+
190
+
191
+ def visualize_all_bbox_together(image, generation):
192
+ if image is None:
193
+ return None, ''
194
+
195
+ generation = html.unescape(generation)
196
+ print('gen begin', generation)
197
+ image_width, image_height = image.size
198
+ image = image.resize([500, int(500 / image_width * image_height)])
199
+ image_width, image_height = image.size
200
+
201
+ string_list = extract_substrings(generation)
202
+ if string_list: # it is grounding or detection
203
+ mode = 'all'
204
+ entities = defaultdict(list)
205
+ i = 0
206
+ j = 0
207
+ for string in string_list:
208
+ try:
209
+ obj, string = string.split('</p>')
210
+ except ValueError:
211
+ print('wrong string: ', string)
212
+ continue
213
+ bbox_list = string.split('<delim>')
214
+ flag = False
215
+ for bbox_string in bbox_list:
216
+ integers = re.findall(r'-?\d+', bbox_string)
217
+ if len(integers) == 4:
218
+ x0, y0, x1, y1 = int(integers[0]), int(integers[1]), int(integers[2]), int(integers[3])
219
+ left = x0 / bounding_box_size * image_width
220
+ bottom = y0 / bounding_box_size * image_height
221
+ right = x1 / bounding_box_size * image_width
222
+ top = y1 / bounding_box_size * image_height
223
+
224
+ entities[obj].append([left, bottom, right, top])
225
+
226
+ j += 1
227
+ flag = True
228
+ if flag:
229
+ i += 1
230
+ else:
231
+ integers = re.findall(r'-?\d+', generation)
232
+
233
+ if len(integers) == 4: # it is refer
234
+ mode = 'single'
235
+
236
+ entities = list()
237
+ x0, y0, x1, y1 = int(integers[0]), int(integers[1]), int(integers[2]), int(integers[3])
238
+ left = x0 / bounding_box_size * image_width
239
+ bottom = y0 / bounding_box_size * image_height
240
+ right = x1 / bounding_box_size * image_width
241
+ top = y1 / bounding_box_size * image_height
242
+ entities.append([left, bottom, right, top])
243
+ else:
244
+ # don't detect any valid bbox to visualize
245
+ return None, ''
246
+
247
+ if len(entities) == 0:
248
+ return None, ''
249
+
250
+ if isinstance(image, Image.Image):
251
+ image_h = image.height
252
+ image_w = image.width
253
+ image = np.array(image)
254
+
255
+ elif isinstance(image, str):
256
+ if os.path.exists(image):
257
+ pil_img = Image.open(image).convert("RGB")
258
+ image = np.array(pil_img)[:, :, [2, 1, 0]]
259
+ image_h = pil_img.height
260
+ image_w = pil_img.width
261
+ else:
262
+ raise ValueError(f"invaild image path, {image}")
263
+ elif isinstance(image, torch.Tensor):
264
+
265
+ image_tensor = image.cpu()
266
+ reverse_norm_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073])[:, None, None]
267
+ reverse_norm_std = torch.tensor([0.26862954, 0.26130258, 0.27577711])[:, None, None]
268
+ image_tensor = image_tensor * reverse_norm_std + reverse_norm_mean
269
+ pil_img = T.ToPILImage()(image_tensor)
270
+ image_h = pil_img.height
271
+ image_w = pil_img.width
272
+ image = np.array(pil_img)[:, :, [2, 1, 0]]
273
+ else:
274
+ raise ValueError(f"invaild image format, {type(image)} for {image}")
275
+
276
+ indices = list(range(len(entities)))
277
+
278
+ new_image = image.copy()
279
+
280
+ previous_bboxes = []
281
+ # size of text
282
+ text_size = 0.5
283
+ # thickness of text
284
+ text_line = 1 # int(max(1 * min(image_h, image_w) / 512, 1))
285
+ box_line = 2
286
+ (c_width, text_height), _ = cv2.getTextSize("F", cv2.FONT_HERSHEY_COMPLEX, text_size, text_line)
287
+ base_height = int(text_height * 0.675)
288
+ text_offset_original = text_height - base_height
289
+ text_spaces = 2
290
+
291
+ # num_bboxes = sum(len(x[-1]) for x in entities)
292
+ used_colors = colors # random.sample(colors, k=num_bboxes)
293
+
294
+ color_id = -1
295
+ for entity_idx, entity_name in enumerate(entities):
296
+ if mode == 'single' or mode == 'identify':
297
+ bboxes = entity_name
298
+ bboxes = [bboxes]
299
+ else:
300
+ bboxes = entities[entity_name]
301
+ color_id += 1
302
+ for bbox_id, (x1_norm, y1_norm, x2_norm, y2_norm) in enumerate(bboxes):
303
+ skip_flag = False
304
+ orig_x1, orig_y1, orig_x2, orig_y2 = int(x1_norm), int(y1_norm), int(x2_norm), int(y2_norm)
305
+
306
+ color = used_colors[entity_idx % len(used_colors)] # tuple(np.random.randint(0, 255, size=3).tolist())
307
+ new_image = cv2.rectangle(new_image, (orig_x1, orig_y1), (orig_x2, orig_y2), color, box_line)
308
+
309
+ if mode == 'all':
310
+ l_o, r_o = box_line // 2 + box_line % 2, box_line // 2 + box_line % 2 + 1
311
+
312
+ x1 = orig_x1 - l_o
313
+ y1 = orig_y1 - l_o
314
+
315
+ if y1 < text_height + text_offset_original + 2 * text_spaces:
316
+ y1 = orig_y1 + r_o + text_height + text_offset_original + 2 * text_spaces
317
+ x1 = orig_x1 + r_o
318
+
319
+ # add text background
320
+ (text_width, text_height), _ = cv2.getTextSize(f" {entity_name}", cv2.FONT_HERSHEY_COMPLEX, text_size,
321
+ text_line)
322
+ text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2 = x1, y1 - (
323
+ text_height + text_offset_original + 2 * text_spaces), x1 + text_width, y1
324
+
325
+ for prev_bbox in previous_bboxes:
326
+ if computeIoU((text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2), prev_bbox['bbox']) > 0.95 and \
327
+ prev_bbox['phrase'] == entity_name:
328
+ skip_flag = True
329
+ break
330
+ while is_overlapping((text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2), prev_bbox['bbox']):
331
+ text_bg_y1 += (text_height + text_offset_original + 2 * text_spaces)
332
+ text_bg_y2 += (text_height + text_offset_original + 2 * text_spaces)
333
+ y1 += (text_height + text_offset_original + 2 * text_spaces)
334
+
335
+ if text_bg_y2 >= image_h:
336
+ text_bg_y1 = max(0, image_h - (text_height + text_offset_original + 2 * text_spaces))
337
+ text_bg_y2 = image_h
338
+ y1 = image_h
339
+ break
340
+ if not skip_flag:
341
+ alpha = 0.5
342
+ for i in range(text_bg_y1, text_bg_y2):
343
+ for j in range(text_bg_x1, text_bg_x2):
344
+ if i < image_h and j < image_w:
345
+ if j < text_bg_x1 + 1.35 * c_width:
346
+ # original color
347
+ bg_color = color
348
+ else:
349
+ # white
350
+ bg_color = [255, 255, 255]
351
+ new_image[i, j] = (alpha * new_image[i, j] + (1 - alpha) * np.array(bg_color)).astype(
352
+ np.uint8)
353
+
354
+ cv2.putText(
355
+ new_image, f" {entity_name}", (x1, y1 - text_offset_original - 1 * text_spaces),
356
+ cv2.FONT_HERSHEY_COMPLEX, text_size, (0, 0, 0), text_line, cv2.LINE_AA
357
+ )
358
+
359
+ previous_bboxes.append(
360
+ {'bbox': (text_bg_x1, text_bg_y1, text_bg_x2, text_bg_y2), 'phrase': entity_name})
361
+
362
+ if mode == 'all':
363
+ def color_iterator(colors):
364
+ while True:
365
+ for color in colors:
366
+ yield color
367
+
368
+ color_gen = color_iterator(colors)
369
+
370
+ # Add colors to phrases and remove <p></p>
371
+ def colored_phrases(match):
372
+ phrase = match.group(1)
373
+ color = next(color_gen)
374
+ return f'<span style="color:rgb{color}">{phrase}</span>'
375
+
376
+ generation = re.sub(r'{<\d+><\d+><\d+><\d+>}|<delim>', '', generation)
377
+ generation_colored = re.sub(r'<p>(.*?)</p>', colored_phrases, generation)
378
+ else:
379
+ generation_colored = ''
380
+
381
+ pil_image = Image.fromarray(new_image)
382
+ return pil_image, generation_colored
383
+
384
+
385
+ def gradio_reset(chat_state, img_list):
386
+ if chat_state is not None:
387
+ chat_state.messages = []
388
+ if img_list is not None:
389
+ img_list = []
390
+ return None, gr.update(value=None, interactive=True), gr.update(placeholder='Upload your image and chat',
391
+ interactive=True), chat_state, img_list
392
+
393
+
394
+ def image_upload_trigger(upload_flag, replace_flag, img_list):
395
+ # set the upload flag to true when receive a new image.
396
+ # if there is an old image (and old conversation), set the replace flag to true to reset the conv later.
397
+ upload_flag = 1
398
+ if img_list:
399
+ replace_flag = 1
400
+ return upload_flag, replace_flag
401
+
402
+
403
+ def example_trigger(text_input, image, upload_flag, replace_flag, img_list):
404
+ # set the upload flag to true when receive a new image.
405
+ # if there is an old image (and old conversation), set the replace flag to true to reset the conv later.
406
+ upload_flag = 1
407
+ if img_list or replace_flag == 1:
408
+ replace_flag = 1
409
+
410
+ return upload_flag, replace_flag
411
+
412
+
413
+ def gradio_ask(user_message, chatbot, chat_state, gr_img, img_list, upload_flag, replace_flag):
414
+ if len(user_message) == 0:
415
+ text_box_show = 'Input should not be empty!'
416
+ else:
417
+ text_box_show = ''
418
+
419
+ if isinstance(gr_img, dict):
420
+ gr_img, mask = gr_img['image'], gr_img['mask']
421
+ else:
422
+ mask = None
423
+
424
+ if '[identify]' in user_message:
425
+ # check if user provide bbox in the text input
426
+ integers = re.findall(r'-?\d+', user_message)
427
+ if len(integers) != 4: # no bbox in text
428
+ bbox = mask2bbox(mask)
429
+ user_message = user_message + bbox
430
+
431
+ if chat_state is None:
432
+ chat_state = CONV_VISION.copy()
433
+
434
+ if upload_flag:
435
+ if replace_flag:
436
+ chat_state = CONV_VISION.copy() # new image, reset everything
437
+ replace_flag = 0
438
+ chatbot = []
439
+ img_list = []
440
+ llm_message = chat.upload_img(gr_img, chat_state, img_list)
441
+ upload_flag = 0
442
+
443
+ chat.ask(user_message, chat_state)
444
+
445
+ chatbot = chatbot + [[user_message, None]]
446
+
447
+ if '[identify]' in user_message:
448
+ visual_img, _ = visualize_all_bbox_together(gr_img, user_message)
449
+ if visual_img is not None:
450
+ file_path = save_tmp_img(visual_img)
451
+ chatbot = chatbot + [[(file_path,), None]]
452
+
453
+ return text_box_show, chatbot, chat_state, img_list, upload_flag, replace_flag
454
+
455
+
456
+ def gradio_answer(chatbot, chat_state, img_list, temperature):
457
+ llm_message = chat.answer(conv=chat_state,
458
+ img_list=img_list,
459
+ temperature=temperature,
460
+ max_new_tokens=500,
461
+ max_length=2000)[0]
462
+ chatbot[-1][1] = llm_message
463
+ return chatbot, chat_state
464
+
465
+
466
+ def gradio_stream_answer(chatbot, chat_state, img_list, temperature):
467
+ if len(img_list) > 0:
468
+ if not isinstance(img_list[0], torch.Tensor):
469
+ chat.encode_img(img_list)
470
+ streamer = chat.stream_answer(conv=chat_state,
471
+ img_list=img_list,
472
+ temperature=temperature,
473
+ max_new_tokens=500,
474
+ max_length=2000)
475
+ output = ''
476
+ for new_output in streamer:
477
+ escapped = escape_markdown(new_output)
478
+ output += escapped
479
+ chatbot[-1][1] = output
480
+ yield chatbot, chat_state
481
+ chat_state.messages[-1][1] = '</s>'
482
+ return chatbot, chat_state
483
+
484
+
485
+ def gradio_visualize(chatbot, gr_img):
486
+ if isinstance(gr_img, dict):
487
+ gr_img, mask = gr_img['image'], gr_img['mask']
488
+
489
+ unescaped = reverse_escape(chatbot[-1][1])
490
+ visual_img, generation_color = visualize_all_bbox_together(gr_img, unescaped)
491
+ if visual_img is not None:
492
+ if len(generation_color):
493
+ chatbot[-1][1] = generation_color
494
+ file_path = save_tmp_img(visual_img)
495
+ chatbot = chatbot + [[None, (file_path,)]]
496
+
497
+ return chatbot
498
+
499
+
500
+ def gradio_taskselect(idx):
501
+ prompt_list = [
502
+ '',
503
+ '[grounding] describe this image in detail',
504
+ '[refer] ',
505
+ '[detection] ',
506
+ '[identify] what is this ',
507
+ '[vqa] '
508
+ ]
509
+ instruct_list = [
510
+ '**Hint:** Type in whatever you want',
511
+ '**Hint:** Send the command to generate a grounded image description',
512
+ '**Hint:** Type in a phrase about an object in the image and send the command',
513
+ '**Hint:** Type in a caption or phrase, and see object locations in the image',
514
+ '**Hint:** Draw a bounding box on the uploaded image then send the command. Click the "clear" botton on the top right of the image before redraw',
515
+ '**Hint:** Send a question to get a short answer',
516
+ ]
517
+ return prompt_list[idx], instruct_list[idx]
518
+
519
+
520
+
521
+
522
+ chat = Chat(model, vis_processor, device=device)
523
+
524
+ title = """<h1 align="center">MiniGPT-Med Demo</h1>"""
525
+ description = 'Welcome to Our MiniGPT-Med Chatbot Demo!'
526
+ # article = """<p><a href='https://minigpt-v2.github.io'><img src='https://img.shields.io/badge/Project-Page-Green'></a></p><p><a href='https://github.com/Vision-CAIR/MiniGPT-4/blob/main/MiniGPTv2.pdf'><img src='https://img.shields.io/badge/Paper-PDF-red'></a></p><p><a href='https://github.com/Vision-CAIR/MiniGPT-4'><img src='https://img.shields.io/badge/GitHub-Repo-blue'></a></p><p><a href='https://www.youtube.com/watch?v=atFCwV2hSY4'><img src='https://img.shields.io/badge/YouTube-Video-red'></a></p>"""
527
+ article = """<p><a href='https://minigpt-med.github.io/'><img src='https://img.shields.io/badge/Project-Page-Green'></a></p>"""
528
+
529
+ introduction = '''
530
+ For Abilities Involving Visual Grounding:
531
+ 1. Grounding: CLICK **Send** to generate a grounded image description.
532
+ 2. Refer: Input a referring object and CLICK **Send**.
533
+ 3. Detection: Write a caption or phrase, and CLICK **Send**.
534
+ 4. Identify: Draw the bounding box on the uploaded image window and CLICK **Send** to generate the bounding box. (CLICK "clear" button before re-drawing next time).
535
+ 5. VQA: Input a visual question and CLICK **Send**.
536
+ 6. No Tag: Input whatever you want and CLICK **Send** without any tagging
537
+
538
+ You can also simply chat in free form!
539
+ '''
540
+
541
+ text_input = gr.Textbox(placeholder='Upload your image and chat', interactive=True, show_label=False, container=False,
542
+ scale=8)
543
+ with gr.Blocks() as demo:
544
+ gr.Markdown(title)
545
+ # gr.Markdown(description)
546
+ gr.Markdown(article)
547
+
548
+ with gr.Row():
549
+ with gr.Column(scale=0.5):
550
+ image = gr.Image(type="pil", tool='sketch', brush_radius=20)
551
+
552
+ temperature = gr.Slider(
553
+ minimum=0.1,
554
+ maximum=1.5,
555
+ value=0.6,
556
+ step=0.1,
557
+ interactive=True,
558
+ label="Temperature",
559
+ )
560
+
561
+ clear = gr.Button("Restart")
562
+
563
+ gr.Markdown(introduction)
564
+
565
+ with gr.Column():
566
+ chat_state = gr.State(value=None)
567
+ img_list = gr.State(value=[])
568
+ chatbot = gr.Chatbot(label='MiniGPT-Med')
569
+
570
+ dataset = gr.Dataset(
571
+ components=[gr.Textbox(visible=False)],
572
+ samples=[['No Tag'], ['Grounding'], ['Refer'], ['Detection'], ['Identify'], ['VQA']],
573
+ type="index",
574
+ label='Task Shortcuts',
575
+ )
576
+ task_inst = gr.Markdown('**Hint:** Upload your image and chat')
577
+ with gr.Row():
578
+ text_input.render()
579
+ send = gr.Button("Send", variant='primary', size='sm', scale=1)
580
+
581
+ upload_flag = gr.State(value=0)
582
+ replace_flag = gr.State(value=0)
583
+ image.upload(image_upload_trigger, [upload_flag, replace_flag, img_list], [upload_flag, replace_flag])
584
+ # [29, 44, 42, 56]
585
+ with gr.Row():
586
+ with gr.Column():
587
+ gr.Examples(examples=[
588
+ ["Med_examples_v2/xmlab149/source.jpg", "[identify] what is this {<56><16><84><58>}", upload_flag,
589
+ replace_flag, img_list],
590
+ ["Med_examples_v2/1.2.276.0.7230010.3.1.4.8323329.1495.1517874291.249176.jpg", "[detection] pneumonia", upload_flag, replace_flag, img_list],
591
+ ["Med_examples_v2/1.2.840.113654.2.55.48339325922382839066544590341580673064.png", "[refer] the nodule in the left lung", upload_flag, replace_flag,
592
+ img_list],
593
+ ["Med_examples_v2/xmlab589/source.jpg", "[grounding] describe this image in detail", upload_flag, replace_flag, img_list],
594
+ ], inputs=[image, text_input, upload_flag, replace_flag, img_list], fn=example_trigger,
595
+ outputs=[upload_flag, replace_flag])
596
+ with gr.Column():
597
+ gr.Examples(examples=[
598
+ ["Med_examples_v2/synpic50958.jpg", "[vqa] What does the small white lesions in the aorta mean?",
599
+ upload_flag, replace_flag, img_list],
600
+ ["Med_examples_v2/5f4e8079-8225a5d2-1b0c3c46-4394a094-f285db0e.jpg", "Please provide a detailed description of the picture", upload_flag, replace_flag, img_list],
601
+ ["Med_examples_v2/1.2.276.0.7230010.3.1.4.8323329.16254.1517874395.786150.jpg", "Diagnose this image", upload_flag, replace_flag, img_list],
602
+ ["Med_examples_v2/synpic58547.jpg", "Could you describe the contents of this image for me?", upload_flag,
603
+ replace_flag, img_list],
604
+ ], inputs=[image, text_input, upload_flag, replace_flag, img_list], fn=example_trigger,
605
+ outputs=[upload_flag, replace_flag])
606
+
607
+ dataset.click(
608
+ gradio_taskselect,
609
+ inputs=[dataset],
610
+ outputs=[text_input, task_inst],
611
+ show_progress="hidden",
612
+ postprocess=False,
613
+ queue=False,
614
+ )
615
+
616
+ text_input.submit(
617
+ gradio_ask,
618
+ [text_input, chatbot, chat_state, image, img_list, upload_flag, replace_flag],
619
+ [text_input, chatbot, chat_state, img_list, upload_flag, replace_flag], queue=False
620
+ ).success(
621
+ gradio_stream_answer,
622
+ [chatbot, chat_state, img_list, temperature],
623
+ [chatbot, chat_state]
624
+ ).success(
625
+ gradio_visualize,
626
+ [chatbot, image],
627
+ [chatbot],
628
+ queue=False,
629
+ )
630
+
631
+ send.click(
632
+ gradio_ask,
633
+ [text_input, chatbot, chat_state, image, img_list, upload_flag, replace_flag],
634
+ [text_input, chatbot, chat_state, img_list, upload_flag, replace_flag], queue=False
635
+ ).success(
636
+ gradio_stream_answer,
637
+ [chatbot, chat_state, img_list, temperature],
638
+ [chatbot, chat_state]
639
+ ).success(
640
+ gradio_visualize,
641
+ [chatbot, image],
642
+ [chatbot],
643
+ queue=False,
644
+ )
645
+
646
+ clear.click(gradio_reset, [chat_state, img_list], [chatbot, image, text_input, chat_state, img_list], queue=False)
647
+
648
+ demo.launch(share=True, enable_queue=True)
environment.yml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: miniGPT-Med
2
+ channels:
3
+ - pytorch
4
+ - defaults
5
+ - anaconda
6
+ dependencies:
7
+ - python=3.9
8
+ - cudatoolkit
9
+ - pip
10
+ - pip:
11
+ - torch==2.0.0
12
+ - torchaudio
13
+ - torchvision
14
+ - huggingface-hub==0.18.0
15
+ - matplotlib==3.7.0
16
+ - psutil==5.9.4
17
+ - iopath
18
+ - pyyaml==6.0
19
+ - regex==2022.10.31
20
+ - tokenizers==0.13.2
21
+ - tqdm==4.64.1
22
+ - transformers==4.30.0
23
+ - timm==0.6.13
24
+ - webdataset==0.2.48
25
+ - omegaconf==2.3.0
26
+ - opencv-python==4.7.0.72
27
+ - decord==0.6.0
28
+ - peft==0.2.0
29
+ - sentence-transformers
30
+ - gradio==3.47.1
31
+ - accelerate==0.20.3
32
+ - bitsandbytes==0.37.0
33
+ - scikit-image
34
+ - visual-genome
35
+ - wandb
eval_configs/minigptv2_benchmark_evaluation.yaml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ arch: minigpt_v2
3
+ model_type: pretrain
4
+ max_txt_len: 500
5
+ end_sym: "</s>"
6
+ low_resource: False
7
+ prompt_template: '[INST] {} [/INST]'
8
+ llama_model: "/ibex/project/c2106/RadGPT/MiniGPT4-v2/llama-2-7b-chat-hf"
9
+ ckpt: "/ibex/project/c2106/RadGPT/MiniGPT-Med-github/miniGPT_Med_.pth"
10
+ lora_r: 64
11
+ lora_alpha: 16
12
+
13
+ datasets:
14
+ cc_sbu_align:
15
+ vis_processor:
16
+ train:
17
+ name: "blip2_image_eval"
18
+ image_size: 448
19
+ text_processor:
20
+ train:
21
+ name: "blip_caption"
22
+
23
+ evaluation_datasets:
24
+ rsna:
25
+ eval_file_path: miniGPT-Med/json_files/RSNA/full_RSNA_1024.json
26
+ img_path: miniGPT-Med/RSNA/RSNA-bbox-1024
27
+ max_new_tokens: 100
28
+ batch_size: 10
29
+
30
+ radvqa:
31
+ eval_file_path: /miniGPT-Med/json_files/vqa/full_radVQA.json
32
+ img_path: /miniGPT-Med/radVQA/VQA_RAD_Images
33
+ max_new_tokens: 300
34
+ batch_size: 10
35
+
36
+ mimic_cxr:
37
+ eval_file_path: /miniGPT-Med/json_files/mimic/MIMIC_test.json
38
+ img_path: /miniGPT-Med/mimic-cxr-dataset/image
39
+ max_new_tokens: 300
40
+ batch_size: 10
41
+
42
+ nlst:
43
+ eval_file_path: /miniGPT-Med/json_files/NLST/NLST_test.json
44
+ img_path: /miniGPT-Med/NLST/NLST_images
45
+ max_new_tokens: 100
46
+ batch_size: 10
47
+
48
+ detect_mimic:
49
+ eval_file_path: /miniGPT-Med/json_files/MIMIC-bbox/MIMIC-benchmarck.json
50
+ img_path: /miniGPT-Med/mimic-cxr-dataset/image
51
+ max_new_tokens: 100
52
+ batch_size: 10
53
+
54
+ SLAKE:
55
+ eval_file_path: /miniGPT-Med/json_files/SLAKE/grounding_test_SLAKE.json
56
+ img_path: /miniGPT-Med/SLAKE_images/imgs
57
+ max_new_tokens: 100
58
+ batch_size: 10
59
+
60
+
61
+ run:
62
+ task: image_text_pretrain
63
+ name: minigptv2_evaluation
64
+ save_path: /miniGPT-Med/expermints
65
+
66
+
67
+
68
+
69
+
eval_configs/minigptv2_eval.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ arch: minigpt_v2
3
+ model_type: pretrain
4
+ max_txt_len: 500
5
+ end_sym: "</s>"
6
+ low_resource: True
7
+ prompt_template: '[INST] {} [/INST]'
8
+ ckpt: "/ibex/project/c2106/RadGPT/MiniGPT-Med-github/miniGPT_Med_.pth"
9
+ lora_r: 64
10
+ lora_alpha: 16
11
+
12
+
13
+ datasets:
14
+ cc_sbu_align:
15
+ vis_processor:
16
+ train:
17
+ name: "blip2_image_eval"
18
+ image_size: 448
19
+ text_processor:
20
+ train:
21
+ name: "blip_caption"
22
+
23
+ run:
24
+ task: image_text_pretrain
eval_scripts/.DS_Store ADDED
Binary file (6.15 kB). View file
 
eval_scripts/__pycache__/IoU.cpython-39.pyc ADDED
Binary file (1.77 kB). View file
 
eval_scripts/__pycache__/clean_json.cpython-39.pyc ADDED
Binary file (2.08 kB). View file
 
eval_scripts/__pycache__/metrics.cpython-39.pyc ADDED
Binary file (4.28 kB). View file
 
eval_scripts/clean_json.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+
4
+ def clean_mimic_json(messy_json, cleaned_output):
5
+ with open(messy_json, 'r') as f:
6
+ messy_data = json.load(f)
7
+
8
+ clean_data = []
9
+ for image_id, captions in messy_data.items():
10
+ image_id_clean = image_id.split('.')[0]
11
+ caption_clean = ' '.join(captions)
12
+
13
+ clean_item = {
14
+ "image_id": image_id_clean,
15
+ "caption": caption_clean
16
+ }
17
+
18
+ clean_data.append(clean_item)
19
+
20
+ with open(cleaned_output, 'w') as outfile:
21
+ json.dump(clean_data, outfile, indent=2)
22
+
23
+
24
+ def clean_vqa_json(messy_json, cleaned_output):
25
+ with open(messy_json, "r") as file:
26
+ messy_json = json.load(file)
27
+
28
+ organized_json = {}
29
+
30
+ for key, values in messy_json.items():
31
+ organized_json[key] = []
32
+ for value in values:
33
+ organized_json[key].append({
34
+ "question": value["question"],
35
+ "answer": value["answer"]
36
+ })
37
+
38
+ with open(cleaned_output, "w") as outfile:
39
+ json.dump(organized_json, outfile, indent=4)
40
+
41
+
42
+
43
+ def clean_detection_json(messy_json, cleaned_output):
44
+
45
+ with open(messy_json, "r") as input_file:
46
+ input_json = json.load(input_file)
47
+
48
+ organized_data = []
49
+
50
+ for key, value in input_json.items():
51
+ if value and isinstance(value, list) and len(value) > 0:
52
+ caption = value[0]
53
+ objects_match = caption.split("<p>")
54
+ if len(objects_match) == 2:
55
+ object_part = objects_match[1].split("</p>")[0].strip()
56
+ else:
57
+ object_part = ""
58
+
59
+ bbox_match = re.findall(r'<(\d+)>', caption)
60
+
61
+ if object_part and bbox_match and len(bbox_match) == 4:
62
+ key_part = key.split(".png")[0]
63
+ bbox_values = [float(val) for val in bbox_match]
64
+
65
+ organized_item = {
66
+ "key": key_part,
67
+ "objects": [object_part],
68
+ "bbox": [bbox_values],
69
+ }
70
+
71
+ organized_data.append(organized_item)
72
+
73
+ with open(cleaned_output, "w") as output_file:
74
+ json.dump(organized_data, output_file, indent=4)
eval_scripts/metrics.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ sys.path.append('.')
3
+
4
+ import json
5
+ import pandas as pd
6
+ import csv
7
+ from sentence_transformers import SentenceTransformer, util
8
+ from minigpt4.common.eval_utils import computeIoU
9
+
10
+ # Load pre-trained BERT model
11
+ model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
12
+
13
+
14
+ # BERT similarity function will be utilized in the two following functions
15
+ def compute_bert_similarity(prediction_caption, ground_truth_caption):
16
+ prediction_embedding = model.encode([prediction_caption])
17
+ ground_truth_embedding = model.encode([ground_truth_caption])
18
+ similarity = util.pytorch_cos_sim(prediction_embedding, ground_truth_embedding)[0][0].item()
19
+ return similarity
20
+
21
+
22
+ def MIMIC_BERT_Sim(gt_pth, pred_pth, output_csv):
23
+ # Read the ground truth and prediction JSON files
24
+ with open(gt_pth, 'r') as f:
25
+ ground_truth_data = json.load(f)
26
+
27
+ with open(pred_pth, 'r') as f:
28
+ prediction_data = json.load(f)
29
+
30
+ # Create a list to store BERT similarity data
31
+ bert_similarity_data = []
32
+
33
+ # Initialize variables to calculate the average
34
+ total_similarity = 0
35
+ total_count = 0
36
+
37
+ # Iterate over each item in the prediction_data list
38
+ for item in prediction_data:
39
+ # Extract the image_id and corresponding prediction caption
40
+ image_id = item["image_id"]
41
+ prediction_caption = item["caption"]
42
+
43
+ # Search for the matching ground truth caption based on image_id
44
+ ground_truth_caption = None
45
+ for gt_item in ground_truth_data:
46
+ if gt_item["image_id"] == image_id:
47
+ ground_truth_caption = gt_item["caption"]
48
+ break
49
+
50
+ if ground_truth_caption is not None:
51
+ bert_similarity = compute_bert_similarity(prediction_caption, ground_truth_caption)
52
+ bert_similarity_data.append({"image_id": image_id, "BERT_score": bert_similarity})
53
+
54
+ total_similarity += bert_similarity
55
+ total_count += 1
56
+
57
+ average_similarity = total_similarity / total_count if total_count > 0 else 0
58
+
59
+ df = pd.DataFrame(bert_similarity_data)
60
+ df_sorted = df.sort_values(by="BERT_score", ascending=True)
61
+ df_sorted.to_csv(output_csv, index=False)
62
+
63
+ return average_similarity
64
+
65
+ def VQA_BERT_Sim(gt_pth, pred_pth, output_csv):
66
+ # Load ground truth JSON file
67
+ with open(gt_pth, 'r') as file:
68
+ gt_data = json.load(file)
69
+
70
+ # Load prediction JSON file
71
+ with open(pred_pth, 'r') as file:
72
+ prediction_data = json.load(file)
73
+
74
+ gt_qa_pairs = {(entry['image_name'], entry['question']): entry['answer'] for entry in gt_data}
75
+
76
+ def convert_to_dict(data):
77
+ qa_dict = {}
78
+ for image_name, qa_list in data.items():
79
+ for qa in qa_list:
80
+ key = (image_name, qa['question'])
81
+ qa_dict[key] = qa['answer']
82
+ return qa_dict
83
+
84
+ pred_qa_dict = convert_to_dict(prediction_data)
85
+
86
+ # Compute BERT similarity and create a list of results
87
+ results = []
88
+
89
+ for key, gt_answer in gt_qa_pairs.items():
90
+ if key in pred_qa_dict:
91
+ pred_answer = pred_qa_dict[key]
92
+ gt_answer = str(gt_answer)
93
+ pred_answer = str(pred_answer)
94
+
95
+ # Compute BERT similarity
96
+ similarity_score = compute_bert_similarity(pred_answer, gt_answer)
97
+
98
+ # Append the result to the list
99
+ results.append({
100
+ "img_name": key[0],
101
+ "question": key[1],
102
+ "answer": pred_answer,
103
+ "BERT_score": similarity_score
104
+ })
105
+
106
+ average_similarity = sum(entry["BERT_score"] for entry in results) / len(results) if results else 0
107
+ df = pd.DataFrame(results)
108
+ df_sorted = df.sort_values(by="BERT_score", ascending=True)
109
+ df_sorted.to_csv(output_csv, index=False)
110
+ print(f"Average BERT similarity score: {average_similarity}")
111
+
112
+
113
+ #################################
114
+ ##############IoU################
115
+ #################################
116
+
117
+ def preprocess_bbox(bbox, original_size, image_size):
118
+ x1 = int((bbox[0] / original_size) * image_size)
119
+ y1 = int((bbox[1] / original_size) * image_size)
120
+ x2 = int((bbox[2] / original_size) * image_size)
121
+ y2 = int((bbox[3] / original_size) * image_size)
122
+ return [x1, y1, x2, y2]
123
+
124
+ def average_iou(gt_pth, pred_pth, original_size, image_size, dataset_name, csv_filename):
125
+ # Load ground truth
126
+ with open(gt_pth, 'r') as file:
127
+ ground_truth = json.load(file)
128
+
129
+ # Load predictions
130
+ with open(pred_pth, 'r') as file:
131
+ predictions = json.load(file)
132
+
133
+ iou_list = []
134
+
135
+ with open(csv_filename, 'w', newline='') as csvfile:
136
+ fieldnames = ['image_name', 'IoU']
137
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
138
+ writer.writeheader()
139
+
140
+ for gt_item in ground_truth:
141
+ gt_key = gt_item['key']
142
+ gt_bboxes = gt_item['bbox']
143
+ original_size = gt_item['height']
144
+ gt_processed_bboxes = [preprocess_bbox(bbox, original_size, image_size) for bbox in gt_bboxes]
145
+
146
+ for pred_item in predictions:
147
+ pred_key = pred_item['key'].replace(".png", "")
148
+
149
+ if gt_key == pred_key:
150
+ pred_bboxes = pred_item['bbox']
151
+ try:
152
+ for gt_bbox in gt_processed_bboxes:
153
+ for pred_bbox in pred_bboxes:
154
+ iou = computeIoU(gt_bbox, pred_bbox)
155
+ iou_list.append(iou)
156
+ writer.writerow({'image_name': gt_key, 'IoU': iou})
157
+ print(gt_key)
158
+ print(iou)
159
+ except Exception as e:
160
+ print("gt_bbox: ", gt_bbox)
161
+ print("gt_bbox: ", pred_bboxes)
162
+
163
+ # average_iou = sum(iou_list) / len(iou_list)
164
+ # print(f"Average IoU for dataset {dataset_name}: {average_iou:.4f}")
eval_scripts/model_evaluation.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ use this command in terminal to run the evaluation script
3
+ torchrun --master-port 8888 --nproc_per_node 1 eval_scripts/model_evaluation.py --cfg-path eval_configs/minigptv2_benchmark_evaluation.yaml --dataset
4
+
5
+
6
+ '''
7
+
8
+ import sys
9
+ sys.path.append('.')
10
+ import os
11
+ import re
12
+ import json
13
+ import argparse
14
+ from collections import defaultdict
15
+ import random
16
+ import numpy as np
17
+ from PIL import Image
18
+ from tqdm import tqdm
19
+ import torch
20
+ from torch.utils.data import DataLoader
21
+ from minigpt4.common.config import Config
22
+ from minigpt4.common.eval_utils import prepare_texts, init_model, eval_parser, computeIoU
23
+ from minigpt4.conversation.conversation import CONV_VISION_minigptv2
24
+
25
+ from minigpt4.datasets.datasets.mimic_cxr_dataset import evalMIMICDataset, evalDetectMimicDataset
26
+ from minigpt4.datasets.datasets.radvqa_dataset import evalRadVQADataset
27
+ from minigpt4.datasets.datasets.nlst_dataset import eval_NLST_Dataset
28
+ from minigpt4.datasets.datasets.rsna_dataset import evalRSNADataset
29
+ from minigpt4.datasets.datasets.SLAKE_dataset import evalSLAKEDataset
30
+ #import cleaning classes
31
+ from eval_scripts.clean_json import clean_mimic_json, clean_vqa_json, clean_detection_json
32
+ from eval_scripts.metrics import MIMIC_BERT_Sim, VQA_BERT_Sim, average_iou
33
+
34
+ def list_of_str(arg):
35
+ return list(map(str, arg.split(',')))
36
+
37
+ parser = eval_parser()
38
+ parser.add_argument("--dataset", type=list_of_str, help="dataset to evaluate")
39
+
40
+ args = parser.parse_args()
41
+
42
+ cfg = Config(args)
43
+
44
+
45
+ model, vis_processor = init_model(args)
46
+ model.eval()
47
+ CONV_VISION = CONV_VISION_minigptv2
48
+ conv_temp = CONV_VISION.copy()
49
+ conv_temp.system = ""
50
+ model.eval()
51
+ save_path = cfg.run_cfg.save_path
52
+
53
+ def process_mimic_dataset():
54
+ eval_file_path = cfg.evaluation_datasets_cfg[dataset]["eval_file_path"]
55
+ img_path = cfg.evaluation_datasets_cfg[dataset]["img_path"]
56
+ batch_size = cfg.evaluation_datasets_cfg[dataset]["batch_size"]
57
+ max_new_tokens = cfg.evaluation_datasets_cfg[dataset]["max_new_tokens"]
58
+
59
+ with open((eval_file_path), 'r') as f:
60
+ mimic = json.load(f)
61
+
62
+ data = evalMIMICDataset(mimic, vis_processor, img_path)
63
+ eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False)
64
+ minigpt4_predict = defaultdict(list)
65
+
66
+ for images, questions, img_ids in tqdm(eval_dataloader):
67
+ texts = prepare_texts(questions, conv_temp) # warp the texts with conversation template
68
+ answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False)
69
+ for answer, img_id, question in zip(answers, img_ids, questions):
70
+ minigpt4_predict[img_id].append(answer)
71
+
72
+ file_save_path = os.path.join(save_path,"MIMIC_inference_results_stage3.json")
73
+ with open(file_save_path,'w') as f:
74
+ json.dump(minigpt4_predict, f)
75
+ clean_mimic_json(file_save_path, file_save_path)
76
+
77
+ # csv file path to save the BERT results per each case
78
+ output_csv_path = '/miniGPT-Med/metric_results/bert_similarity_scores.csv'
79
+
80
+ # in MIMIC_BERT_Sim add the path of the ground_truth then the path of the inference result
81
+ average_similarity = MIMIC_BERT_Sim(eval_file_path, file_save_path, output_csv_path)
82
+ #print the average BERT_Sim
83
+ print("Average BERT Similarity:", average_similarity)
84
+
85
+ def process_vqa_dataset():
86
+ eval_file_path = cfg.evaluation_datasets_cfg[dataset]["eval_file_path"]
87
+ img_path = cfg.evaluation_datasets_cfg[dataset]["img_path"]
88
+ batch_size = cfg.evaluation_datasets_cfg[dataset]["batch_size"]
89
+ max_new_tokens = cfg.evaluation_datasets_cfg[dataset]["max_new_tokens"]
90
+
91
+ with open((eval_file_path), 'r') as f:
92
+ radVQA = json.load(f)
93
+
94
+ data = evalRadVQADataset(radVQA, vis_processor, img_path)
95
+ eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False)
96
+ minigpt4_predict = defaultdict(list)
97
+
98
+ for images, questions, img_ids in tqdm(eval_dataloader):
99
+ texts = prepare_texts(questions, conv_temp) # warp the texts with conversation template
100
+ answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False)
101
+ for answer, img_id, question in zip(answers, img_ids, questions):
102
+ minigpt4_predict[img_id].append({"key":img_ids,"question": question.replace("[vqa]", "").strip() , "answer": answer})
103
+
104
+ file_save_path = os.path.join(save_path,"radVQA_inference_results.json")
105
+ output_csv_path = '/miniGPT-Med/BERT_Sim_results/vqa_bert_similarity_scores.csv'
106
+
107
+ with open(file_save_path,'w') as f:
108
+ json.dump(minigpt4_predict, f)
109
+
110
+ clean_vqa_json(file_save_path, file_save_path)
111
+ VQA_BERT_Sim(eval_file_path, file_save_path, output_csv_path)
112
+
113
+ def process_nlst_dataset():
114
+ eval_file_path = cfg.evaluation_datasets_cfg[dataset]["eval_file_path"]
115
+ img_path = cfg.evaluation_datasets_cfg[dataset]["img_path"]
116
+ batch_size = cfg.evaluation_datasets_cfg[dataset]["batch_size"]
117
+ max_new_tokens = cfg.evaluation_datasets_cfg[dataset]["max_new_tokens"]
118
+
119
+ with open((eval_file_path), 'r') as f:
120
+ nlst = json.load(f)
121
+
122
+ data = eval_NLST_Dataset(nlst, vis_processor, img_path)
123
+ eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False)
124
+ minigpt4_predict = defaultdict(list)
125
+ resamples = []
126
+
127
+ for images, questions, img_ids in tqdm(eval_dataloader):
128
+
129
+ texts = prepare_texts(questions, conv_temp)
130
+ answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False)
131
+
132
+ for answer, img_id, question in zip(answers, img_ids, questions):
133
+
134
+ # answer = answer.replace("<unk>","").replace(" ","").strip()
135
+ pattern = r'\{<\d{1,2}><\d{1,2}><\d{1,2}><\d{1,2}>\}'
136
+ minigpt4_predict[img_id].append(answer)
137
+
138
+ file_save_path = os.path.join(save_path,"NLST_inference_result.json")
139
+ with open(file_save_path,'w') as f:
140
+ json.dump(minigpt4_predict, f)
141
+
142
+ csv_pth = os.path.join(save_path,"NLST_IoU_results.csv")
143
+ clean_detection_json(file_save_path,file_save_path)
144
+ average_iou(eval_file_path, file_save_path, 512, 100, "NLST", csv_pth)
145
+
146
+
147
+
148
+ def process_rsna_dataset():
149
+ eval_file_path = cfg.evaluation_datasets_cfg[dataset]["eval_file_path"]
150
+ print(eval_file_path)
151
+ img_path = cfg.evaluation_datasets_cfg[dataset]["img_path"]
152
+ batch_size = cfg.evaluation_datasets_cfg[dataset]["batch_size"]
153
+ max_new_tokens = cfg.evaluation_datasets_cfg[dataset]["max_new_tokens"]
154
+ print("----config----")
155
+ with open((eval_file_path), 'r') as f:
156
+ nlst = json.load(f)
157
+
158
+ data = evalRSNADataset(nlst, vis_processor, img_path)
159
+ eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False)
160
+ minigpt4_predict = defaultdict(list)
161
+ resamples = []
162
+
163
+ for images, questions, img_ids in tqdm(eval_dataloader):
164
+ texts = prepare_texts(questions, conv_temp)
165
+ answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False)
166
+
167
+ for answer, img_id, question in zip(answers, img_ids, questions):
168
+
169
+ # answer = answer.replace("<unk>","").replace(" ","").strip()
170
+ pattern = r'\{<\d{1,2}><\d{1,2}><\d{1,2}><\d{1,2}>\}'
171
+ minigpt4_predict[img_id].append(answer)
172
+ print(img_id)
173
+ print(answer)
174
+
175
+ file_save_path = os.path.join(save_path,"RSNA_inference_result.json")
176
+ with open(file_save_path,'w') as f:
177
+ json.dump(minigpt4_predict, f)
178
+
179
+ csv_pth = os.path.join(save_path,"RSNA_IoU_results.csv")
180
+ clean_detection_json(file_save_path,file_save_path)
181
+ average_iou(eval_file_path, file_save_path, 1024, 100, "rsna", csv_pth)
182
+
183
+
184
+ def process_detect_mimic():
185
+ eval_file_path = cfg.evaluation_datasets_cfg[dataset]["eval_file_path"]
186
+ img_path = cfg.evaluation_datasets_cfg[dataset]["img_path"]
187
+ batch_size = cfg.evaluation_datasets_cfg[dataset]["batch_size"]
188
+ max_new_tokens = cfg.evaluation_datasets_cfg[dataset]["max_new_tokens"]
189
+
190
+ with open((eval_file_path), 'r') as f:
191
+ nlst = json.load(f)
192
+
193
+ data = evalDetectMimicDataset(nlst, vis_processor, img_path)
194
+ eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False)
195
+ minigpt4_predict = defaultdict(list)
196
+ resamples = []
197
+
198
+ for images, questions, img_ids in tqdm(eval_dataloader):
199
+
200
+ texts = prepare_texts(questions, conv_temp)
201
+ answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False)
202
+
203
+ for answer, img_id, question in zip(answers, img_ids, questions):
204
+ pattern = r'\{<\d{1,2}><\d{1,2}><\d{1,2}><\d{1,2}>\}'
205
+ minigpt4_predict[img_id].append(answer)
206
+
207
+ file_save_path = os.path.join(save_path,"Detect_MIMIC_inference_result.json")
208
+ with open(file_save_path,'w') as f:
209
+ json.dump(minigpt4_predict, f)
210
+
211
+
212
+ csv_pth = os.path.join(save_path,"MIMIC_IoU_results.csv")
213
+ clean_detection_json(file_save_path,file_save_path)
214
+ average_iou(eval_file_path, file_save_path, "to be specified soon", 100, "MIMIC", csv_pth)
215
+
216
+
217
+
218
+ def process_SLAKE_dataset():
219
+ eval_file_path = cfg.evaluation_datasets_cfg[dataset]["eval_file_path"]
220
+ img_path = cfg.evaluation_datasets_cfg[dataset]["img_path"]
221
+ batch_size = cfg.evaluation_datasets_cfg[dataset]["batch_size"]
222
+ max_new_tokens = cfg.evaluation_datasets_cfg[dataset]["max_new_tokens"]
223
+
224
+ with open((eval_file_path), 'r') as f:
225
+ SLAKE = json.load(f)
226
+
227
+ data = evalSLAKEDataset(SLAKE, vis_processor, img_path)
228
+ eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False)
229
+ minigpt4_predict = defaultdict(list)
230
+ resamples = []
231
+
232
+ for images, questions, img_ids in tqdm(eval_dataloader):
233
+
234
+ texts = prepare_texts(questions, conv_temp)
235
+ answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False)
236
+
237
+ for answer, img_id, question in zip(answers, img_ids, questions):
238
+
239
+ # answer = answer.replace("<unk>","").replace(" ","").strip()
240
+ pattern = r'\{<\d{1,2}><\d{1,2}><\d{1,2}><\d{1,2}>\}'
241
+ minigpt4_predict[img_id].append(answer)
242
+
243
+ file_save_path = os.path.join(save_path,"SLAKE_inference_result.json")
244
+ with open(file_save_path,'w') as f:
245
+ json.dump(minigpt4_predict, f)
246
+
247
+ csv_pth = os.path.join(save_path,"SLAKE_IoU_results.csv")
248
+ clean_detection_json(file_save_path,file_save_path)
249
+ average_iou(eval_file_path, file_save_path, 100, 100, "SLAKE", csv_pth)
250
+
251
+
252
+
253
+ ############################################################################
254
+ for dataset in args.dataset:
255
+ if dataset == 'mimic_cxr':
256
+ process_mimic_dataset()
257
+
258
+ elif dataset == 'radvqa':
259
+ process_vqa_dataset()
260
+
261
+ elif dataset == 'nlst':
262
+ process_nlst_dataset()
263
+
264
+ elif dataset == 'rsna':
265
+ process_rsna_dataset()
266
+
267
+ elif dataset == 'detect_mimic':
268
+ process_detect_mimic()
269
+
270
+ elif dataset == 'SLAKE':
271
+ process_SLAKE_dataset()
272
+
273
+ else:
274
+ print(f"Dataset '{dataset}' is not supported.")
miniGPTV2.yml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: GPTv2
2
+ channels:
3
+ - pytorch
4
+ - defaults
5
+ - anaconda
6
+ dependencies:
7
+ - python=3.9
8
+ - cudatoolkit
9
+ - pip
10
+ - pip:
11
+ - torch==2.0.0
12
+ - torchaudio
13
+ - torchvision
14
+ - huggingface-hub==0.18.0
15
+ - matplotlib==3.7.0
16
+ - psutil==5.9.4
17
+ - iopath
18
+ - pyyaml==6.0
19
+ - regex==2022.10.31
20
+ - tokenizers==0.13.2
21
+ - tqdm==4.64.1
22
+ - transformers==4.30.0
23
+ - timm==0.6.13
24
+ - webdataset==0.2.48
25
+ - omegaconf==2.3.0
26
+ - opencv-python==4.7.0.72
27
+ - decord==0.6.0
28
+ - peft==0.2.0
29
+ - sentence-transformers
30
+ - gradio==3.47.1
31
+ - accelerate==0.20.3
32
+ - bitsandbytes==0.37.0
33
+ - scikit-image
34
+ - visual-genome
35
+ - wandb
miniGPT_Med_.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca2d7fc37dc5330cdae927c8a3ff649c5919c726eccb05cae921fb997028b08e
3
+ size 679780138
minigpt4/.DS_Store ADDED
Binary file (6.15 kB). View file
 
minigpt4/__init__.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Copyright (c) 2022, salesforce.com, inc.
3
+ All rights reserved.
4
+ SPDX-License-Identifier: BSD-3-Clause
5
+ For full license text, see the LICENSE_Lavis file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6
+ """
7
+
8
+ import os
9
+ import sys
10
+
11
+ from omegaconf import OmegaConf
12
+
13
+ from minigpt4.common.registry import registry
14
+
15
+ from minigpt4.datasets.builders import *
16
+ from minigpt4.models import *
17
+ from minigpt4.processors import *
18
+ from minigpt4.tasks import *
19
+
20
+
21
+ root_dir = os.path.dirname(os.path.abspath(__file__))
22
+ default_cfg = OmegaConf.load(os.path.join(root_dir, "configs/default.yaml"))
23
+
24
+ registry.register_path("library_root", root_dir)
25
+ repo_root = os.path.join(root_dir, "..")
26
+ registry.register_path("repo_root", repo_root)
27
+ cache_root = os.path.join(repo_root, default_cfg.env.cache_root)
28
+ registry.register_path("cache_root", cache_root)
29
+
30
+ registry.register("MAX_INT", sys.maxsize)
31
+ registry.register("SPLIT_NAMES", ["train", "val", "test"])
minigpt4/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (1.03 kB). View file
 
minigpt4/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (1.03 kB). View file
 
minigpt4/common/.DS_Store ADDED
Binary file (6.15 kB). View file
 
minigpt4/common/__init__.py ADDED
File without changes
minigpt4/common/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (166 Bytes). View file
 
minigpt4/common/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (164 Bytes). View file
 
minigpt4/common/__pycache__/config.cpython-310.pyc ADDED
Binary file (12.6 kB). View file
 
minigpt4/common/__pycache__/config.cpython-39.pyc ADDED
Binary file (12.7 kB). View file
 
minigpt4/common/__pycache__/dist_utils.cpython-310.pyc ADDED
Binary file (3.81 kB). View file
 
minigpt4/common/__pycache__/dist_utils.cpython-39.pyc ADDED
Binary file (3.82 kB). View file
 
minigpt4/common/__pycache__/eval_utils.cpython-39.pyc ADDED
Binary file (3.17 kB). View file