Spaces:
Running
Running
Merge pull request #25 from soumik12345/feat/streamlit-gui
Browse files- app.py +91 -0
- docs/app.md +61 -0
- medrag_multi_modal/cli.py +17 -0
- medrag_multi_modal/document_loader/image_loader/base_img_loader.py +1 -1
- medrag_multi_modal/retrieval/bm25s_retrieval.py +1 -2
- medrag_multi_modal/retrieval/colpali_retrieval.py +1 -2
- medrag_multi_modal/retrieval/common.py +0 -1
- medrag_multi_modal/utils.py +1 -2
- mkdocs.yml +2 -0
- pyproject.toml +16 -5
app.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import weave
|
3 |
+
|
4 |
+
from medrag_multi_modal.assistant import (
|
5 |
+
FigureAnnotatorFromPageImage,
|
6 |
+
LLMClient,
|
7 |
+
MedQAAssistant,
|
8 |
+
)
|
9 |
+
from medrag_multi_modal.assistant.llm_client import (
|
10 |
+
GOOGLE_MODELS,
|
11 |
+
MISTRAL_MODELS,
|
12 |
+
OPENAI_MODELS,
|
13 |
+
)
|
14 |
+
from medrag_multi_modal.retrieval import MedCPTRetriever
|
15 |
+
|
16 |
+
# Define constants
|
17 |
+
ALL_AVAILABLE_MODELS = GOOGLE_MODELS + MISTRAL_MODELS + OPENAI_MODELS
|
18 |
+
|
19 |
+
# Sidebar for configuration settings
|
20 |
+
st.sidebar.title("Configuration Settings")
|
21 |
+
project_name = st.sidebar.text_input(
|
22 |
+
label="Project Name",
|
23 |
+
value="ml-colabs/medrag-multi-modal",
|
24 |
+
placeholder="wandb project name",
|
25 |
+
help="format: wandb_username/wandb_project_name",
|
26 |
+
)
|
27 |
+
chunk_dataset_name = st.sidebar.text_input(
|
28 |
+
label="Text Chunk WandB Dataset Name",
|
29 |
+
value="grays-anatomy-chunks:v0",
|
30 |
+
placeholder="wandb dataset name",
|
31 |
+
help="format: wandb_dataset_name:version",
|
32 |
+
)
|
33 |
+
index_artifact_address = st.sidebar.text_input(
|
34 |
+
label="WandB Index Artifact Address",
|
35 |
+
value="ml-colabs/medrag-multi-modal/grays-anatomy-medcpt:v0",
|
36 |
+
placeholder="wandb artifact address",
|
37 |
+
help="format: wandb_username/wandb_project_name/wandb_artifact_name:version",
|
38 |
+
)
|
39 |
+
image_artifact_address = st.sidebar.text_input(
|
40 |
+
label="WandB Image Artifact Address",
|
41 |
+
value="ml-colabs/medrag-multi-modal/grays-anatomy-images-marker:v6",
|
42 |
+
placeholder="wandb artifact address",
|
43 |
+
help="format: wandb_username/wandb_project_name/wandb_artifact_name:version",
|
44 |
+
)
|
45 |
+
llm_client_model_name = st.sidebar.selectbox(
|
46 |
+
label="LLM Client Model Name",
|
47 |
+
options=ALL_AVAILABLE_MODELS,
|
48 |
+
index=ALL_AVAILABLE_MODELS.index("gemini-1.5-flash"),
|
49 |
+
help="select a model from the list",
|
50 |
+
)
|
51 |
+
figure_extraction_model_name = st.sidebar.selectbox(
|
52 |
+
label="Figure Extraction Model Name",
|
53 |
+
options=ALL_AVAILABLE_MODELS,
|
54 |
+
index=ALL_AVAILABLE_MODELS.index("pixtral-12b-2409"),
|
55 |
+
help="select a model from the list",
|
56 |
+
)
|
57 |
+
structured_output_model_name = st.sidebar.selectbox(
|
58 |
+
label="Structured Output Model Name",
|
59 |
+
options=ALL_AVAILABLE_MODELS,
|
60 |
+
index=ALL_AVAILABLE_MODELS.index("gpt-4o"),
|
61 |
+
help="select a model from the list",
|
62 |
+
)
|
63 |
+
|
64 |
+
# Streamlit app layout
|
65 |
+
st.title("MedQA Assistant App")
|
66 |
+
|
67 |
+
# Initialize Weave
|
68 |
+
weave.init(project_name=project_name)
|
69 |
+
|
70 |
+
# Initialize clients and assistants
|
71 |
+
llm_client = LLMClient(model_name=llm_client_model_name)
|
72 |
+
retriever = MedCPTRetriever.from_wandb_artifact(
|
73 |
+
chunk_dataset_name=chunk_dataset_name,
|
74 |
+
index_artifact_address=index_artifact_address,
|
75 |
+
)
|
76 |
+
figure_annotator = FigureAnnotatorFromPageImage(
|
77 |
+
figure_extraction_llm_client=LLMClient(model_name=figure_extraction_model_name),
|
78 |
+
structured_output_llm_client=LLMClient(model_name=structured_output_model_name),
|
79 |
+
image_artifact_address=image_artifact_address,
|
80 |
+
)
|
81 |
+
medqa_assistant = MedQAAssistant(
|
82 |
+
llm_client=llm_client, retriever=retriever, figure_annotator=figure_annotator
|
83 |
+
)
|
84 |
+
|
85 |
+
query = st.chat_input("Enter your question here")
|
86 |
+
if query:
|
87 |
+
with st.chat_message("user"):
|
88 |
+
st.markdown(query)
|
89 |
+
response = medqa_assistant.predict(query=query)
|
90 |
+
with st.chat_message("assistant"):
|
91 |
+
st.markdown(response)
|
docs/app.md
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# MedQA Assistant App
|
2 |
+
|
3 |
+
The MedQA Assistant App is a Streamlit-based application designed to provide a chat interface for medical question answering. It leverages advanced language models (LLMs) and retrieval augmented generation (RAG) techniques to deliver accurate and informative responses to medical queries.
|
4 |
+
|
5 |
+
## Features
|
6 |
+
|
7 |
+
- **Interactive Chat Interface**: Engage with the app through a user-friendly chat interface.
|
8 |
+
- **Configurable Settings**: Customize model selection and data sources via the sidebar.
|
9 |
+
- **Retrieval-Augmented Generation**: Ensures precise and contextually relevant responses.
|
10 |
+
- **Figure Annotation Capabilities**: Extracts and annotates figures from medical texts.
|
11 |
+
|
12 |
+
## Usage
|
13 |
+
|
14 |
+
1. Install the package using:
|
15 |
+
```bash
|
16 |
+
uv pip install .
|
17 |
+
```
|
18 |
+
1. **Launch the App**: Start the application using Streamlit:
|
19 |
+
```bash
|
20 |
+
medrag run
|
21 |
+
```
|
22 |
+
2. **Configure Settings**: Adjust configuration settings in the sidebar to suit your needs.
|
23 |
+
3. **Ask a Question**: Enter your medical question in the chat input field.
|
24 |
+
4. **Receive a Response**: Get a detailed answer from the MedQA Assistant.
|
25 |
+
|
26 |
+
## Configuration
|
27 |
+
|
28 |
+
The app allows users to customize various settings through the sidebar:
|
29 |
+
|
30 |
+
- **Project Name**: Specify the WandB project name.
|
31 |
+
- **Text Chunk WandB Dataset Name**: Define the dataset containing text chunks.
|
32 |
+
- **WandB Index Artifact Address**: Provide the address of the index artifact.
|
33 |
+
- **WandB Image Artifact Address**: Provide the address of the image artifact.
|
34 |
+
- **LLM Client Model Name**: Choose a language model for generating responses.
|
35 |
+
- **Figure Extraction Model Name**: Select a model for extracting figures from images.
|
36 |
+
- **Structured Output Model Name**: Choose a model for generating structured outputs.
|
37 |
+
|
38 |
+
## Technical Details
|
39 |
+
|
40 |
+
The app is built using the following components:
|
41 |
+
|
42 |
+
- **Streamlit**: For the user interface.
|
43 |
+
- **Weave**: For project initialization and artifact management.
|
44 |
+
- **MedQAAssistant**: For processing queries and generating responses.
|
45 |
+
- **LLMClient**: For interacting with language models.
|
46 |
+
- **MedCPTRetriever**: For retrieving relevant text chunks.
|
47 |
+
- **FigureAnnotatorFromPageImage**: For annotating figures in medical texts.
|
48 |
+
|
49 |
+
## Development and Deployment
|
50 |
+
|
51 |
+
- **Environment Setup**: Ensure all dependencies are installed as per the `pyproject.toml`.
|
52 |
+
- **Running the App**: Use Streamlit to run the app locally.
|
53 |
+
- **Deployment**: coming soon...
|
54 |
+
|
55 |
+
## Additional Resources
|
56 |
+
|
57 |
+
For more detailed information on the components and their usage, refer to the following documentation sections:
|
58 |
+
|
59 |
+
- [MedQA Assistant](/assistant/medqa_assistant)
|
60 |
+
- [LLM Client](/assistant/llm_client)
|
61 |
+
- [Figure Annotation](/assistant/figure_annotation)
|
medrag_multi_modal/cli.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import subprocess
|
3 |
+
import sys
|
4 |
+
|
5 |
+
|
6 |
+
def main():
|
7 |
+
parser = argparse.ArgumentParser(description="MedRAG Multi-Modal CLI")
|
8 |
+
parser.add_argument("command", choices=["run"], help="Command to execute")
|
9 |
+
args = parser.parse_args()
|
10 |
+
|
11 |
+
if args.command == "run":
|
12 |
+
# Assuming your Streamlit app is in app.py
|
13 |
+
subprocess.run([sys.executable, "-m", "streamlit", "run", "app.py"])
|
14 |
+
|
15 |
+
|
16 |
+
if __name__ == "__main__":
|
17 |
+
main()
|
medrag_multi_modal/document_loader/image_loader/base_img_loader.py
CHANGED
@@ -5,8 +5,8 @@ from typing import Dict, List, Optional
|
|
5 |
|
6 |
import jsonlines
|
7 |
import rich
|
8 |
-
|
9 |
import wandb
|
|
|
10 |
from medrag_multi_modal.document_loader.text_loader.base_text_loader import (
|
11 |
BaseTextLoader,
|
12 |
)
|
|
|
5 |
|
6 |
import jsonlines
|
7 |
import rich
|
|
|
8 |
import wandb
|
9 |
+
|
10 |
from medrag_multi_modal.document_loader.text_loader.base_text_loader import (
|
11 |
BaseTextLoader,
|
12 |
)
|
medrag_multi_modal/retrieval/bm25s_retrieval.py
CHANGED
@@ -3,11 +3,10 @@ from glob import glob
|
|
3 |
from typing import Optional
|
4 |
|
5 |
import bm25s
|
|
|
6 |
import weave
|
7 |
from Stemmer import Stemmer
|
8 |
|
9 |
-
import wandb
|
10 |
-
|
11 |
LANGUAGE_DICT = {
|
12 |
"english": "en",
|
13 |
"french": "fr",
|
|
|
3 |
from typing import Optional
|
4 |
|
5 |
import bm25s
|
6 |
+
import wandb
|
7 |
import weave
|
8 |
from Stemmer import Stemmer
|
9 |
|
|
|
|
|
10 |
LANGUAGE_DICT = {
|
11 |
"english": "en",
|
12 |
"french": "fr",
|
medrag_multi_modal/retrieval/colpali_retrieval.py
CHANGED
@@ -6,9 +6,8 @@ import weave
|
|
6 |
if TYPE_CHECKING:
|
7 |
from byaldi import RAGMultiModalModel
|
8 |
|
9 |
-
from PIL import Image
|
10 |
-
|
11 |
import wandb
|
|
|
12 |
|
13 |
from ..utils import get_wandb_artifact
|
14 |
|
|
|
6 |
if TYPE_CHECKING:
|
7 |
from byaldi import RAGMultiModalModel
|
8 |
|
|
|
|
|
9 |
import wandb
|
10 |
+
from PIL import Image
|
11 |
|
12 |
from ..utils import get_wandb_artifact
|
13 |
|
medrag_multi_modal/retrieval/common.py
CHANGED
@@ -3,7 +3,6 @@ from enum import Enum
|
|
3 |
import safetensors
|
4 |
import safetensors.torch
|
5 |
import torch
|
6 |
-
|
7 |
import wandb
|
8 |
|
9 |
|
|
|
3 |
import safetensors
|
4 |
import safetensors.torch
|
5 |
import torch
|
|
|
6 |
import wandb
|
7 |
|
8 |
|
medrag_multi_modal/utils.py
CHANGED
@@ -3,9 +3,8 @@ import io
|
|
3 |
|
4 |
import jsonlines
|
5 |
import torch
|
6 |
-
from PIL import Image
|
7 |
-
|
8 |
import wandb
|
|
|
9 |
|
10 |
|
11 |
def get_wandb_artifact(
|
|
|
3 |
|
4 |
import jsonlines
|
5 |
import torch
|
|
|
|
|
6 |
import wandb
|
7 |
+
from PIL import Image
|
8 |
|
9 |
|
10 |
def get_wandb_artifact(
|
mkdocs.yml
CHANGED
@@ -62,6 +62,8 @@ nav:
|
|
62 |
- Setup:
|
63 |
- Installation: 'installation/install.md'
|
64 |
- Development: 'installation/development.md'
|
|
|
|
|
65 |
- Document Loader:
|
66 |
- Text Loader:
|
67 |
- Base: 'document_loader/text_loader/base_text_loader.md'
|
|
|
62 |
- Setup:
|
63 |
- Installation: 'installation/install.md'
|
64 |
- Development: 'installation/development.md'
|
65 |
+
- App:
|
66 |
+
- MedQA Assistant: 'app.md'
|
67 |
- Document Loader:
|
68 |
- Text Loader:
|
69 |
- Base: 'document_loader/text_loader/base_text_loader.md'
|
pyproject.toml
CHANGED
@@ -44,9 +44,13 @@ dependencies = [
|
|
44 |
"jsonlines>=4.0.0",
|
45 |
"opencv-python>=4.10.0.84",
|
46 |
"openai>=1.52.2",
|
|
|
47 |
]
|
48 |
|
49 |
[project.optional-dependencies]
|
|
|
|
|
|
|
50 |
core = [
|
51 |
"adapters>=1.0.0",
|
52 |
"bm25s[full]>=0.2.2",
|
@@ -74,10 +78,12 @@ core = [
|
|
74 |
"opencv-python>=4.10.0.84",
|
75 |
"openai>=1.52.2",
|
76 |
]
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
81 |
docs = [
|
82 |
"mkdocs>=1.6.1",
|
83 |
"mkdocstrings>=0.26.1",
|
@@ -89,6 +95,11 @@ docs = [
|
|
89 |
"jupyter>=1.1.1",
|
90 |
]
|
91 |
|
|
|
|
|
92 |
|
93 |
[tool.pytest.ini_options]
|
94 |
-
pythonpath = "."
|
|
|
|
|
|
|
|
44 |
"jsonlines>=4.0.0",
|
45 |
"opencv-python>=4.10.0.84",
|
46 |
"openai>=1.52.2",
|
47 |
+
"streamlit>=1.39.0",
|
48 |
]
|
49 |
|
50 |
[project.optional-dependencies]
|
51 |
+
app = [
|
52 |
+
"streamlit>=1.39.0",
|
53 |
+
]
|
54 |
core = [
|
55 |
"adapters>=1.0.0",
|
56 |
"bm25s[full]>=0.2.2",
|
|
|
78 |
"opencv-python>=4.10.0.84",
|
79 |
"openai>=1.52.2",
|
80 |
]
|
81 |
+
dev = [
|
82 |
+
"pytest>=8.3.3",
|
83 |
+
"isort>=5.13.2",
|
84 |
+
"black>=24.10.0",
|
85 |
+
"ruff>=0.6.9",
|
86 |
+
]
|
87 |
docs = [
|
88 |
"mkdocs>=1.6.1",
|
89 |
"mkdocstrings>=0.26.1",
|
|
|
95 |
"jupyter>=1.1.1",
|
96 |
]
|
97 |
|
98 |
+
[project.scripts]
|
99 |
+
medrag = "medrag_multi_modal.cli:main"
|
100 |
|
101 |
[tool.pytest.ini_options]
|
102 |
+
pythonpath = "."
|
103 |
+
|
104 |
+
[tool.setuptools]
|
105 |
+
py-modules = ["medrag_multi_modal"]
|