Spaces:

geekyrakshit
/

medrag

Running

App Files Files Community

Atanu Sarkar commited on Oct 25, 2024

Commit

4793def

•

2 Parent(s): a19b9c3 33deb8d

Merge pull request #25 from soumik12345/feat/streamlit-gui

Browse files

Files changed (10) hide show

app.py +91 -0
docs/app.md +61 -0
medrag_multi_modal/cli.py +17 -0
medrag_multi_modal/document_loader/image_loader/base_img_loader.py +1 -1
medrag_multi_modal/retrieval/bm25s_retrieval.py +1 -2
medrag_multi_modal/retrieval/colpali_retrieval.py +1 -2
medrag_multi_modal/retrieval/common.py +0 -1
medrag_multi_modal/utils.py +1 -2
mkdocs.yml +2 -0
pyproject.toml +16 -5

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import streamlit as st
+import weave
+from medrag_multi_modal.assistant import (
+    FigureAnnotatorFromPageImage,
+    LLMClient,
+    MedQAAssistant,
+)
+from medrag_multi_modal.assistant.llm_client import (
+    GOOGLE_MODELS,
+    MISTRAL_MODELS,
+    OPENAI_MODELS,
+)
+from medrag_multi_modal.retrieval import MedCPTRetriever
+# Define constants
+ALL_AVAILABLE_MODELS = GOOGLE_MODELS + MISTRAL_MODELS + OPENAI_MODELS
+# Sidebar for configuration settings
+st.sidebar.title("Configuration Settings")
+project_name = st.sidebar.text_input(
+    label="Project Name",
+    value="ml-colabs/medrag-multi-modal",
+    placeholder="wandb project name",
+    help="format: wandb_username/wandb_project_name",
+)
+chunk_dataset_name = st.sidebar.text_input(
+    label="Text Chunk WandB Dataset Name",
+    value="grays-anatomy-chunks:v0",
+    placeholder="wandb dataset name",
+    help="format: wandb_dataset_name:version",
+)
+index_artifact_address = st.sidebar.text_input(
+    label="WandB Index Artifact Address",
+    value="ml-colabs/medrag-multi-modal/grays-anatomy-medcpt:v0",
+    placeholder="wandb artifact address",
+    help="format: wandb_username/wandb_project_name/wandb_artifact_name:version",
+)
+image_artifact_address = st.sidebar.text_input(
+    label="WandB Image Artifact Address",
+    value="ml-colabs/medrag-multi-modal/grays-anatomy-images-marker:v6",
+    placeholder="wandb artifact address",
+    help="format: wandb_username/wandb_project_name/wandb_artifact_name:version",
+)
+llm_client_model_name = st.sidebar.selectbox(
+    label="LLM Client Model Name",
+    options=ALL_AVAILABLE_MODELS,
+    index=ALL_AVAILABLE_MODELS.index("gemini-1.5-flash"),
+    help="select a model from the list",
+)
+figure_extraction_model_name = st.sidebar.selectbox(
+    label="Figure Extraction Model Name",
+    options=ALL_AVAILABLE_MODELS,
+    index=ALL_AVAILABLE_MODELS.index("pixtral-12b-2409"),
+    help="select a model from the list",
+)
+structured_output_model_name = st.sidebar.selectbox(
+    label="Structured Output Model Name",
+    options=ALL_AVAILABLE_MODELS,
+    index=ALL_AVAILABLE_MODELS.index("gpt-4o"),
+    help="select a model from the list",
+)
+# Streamlit app layout
+st.title("MedQA Assistant App")
+# Initialize Weave
+weave.init(project_name=project_name)
+# Initialize clients and assistants
+llm_client = LLMClient(model_name=llm_client_model_name)
+retriever = MedCPTRetriever.from_wandb_artifact(
+    chunk_dataset_name=chunk_dataset_name,
+    index_artifact_address=index_artifact_address,
+)
+figure_annotator = FigureAnnotatorFromPageImage(
+    figure_extraction_llm_client=LLMClient(model_name=figure_extraction_model_name),
+    structured_output_llm_client=LLMClient(model_name=structured_output_model_name),
+    image_artifact_address=image_artifact_address,
+)
+medqa_assistant = MedQAAssistant(
+    llm_client=llm_client, retriever=retriever, figure_annotator=figure_annotator
+)
+query = st.chat_input("Enter your question here")
+if query:
+    with st.chat_message("user"):
+        st.markdown(query)
+    response = medqa_assistant.predict(query=query)
+    with st.chat_message("assistant"):
+        st.markdown(response)

docs/app.md ADDED Viewed

	@@ -0,0 +1,61 @@

+# MedQA Assistant App
+The MedQA Assistant App is a Streamlit-based application designed to provide a chat interface for medical question answering. It leverages advanced language models (LLMs) and retrieval augmented generation (RAG) techniques to deliver accurate and informative responses to medical queries.
+## Features
+- **Interactive Chat Interface**: Engage with the app through a user-friendly chat interface.
+- **Configurable Settings**: Customize model selection and data sources via the sidebar.
+- **Retrieval-Augmented Generation**: Ensures precise and contextually relevant responses.
+- **Figure Annotation Capabilities**: Extracts and annotates figures from medical texts.
+## Usage
+1. Install the package using:
+    ```bash
+    uv pip install .
+    ```
+1. **Launch the App**: Start the application using Streamlit:
+    ```bash
+    medrag run
+    ```
+2. **Configure Settings**: Adjust configuration settings in the sidebar to suit your needs.
+3. **Ask a Question**: Enter your medical question in the chat input field.
+4. **Receive a Response**: Get a detailed answer from the MedQA Assistant.
+## Configuration
+The app allows users to customize various settings through the sidebar:
+- **Project Name**: Specify the WandB project name.
+- **Text Chunk WandB Dataset Name**: Define the dataset containing text chunks.
+- **WandB Index Artifact Address**: Provide the address of the index artifact.
+- **WandB Image Artifact Address**: Provide the address of the image artifact.
+- **LLM Client Model Name**: Choose a language model for generating responses.
+- **Figure Extraction Model Name**: Select a model for extracting figures from images.
+- **Structured Output Model Name**: Choose a model for generating structured outputs.
+## Technical Details
+The app is built using the following components:
+- **Streamlit**: For the user interface.
+- **Weave**: For project initialization and artifact management.
+- **MedQAAssistant**: For processing queries and generating responses.
+- **LLMClient**: For interacting with language models.
+- **MedCPTRetriever**: For retrieving relevant text chunks.
+- **FigureAnnotatorFromPageImage**: For annotating figures in medical texts.
+## Development and Deployment
+- **Environment Setup**: Ensure all dependencies are installed as per the `pyproject.toml`.
+- **Running the App**: Use Streamlit to run the app locally.
+- **Deployment**: coming soon...
+## Additional Resources
+For more detailed information on the components and their usage, refer to the following documentation sections:
+- [MedQA Assistant](/assistant/medqa_assistant)
+- [LLM Client](/assistant/llm_client)
+- [Figure Annotation](/assistant/figure_annotation)

medrag_multi_modal/cli.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import argparse
+import subprocess
+import sys
+def main():
+    parser = argparse.ArgumentParser(description="MedRAG Multi-Modal CLI")
+    parser.add_argument("command", choices=["run"], help="Command to execute")
+    args = parser.parse_args()
+    if args.command == "run":
+        # Assuming your Streamlit app is in app.py
+        subprocess.run([sys.executable, "-m", "streamlit", "run", "app.py"])
+if __name__ == "__main__":
+    main()

medrag_multi_modal/document_loader/image_loader/base_img_loader.py CHANGED Viewed

@@ -5,8 +5,8 @@ from typing import Dict, List, Optional
 import jsonlines
 import rich
 import wandb
 from medrag_multi_modal.document_loader.text_loader.base_text_loader import (
     BaseTextLoader,
 )

 import jsonlines
 import rich
 import wandb
 from medrag_multi_modal.document_loader.text_loader.base_text_loader import (
     BaseTextLoader,
 )

medrag_multi_modal/retrieval/bm25s_retrieval.py CHANGED Viewed

@@ -3,11 +3,10 @@ from glob import glob
 from typing import Optional
 import bm25s
 import weave
 from Stemmer import Stemmer
-import wandb
 LANGUAGE_DICT = {
     "english": "en",
     "french": "fr",

 from typing import Optional
 import bm25s
+import wandb
 import weave
 from Stemmer import Stemmer
 LANGUAGE_DICT = {
     "english": "en",
     "french": "fr",

medrag_multi_modal/retrieval/colpali_retrieval.py CHANGED Viewed

@@ -6,9 +6,8 @@ import weave
 if TYPE_CHECKING:
     from byaldi import RAGMultiModalModel
-from PIL import Image
 import wandb
 from ..utils import get_wandb_artifact

 if TYPE_CHECKING:
     from byaldi import RAGMultiModalModel
 import wandb
+from PIL import Image
 from ..utils import get_wandb_artifact

medrag_multi_modal/retrieval/common.py CHANGED Viewed

@@ -3,7 +3,6 @@ from enum import Enum
 import safetensors
 import safetensors.torch
 import torch
 import wandb

 import safetensors
 import safetensors.torch
 import torch
 import wandb

medrag_multi_modal/utils.py CHANGED Viewed

@@ -3,9 +3,8 @@ import io
 import jsonlines
 import torch
-from PIL import Image
 import wandb
 def get_wandb_artifact(

 import jsonlines
 import torch
 import wandb
+from PIL import Image
 def get_wandb_artifact(

mkdocs.yml CHANGED Viewed

@@ -62,6 +62,8 @@ nav:
   - Setup:
     - Installation: 'installation/install.md'
     - Development: 'installation/development.md'
   - Document Loader:
     - Text Loader:
       - Base: 'document_loader/text_loader/base_text_loader.md'

   - Setup:
     - Installation: 'installation/install.md'
     - Development: 'installation/development.md'
+  - App:
+    - MedQA Assistant: 'app.md'
   - Document Loader:
     - Text Loader:
       - Base: 'document_loader/text_loader/base_text_loader.md'

pyproject.toml CHANGED Viewed

@@ -44,9 +44,13 @@ dependencies = [
     "jsonlines>=4.0.0",
     "opencv-python>=4.10.0.84",
     "openai>=1.52.2",
 ]
 [project.optional-dependencies]
 core = [
     "adapters>=1.0.0",
     "bm25s[full]>=0.2.2",
@@ -74,10 +78,12 @@ core = [
     "opencv-python>=4.10.0.84",
     "openai>=1.52.2",
 ]
-dev = ["pytest>=8.3.3", "isort>=5.13.2", "black>=24.10.0", "ruff>=0.6.9"]
 docs = [
     "mkdocs>=1.6.1",
     "mkdocstrings>=0.26.1",
@@ -89,6 +95,11 @@ docs = [
     "jupyter>=1.1.1",
 ]
 [tool.pytest.ini_options]
-pythonpath = "."

     "jsonlines>=4.0.0",
     "opencv-python>=4.10.0.84",
     "openai>=1.52.2",
+    "streamlit>=1.39.0",
 ]
 [project.optional-dependencies]
+app = [
+    "streamlit>=1.39.0",
+]
 core = [
     "adapters>=1.0.0",
     "bm25s[full]>=0.2.2",
     "opencv-python>=4.10.0.84",
     "openai>=1.52.2",
 ]
+dev = [
+    "pytest>=8.3.3",
+    "isort>=5.13.2",
+    "black>=24.10.0",
+    "ruff>=0.6.9",
+]
 docs = [
     "mkdocs>=1.6.1",
     "mkdocstrings>=0.26.1",
     "jupyter>=1.1.1",
 ]
+[project.scripts]
+medrag = "medrag_multi_modal.cli:main"
 [tool.pytest.ini_options]
+pythonpath = "."
+[tool.setuptools]
+py-modules = ["medrag_multi_modal"]