Spaces:

soutrik
/

gradio_demo_CatDogClassifier

Runtime error

App Files Files Community

Soutrik commited on Nov 18, 2024

Commit

1b0bd15

1 Parent(s): 6053557

added: client server on docker compose cpu tested

Browse files

Files changed (7) hide show

Dockerfile +7 -0
artifacts/image_prediction.png +0 -0
docker-compose-old.yaml +74 -0
docker-compose.yaml +29 -10
run_client.sh +5 -0
src/client.py +46 -36
src/server.py +2 -2

Dockerfile CHANGED Viewed

@@ -29,6 +29,10 @@ RUN --mount=type=cache,target=/tmp/poetry_cache poetry install --only main --no-
 # Stage 2: Runtime environment
 FROM python:3.10.15-slim as runner
 # Copy application source code and necessary files
 COPY src /app/src
 COPY configs /app/configs
@@ -38,6 +42,9 @@ COPY main.py /app/main.py
 # Copy virtual environment from the builder stage
 COPY --from=builder /app/.venv /app/.venv
 # Set the working directory to /app
 WORKDIR /app

 # Stage 2: Runtime environment
 FROM python:3.10.15-slim as runner
+# Install curl for health check script
+RUN apt-get update && apt-get install -y --no-install-recommends curl && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
 # Copy application source code and necessary files
 COPY src /app/src
 COPY configs /app/configs
 # Copy virtual environment from the builder stage
 COPY --from=builder /app/.venv /app/.venv
+# Copy the client files
+COPY run_client.sh /app/run_client.sh
 # Set the working directory to /app
 WORKDIR /app

artifacts/image_prediction.png DELETED Viewed

Binary file (390 kB)

docker-compose-old.yaml ADDED Viewed

	@@ -0,0 +1,74 @@

+version: '3.8'
+services:
+  train:
+    build:
+      context: .
+    command: |
+      python -m src.train_optuna_callbacks experiment=catdog_experiment ++task_name=train ++train=True ++test=False && \
+      python -m src.create_artifacts && \
+      touch ./checkpoints/train_done.flag
+    volumes:
+      - ./data:/app/data
+      - ./checkpoints:/app/checkpoints
+      - ./artifacts:/app/artifacts
+      - ./logs:/app/logs
+    environment:
+      - PYTHONUNBUFFERED=1
+      - PYTHONPATH=/app
+    shm_size: '4g'
+    networks:
+      - default
+    env_file:
+      - .env
+  eval:
+    build:
+      context: .
+    command: |
+      sh -c 'while [ ! -f /app/checkpoints/train_done.flag ]; do sleep 10; done && python -m src.train_optuna_callbacks experiment=catdog_experiment ++task_name=test ++train=False ++test=True'
+    volumes:
+      - ./data:/app/data
+      - ./checkpoints:/app/checkpoints
+      - ./artifacts:/app/artifacts
+      - ./logs:/app/logs
+    environment:
+      - PYTHONUNBUFFERED=1
+      - PYTHONPATH=/app
+    shm_size: '4g'
+    networks:
+      - default
+    env_file:
+      - .env
+    depends_on:
+      - train
+  inference:
+    build:
+      context: .
+    command: |
+      sh -c 'while [ ! -f /app/checkpoints/train_done.flag ]; do sleep 10; done && python -m src.infer experiment=catdog_experiment'
+    volumes:
+      - ./data:/app/data
+      - ./checkpoints:/app/checkpoints
+      - ./artifacts:/app/artifacts
+      - ./logs:/app/logs
+    environment:
+      - PYTHONUNBUFFERED=1
+      - PYTHONPATH=/app
+    shm_size: '4g'
+    networks:
+      - default
+    env_file:
+      - .env
+    depends_on:
+      - train
+volumes:
+  data:
+  checkpoints:
+  artifacts:
+  logs:
+networks:
+  default:

docker-compose.yaml CHANGED Viewed

@@ -1,5 +1,3 @@
-version: '3.8'
 services:
   train:
     build:
@@ -21,7 +19,7 @@ services:
       - default
     env_file:
       - .env
   eval:
     build:
       context: .
@@ -40,14 +38,13 @@ services:
       - default
     env_file:
       - .env
-    depends_on:
-      - train
-  inference:
     build:
       context: .
     command: |
-      sh -c 'while [ ! -f /app/checkpoints/train_done.flag ]; do sleep 10; done && python -m src.infer experiment=catdog_experiment'
     volumes:
       - ./data:/app/data
       - ./checkpoints:/app/checkpoints
@@ -56,14 +53,36 @@ services:
     environment:
       - PYTHONUNBUFFERED=1
       - PYTHONPATH=/app
     shm_size: '4g'
     networks:
       - default
     env_file:
-      - .env
-    depends_on:
-      - train
 volumes:
   data:
   checkpoints:

 services:
   train:
     build:
       - default
     env_file:
       - .env
   eval:
     build:
       context: .
       - default
     env_file:
       - .env
+  server:
     build:
       context: .
     command: |
+      sh -c 'while [ ! -f /app/checkpoints/train_done.flag ]; do sleep 10; done && python -m src.server'
     volumes:
       - ./data:/app/data
       - ./checkpoints:/app/checkpoints
     environment:
       - PYTHONUNBUFFERED=1
       - PYTHONPATH=/app
+      - SERVER_URL=http://localhost:8080
     shm_size: '4g'
     networks:
       - default
     env_file:
+      - .env
+    ports:
+      - "8080:8080"
+  client:
+    build:
+      context: .
+    command: |
+      sh -c 'while [ ! -f /app/checkpoints/train_done.flag ]; do sleep 10; done && ./run_client.sh'
+    volumes:
+      - ./data:/app/data
+      - ./checkpoints:/app/checkpoints
+      - ./artifacts:/app/artifacts
+      - ./logs:/app/logs
+    environment:
+      - PYTHONUNBUFFERED=1
+      - PYTHONPATH=/app
+      - SERVER_URL=http://server:8080
+    shm_size: '4g'
+    networks:
+      - default
+    env_file:
+      - .env
 volumes:
   data:
   checkpoints:

run_client.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+#!/bin/bash
+# Run the client script
+echo "Running the client script..."
+python -m src.client

src/client.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import requests
 from urllib.request import urlopen
 import base64
@@ -8,45 +9,52 @@ def fetch_image(url):
     """
     Fetch image data from a URL.
     """
-    return urlopen(url).read()
 def encode_image_to_base64(img_data):
     """
     Encode image bytes to a base64 string.
     """
-    return base64.b64encode(img_data).decode("utf-8")
-def send_prediction_request(base64_image, server_url):
-    """
-    Send a single base64 image to the prediction API and retrieve predictions.
-    """
     try:
-        response = requests.post(f"{server_url}/predict", json={"image": base64_image})
-        return response
-    except requests.exceptions.RequestException as e:
-        print(f"Error connecting to the server: {e}")
-        return None
-def send_batch_prediction_request(base64_images, server_url):
     """
-    Send a batch of base64 images to the prediction API and retrieve predictions.
     """
-    try:
-        response = requests.post(
-            f"{server_url}/predict", json=[{"image": img} for img in base64_images]
-        )
-        return response
-    except requests.exceptions.RequestException as e:
-        print(f"Error connecting to the server: {e}")
-        return None
 def main():
-    # Server URL (default or from environment)
-    server_url = os.getenv("SERVER_URL", "http://localhost:8080")
     # Example URLs for testing
     image_urls = [
@@ -55,30 +63,32 @@ def main():
     # Fetch and encode images
     try:
-        print("Fetching and encoding images...")
         base64_images = [encode_image_to_base64(fetch_image(url)) for url in image_urls]
-        print("Images fetched and encoded successfully.")
     except Exception as e:
-        print(f"Error fetching or encoding images: {e}")
         return
     # Test single image prediction
     try:
-        print("\n--- Single Image Prediction ---")
-        single_response = send_prediction_request(base64_images[0], server_url)
         if single_response and single_response.status_code == 200:
             predictions = single_response.json().get("predictions", [])
             if predictions:
-                print("Top 5 Predictions:")
                 for pred in predictions:
-                    print(f"{pred['label']}: {pred['probability']:.2%}")
             else:
-                print("No predictions returned.")
         elif single_response:
-            print(f"Error: {single_response.status_code}")
-            print(single_response.text)
     except Exception as e:
-        print(f"Error sending single prediction request: {e}")
 if __name__ == "__main__":

+from loguru import logger
 import requests
 from urllib.request import urlopen
 import base64
     """
     Fetch image data from a URL.
     """
+    try:
+        return urlopen(url).read()
+    except Exception as e:
+        logger.error(f"Failed to fetch image from {url}: {e}")
+        raise
 def encode_image_to_base64(img_data):
     """
     Encode image bytes to a base64 string.
     """
     try:
+        return base64.b64encode(img_data).decode("utf-8")
+    except Exception as e:
+        logger.error(f"Failed to encode image to base64: {e}")
+        raise
+def send_prediction_request(base64_image, server_urls):
     """
+    Send a single base64 image to the prediction API and retrieve predictions.
+    Tries multiple server URLs in order.
     """
+    for server_url in server_urls:
+        try:
+            logger.info(f"Attempting to send prediction request to {server_url}...")
+            response = requests.post(
+                f"{server_url}/predict", json={"image": base64_image}
+            )
+            if response.status_code == 200:
+                logger.info(f"Successfully connected to {server_url}")
+                return response
+            else:
+                logger.warning(
+                    f"Server at {server_url} returned status code {response.status_code}"
+                )
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Error connecting to the server at {server_url}: {e}")
+    logger.error("Failed to connect to any server.")
+    return None
 def main():
+    # Server URLs to try
+    server_url_env = os.getenv("SERVER_URL", "http://localhost:8080")
+    server_urls = [server_url_env]
     # Example URLs for testing
     image_urls = [
     # Fetch and encode images
     try:
+        logger.info("Fetching and encoding images...")
         base64_images = [encode_image_to_base64(fetch_image(url)) for url in image_urls]
+        logger.info("Images fetched and encoded successfully.")
     except Exception as e:
+        logger.error(f"Error fetching or encoding images: {e}")
         return
     # Test single image prediction
     try:
+        logger.info("--- Single Image Prediction ---")
+        single_response = send_prediction_request(base64_images[0], server_urls)
         if single_response and single_response.status_code == 200:
             predictions = single_response.json().get("predictions", [])
             if predictions:
+                logger.info("Top Predictions:")
                 for pred in predictions:
+                    logger.info(f"{pred['label']}: {pred['probability']:.2%}")
             else:
+                logger.warning("No predictions returned.")
         elif single_response:
+            logger.error(f"Error: {single_response.status_code}")
+            logger.error(single_response.text)
+        else:
+            logger.error("Failed to get a response from any server.")
     except Exception as e:
+        logger.error(f"Error sending single prediction request: {e}")
 if __name__ == "__main__":

src/server.py CHANGED Viewed

@@ -82,7 +82,7 @@ class ImageClassifierAPI(lit.LitAPI):
     def decode_request(self, request):
         """Handle both single and batch inputs."""
-        logger.info(f"decode_request received: {request}")
         if not isinstance(request, dict) or "image" not in request:
             logger.error(
                 "Invalid request format. Expected a dictionary with key 'image'."
@@ -94,7 +94,7 @@ class ImageClassifierAPI(lit.LitAPI):
     def batch(self, inputs):
         """Batch process images."""
-        logger.info(f"batch received inputs: {inputs}")
         if not isinstance(inputs, list):
             raise ValueError("Input to batch must be a list.")

     def decode_request(self, request):
         """Handle both single and batch inputs."""
+        # logger.info(f"decode_request received: {request}")
         if not isinstance(request, dict) or "image" not in request:
             logger.error(
                 "Invalid request format. Expected a dictionary with key 'image'."
     def batch(self, inputs):
         """Batch process images."""
+        # logger.info(f"batch received inputs: {inputs}")
         if not isinstance(inputs, list):
             raise ValueError("Input to batch must be a list.")